cgcpu.pas 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. This unit implements the code generator for the PowerPC
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$I fpcdefs.inc}
  19. interface
  20. uses
  21. globtype, symtype, symdef,
  22. cgbase, cgobj,
  23. aasmbase, aasmcpu, aasmtai,
  24. cpubase, cpuinfo, cgutils, rgcpu,
  25. parabase;
  26. type
  27. tcgppc = class(tcg)
  28. procedure init_register_allocators; override;
  29. procedure done_register_allocators; override;
  30. { passing parameters, per default the parameter is pushed }
  31. { nr gives the number of the parameter (enumerated from }
  32. { left to right), this allows to move the parameter to }
  33. { register, if the cpu supports register calling }
  34. { conventions }
  35. procedure a_param_const(list: taasmoutput; size: tcgsize; a: aint; const
  36. paraloc: tcgpara); override;
  37. procedure a_param_ref(list: taasmoutput; size: tcgsize; const r: treference;
  38. const paraloc: tcgpara); override;
  39. procedure a_paramaddr_ref(list: taasmoutput; const r: treference; const
  40. paraloc: tcgpara); override;
  41. procedure a_call_name(list: taasmoutput; const s: string); override;
  42. procedure a_call_reg(list: taasmoutput; reg: tregister); override;
  43. procedure a_op_const_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; a:
  44. aint; reg: TRegister); override;
  45. procedure a_op_reg_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; src,
  46. dst: TRegister); override;
  47. procedure a_op_const_reg_reg(list: taasmoutput; op: TOpCg;
  48. size: tcgsize; a: aint; src, dst: tregister); override;
  49. procedure a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
  50. size: tcgsize; src1, src2, dst: tregister); override;
  51. { move instructions }
  52. procedure a_load_const_reg(list: taasmoutput; size: tcgsize; a: aint; reg:
  53. tregister); override;
  54. { stores the contents of register reg to the memory location described by
  55. ref }
  56. procedure a_load_reg_ref(list: taasmoutput; fromsize, tosize: tcgsize; reg:
  57. tregister; const ref: treference); override;
  58. { loads the memory pointed to by ref into register reg }
  59. procedure a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize; const
  60. Ref: treference; reg: tregister); override;
  61. procedure a_load_reg_reg(list: taasmoutput; fromsize, tosize: tcgsize; reg1,
  62. reg2: tregister); override;
  63. { fpu move instructions }
  64. procedure a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2:
  65. tregister); override;
  66. procedure a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref:
  67. treference; reg: tregister); override;
  68. procedure a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
  69. tregister; const ref: treference); override;
  70. { comparison operations }
  71. procedure a_cmp_const_reg_label(list: taasmoutput; size: tcgsize; cmp_op:
  72. topcmp; a: aint; reg: tregister;
  73. l: tasmlabel); override;
  74. procedure a_cmp_reg_reg_label(list: taasmoutput; size: tcgsize; cmp_op:
  75. topcmp; reg1, reg2: tregister; l: tasmlabel); override;
  76. procedure a_jmp_name(list: taasmoutput; const s: string); override;
  77. procedure a_jmp_always(list: taasmoutput; l: tasmlabel); override;
  78. procedure a_jmp_flags(list: taasmoutput; const f: TResFlags; l: tasmlabel);
  79. override;
  80. procedure g_flags2reg(list: taasmoutput; size: TCgSize; const f: TResFlags;
  81. reg: TRegister); override;
  82. procedure g_proc_entry(list: taasmoutput; localsize: longint; nostackframe:
  83. boolean); override;
  84. procedure g_proc_exit(list: taasmoutput; parasize: longint; nostackframe:
  85. boolean); override;
  86. procedure g_save_standard_registers(list: Taasmoutput); override;
  87. procedure g_restore_standard_registers(list: Taasmoutput); override;
  88. procedure a_loadaddr_ref_reg(list: taasmoutput; const ref: treference; r:
  89. tregister); override;
  90. procedure g_concatcopy(list: taasmoutput; const source, dest: treference;
  91. len: aint); override;
  92. procedure g_overflowcheck(list: taasmoutput; const l: tlocation; def: tdef);
  93. override;
  94. procedure a_jmp_cond(list: taasmoutput; cond: TOpCmp; l: tasmlabel);
  95. procedure g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
  96. labelname: string; ioffset: longint); override;
  97. private
  98. { Make sure ref is a valid reference for the PowerPC and sets the }
  99. { base to the value of the index if (base = R_NO). }
  100. { Returns true if the reference contained a base, index and an }
  101. { offset or symbol, in which case the base will have been changed }
  102. { to a tempreg (which has to be freed by the caller) containing }
  103. { the sum of part of the original reference }
  104. function fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
  105. { returns whether a reference can be used immediately in a powerpc }
  106. { instruction }
  107. function issimpleref(const ref: treference): boolean;
  108. { contains the common code of a_load_reg_ref and a_load_ref_reg }
  109. procedure a_load_store(list: taasmoutput; op: tasmop; reg: tregister;
  110. ref: treference);
  111. { creates the correct branch instruction for a given combination }
  112. { of asmcondflags and destination addressing mode }
  113. procedure a_jmp(list: taasmoutput; op: tasmop;
  114. c: tasmcondflag; crval: longint; l: tasmlabel);
  115. { returns the lowest numbered FP register in use, and the number of used FP registers
  116. for the current procedure }
  117. procedure calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
  118. { returns the lowest numbered GP register in use, and the number of used GP registers
  119. for the current procedure }
  120. procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
  121. { returns true if the offset of the given reference can not be represented by a 16 bit
  122. immediate as required by some PowerPC instructions }
  123. function hasLargeOffset(const ref : TReference) : Boolean; inline;
  124. procedure a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean; addNOP : boolean);
  125. end;
  126. const
  127. TShiftOpCG2AsmOpConst : array[boolean, OP_SAR..OP_SHR] of TAsmOp = (
  128. (A_SRAWI, A_SLWI, A_SRWI), (A_SRADI, A_SLDI, A_SRDI)
  129. );
  130. TOpCmp2AsmCond: array[topcmp] of TAsmCondFlag = (C_NONE, C_EQ, C_GT,
  131. C_LT, C_GE, C_LE, C_NE, C_LE, C_LT, C_GE, C_GT);
  132. implementation
  133. uses
  134. sysutils,
  135. globals, verbose, systems, cutils,
  136. symconst, symsym, fmodule,
  137. rgobj, tgobj, cpupi, procinfo, paramgr;
  138. { helper function which calculate "magic" values for replacement of unsigned
  139. division by constant operation by multiplication. See the PowerPC compiler
  140. developer manual for more information }
  141. procedure getmagic_unsignedN(const N : byte; const d : aWord;
  142. out magic_m : aWord; out magic_add : boolean; out magic_shift : byte);
  143. var
  144. p : aInt;
  145. nc, delta, q1, r1, q2, r2, two_N_minus_1 : aWord;
  146. begin
  147. assert(d > 0);
  148. two_N_minus_1 := aWord(1) shl (N-1);
  149. magic_add := false;
  150. nc := - 1 - (-d) mod d;
  151. p := N-1; { initialize p }
  152. q1 := two_N_minus_1 div nc; { initialize q1 = 2p/nc }
  153. r1 := two_N_minus_1 - q1*nc; { initialize r1 = rem(2p,nc) }
  154. q2 := (two_N_minus_1-1) div d; { initialize q2 = (2p-1)/d }
  155. r2 := (two_N_minus_1-1) - q2*d; { initialize r2 = rem((2p-1),d) }
  156. repeat
  157. inc(p);
  158. if (r1 >= (nc - r1)) then begin
  159. q1 := 2 * q1 + 1; { update q1 }
  160. r1 := 2*r1 - nc; { update r1 }
  161. end else begin
  162. q1 := 2*q1; { update q1 }
  163. r1 := 2*r1; { update r1 }
  164. end;
  165. if ((r2 + 1) >= (d - r2)) then begin
  166. if (q2 >= (two_N_minus_1-1)) then
  167. magic_add := true;
  168. q2 := 2*q2 + 1; { update q2 }
  169. r2 := 2*r2 + 1 - d; { update r2 }
  170. end else begin
  171. if (q2 >= two_N_minus_1) then
  172. magic_add := true;
  173. q2 := 2*q2; { update q2 }
  174. r2 := 2*r2 + 1; { update r2 }
  175. end;
  176. delta := d - 1 - r2;
  177. until not ((p < (2*N)) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
  178. magic_m := q2 + 1; { resulting magic number }
  179. magic_shift := p - N; { resulting shift }
  180. end;
  181. { helper function which calculate "magic" values for replacement of signed
  182. division by constant operation by multiplication. See the PowerPC compiler
  183. developer manual for more information }
  184. procedure getmagic_signedN(const N : byte; const d : aInt;
  185. out magic_m : aInt; out magic_s : aInt);
  186. var
  187. p : aInt;
  188. ad, anc, delta, q1, r1, q2, r2, t : aWord;
  189. two_N_minus_1 : aWord;
  190. begin
  191. assert((d < -1) or (d > 1));
  192. two_N_minus_1 := aWord(1) shl (N-1);
  193. ad := abs(d);
  194. t := two_N_minus_1 + (aWord(d) shr (N-1));
  195. anc := t - 1 - t mod ad; { absolute value of nc }
  196. p := (N-1); { initialize p }
  197. q1 := two_N_minus_1 div anc; { initialize q1 = 2p/abs(nc) }
  198. r1 := two_N_minus_1 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
  199. q2 := two_N_minus_1 div ad; { initialize q2 = 2p/abs(d) }
  200. r2 := two_N_minus_1 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
  201. repeat
  202. inc(p);
  203. q1 := 2*q1; { update q1 = 2p/abs(nc) }
  204. r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
  205. if (r1 >= anc) then begin { must be unsigned comparison }
  206. inc(q1);
  207. dec(r1, anc);
  208. end;
  209. q2 := 2*q2; { update q2 = 2p/abs(d) }
  210. r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
  211. if (r2 >= ad) then begin { must be unsigned comparison }
  212. inc(q2);
  213. dec(r2, ad);
  214. end;
  215. delta := ad - r2;
  216. until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
  217. magic_m := q2 + 1;
  218. if (d < 0) then begin
  219. magic_m := -magic_m; { resulting magic number }
  220. end;
  221. magic_s := p - N; { resulting shift }
  222. end;
  223. { finds positive and negative powers of two of the given value, returning the
  224. power and whether it's a negative power or not in addition to the actual result
  225. of the function }
  226. function ispowerof2(value : aInt; out power : byte; out neg : boolean) : boolean;
  227. var
  228. i : longint;
  229. hl : aInt;
  230. begin
  231. neg := false;
  232. { also try to find negative power of two's by negating if the
  233. value is negative. low(aInt) is special because it can not be
  234. negated. Simply return the appropriate values for it }
  235. if (value < 0) then begin
  236. neg := true;
  237. if (value = low(aInt)) then begin
  238. power := sizeof(aInt)*8-1;
  239. result := true;
  240. exit;
  241. end;
  242. value := -value;
  243. end;
  244. if ((value and (value-1)) <> 0) then begin
  245. result := false;
  246. exit;
  247. end;
  248. hl := 1;
  249. for i := 0 to (sizeof(aInt)*8-1) do begin
  250. if (hl = value) then begin
  251. result := true;
  252. power := i;
  253. exit;
  254. end;
  255. hl := hl shl 1;
  256. end;
  257. end;
  258. procedure tcgppc.init_register_allocators;
  259. begin
  260. inherited init_register_allocators;
  261. rg[R_INTREGISTER] := trgcpu.create(R_INTREGISTER, R_SUBWHOLE,
  262. [RS_R3, RS_R4, RS_R5, RS_R6, RS_R7, RS_R8,
  263. RS_R9, RS_R10, RS_R11, RS_R12, RS_R31, RS_R30, RS_R29,
  264. RS_R28, RS_R27, RS_R26, RS_R25, RS_R24, RS_R23, RS_R22,
  265. RS_R21, RS_R20, RS_R19, RS_R18, RS_R17, RS_R16, RS_R15,
  266. RS_R14, RS_R13], first_int_imreg, []);
  267. rg[R_FPUREGISTER] := trgcpu.create(R_FPUREGISTER, R_SUBNONE,
  268. [RS_F0, RS_F1, RS_F2, RS_F3, RS_F4, RS_F5, RS_F6, RS_F7, RS_F8, RS_F9,
  269. RS_F10, RS_F11, RS_F12, RS_F13, RS_F31, RS_F30, RS_F29, RS_F28, RS_F27,
  270. RS_F26, RS_F25, RS_F24, RS_F23, RS_F22, RS_F21, RS_F20, RS_F19, RS_F18,
  271. RS_F17, RS_F16, RS_F15, RS_F14], first_fpu_imreg, []);
  272. {$WARNING FIX ME}
  273. rg[R_MMREGISTER] := trgcpu.create(R_MMREGISTER, R_SUBNONE,
  274. [RS_M0, RS_M1, RS_M2], first_mm_imreg, []);
  275. end;
  276. procedure tcgppc.done_register_allocators;
  277. begin
  278. rg[R_INTREGISTER].free;
  279. rg[R_FPUREGISTER].free;
  280. rg[R_MMREGISTER].free;
  281. inherited done_register_allocators;
  282. end;
  283. procedure tcgppc.a_param_const(list: taasmoutput; size: tcgsize; a: aint; const
  284. paraloc: tcgpara);
  285. var
  286. ref: treference;
  287. begin
  288. paraloc.check_simple_location;
  289. case paraloc.location^.loc of
  290. LOC_REGISTER, LOC_CREGISTER:
  291. a_load_const_reg(list, size, a, paraloc.location^.register);
  292. LOC_REFERENCE:
  293. begin
  294. reference_reset(ref);
  295. ref.base := paraloc.location^.reference.index;
  296. ref.offset := paraloc.location^.reference.offset;
  297. a_load_const_ref(list, size, a, ref);
  298. end;
  299. else
  300. internalerror(2002081101);
  301. end;
  302. end;
  303. procedure tcgppc.a_param_ref(list: taasmoutput; size: tcgsize; const r:
  304. treference; const paraloc: tcgpara);
  305. var
  306. tmpref, ref: treference;
  307. location: pcgparalocation;
  308. sizeleft: aint;
  309. adjusttail : boolean;
  310. begin
  311. location := paraloc.location;
  312. tmpref := r;
  313. sizeleft := paraloc.intsize;
  314. adjusttail := false;
  315. while assigned(location) do begin
  316. case location^.loc of
  317. LOC_REGISTER, LOC_CREGISTER:
  318. begin
  319. if (size <> OS_NO) then
  320. a_load_ref_reg(list, size, location^.size, tmpref,
  321. location^.register)
  322. else
  323. {$IFDEF extdebug}
  324. list.concat(tai_comment.create(strpnew('a_param_ref with OS_NO')));
  325. {$ENDIF extdebug}
  326. { load non-integral sized memory location into register. This
  327. memory location be 1-sizeleft byte sized.
  328. Always assume that this memory area is properly aligned, eg. start
  329. loading the larger quantities for "odd" quantities first }
  330. case sizeleft of
  331. 1,2,4,8 :
  332. a_load_ref_reg(list, int_cgsize(sizeleft), location^.size, tmpref,
  333. location^.register);
  334. 3 : begin
  335. a_reg_alloc(list, NR_R12);
  336. a_load_ref_reg(list, OS_16, location^.size, tmpref,
  337. NR_R12);
  338. inc(tmpref.offset, tcgsize2size[OS_16]);
  339. a_load_ref_reg(list, OS_8, location^.size, tmpref,
  340. location^.register);
  341. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 40));
  342. a_reg_dealloc(list, NR_R12);
  343. end;
  344. 5 : begin
  345. a_reg_alloc(list, NR_R12);
  346. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  347. inc(tmpref.offset, tcgsize2size[OS_32]);
  348. a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
  349. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 24));
  350. a_reg_dealloc(list, NR_R12);
  351. end;
  352. 6 : begin
  353. a_reg_alloc(list, NR_R12);
  354. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  355. inc(tmpref.offset, tcgsize2size[OS_32]);
  356. a_load_ref_reg(list, OS_16, location^.size, tmpref, location^.register);
  357. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 16, 16));
  358. a_reg_dealloc(list, NR_R12);
  359. end;
  360. 7 : begin
  361. a_reg_alloc(list, NR_R12);
  362. a_reg_alloc(list, NR_R0);
  363. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  364. inc(tmpref.offset, tcgsize2size[OS_32]);
  365. a_load_ref_reg(list, OS_16, location^.size, tmpref, NR_R0);
  366. inc(tmpref.offset, tcgsize2size[OS_16]);
  367. a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
  368. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, NR_R0, NR_R12, 16, 16));
  369. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R0, 8, 8));
  370. a_reg_dealloc(list, NR_R0);
  371. a_reg_dealloc(list, NR_R12);
  372. end;
  373. else
  374. { still > 8 bytes to load, so load data single register now }
  375. a_load_ref_reg(list, location^.size, location^.size, tmpref,
  376. location^.register);
  377. { the block is > 8 bytes, so we have to store any bytes not
  378. a multiple of the register size beginning with the MSB }
  379. adjusttail := true;
  380. end;
  381. (*
  382. { Comment this in (for gcc compat) and be prepared for a whole bunch of errors :/ }
  383. if (adjusttail) and (sizeleft < tcgsize2size[OS_INT]) then
  384. a_op_const_reg(list, OP_SHL, OS_INT,
  385. (tcgsize2size[OS_INT] - sizeleft) * tcgsize2size[OS_INT],
  386. location^.register);
  387. *)
  388. end;
  389. LOC_REFERENCE:
  390. begin
  391. reference_reset_base(ref, location^.reference.index,
  392. location^.reference.offset);
  393. g_concatcopy(list, tmpref, ref, sizeleft);
  394. if assigned(location^.next) then
  395. internalerror(2005010710);
  396. end;
  397. LOC_FPUREGISTER, LOC_CFPUREGISTER:
  398. case location^.size of
  399. OS_F32, OS_F64:
  400. a_loadfpu_ref_reg(list, location^.size, tmpref, location^.register);
  401. else
  402. internalerror(2002072801);
  403. end;
  404. LOC_VOID:
  405. { nothing to do }
  406. ;
  407. else
  408. internalerror(2002081103);
  409. end;
  410. inc(tmpref.offset, tcgsize2size[location^.size]);
  411. dec(sizeleft, tcgsize2size[location^.size]);
  412. location := location^.next;
  413. end;
  414. end;
  415. procedure tcgppc.a_paramaddr_ref(list: taasmoutput; const r: treference; const
  416. paraloc: tcgpara);
  417. var
  418. ref: treference;
  419. tmpreg: tregister;
  420. begin
  421. paraloc.check_simple_location;
  422. case paraloc.location^.loc of
  423. LOC_REGISTER, LOC_CREGISTER:
  424. a_loadaddr_ref_reg(list, r, paraloc.location^.register);
  425. LOC_REFERENCE:
  426. begin
  427. reference_reset(ref);
  428. ref.base := paraloc.location^.reference.index;
  429. ref.offset := paraloc.location^.reference.offset;
  430. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  431. a_loadaddr_ref_reg(list, r, tmpreg);
  432. a_load_reg_ref(list, OS_ADDR, OS_ADDR, tmpreg, ref);
  433. end;
  434. else
  435. internalerror(2002080701);
  436. end;
  437. end;
  438. { calling a procedure by name }
  439. procedure tcgppc.a_call_name(list: taasmoutput; const s: string);
  440. begin
  441. a_call_name_direct(list, s, true, true);
  442. end;
  443. procedure tcgppc.a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean; addNOP : boolean);
  444. begin
  445. if (prependDot) then
  446. s := '.' + s;
  447. list.concat(taicpu.op_sym(A_BL, objectlibrary.newasmsymbol(s, AB_EXTERNAL,
  448. AT_FUNCTION)));
  449. if (addNOP) then
  450. list.concat(taicpu.op_none(A_NOP));
  451. { the compiler does not properly set this flag anymore in pass 1, and
  452. for now we only need it after pass 2 (I hope) (JM) }
  453. include(current_procinfo.flags, pi_do_call);
  454. end;
  455. { calling a procedure by address }
  456. procedure tcgppc.a_call_reg(list: taasmoutput; reg: tregister);
  457. var
  458. tmpref: treference;
  459. begin
  460. if (not (cs_littlesize in aktglobalswitches)) then begin
  461. { load actual function entry (reg contains the reference to the function descriptor)
  462. into R0 }
  463. reference_reset_base(tmpref, reg, 0);
  464. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R0);
  465. { save TOC pointer in stackframe }
  466. reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
  467. a_load_reg_ref(list, OS_ADDR, OS_ADDR, NR_RTOC, tmpref);
  468. { move actual function pointer to CTR register }
  469. list.concat(taicpu.op_reg(A_MTCTR, NR_R0));
  470. { load new TOC pointer from function descriptor into RTOC register }
  471. reference_reset_base(tmpref, reg, tcgsize2size[OS_ADDR]);
  472. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
  473. { load new environment pointer from function descriptor into R11 register }
  474. reference_reset_base(tmpref, reg, 2*tcgsize2size[OS_ADDR]);
  475. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R11);
  476. { call function }
  477. list.concat(taicpu.op_none(A_BCTRL));
  478. end else begin
  479. { call ptrgl helper routine which expects the pointer to the function descriptor
  480. in R11 }
  481. a_load_reg_reg(list, OS_ADDR, OS_ADDR, reg, NR_R11);
  482. a_call_name_direct(list, 'ptrgl', true, false);
  483. end;
  484. { we need to load the old RTOC from stackframe because we changed it}
  485. reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
  486. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
  487. include(current_procinfo.flags, pi_do_call);
  488. end;
  489. {********************** load instructions ********************}
  490. procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint;
  491. reg: TRegister);
  492. { loads a 32 bit constant into the given register, using an optimal instruction sequence.
  493. This is either LIS, LI or LI+ADDIS.
  494. Returns true if during these operations the upper 32 bits were filled with 1 bits (e.g.
  495. sign extension was performed) }
  496. function load32bitconstant(list : taasmoutput; size : TCGSize; a : longint;
  497. reg : TRegister) : boolean;
  498. var
  499. is_half_signed : byte;
  500. begin
  501. { if the lower 16 bits are zero, do a single LIS }
  502. if (smallint(a) = 0) and ((a shr 16) <> 0) then begin
  503. list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(hi(a))));
  504. load32bitconstant := longint(a) < 0;
  505. end else begin
  506. is_half_signed := ord(smallint(lo(a)) < 0);
  507. list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a and $ffff)));
  508. if smallint(hi(a) + is_half_signed) <> 0 then begin
  509. list.concat(taicpu.op_reg_reg_const(A_ADDIS, reg, reg, smallint(hi(a) + is_half_signed)));
  510. end;
  511. load32bitconstant := (smallint(a) < 0) or (a < 0);
  512. end;
  513. end;
  514. { R0-safe version of the above (ADDIS doesn't work the same way with R0 as base), without
  515. the return value. Unused until further testing shows that it is not really necessary;
  516. loading the upper 32 bits of a value is now done using R12, which does not require
  517. special treatment }
  518. procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint;
  519. reg : TRegister);
  520. begin
  521. { only 16 bit constant? (-2^15 <= a <= +2^15-1) }
  522. if (a >= low(smallint)) and (a <= high(smallint)) then begin
  523. list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a)));
  524. end else begin
  525. { check if we have to start with LI or LIS, load as 32 bit constant }
  526. if ((a and $FFFF) <> 0) then begin
  527. list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16)));
  528. list.concat(taicpu.op_reg_reg_const(A_ORI, reg, reg, word(a)));
  529. end else begin
  530. list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16)));
  531. end;
  532. end;
  533. end;
  534. var
  535. extendssign : boolean;
  536. {$IFDEF EXTDEBUG}
  537. astring : string;
  538. {$ENDIF EXTDEBUG}
  539. begin
  540. {$IFDEF EXTDEBUG}
  541. astring := 'a_load_const reg ' + inttostr(hi(a)) + ' ' + inttostr(lo(a)) + ' ' + inttostr(ord(size)) + ' ' + inttostr(tcgsize2size[size]);
  542. list.concat(tai_comment.create(strpnew(astring)));
  543. {$ENDIF EXTDEBUG}
  544. if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
  545. internalerror(2002090902);
  546. if (lo(a) = 0) and (hi(a) <> 0) then begin
  547. { load only upper 32 bits, and shift }
  548. load32bitconstant(list, size, hi(a), reg);
  549. list.concat(taicpu.op_reg_reg_const(A_SLDI, reg, reg, 32));
  550. end else begin
  551. { load lower 32 bits }
  552. extendssign := load32bitconstant(list, size, lo(a), reg);
  553. if (extendssign) and (hi(a) = 0) then
  554. { if upper 32 bits are zero, but loading the lower 32 bit resulted in automatic
  555. sign extension, clear those bits }
  556. a_load_reg_reg(list, OS_32, OS_64, reg, reg)
  557. else if (not
  558. ((extendssign and (longint(hi(a)) = -1)) or
  559. ((not extendssign) and (hi(a)=0)))
  560. ) then begin
  561. { only load the upper 32 bits, if the automatic sign extension is not okay,
  562. that is, _not_ if
  563. - loading the lower 32 bits resulted in -1 in the upper 32 bits, and the upper
  564. 32 bits should contain -1
  565. - loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper
  566. 32 bits should contain 0 }
  567. load32bitconstant(list, size, hi(a), NR_R12);
  568. { combine both registers }
  569. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R12, 32, 0));
  570. end;
  571. end;
  572. end;
  573. procedure tcgppc.a_load_reg_ref(list: taasmoutput; fromsize, tosize: TCGSize;
  574. reg: tregister; const ref: treference);
  575. const
  576. StoreInstr: array[OS_8..OS_64, boolean, boolean] of TAsmOp =
  577. { indexed? updating?}
  578. (((A_STB, A_STBU), (A_STBX, A_STBUX)),
  579. ((A_STH, A_STHU), (A_STHX, A_STHUX)),
  580. ((A_STW, A_STWU), (A_STWX, A_STWUX)),
  581. ((A_STD, A_STDU), (A_STDX, A_STDUX))
  582. );
  583. var
  584. op: TAsmOp;
  585. ref2: TReference;
  586. begin
  587. ref2 := ref;
  588. fixref(list, ref2, tosize);
  589. if tosize in [OS_S8..OS_S64] then
  590. { storing is the same for signed and unsigned values }
  591. tosize := tcgsize(ord(tosize) - (ord(OS_S8) - ord(OS_8)));
  592. op := storeinstr[tcgsize2unsigned[tosize], ref2.index <> NR_NO, false];
  593. a_load_store(list, op, reg, ref2);
  594. end;
  595. procedure tcgppc.a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize;
  596. const ref: treference; reg: tregister);
  597. const
  598. LoadInstr: array[OS_8..OS_S64, boolean, boolean] of TAsmOp =
  599. { indexed? updating? }
  600. (((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
  601. ((A_LHZ, A_LHZU), (A_LHZX, A_LHZUX)),
  602. ((A_LWZ, A_LWZU), (A_LWZX, A_LWZUX)),
  603. ((A_LD, A_LDU), (A_LDX, A_LDUX)),
  604. { 128bit stuff too }
  605. ((A_NONE, A_NONE), (A_NONE, A_NONE)),
  606. { there's no load-byte-with-sign-extend :( }
  607. ((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
  608. ((A_LHA, A_LHAU), (A_LHAX, A_LHAUX)),
  609. { there's no load-word-arithmetic-indexed with update, simulate it in code :( }
  610. ((A_LWA, A_NOP), (A_LWAX, A_LWAUX)),
  611. ((A_LD, A_LDU), (A_LDX, A_LDUX))
  612. );
  613. var
  614. op: tasmop;
  615. ref2: treference;
  616. begin
  617. if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
  618. internalerror(2002090902);
  619. ref2 := ref;
  620. fixref(list, ref2, tosize);
  621. { the caller is expected to have adjusted the reference already
  622. in this case }
  623. if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
  624. fromsize := tosize;
  625. op := loadinstr[fromsize, ref2.index <> NR_NO, false];
  626. { there is no LWAU instruction, simulate using ADDI and LWA }
  627. if (op = A_NOP) then begin
  628. list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
  629. ref2.offset := 0;
  630. op := A_LWA;
  631. end;
  632. a_load_store(list, op, reg, ref2);
  633. { sign extend shortint if necessary, since there is no
  634. load instruction that does that automatically (JM) }
  635. if fromsize = OS_S8 then
  636. list.concat(taicpu.op_reg_reg(A_EXTSB, reg, reg));
  637. end;
  638. procedure tcgppc.a_load_reg_reg(list: taasmoutput; fromsize, tosize: tcgsize;
  639. reg1, reg2: tregister);
  640. const
  641. movemap : array[OS_8..OS_S128, OS_8..OS_S128] of tasmop = (
  642. { to -> OS_8 OS_16 OS_32 OS_64 OS_128 OS_S8 OS_S16 OS_S32 OS_S64 OS_S128 }
  643. { from }
  644. { OS_8 } (A_MR, A_RLDICL, A_RLDICL, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  645. { OS_16 } (A_RLDICL, A_MR, A_RLDICL, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  646. { OS_32 } (A_RLDICL, A_RLDICL, A_MR, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  647. { OS_64 } (A_RLDICL, A_RLDICL, A_RLDICL, A_MR, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  648. { OS_128 } (A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NOP ),
  649. { OS_S8 } (A_EXTSB, A_EXTSB, A_EXTSB, A_EXTSB, A_NONE, A_MR, A_EXTSB, A_EXTSB, A_EXTSB, A_NOP ),
  650. { OS_S16 } (A_RLDICL, A_EXTSH, A_EXTSH, A_EXTSH, A_NONE, A_EXTSB, A_MR, A_EXTSH, A_EXTSH, A_NOP ),
  651. { OS_S32 } (A_RLDICL, A_RLDICL, A_EXTSW, A_EXTSW, A_NONE, A_EXTSB, A_EXTSH, A_MR, A_EXTSW, A_NOP ),
  652. { OS_S64 } (A_RLDICL, A_RLDICL, A_RLDICL, A_MR, A_NONE, A_EXTSB, A_EXTSH, A_EXTSW, A_MR, A_NOP ),
  653. { OS_S128 } (A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NOP )
  654. );
  655. var
  656. instr: taicpu;
  657. op : tasmop;
  658. begin
  659. op := movemap[fromsize, tosize];
  660. case op of
  661. A_MR, A_EXTSB, A_EXTSH, A_EXTSW : instr := taicpu.op_reg_reg(op, reg2, reg1);
  662. A_RLDICL : instr := taicpu.op_reg_reg_const_const(A_RLDICL, reg2, reg1, 0, (8-tcgsize2size[fromsize])*8);
  663. else
  664. internalerror(2002090901);
  665. end;
  666. list.concat(instr);
  667. rg[R_INTREGISTER].add_move_instruction(instr);
  668. end;
  669. procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize;
  670. reg1, reg2: tregister);
  671. var
  672. instr: taicpu;
  673. begin
  674. instr := taicpu.op_reg_reg(A_FMR, reg2, reg1);
  675. list.concat(instr);
  676. rg[R_FPUREGISTER].add_move_instruction(instr);
  677. end;
  678. procedure tcgppc.a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize;
  679. const ref: treference; reg: tregister);
  680. const
  681. FpuLoadInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
  682. { indexed? updating?}
  683. (((A_LFS, A_LFSU), (A_LFSX, A_LFSUX)),
  684. ((A_LFD, A_LFDU), (A_LFDX, A_LFDUX)));
  685. var
  686. op: tasmop;
  687. ref2: treference;
  688. begin
  689. { several functions call this procedure with OS_32 or OS_64
  690. so this makes life easier (FK) }
  691. case size of
  692. OS_32, OS_F32:
  693. size := OS_F32;
  694. OS_64, OS_F64, OS_C64:
  695. size := OS_F64;
  696. else
  697. internalerror(200201121);
  698. end;
  699. ref2 := ref;
  700. fixref(list, ref2, size);
  701. op := fpuloadinstr[size, ref2.index <> NR_NO, false];
  702. a_load_store(list, op, reg, ref2);
  703. end;
  704. procedure tcgppc.a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
  705. tregister; const ref: treference);
  706. const
  707. FpuStoreInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
  708. { indexed? updating? }
  709. (((A_STFS, A_STFSU), (A_STFSX, A_STFSUX)),
  710. ((A_STFD, A_STFDU), (A_STFDX, A_STFDUX)));
  711. var
  712. op: tasmop;
  713. ref2: treference;
  714. begin
  715. if not (size in [OS_F32, OS_F64]) then
  716. internalerror(200201122);
  717. ref2 := ref;
  718. fixref(list, ref2, size);
  719. op := fpustoreinstr[size, ref2.index <> NR_NO, false];
  720. a_load_store(list, op, reg, ref2);
  721. end;
  722. procedure tcgppc.a_op_const_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; a:
  723. aint; reg: TRegister);
  724. begin
  725. a_op_const_reg_reg(list, op, size, a, reg, reg);
  726. end;
  727. procedure tcgppc.a_op_reg_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; src,
  728. dst: TRegister);
  729. begin
  730. a_op_reg_reg_reg(list, op, size, src, dst, dst);
  731. end;
  732. procedure tcgppc.a_op_const_reg_reg(list: taasmoutput; op: TOpCg;
  733. size: tcgsize; a: aint; src, dst: tregister);
  734. var
  735. useReg : boolean;
  736. procedure do_lo_hi(loOp, hiOp : TAsmOp);
  737. begin
  738. { Optimization for logical ops (excluding AND), trying to do this as efficiently
  739. as possible by only generating code for the affected halfwords. Note that all
  740. the instructions handled here must have "X op 0 = X" for every halfword. }
  741. usereg := false;
  742. if (aword(a) > high(dword)) then begin
  743. usereg := true;
  744. end else begin
  745. if (word(a) <> 0) then begin
  746. list.concat(taicpu.op_reg_reg_const(loOp, dst, src, word(a)));
  747. if (word(a shr 16) <> 0) then
  748. list.concat(taicpu.op_reg_reg_const(hiOp, dst, dst, word(a shr 16)));
  749. end else if (word(a shr 16) <> 0) then
  750. list.concat(taicpu.op_reg_reg_const(hiOp, dst, src, word(a shr 16)));
  751. end;
  752. end;
  753. procedure do_lo_hi_and;
  754. begin
  755. { optimization logical and with immediate: only use "andi." for 16 bit
  756. ands, otherwise use register method. Doing this for 32 bit constants
  757. would not give any advantage to the register method (via useReg := true),
  758. requiring a scratch register and three instructions. }
  759. usereg := false;
  760. if (aword(a) > high(word)) then
  761. usereg := true
  762. else
  763. list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a)));
  764. end;
  765. procedure do_constant_div(list : taasmoutput; size : TCgSize; a : aint; src, dst : TRegister;
  766. signed : boolean);
  767. const
  768. negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
  769. var
  770. magic, shift : int64;
  771. u_magic : qword;
  772. u_shift : byte;
  773. u_add : boolean;
  774. power : byte;
  775. isNegPower : boolean;
  776. divreg : tregister;
  777. begin
  778. if (a = 0) then begin
  779. internalerror(2005061701);
  780. end else if (a = 1) then begin
  781. cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, src, dst);
  782. end else if (a = -1) then begin
  783. { note: only in the signed case possible..., may overflow }
  784. exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], dst, src));
  785. end else if (ispowerof2(a, power, isNegPower)) then begin
  786. if (signed) then begin
  787. { From "The PowerPC Compiler Writer's Guide", pg. 52ff }
  788. cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power,
  789. src, dst);
  790. exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst));
  791. if (isNegPower) then
  792. exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
  793. end else begin
  794. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, src, dst)
  795. end;
  796. end else begin
  797. { replace division by multiplication, both implementations }
  798. { from "The PowerPC Compiler Writer's Guide" pg. 53ff }
  799. divreg := cg.getintregister(exprasmlist, OS_INT);
  800. if (signed) then begin
  801. getmagic_signedN(sizeof(aInt)*8, a, magic, shift);
  802. { load magic value }
  803. cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg);
  804. { multiply }
  805. exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, dst, src, divreg));
  806. { add/subtract numerator }
  807. if (a > 0) and (magic < 0) then begin
  808. cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, src, dst, dst);
  809. end else if (a < 0) and (magic > 0) then begin
  810. cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, src, dst, dst);
  811. end;
  812. { shift shift places to the right (arithmetic) }
  813. cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, dst, dst);
  814. { extract and add sign bit }
  815. if (a >= 0) then begin
  816. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, src, divreg);
  817. end else begin
  818. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, dst, divreg);
  819. end;
  820. cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, dst, divreg, dst);
  821. end else begin
  822. getmagic_unsignedN(sizeof(aWord)*8, a, u_magic, u_add, u_shift);
  823. { load magic in divreg }
  824. cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg);
  825. exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, dst, src, divreg));
  826. if (u_add) then begin
  827. cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, dst, src, divreg);
  828. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 1, divreg, divreg);
  829. cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, dst, divreg);
  830. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, dst);
  831. end else begin
  832. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, dst, dst);
  833. end;
  834. end;
  835. end;
  836. end;
  837. var
  838. scratchreg: tregister;
  839. shift : byte;
  840. shiftmask : longint;
  841. isneg : boolean;
  842. begin
  843. { subtraction is the same as addition with negative constant }
  844. if op = OP_SUB then begin
  845. a_op_const_reg_reg(list, OP_ADD, size, -a, src, dst);
  846. exit;
  847. end;
  848. { This case includes some peephole optimizations for the various operations,
  849. (e.g. AND, OR, XOR, ..) - can't this be done at some higher level,
  850. independent of architecture? }
  851. { assume that we do not need a scratch register for the operation }
  852. useReg := false;
  853. case (op) of
  854. OP_DIV, OP_IDIV:
  855. if (cs_slowoptimize in aktglobalswitches) then
  856. do_constant_div(list, size, a, src, dst, op = OP_IDIV)
  857. else
  858. usereg := true;
  859. OP_IMUL, OP_MUL:
  860. { idea: factorize constant multiplicands and use adds/shifts with few factors;
  861. however, even a 64 bit multiply is already quite fast on PPC64 }
  862. if (a = 0) then
  863. a_load_const_reg(list, size, 0, dst)
  864. else if (a = -1) then
  865. list.concat(taicpu.op_reg_reg(A_NEG, dst, dst))
  866. else if (a = 1) then
  867. a_load_reg_reg(list, OS_INT, OS_INT, src, dst)
  868. else if ispowerof2(a, shift, isneg) then begin
  869. list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift));
  870. if (isneg) then
  871. exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
  872. end else if (a >= low(smallint)) and (a <= high(smallint)) then
  873. list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src,
  874. smallint(a)))
  875. else
  876. usereg := true;
  877. OP_ADD:
  878. if (a = 0) then
  879. a_load_reg_reg(list, size, size, src, dst)
  880. else if (a >= low(smallint)) and (a <= high(smallint)) then
  881. list.concat(taicpu.op_reg_reg_const(A_ADDI, dst, src, smallint(a)))
  882. else
  883. useReg := true;
  884. OP_OR:
  885. if (a = 0) then
  886. a_load_reg_reg(list, size, size, src, dst)
  887. else if (a = -1) then
  888. a_load_const_reg(list, size, -1, dst)
  889. else
  890. do_lo_hi(A_ORI, A_ORIS);
  891. OP_AND:
  892. if (a = 0) then
  893. a_load_const_reg(list, size, 0, dst)
  894. else if (a = -1) then
  895. a_load_reg_reg(list, size, size, src, dst)
  896. else
  897. do_lo_hi_and;
  898. OP_XOR:
  899. if (a = 0) then
  900. a_load_reg_reg(list, size, size, src, dst)
  901. else if (a = -1) then
  902. list.concat(taicpu.op_reg_reg(A_NOT, dst, src))
  903. else
  904. do_lo_hi(A_XORI, A_XORIS);
  905. OP_SHL, OP_SHR, OP_SAR:
  906. begin
  907. if (size in [OS_64, OS_S64]) then
  908. shift := 6
  909. else
  910. shift := 5;
  911. shiftmask := (1 shl shift)-1;
  912. if (a and shiftmask) <> 0 then
  913. list.concat(taicpu.op_reg_reg_const(
  914. TShiftOpCG2AsmOpConst[size in [OS_64, OS_S64], op], dst, src, a and shiftmask))
  915. else
  916. a_load_reg_reg(list, size, size, src, dst);
  917. if ((a shr shift) <> 0) then
  918. internalError(68991);
  919. end
  920. else
  921. internalerror(200109091);
  922. end;
  923. { if all else failed, load the constant in a register and then
  924. perform the operation }
  925. if (useReg) then begin
  926. scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  927. a_load_const_reg(list, size, a, scratchreg);
  928. a_op_reg_reg_reg(list, op, size, scratchreg, src, dst);
  929. end;
  930. end;
  931. procedure tcgppc.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
  932. size: tcgsize; src1, src2, dst: tregister);
  933. const
  934. op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
  935. (A_NONE, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
  936. A_SRAW, A_SLW, A_SRW, A_SUB, A_XOR);
  937. op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
  938. (A_NONE, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
  939. A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR);
  940. begin
  941. case op of
  942. OP_NEG, OP_NOT:
  943. begin
  944. list.concat(taicpu.op_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src1));
  945. if (op = OP_NOT) and not (size in [OS_64, OS_S64]) then
  946. { zero/sign extend result again, fromsize is not important here }
  947. a_load_reg_reg(list, OS_S64, size, dst, dst)
  948. end;
  949. else
  950. if (size in [OS_64, OS_S64]) then begin
  951. list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src2,
  952. src1));
  953. end else begin
  954. list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop32[op], dst, src2,
  955. src1));
  956. end;
  957. end;
  958. end;
  959. {*************** compare instructructions ****************}
  960. procedure tcgppc.a_cmp_const_reg_label(list: taasmoutput; size: tcgsize;
  961. cmp_op: topcmp; a: aint; reg: tregister; l: tasmlabel);
  962. var
  963. scratch_register: TRegister;
  964. signed: boolean;
  965. begin
  966. signed := cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE];
  967. { in the following case, we generate more efficient code when }
  968. { signed is true }
  969. if (cmp_op in [OC_EQ, OC_NE]) and
  970. (aword(a) > $FFFF) then
  971. signed := true;
  972. if signed then
  973. if (a >= low(smallint)) and (a <= high(smallint)) then
  974. list.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR0, reg, a))
  975. else begin
  976. scratch_register := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  977. a_load_const_reg(list, OS_64, a, scratch_register);
  978. list.concat(taicpu.op_reg_reg_reg(A_CMPD, NR_CR0, reg, scratch_register));
  979. end
  980. else if (aword(a) <= $FFFF) then
  981. list.concat(taicpu.op_reg_reg_const(A_CMPLDI, NR_CR0, reg, aword(a)))
  982. else begin
  983. scratch_register := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  984. a_load_const_reg(list, OS_64, a, scratch_register);
  985. list.concat(taicpu.op_reg_reg_reg(A_CMPLD, NR_CR0, reg,
  986. scratch_register));
  987. end;
  988. a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
  989. end;
  990. procedure tcgppc.a_cmp_reg_reg_label(list: taasmoutput; size: tcgsize;
  991. cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);
  992. var
  993. op: tasmop;
  994. begin
  995. if cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE] then
  996. if (size in [OS_64, OS_S64]) then
  997. op := A_CMPD
  998. else
  999. op := A_CMPW
  1000. else
  1001. if (size in [OS_64, OS_S64]) then
  1002. op := A_CMPLD
  1003. else
  1004. op := A_CMPLW;
  1005. list.concat(taicpu.op_reg_reg_reg(op, NR_CR0, reg2, reg1));
  1006. a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
  1007. end;
  1008. procedure tcgppc.a_jmp_cond(list: taasmoutput; cond: TOpCmp; l: tasmlabel);
  1009. begin
  1010. a_jmp(list, A_BC, TOpCmp2AsmCond[cond], 0, l);
  1011. end;
  1012. procedure tcgppc.a_jmp_name(list: taasmoutput; const s: string);
  1013. var
  1014. p: taicpu;
  1015. begin
  1016. p := taicpu.op_sym(A_B, objectlibrary.newasmsymbol(s, AB_EXTERNAL,
  1017. AT_LABEL));
  1018. p.is_jmp := true;
  1019. list.concat(p)
  1020. end;
  1021. procedure tcgppc.a_jmp_always(list: taasmoutput; l: tasmlabel);
  1022. begin
  1023. a_jmp(list, A_B, C_None, 0, l);
  1024. end;
  1025. procedure tcgppc.a_jmp_flags(list: taasmoutput; const f: TResFlags; l:
  1026. tasmlabel);
  1027. var
  1028. c: tasmcond;
  1029. begin
  1030. c := flags_to_cond(f);
  1031. a_jmp(list, A_BC, c.cond, c.cr - RS_CR0, l);
  1032. end;
  1033. procedure tcgppc.g_flags2reg(list: taasmoutput; size: TCgSize; const f:
  1034. TResFlags; reg: TRegister);
  1035. var
  1036. testbit: byte;
  1037. bitvalue: boolean;
  1038. begin
  1039. { get the bit to extract from the conditional register + its requested value (0 or 1) }
  1040. testbit := ((f.cr - RS_CR0) * 4);
  1041. case f.flag of
  1042. F_EQ, F_NE:
  1043. begin
  1044. inc(testbit, 2);
  1045. bitvalue := f.flag = F_EQ;
  1046. end;
  1047. F_LT, F_GE:
  1048. begin
  1049. bitvalue := f.flag = F_LT;
  1050. end;
  1051. F_GT, F_LE:
  1052. begin
  1053. inc(testbit);
  1054. bitvalue := f.flag = F_GT;
  1055. end;
  1056. else
  1057. internalerror(200112261);
  1058. end;
  1059. { load the conditional register in the destination reg }
  1060. list.concat(taicpu.op_reg(A_MFCR, reg));
  1061. { we will move the bit that has to be tested to bit 0 by rotating left }
  1062. testbit := (testbit + 1) and 31;
  1063. { extract bit }
  1064. list.concat(taicpu.op_reg_reg_const_const_const(
  1065. A_RLWINM,reg,reg,testbit,31,31));
  1066. { if we need the inverse, xor with 1 }
  1067. if not bitvalue then
  1068. list.concat(taicpu.op_reg_reg_const(A_XORI, reg, reg, 1));
  1069. end;
  1070. { *********** entry/exit code and address loading ************ }
  1071. procedure tcgppc.g_save_standard_registers(list: Taasmoutput);
  1072. begin
  1073. { this work is done in g_proc_entry }
  1074. end;
  1075. procedure tcgppc.g_restore_standard_registers(list: Taasmoutput);
  1076. begin
  1077. { this work is done in g_proc_exit }
  1078. end;
  1079. procedure tcgppc.calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
  1080. var
  1081. reg : TSuperRegister;
  1082. begin
  1083. fprcount := 0;
  1084. firstfpr := RS_F31;
  1085. if not (po_assembler in current_procinfo.procdef.procoptions) then begin
  1086. for reg := RS_F14 to RS_F31 do begin
  1087. if reg in rg[R_FPUREGISTER].used_in_proc then begin
  1088. fprcount := ord(RS_F31)-ord(reg)+1;
  1089. firstfpr := reg;
  1090. break;
  1091. end;
  1092. end;
  1093. end;
  1094. end;
  1095. procedure tcgppc.calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
  1096. var
  1097. reg : TSuperRegister;
  1098. begin
  1099. gprcount := 0;
  1100. firstgpr := RS_R31;
  1101. if not (po_assembler in current_procinfo.procdef.procoptions) then begin
  1102. for reg := RS_R14 to RS_R31 do begin
  1103. if reg in rg[R_INTREGISTER].used_in_proc then begin
  1104. gprcount := ord(RS_R31)-ord(reg)+1;
  1105. firstgpr := reg;
  1106. break;
  1107. end;
  1108. end;
  1109. end;
  1110. end;
  1111. procedure tcgppc.g_proc_entry(list: taasmoutput; localsize: longint;
  1112. nostackframe: boolean);
  1113. { generated the entry code of a procedure/function. Note: localsize is the
  1114. sum of the size necessary for local variables and the maximum possible
  1115. combined size of ALL the parameters of a procedure called by the current
  1116. one.
  1117. This procedure may be called before, as well as after g_return_from_proc
  1118. is called. NOTE registers are not to be allocated through the register
  1119. allocator here, because the register colouring has already occured !! }
  1120. var
  1121. firstregfpu, firstreggpr: TSuperRegister;
  1122. href: treference;
  1123. needslinkreg: boolean;
  1124. regcount : TSuperRegister;
  1125. fprcount, gprcount : aint;
  1126. begin
  1127. { CR and LR only have to be saved in case they are modified by the current
  1128. procedure, but currently this isn't checked, so save them always
  1129. following is the entry code as described in "Altivec Programming
  1130. Interface Manual", bar the saving of AltiVec registers }
  1131. a_reg_alloc(list, NR_STACK_POINTER_REG);
  1132. a_reg_alloc(list, NR_R0);
  1133. calcFirstUsedFPR(firstregfpu, fprcount);
  1134. calcFirstUsedGPR(firstreggpr, gprcount);
  1135. { calculate real stack frame size }
  1136. localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
  1137. gprcount, fprcount);
  1138. { determine whether we need to save the link register }
  1139. needslinkreg := ((not (po_assembler in current_procinfo.procdef.procoptions)) and
  1140. (pi_do_call in current_procinfo.flags));
  1141. { move link register to r0 }
  1142. if (needslinkreg) then begin
  1143. list.concat(taicpu.op_reg(A_MFLR, NR_R0));
  1144. end;
  1145. { save old stack frame pointer }
  1146. if (localsize > 0) then begin
  1147. a_reg_alloc(list, NR_OLD_STACK_POINTER_REG);
  1148. list.concat(taicpu.op_reg_reg(A_MR, NR_OLD_STACK_POINTER_REG, NR_STACK_POINTER_REG));
  1149. end;
  1150. { save registers, FPU first, then GPR }
  1151. reference_reset_base(href, NR_STACK_POINTER_REG, -8);
  1152. if (fprcount > 0) then begin
  1153. for regcount := RS_F31 downto firstregfpu do begin
  1154. a_loadfpu_reg_ref(list, OS_FLOAT, newreg(R_FPUREGISTER, regcount,
  1155. R_SUBNONE), href);
  1156. dec(href.offset, tcgsize2size[OS_FLOAT]);
  1157. end;
  1158. end;
  1159. if (gprcount > 0) then begin
  1160. for regcount := RS_R31 downto firstreggpr do begin
  1161. a_load_reg_ref(list, OS_INT, OS_INT, newreg(R_INTREGISTER, regcount,
  1162. R_SUBNONE), href);
  1163. dec(href.offset, tcgsize2size[OS_INT]);
  1164. end;
  1165. end;
  1166. { VMX registers not supported by FPC atm }
  1167. { we may need to store R0 (=LR) ourselves }
  1168. if (needslinkreg) then begin
  1169. reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
  1170. list.concat(taicpu.op_reg_ref(A_STD, NR_R0, href));
  1171. end;
  1172. { create stack frame }
  1173. if (not nostackframe) and (localsize > 0) then begin
  1174. if (localsize <= high(smallint)) then begin
  1175. reference_reset_base(href, NR_STACK_POINTER_REG, -localsize);
  1176. a_load_store(list, A_STDU, NR_STACK_POINTER_REG, href);
  1177. end else begin
  1178. reference_reset_base(href, NR_NO, -localsize);
  1179. { use R0 for loading the constant (which is definitely > 32k when entering
  1180. this branch)
  1181. Inlined at this position because it must not use temp registers because
  1182. register allocations have already been done :( }
  1183. { Code template:
  1184. lis r0,ofs@highest
  1185. ori r0,r0,ofs@higher
  1186. sldi r0,r0,32
  1187. oris r0,r0,ofs@h
  1188. ori r0,r0,ofs@l
  1189. }
  1190. list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48)));
  1191. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32)));
  1192. list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32));
  1193. list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16)));
  1194. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset)));
  1195. list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
  1196. end;
  1197. end;
  1198. { CR register not used by FPC atm }
  1199. { keep R1 allocated??? }
  1200. a_reg_dealloc(list, NR_R0);
  1201. end;
  1202. procedure tcgppc.g_proc_exit(list: taasmoutput; parasize: longint; nostackframe:
  1203. boolean);
  1204. { This procedure may be called before, as well as after g_stackframe_entry }
  1205. { is called. NOTE registers are not to be allocated through the register }
  1206. { allocator here, because the register colouring has already occured !! }
  1207. var
  1208. regcount, firstregfpu, firstreggpr: TSuperRegister;
  1209. href: treference;
  1210. needslinkreg : boolean;
  1211. localsize,
  1212. fprcount, gprcount: aint;
  1213. begin
  1214. calcFirstUsedFPR(firstregfpu, fprcount);
  1215. calcFirstUsedGPR(firstreggpr, gprcount);
  1216. { determine whether we need to restore the link register }
  1217. needslinkreg := ((not (po_assembler in current_procinfo.procdef.procoptions)) and
  1218. (pi_do_call in current_procinfo.flags));
  1219. { calculate stack frame }
  1220. localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
  1221. gprcount, fprcount);
  1222. { CR register not supported }
  1223. { restore stack pointer }
  1224. if (not nostackframe) and (localsize > 0) then begin
  1225. if (localsize <= high(smallint)) then begin
  1226. list.concat(taicpu.op_reg_reg_const(A_ADDI, NR_STACK_POINTER_REG, NR_STACK_POINTER_REG, localsize));
  1227. end else begin
  1228. reference_reset_base(href, NR_NO, localsize);
  1229. { use R0 for loading the constant (which is definitely > 32k when entering
  1230. this branch)
  1231. Inlined because it must not use temp registers because register allocations
  1232. have already been done :( }
  1233. { Code template:
  1234. lis r0,ofs@highest
  1235. ori r0,ofs@higher
  1236. sldi r0,r0,32
  1237. oris r0,r0,ofs@h
  1238. ori r0,r0,ofs@l
  1239. }
  1240. list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48)));
  1241. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32)));
  1242. list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32));
  1243. list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16)));
  1244. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset)));
  1245. list.concat(taicpu.op_reg_reg_reg(A_ADD, NR_R1, NR_R1, NR_R0));
  1246. end;
  1247. end;
  1248. { load registers, FPR first, then GPR }
  1249. {$note ts:todo change order of loading}
  1250. reference_reset_base(href, NR_STACK_POINTER_REG, -tcgsize2size[OS_FLOAT]);
  1251. if (fprcount > 0) then begin
  1252. for regcount := RS_F31 downto firstregfpu do begin
  1253. a_loadfpu_ref_reg(list, OS_FLOAT, href, newreg(R_FPUREGISTER, regcount,
  1254. R_SUBNONE));
  1255. dec(href.offset, tcgsize2size[OS_FLOAT]);
  1256. end;
  1257. end;
  1258. if (gprcount > 0) then begin
  1259. for regcount := RS_R31 downto firstreggpr do begin
  1260. a_load_ref_reg(list, OS_INT, OS_INT, href, newreg(R_INTREGISTER, regcount,
  1261. R_SUBNONE));
  1262. dec(href.offset, tcgsize2size[OS_INT]);
  1263. end;
  1264. end;
  1265. { VMX not supported... }
  1266. { restore LR (if needed) }
  1267. if (needslinkreg) then begin
  1268. reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
  1269. list.concat(taicpu.op_reg_ref(A_LD, NR_R0, href));
  1270. list.concat(taicpu.op_reg(A_MTLR, NR_R0));
  1271. end;
  1272. { generate return instruction }
  1273. list.concat(taicpu.op_none(A_BLR));
  1274. end;
  1275. procedure tcgppc.a_loadaddr_ref_reg(list: taasmoutput; const ref: treference; r:
  1276. tregister);
  1277. var
  1278. ref2, tmpref: treference;
  1279. { register used to construct address }
  1280. tempreg : TRegister;
  1281. begin
  1282. ref2 := ref;
  1283. fixref(list, ref2, OS_64);
  1284. { load a symbol }
  1285. if assigned(ref2.symbol) or (hasLargeOffset(ref2)) then begin
  1286. { add the symbol's value to the base of the reference, and if the }
  1287. { reference doesn't have a base, create one }
  1288. reference_reset(tmpref);
  1289. tmpref.offset := ref2.offset;
  1290. tmpref.symbol := ref2.symbol;
  1291. tmpref.relsymbol := ref2.relsymbol;
  1292. { load 64 bit reference into r. If the reference already has a base register,
  1293. first load the 64 bit value into a temp register, then add it to the result
  1294. register rD }
  1295. if (ref2.base <> NR_NO) then begin
  1296. { already have a base register, so allocate a new one }
  1297. tempreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1298. end else begin
  1299. tempreg := r;
  1300. end;
  1301. { code for loading a reference from a symbol into a register rD }
  1302. (*
  1303. lis rX,SYM@highest
  1304. ori rX,SYM@higher
  1305. sldi rX,rX,32
  1306. oris rX,rX,SYM@h
  1307. ori rX,rX,SYM@l
  1308. *)
  1309. tmpref.refaddr := addr_highest;
  1310. list.concat(taicpu.op_reg_ref(A_LIS, tempreg, tmpref));
  1311. tmpref.refaddr := addr_higher;
  1312. list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
  1313. list.concat(taicpu.op_reg_reg_const(A_SLDI, tempreg, tempreg, 32));
  1314. tmpref.refaddr := addr_high;
  1315. list.concat(taicpu.op_reg_reg_ref(A_ORIS, tempreg, tempreg, tmpref));
  1316. tmpref.refaddr := addr_low;
  1317. list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
  1318. { if there's already a base register, add the temp register contents to
  1319. the base register }
  1320. if (ref2.base <> NR_NO) then begin
  1321. list.concat(taicpu.op_reg_reg_reg(A_ADD, r, tempreg, ref2.base));
  1322. end;
  1323. end else if ref2.offset <> 0 then begin
  1324. { no symbol, but offset <> 0 }
  1325. if ref2.base <> NR_NO then begin
  1326. a_op_const_reg_reg(list, OP_ADD, OS_64, ref2.offset, ref2.base, r)
  1327. { FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never
  1328. occurs, so now only ref.offset has to be loaded }
  1329. end else begin
  1330. a_load_const_reg(list, OS_64, ref2.offset, r)
  1331. end;
  1332. end else if ref.index <> NR_NO then
  1333. list.concat(taicpu.op_reg_reg_reg(A_ADD, r, ref2.base, ref2.index))
  1334. else if (ref2.base <> NR_NO) and
  1335. (r <> ref2.base) then
  1336. a_load_reg_reg(list, OS_ADDR, OS_ADDR, ref2.base, r)
  1337. else begin
  1338. list.concat(taicpu.op_reg_const(A_LI, r, 0));
  1339. end;
  1340. end;
  1341. { ************* concatcopy ************ }
  1342. const
  1343. maxmoveunit = 8;
  1344. procedure tcgppc.g_concatcopy(list: taasmoutput; const source, dest: treference;
  1345. len: aint);
  1346. var
  1347. countreg, tempreg: TRegister;
  1348. src, dst: TReference;
  1349. lab: tasmlabel;
  1350. count, count2: longint;
  1351. size: tcgsize;
  1352. begin
  1353. {$IFDEF extdebug}
  1354. if len > high(aint) then
  1355. internalerror(2002072704);
  1356. list.concat(tai_comment.create(strpnew('g_concatcopy')));
  1357. {$ENDIF extdebug}
  1358. { make sure short loads are handled as optimally as possible;
  1359. note that the data here never overlaps, so we can do a forward
  1360. copy at all times.
  1361. NOTE: maybe use some scratch registers to pair load/store instructions
  1362. }
  1363. if (len <= maxmoveunit) then begin
  1364. src := source; dst := dest;
  1365. while (len <> 0) do begin
  1366. if (len = 8) then begin
  1367. a_load_ref_ref(list, OS_64, OS_64, src, dst);
  1368. dec(len, 8);
  1369. end else if (len >= 4) then begin
  1370. a_load_ref_ref(list, OS_32, OS_32, src, dst);
  1371. inc(src.offset, 4); inc(dst.offset, 4);
  1372. dec(len, 4);
  1373. end else if (len >= 2) then begin
  1374. a_load_ref_ref(list, OS_16, OS_16, src, dst);
  1375. inc(src.offset, 2); inc(dst.offset, 2);
  1376. dec(len, 2);
  1377. end else begin
  1378. a_load_ref_ref(list, OS_8, OS_8, src, dst);
  1379. inc(src.offset, 1); inc(dst.offset, 1);
  1380. dec(len, 1);
  1381. end;
  1382. end;
  1383. exit;
  1384. end;
  1385. count := len div maxmoveunit;
  1386. reference_reset(src);
  1387. reference_reset(dst);
  1388. { load the address of source into src.base }
  1389. if (count > 4) or
  1390. not issimpleref(source) or
  1391. ((source.index <> NR_NO) and
  1392. ((source.offset + len) > high(smallint))) then begin
  1393. src.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1394. a_loadaddr_ref_reg(list, source, src.base);
  1395. end else begin
  1396. src := source;
  1397. end;
  1398. { load the address of dest into dst.base }
  1399. if (count > 4) or
  1400. not issimpleref(dest) or
  1401. ((dest.index <> NR_NO) and
  1402. ((dest.offset + len) > high(smallint))) then begin
  1403. dst.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1404. a_loadaddr_ref_reg(list, dest, dst.base);
  1405. end else begin
  1406. dst := dest;
  1407. end;
  1408. { generate a loop }
  1409. if count > 4 then begin
  1410. { the offsets are zero after the a_loadaddress_ref_reg and just
  1411. have to be set to 8. I put an Inc there so debugging may be
  1412. easier (should offset be different from zero here, it will be
  1413. easy to notice in the generated assembler }
  1414. inc(dst.offset, 8);
  1415. inc(src.offset, 8);
  1416. list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
  1417. list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
  1418. countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1419. a_load_const_reg(list, OS_64, count, countreg);
  1420. { explicitely allocate F0 since it can be used safely here
  1421. (for holding date that's being copied) }
  1422. a_reg_alloc(list, NR_F0);
  1423. objectlibrary.getjumplabel(lab);
  1424. a_label(list, lab);
  1425. list.concat(taicpu.op_reg_reg_const(A_SUBIC_, countreg, countreg, 1));
  1426. list.concat(taicpu.op_reg_ref(A_LFDU, NR_F0, src));
  1427. list.concat(taicpu.op_reg_ref(A_STFDU, NR_F0, dst));
  1428. a_jmp(list, A_BC, C_NE, 0, lab);
  1429. a_reg_dealloc(list, NR_F0);
  1430. len := len mod 8;
  1431. end;
  1432. count := len div 8;
  1433. { unrolled loop }
  1434. if count > 0 then begin
  1435. a_reg_alloc(list, NR_F0);
  1436. for count2 := 1 to count do begin
  1437. a_loadfpu_ref_reg(list, OS_F64, src, NR_F0);
  1438. a_loadfpu_reg_ref(list, OS_F64, NR_F0, dst);
  1439. inc(src.offset, 8);
  1440. inc(dst.offset, 8);
  1441. end;
  1442. a_reg_dealloc(list, NR_F0);
  1443. len := len mod 8;
  1444. end;
  1445. if (len and 4) <> 0 then begin
  1446. a_reg_alloc(list, NR_R0);
  1447. a_load_ref_reg(list, OS_32, OS_32, src, NR_R0);
  1448. a_load_reg_ref(list, OS_32, OS_32, NR_R0, dst);
  1449. inc(src.offset, 4);
  1450. inc(dst.offset, 4);
  1451. a_reg_dealloc(list, NR_R0);
  1452. end;
  1453. { copy the leftovers }
  1454. if (len and 2) <> 0 then begin
  1455. a_reg_alloc(list, NR_R0);
  1456. a_load_ref_reg(list, OS_16, OS_16, src, NR_R0);
  1457. a_load_reg_ref(list, OS_16, OS_16, NR_R0, dst);
  1458. inc(src.offset, 2);
  1459. inc(dst.offset, 2);
  1460. a_reg_dealloc(list, NR_R0);
  1461. end;
  1462. if (len and 1) <> 0 then begin
  1463. a_reg_alloc(list, NR_R0);
  1464. a_load_ref_reg(list, OS_8, OS_8, src, NR_R0);
  1465. a_load_reg_ref(list, OS_8, OS_8, NR_R0, dst);
  1466. a_reg_dealloc(list, NR_R0);
  1467. end;
  1468. end;
  1469. procedure tcgppc.g_overflowcheck(list: taasmoutput; const l: tlocation; def:
  1470. tdef);
  1471. var
  1472. hl: tasmlabel;
  1473. flags : TResFlags;
  1474. begin
  1475. if not (cs_check_overflow in aktlocalswitches) then
  1476. exit;
  1477. objectlibrary.getjumplabel(hl);
  1478. if not ((def.deftype = pointerdef) or
  1479. ((def.deftype = orddef) and
  1480. (torddef(def).typ in [u64bit, u16bit, u32bit, u8bit, uchar,
  1481. bool8bit, bool16bit, bool32bit]))) then
  1482. begin
  1483. { ... instructions setting overflow flag ...
  1484. mfxerf R0
  1485. mtcrf 128, R0
  1486. ble cr0, label }
  1487. list.concat(taicpu.op_reg(A_MFXER, NR_R0));
  1488. list.concat(taicpu.op_const_reg(A_MTCRF, 128, NR_R0));
  1489. flags.cr := RS_CR0;
  1490. flags.flag := F_LE;
  1491. a_jmp_flags(list, flags, hl);
  1492. end else
  1493. a_jmp_cond(list, OC_AE, hl);
  1494. a_call_name(list, 'FPC_OVERFLOW');
  1495. a_label(list, hl);
  1496. end;
  1497. procedure tcgppc.g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
  1498. labelname: string; ioffset: longint);
  1499. procedure loadvmttor11;
  1500. var
  1501. href: treference;
  1502. begin
  1503. reference_reset_base(href, NR_R3, 0);
  1504. cg.a_load_ref_reg(list, OS_ADDR, OS_ADDR, href, NR_R11);
  1505. end;
  1506. procedure op_onr11methodaddr;
  1507. var
  1508. href: treference;
  1509. begin
  1510. if (procdef.extnumber = $FFFF) then
  1511. Internalerror(200006139);
  1512. { call/jmp vmtoffs(%eax) ; method offs }
  1513. reference_reset_base(href, NR_R11,
  1514. procdef._class.vmtmethodoffset(procdef.extnumber));
  1515. if not (hasLargeOffset(href)) then begin
  1516. list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11,
  1517. smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) <
  1518. 0))));
  1519. href.offset := smallint(href.offset and $FFFF);
  1520. end else
  1521. { add support for offsets > 16 bit }
  1522. internalerror(200510201);
  1523. list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
  1524. { the loaded reference is a function descriptor reference, so deref again
  1525. (at ofs 0 there's the real pointer) }
  1526. {$warning ts:TODO: update GOT reference}
  1527. reference_reset_base(href, NR_R11, 0);
  1528. list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
  1529. list.concat(taicpu.op_reg(A_MTCTR, NR_R11));
  1530. list.concat(taicpu.op_none(A_BCTR));
  1531. { NOP needed for the linker...? }
  1532. list.concat(taicpu.op_none(A_NOP));
  1533. end;
  1534. var
  1535. make_global: boolean;
  1536. begin
  1537. if (not (procdef.proctypeoption in [potype_function, potype_procedure])) then
  1538. Internalerror(200006137);
  1539. if not assigned(procdef._class) or
  1540. (procdef.procoptions * [po_classmethod, po_staticmethod,
  1541. po_methodpointer, po_interrupt, po_iocheck] <> []) then
  1542. Internalerror(200006138);
  1543. if procdef.owner.symtabletype <> objectsymtable then
  1544. Internalerror(200109191);
  1545. make_global := false;
  1546. if (not current_module.is_unit) or
  1547. (cs_create_smart in aktmoduleswitches) or
  1548. (procdef.owner.defowner.owner.symtabletype = globalsymtable) then
  1549. make_global := true;
  1550. if make_global then
  1551. List.concat(Tai_symbol.Createname_global(labelname, AT_FUNCTION, 0))
  1552. else
  1553. List.concat(Tai_symbol.Createname(labelname, AT_FUNCTION, 0));
  1554. { set param1 interface to self }
  1555. g_adjust_self_value(list, procdef, ioffset);
  1556. if po_virtualmethod in procdef.procoptions then begin
  1557. loadvmttor11;
  1558. op_onr11methodaddr;
  1559. end else
  1560. {$note ts:todo add GOT change?? - think not needed :) }
  1561. list.concat(taicpu.op_sym(A_B,
  1562. objectlibrary.newasmsymbol('.' + procdef.mangledname, AB_EXTERNAL,
  1563. AT_FUNCTION)));
  1564. List.concat(Tai_symbol_end.Createname(labelname));
  1565. end;
  1566. {***************** This is private property, keep out! :) *****************}
  1567. function tcgppc.issimpleref(const ref: treference): boolean;
  1568. begin
  1569. if (ref.base = NR_NO) and
  1570. (ref.index <> NR_NO) then
  1571. internalerror(200208101);
  1572. result :=
  1573. not (assigned(ref.symbol)) and
  1574. (((ref.index = NR_NO) and
  1575. (ref.offset >= low(smallint)) and
  1576. (ref.offset <= high(smallint))) or
  1577. ((ref.index <> NR_NO) and
  1578. (ref.offset = 0)));
  1579. end;
  1580. function tcgppc.fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
  1581. var
  1582. tmpreg: tregister;
  1583. needsAlign : boolean;
  1584. begin
  1585. result := false;
  1586. needsAlign := size in [OS_S32, OS_64, OS_S64];
  1587. if (ref.base = NR_NO) then begin
  1588. ref.base := ref.index;
  1589. ref.index := NR_NO;
  1590. end;
  1591. if (ref.base <> NR_NO) and (ref.index <> NR_NO) and
  1592. ((ref.offset <> 0) or assigned(ref.symbol)) then begin
  1593. result := true;
  1594. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1595. a_op_reg_reg_reg(list, OP_ADD, size, ref.base, ref.index, tmpreg);
  1596. ref.index := NR_NO;
  1597. ref.base := tmpreg;
  1598. end;
  1599. end;
  1600. procedure tcgppc.a_load_store(list: taasmoutput; op: tasmop; reg: tregister;
  1601. ref: treference);
  1602. var
  1603. tmpreg, tmpreg2: tregister;
  1604. tmpref: treference;
  1605. largeOffset: Boolean;
  1606. begin
  1607. { at this point there must not be a combination of values in the ref treference
  1608. which is not possible to directly map to instructions of the PowerPC architecture }
  1609. if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
  1610. internalerror(200310131);
  1611. { for some instructions we need to check that the offset is divisible by at
  1612. least four. If not, add the bytes which are "off" to the base register and
  1613. adjust the offset accordingly }
  1614. case op of
  1615. A_LD, A_LDU, A_STD, A_STDU, A_LWA :
  1616. if ((ref.offset mod 4) <> 0) then begin
  1617. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1618. if (ref.base <> NR_NO) then begin
  1619. a_op_const_reg_reg(list, OP_ADD, OS_ADDR, ref.offset mod 4, ref.base, tmpreg);
  1620. ref.base := tmpreg;
  1621. end else begin
  1622. list.concat(taicpu.op_reg_const(A_LI, tmpreg, ref.offset mod 4));
  1623. ref.base := tmpreg;
  1624. end;
  1625. ref.offset := (ref.offset div 4) * 4;
  1626. end;
  1627. end;
  1628. { if we have to load/store from a symbol or large addresses, use a temporary register
  1629. containing the address }
  1630. if assigned(ref.symbol) or (hasLargeOffset(ref)) then begin
  1631. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1632. if (hasLargeOffset(ref) and (ref.base = NR_NO)) then begin
  1633. ref.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1634. a_load_const_reg(list, OS_ADDR, ref.offset, ref.base);
  1635. ref.offset := 0;
  1636. end;
  1637. reference_reset(tmpref);
  1638. tmpref.symbol := ref.symbol;
  1639. tmpref.relsymbol := ref.relsymbol;
  1640. tmpref.offset := ref.offset;
  1641. if (ref.base <> NR_NO) then begin
  1642. { As long as the TOC isn't working we try to achieve highest speed (in this
  1643. case by allowing instructions execute in parallel) as possible at the cost
  1644. of using another temporary register. So the code template when there is
  1645. a base register and an offset is the following:
  1646. lis rT1, SYM+offs@highest
  1647. ori rT1, rT1, SYM+offs@higher
  1648. lis rT2, SYM+offs@hi
  1649. ori rT2, SYM+offs@lo
  1650. rldimi rT2, rT1, 32
  1651. <op>X reg, base, rT2
  1652. }
  1653. tmpreg2 := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1654. tmpref.refaddr := addr_highest;
  1655. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
  1656. tmpref.refaddr := addr_higher;
  1657. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
  1658. tmpref.refaddr := addr_high;
  1659. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg2, tmpref));
  1660. tmpref.refaddr := addr_low;
  1661. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg2, tmpreg2, tmpref));
  1662. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, tmpreg2, tmpreg, 32, 0));
  1663. reference_reset(tmpref);
  1664. tmpref.base := ref.base;
  1665. tmpref.index := tmpreg2;
  1666. case op of
  1667. { the code generator doesn't generate update instructions anyway, so
  1668. error out on those instructions }
  1669. A_LBZ : op := A_LBZX;
  1670. A_LHZ : op := A_LHZX;
  1671. A_LWZ : op := A_LWZX;
  1672. A_LD : op := A_LDX;
  1673. A_LHA : op := A_LHAX;
  1674. A_LWA : op := A_LWAX;
  1675. A_LFS : op := A_LFSX;
  1676. A_LFD : op := A_LFDX;
  1677. A_STB : op := A_STBX;
  1678. A_STH : op := A_STHX;
  1679. A_STW : op := A_STWX;
  1680. A_STD : op := A_STDX;
  1681. A_STFS : op := A_STFSX;
  1682. A_STFD : op := A_STFDX;
  1683. else
  1684. { unknown load/store opcode }
  1685. internalerror(2005101302);
  1686. end;
  1687. list.concat(taicpu.op_reg_ref(op, reg, tmpref));
  1688. end else begin
  1689. { when accessing value from a reference without a base register, use the
  1690. following code template:
  1691. lis rT,SYM+offs@highesta
  1692. ori rT,SYM+offs@highera
  1693. sldi rT,rT,32
  1694. oris rT,rT,SYM+offs@ha
  1695. ld rD,SYM+offs@l(rT)
  1696. }
  1697. tmpref.refaddr := addr_highesta;
  1698. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
  1699. tmpref.refaddr := addr_highera;
  1700. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
  1701. list.concat(taicpu.op_reg_reg_const(A_SLDI, tmpreg, tmpreg, 32));
  1702. tmpref.refaddr := addr_higha;
  1703. list.concat(taicpu.op_reg_reg_ref(A_ORIS, tmpreg, tmpreg, tmpref));
  1704. tmpref.base := tmpreg;
  1705. tmpref.refaddr := addr_low;
  1706. list.concat(taicpu.op_reg_ref(op, reg, tmpref));
  1707. end;
  1708. end else begin
  1709. list.concat(taicpu.op_reg_ref(op, reg, ref));
  1710. end;
  1711. end;
  1712. procedure tcgppc.a_jmp(list: taasmoutput; op: tasmop; c: tasmcondflag;
  1713. crval: longint; l: tasmlabel);
  1714. var
  1715. p: taicpu;
  1716. begin
  1717. p := taicpu.op_sym(op, objectlibrary.newasmsymbol(l.name, AB_EXTERNAL,
  1718. AT_LABEL));
  1719. if op <> A_B then
  1720. create_cond_norm(c, crval, p.condition);
  1721. p.is_jmp := true;
  1722. list.concat(p)
  1723. end;
  1724. function tcgppc.hasLargeOffset(const ref : TReference) : Boolean;
  1725. begin
  1726. { this rather strange calculation is required because offsets of TReferences are unsigned }
  1727. result := aword(ref.offset-low(smallint)) > high(smallint)-low(smallint);
  1728. end;
  1729. begin
  1730. cg := tcgppc.create;
  1731. end.