cgcpu.pas 208 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
  34. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  35. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  36. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  37. { move instructions }
  38. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  39. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  40. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  41. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  42. { fpu move instructions }
  43. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  44. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  45. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  46. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  47. { comparison operations }
  48. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  49. l : tasmlabel);override;
  50. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  51. procedure a_jmp_name(list : TAsmList;const s : string); override;
  52. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  53. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  54. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  55. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  56. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  57. procedure g_maybe_got_init(list : TAsmList); override;
  58. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  59. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  60. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  61. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  62. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  63. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  64. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  65. procedure g_save_registers(list : TAsmList);override;
  66. procedure g_restore_registers(list : TAsmList);override;
  67. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  68. procedure fixref(list : TAsmList;var ref : treference);
  69. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  70. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  71. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  72. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  73. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  74. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  75. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  76. { Transform unsupported methods into Internal errors }
  77. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
  78. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  79. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  80. { clear out potential overflow bits from 8 or 16 bit operations }
  81. { the upper 24/16 bits of a register after an operation }
  82. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  83. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  84. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  85. end;
  86. { tcgarm is shared between normal arm and thumb-2 }
  87. tcgarm = class(tbasecgarm)
  88. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  89. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  90. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  91. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  92. size: tcgsize; a: tcgint; src, dst: tregister); override;
  93. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  94. size: tcgsize; src1, src2, dst: tregister); override;
  95. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  96. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  97. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  98. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  99. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  100. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  101. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  102. end;
  103. { normal arm cg }
  104. tarmcgarm = class(tcgarm)
  105. procedure init_register_allocators;override;
  106. procedure done_register_allocators;override;
  107. end;
  108. { 64 bit cg for all arm flavours }
  109. tbasecg64farm = class(tcg64f32)
  110. end;
  111. { tcg64farm is shared between normal arm and thumb-2 }
  112. tcg64farm = class(tbasecg64farm)
  113. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  114. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  115. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  116. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  117. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  118. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  119. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  120. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  121. end;
  122. tarmcg64farm = class(tcg64farm)
  123. end;
  124. tthumbcgarm = class(tbasecgarm)
  125. procedure init_register_allocators;override;
  126. procedure done_register_allocators;override;
  127. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  128. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  129. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  130. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  131. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  132. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  133. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  134. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  135. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  136. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  137. end;
  138. tthumbcg64farm = class(tbasecg64farm)
  139. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  140. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  141. end;
  142. tthumb2cgarm = class(tcgarm)
  143. procedure init_register_allocators;override;
  144. procedure done_register_allocators;override;
  145. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  146. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  147. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  148. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  149. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  150. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  151. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  152. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  153. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  154. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  155. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  156. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  157. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  158. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  159. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  160. end;
  161. tthumb2cg64farm = class(tcg64farm)
  162. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  163. end;
  164. const
  165. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  166. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  167. winstackpagesize = 4096;
  168. function get_fpu_postfix(def : tdef) : toppostfix;
  169. procedure create_codegen;
  170. implementation
  171. uses
  172. globals,verbose,systems,cutils,
  173. aopt,aoptcpu,
  174. fmodule,
  175. symconst,symsym,symtable,
  176. tgobj,
  177. procinfo,cpupi,
  178. paramgr;
  179. function get_fpu_postfix(def : tdef) : toppostfix;
  180. begin
  181. if def.typ=floatdef then
  182. begin
  183. case tfloatdef(def).floattype of
  184. s32real:
  185. result:=PF_S;
  186. s64real:
  187. result:=PF_D;
  188. s80real:
  189. result:=PF_E;
  190. else
  191. internalerror(200401272);
  192. end;
  193. end
  194. else
  195. internalerror(200401271);
  196. end;
  197. procedure tarmcgarm.init_register_allocators;
  198. begin
  199. inherited init_register_allocators;
  200. { currently, we always save R14, so we can use it }
  201. if (target_info.system<>system_arm_darwin) then
  202. begin
  203. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  204. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  205. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  206. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  207. else
  208. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  209. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  210. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  211. end
  212. else
  213. { r7 is not available on Darwin, it's used as frame pointer (always,
  214. for backtrace support -- also in gcc/clang -> R11 can be used).
  215. r9 is volatile }
  216. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  217. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  218. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  219. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  220. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  221. { The register allocator currently cannot deal with multiple
  222. non-overlapping subregs per register, so we can only use
  223. half the single precision registers for now (as sub registers of the
  224. double precision ones). }
  225. if current_settings.fputype=fpu_vfpv3 then
  226. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  227. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  228. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  229. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  230. ],first_mm_imreg,[])
  231. else
  232. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  233. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15],first_mm_imreg,[]);
  234. end;
  235. procedure tarmcgarm.done_register_allocators;
  236. begin
  237. rg[R_INTREGISTER].free;
  238. rg[R_FPUREGISTER].free;
  239. rg[R_MMREGISTER].free;
  240. inherited done_register_allocators;
  241. end;
  242. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  243. var
  244. imm_shift : byte;
  245. l : tasmlabel;
  246. hr : treference;
  247. imm1, imm2: DWord;
  248. begin
  249. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  250. internalerror(2002090902);
  251. if is_shifter_const(a,imm_shift) then
  252. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  253. else if is_shifter_const(not(a),imm_shift) then
  254. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  255. { loading of constants with mov and orr }
  256. else if (split_into_shifter_const(a,imm1, imm2)) then
  257. begin
  258. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  259. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  260. end
  261. { loading of constants with mvn and bic }
  262. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  263. begin
  264. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  265. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  266. end
  267. else
  268. begin
  269. reference_reset(hr,4);
  270. current_asmdata.getjumplabel(l);
  271. cg.a_label(current_procinfo.aktlocaldata,l);
  272. hr.symboldata:=current_procinfo.aktlocaldata.last;
  273. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  274. hr.symbol:=l;
  275. hr.base:=NR_PC;
  276. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  277. end;
  278. end;
  279. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  280. var
  281. oppostfix:toppostfix;
  282. usedtmpref: treference;
  283. tmpreg,tmpreg2 : tregister;
  284. so : tshifterop;
  285. dir : integer;
  286. begin
  287. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  288. FromSize := ToSize;
  289. case FromSize of
  290. { signed integer registers }
  291. OS_8:
  292. oppostfix:=PF_B;
  293. OS_S8:
  294. oppostfix:=PF_SB;
  295. OS_16:
  296. oppostfix:=PF_H;
  297. OS_S16:
  298. oppostfix:=PF_SH;
  299. OS_32,
  300. OS_S32:
  301. oppostfix:=PF_None;
  302. else
  303. InternalError(200308297);
  304. end;
  305. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  306. begin
  307. if target_info.endian=endian_big then
  308. dir:=-1
  309. else
  310. dir:=1;
  311. case FromSize of
  312. OS_16,OS_S16:
  313. begin
  314. { only complicated references need an extra loadaddr }
  315. if assigned(ref.symbol) or
  316. (ref.index<>NR_NO) or
  317. (ref.offset<-4095) or
  318. (ref.offset>4094) or
  319. { sometimes the compiler reused registers }
  320. (reg=ref.index) or
  321. (reg=ref.base) then
  322. begin
  323. tmpreg2:=getintregister(list,OS_INT);
  324. a_loadaddr_ref_reg(list,ref,tmpreg2);
  325. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  326. end
  327. else
  328. usedtmpref:=ref;
  329. if target_info.endian=endian_big then
  330. inc(usedtmpref.offset,1);
  331. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  332. tmpreg:=getintregister(list,OS_INT);
  333. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  334. inc(usedtmpref.offset,dir);
  335. if FromSize=OS_16 then
  336. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  337. else
  338. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  339. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  340. end;
  341. OS_32,OS_S32:
  342. begin
  343. tmpreg:=getintregister(list,OS_INT);
  344. { only complicated references need an extra loadaddr }
  345. if assigned(ref.symbol) or
  346. (ref.index<>NR_NO) or
  347. (ref.offset<-4095) or
  348. (ref.offset>4092) or
  349. { sometimes the compiler reused registers }
  350. (reg=ref.index) or
  351. (reg=ref.base) then
  352. begin
  353. tmpreg2:=getintregister(list,OS_INT);
  354. a_loadaddr_ref_reg(list,ref,tmpreg2);
  355. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  356. end
  357. else
  358. usedtmpref:=ref;
  359. shifterop_reset(so);so.shiftmode:=SM_LSL;
  360. if ref.alignment=2 then
  361. begin
  362. if target_info.endian=endian_big then
  363. inc(usedtmpref.offset,2);
  364. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  365. inc(usedtmpref.offset,dir*2);
  366. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  367. so.shiftimm:=16;
  368. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  369. end
  370. else
  371. begin
  372. tmpreg2:=getintregister(list,OS_INT);
  373. if target_info.endian=endian_big then
  374. inc(usedtmpref.offset,3);
  375. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  376. inc(usedtmpref.offset,dir);
  377. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  378. inc(usedtmpref.offset,dir);
  379. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  380. so.shiftimm:=8;
  381. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  382. inc(usedtmpref.offset,dir);
  383. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  384. so.shiftimm:=16;
  385. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  386. so.shiftimm:=24;
  387. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  388. end;
  389. end
  390. else
  391. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  392. end;
  393. end
  394. else
  395. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  396. if (fromsize=OS_S8) and (tosize = OS_16) then
  397. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  398. end;
  399. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  400. var
  401. hsym : tsym;
  402. href : treference;
  403. paraloc : Pcgparalocation;
  404. shift : byte;
  405. begin
  406. { calculate the parameter info for the procdef }
  407. procdef.init_paraloc_info(callerside);
  408. hsym:=tsym(procdef.parast.Find('self'));
  409. if not(assigned(hsym) and
  410. (hsym.typ=paravarsym)) then
  411. internalerror(200305251);
  412. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  413. while paraloc<>nil do
  414. with paraloc^ do
  415. begin
  416. case loc of
  417. LOC_REGISTER:
  418. begin
  419. if is_shifter_const(ioffset,shift) then
  420. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  421. else
  422. begin
  423. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  424. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  425. end;
  426. end;
  427. LOC_REFERENCE:
  428. begin
  429. { offset in the wrapper needs to be adjusted for the stored
  430. return address }
  431. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),sizeof(pint));
  432. if is_shifter_const(ioffset,shift) then
  433. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  434. else
  435. begin
  436. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  437. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  438. end;
  439. end
  440. else
  441. internalerror(200309189);
  442. end;
  443. paraloc:=next;
  444. end;
  445. end;
  446. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  447. var
  448. ref: treference;
  449. begin
  450. paraloc.check_simple_location;
  451. paramanager.allocparaloc(list,paraloc.location);
  452. case paraloc.location^.loc of
  453. LOC_REGISTER,LOC_CREGISTER:
  454. a_load_const_reg(list,size,a,paraloc.location^.register);
  455. LOC_REFERENCE:
  456. begin
  457. reference_reset(ref,paraloc.alignment);
  458. ref.base:=paraloc.location^.reference.index;
  459. ref.offset:=paraloc.location^.reference.offset;
  460. a_load_const_ref(list,size,a,ref);
  461. end;
  462. else
  463. internalerror(2002081101);
  464. end;
  465. end;
  466. procedure tbasecgarm.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
  467. var
  468. tmpref, ref: treference;
  469. location: pcgparalocation;
  470. sizeleft: aint;
  471. begin
  472. location := paraloc.location;
  473. tmpref := r;
  474. sizeleft := paraloc.intsize;
  475. while assigned(location) do
  476. begin
  477. paramanager.allocparaloc(list,location);
  478. case location^.loc of
  479. LOC_REGISTER,LOC_CREGISTER:
  480. a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  481. LOC_REFERENCE:
  482. begin
  483. reference_reset_base(ref,location^.reference.index,location^.reference.offset,paraloc.alignment);
  484. { doubles in softemu mode have a strange order of registers and references }
  485. if location^.size=OS_32 then
  486. g_concatcopy(list,tmpref,ref,4)
  487. else
  488. begin
  489. g_concatcopy(list,tmpref,ref,sizeleft);
  490. if assigned(location^.next) then
  491. internalerror(2005010710);
  492. end;
  493. end;
  494. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  495. case location^.size of
  496. OS_F32, OS_F64:
  497. a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  498. else
  499. internalerror(2002072801);
  500. end;
  501. LOC_VOID:
  502. begin
  503. // nothing to do
  504. end;
  505. else
  506. internalerror(2002081103);
  507. end;
  508. inc(tmpref.offset,tcgsize2size[location^.size]);
  509. dec(sizeleft,tcgsize2size[location^.size]);
  510. location := location^.next;
  511. end;
  512. end;
  513. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  514. var
  515. ref: treference;
  516. tmpreg: tregister;
  517. begin
  518. paraloc.check_simple_location;
  519. paramanager.allocparaloc(list,paraloc.location);
  520. case paraloc.location^.loc of
  521. LOC_REGISTER,LOC_CREGISTER:
  522. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  523. LOC_REFERENCE:
  524. begin
  525. reference_reset(ref,paraloc.alignment);
  526. ref.base := paraloc.location^.reference.index;
  527. ref.offset := paraloc.location^.reference.offset;
  528. tmpreg := getintregister(list,OS_ADDR);
  529. a_loadaddr_ref_reg(list,r,tmpreg);
  530. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  531. end;
  532. else
  533. internalerror(2002080701);
  534. end;
  535. end;
  536. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  537. var
  538. branchopcode: tasmop;
  539. r : treference;
  540. sym : TAsmSymbol;
  541. begin
  542. { check not really correct: should only be used for non-Thumb cpus }
  543. if CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype] then
  544. branchopcode:=A_BLX
  545. else
  546. branchopcode:=A_BL;
  547. if not(weak) then
  548. sym:=current_asmdata.RefAsmSymbol(s)
  549. else
  550. sym:=current_asmdata.WeakRefAsmSymbol(s);
  551. reference_reset_symbol(r,sym,0,sizeof(pint));
  552. if (tf_pic_uses_got in target_info.flags) and
  553. (cs_create_pic in current_settings.moduleswitches) then
  554. begin
  555. include(current_procinfo.flags,pi_needs_got);
  556. r.refaddr:=addr_pic
  557. end
  558. else
  559. r.refaddr:=addr_full;
  560. list.concat(taicpu.op_ref(branchopcode,r));
  561. {
  562. the compiler does not properly set this flag anymore in pass 1, and
  563. for now we only need it after pass 2 (I hope) (JM)
  564. if not(pi_do_call in current_procinfo.flags) then
  565. internalerror(2003060703);
  566. }
  567. include(current_procinfo.flags,pi_do_call);
  568. end;
  569. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  570. begin
  571. { check not really correct: should only be used for non-Thumb cpus }
  572. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  573. begin
  574. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  575. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  576. end
  577. else
  578. list.concat(taicpu.op_reg(A_BLX, reg));
  579. {
  580. the compiler does not properly set this flag anymore in pass 1, and
  581. for now we only need it after pass 2 (I hope) (JM)
  582. if not(pi_do_call in current_procinfo.flags) then
  583. internalerror(2003060703);
  584. }
  585. include(current_procinfo.flags,pi_do_call);
  586. end;
  587. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  588. begin
  589. a_op_const_reg_reg(list,op,size,a,reg,reg);
  590. end;
  591. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  592. var
  593. tmpreg,tmpresreg : tregister;
  594. tmpref : treference;
  595. begin
  596. tmpreg:=getintregister(list,size);
  597. tmpresreg:=getintregister(list,size);
  598. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  599. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  600. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  601. end;
  602. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  603. var
  604. so : tshifterop;
  605. begin
  606. if op = OP_NEG then
  607. begin
  608. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  609. maybeadjustresult(list,OP_NEG,size,dst);
  610. end
  611. else if op = OP_NOT then
  612. begin
  613. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  614. begin
  615. shifterop_reset(so);
  616. so.shiftmode:=SM_LSL;
  617. if size in [OS_8, OS_S8] then
  618. so.shiftimm:=24
  619. else
  620. so.shiftimm:=16;
  621. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  622. {Using a shift here allows this to be folded into another instruction}
  623. if size in [OS_S8, OS_S16] then
  624. so.shiftmode:=SM_ASR
  625. else
  626. so.shiftmode:=SM_LSR;
  627. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  628. end
  629. else
  630. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  631. end
  632. else
  633. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  634. end;
  635. const
  636. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  637. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  638. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  639. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  640. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  641. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  642. op_reg_postfix: array[TOpCG] of TOpPostfix =
  643. (PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
  644. PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None);
  645. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  646. size: tcgsize; a: tcgint; src, dst: tregister);
  647. var
  648. ovloc : tlocation;
  649. begin
  650. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  651. end;
  652. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  653. size: tcgsize; src1, src2, dst: tregister);
  654. var
  655. ovloc : tlocation;
  656. begin
  657. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  658. end;
  659. function opshift2shiftmode(op: TOpCg): tshiftmode;
  660. begin
  661. case op of
  662. OP_SHL: Result:=SM_LSL;
  663. OP_SHR: Result:=SM_LSR;
  664. OP_ROR: Result:=SM_ROR;
  665. OP_ROL: Result:=SM_ROR;
  666. OP_SAR: Result:=SM_ASR;
  667. else internalerror(2012070501);
  668. end
  669. end;
  670. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  671. var
  672. multiplier : dword;
  673. power : longint;
  674. shifterop : tshifterop;
  675. bitsset : byte;
  676. negative : boolean;
  677. first : boolean;
  678. b,
  679. cycles : byte;
  680. maxeffort : byte;
  681. begin
  682. result:=true;
  683. cycles:=0;
  684. negative:=a<0;
  685. shifterop.rs:=NR_NO;
  686. shifterop.shiftmode:=SM_LSL;
  687. if negative then
  688. inc(cycles);
  689. multiplier:=dword(abs(a));
  690. bitsset:=popcnt(multiplier and $fffffffe);
  691. { heuristics to estimate how much instructions are reasonable to replace the mul,
  692. this is currently based on XScale timings }
  693. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  694. actual multiplication, this requires min. 1+4 cycles
  695. because the first shift imm. might cause a stall and because we need more instructions
  696. when replacing the mul we generate max. 3 instructions to replace this mul }
  697. maxeffort:=3;
  698. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  699. a ldr, so generating one more operation to replace this is beneficial }
  700. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  701. inc(maxeffort);
  702. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  703. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  704. dec(maxeffort);
  705. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  706. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  707. dec(maxeffort);
  708. { most simple cases }
  709. if a=1 then
  710. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  711. else if a=0 then
  712. a_load_const_reg(list,OS_32,0,dst)
  713. else if a=-1 then
  714. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  715. { add up ?
  716. basically, one add is needed for each bit being set in the constant factor
  717. however, the least significant bit is for free, it can be hidden in the initial
  718. instruction
  719. }
  720. else if (bitsset+cycles<=maxeffort) and
  721. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  722. begin
  723. first:=true;
  724. while multiplier<>0 do
  725. begin
  726. shifterop.shiftimm:=BsrDWord(multiplier);
  727. if odd(multiplier) then
  728. begin
  729. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  730. dec(multiplier);
  731. end
  732. else
  733. if first then
  734. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  735. else
  736. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  737. first:=false;
  738. dec(multiplier,1 shl shifterop.shiftimm);
  739. end;
  740. if negative then
  741. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  742. end
  743. { subtract from the next greater power of two? }
  744. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  745. begin
  746. first:=true;
  747. while multiplier<>0 do
  748. begin
  749. if first then
  750. begin
  751. multiplier:=(1 shl power)-multiplier;
  752. shifterop.shiftimm:=power;
  753. end
  754. else
  755. shifterop.shiftimm:=BsrDWord(multiplier);
  756. if odd(multiplier) then
  757. begin
  758. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  759. dec(multiplier);
  760. end
  761. else
  762. if first then
  763. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  764. else
  765. begin
  766. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  767. dec(multiplier,1 shl shifterop.shiftimm);
  768. end;
  769. first:=false;
  770. end;
  771. if negative then
  772. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  773. end
  774. else
  775. result:=false;
  776. end;
  777. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  778. var
  779. shift, lsb, width : byte;
  780. tmpreg : tregister;
  781. so : tshifterop;
  782. l1 : longint;
  783. imm1, imm2: DWord;
  784. begin
  785. optimize_op_const(size, op, a);
  786. case op of
  787. OP_NONE:
  788. begin
  789. if src <> dst then
  790. a_load_reg_reg(list, size, size, src, dst);
  791. exit;
  792. end;
  793. OP_MOVE:
  794. begin
  795. a_load_const_reg(list, size, a, dst);
  796. exit;
  797. end;
  798. end;
  799. ovloc.loc:=LOC_VOID;
  800. if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
  801. case op of
  802. OP_ADD:
  803. begin
  804. op:=OP_SUB;
  805. a:=aint(dword(-a));
  806. end;
  807. OP_SUB:
  808. begin
  809. op:=OP_ADD;
  810. a:=aint(dword(-a));
  811. end
  812. end;
  813. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  814. case op of
  815. OP_NEG,OP_NOT:
  816. internalerror(200308281);
  817. OP_SHL,
  818. OP_SHR,
  819. OP_ROL,
  820. OP_ROR,
  821. OP_SAR:
  822. begin
  823. if a>32 then
  824. internalerror(200308294);
  825. shifterop_reset(so);
  826. so.shiftmode:=opshift2shiftmode(op);
  827. if op = OP_ROL then
  828. so.shiftimm:=32-a
  829. else
  830. so.shiftimm:=a;
  831. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  832. end;
  833. else
  834. {if (op in [OP_SUB, OP_ADD]) and
  835. ((a < 0) or
  836. (a > 4095)) then
  837. begin
  838. tmpreg:=getintregister(list,size);
  839. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  840. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  841. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  842. ));
  843. end
  844. else}
  845. begin
  846. if cgsetflags or setflags then
  847. a_reg_alloc(list,NR_DEFAULTFLAGS);
  848. list.concat(setoppostfix(
  849. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  850. end;
  851. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  852. begin
  853. ovloc.loc:=LOC_FLAGS;
  854. case op of
  855. OP_ADD:
  856. ovloc.resflags:=F_CS;
  857. OP_SUB:
  858. ovloc.resflags:=F_CC;
  859. end;
  860. end;
  861. end
  862. else
  863. begin
  864. { there could be added some more sophisticated optimizations }
  865. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  866. a_op_reg_reg(list,OP_NEG,size,src,dst)
  867. { we do this here instead in the peephole optimizer because
  868. it saves us a register }
  869. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  870. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  871. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  872. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  873. begin
  874. if l1>32 then{roozbeh does this ever happen?}
  875. internalerror(200308296);
  876. shifterop_reset(so);
  877. so.shiftmode:=SM_LSL;
  878. so.shiftimm:=l1;
  879. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  880. end
  881. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  882. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  883. begin
  884. if l1>32 then{does this ever happen?}
  885. internalerror(201205181);
  886. shifterop_reset(so);
  887. so.shiftmode:=SM_LSL;
  888. so.shiftimm:=l1;
  889. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  890. end
  891. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  892. begin
  893. { nothing to do on success }
  894. end
  895. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  896. broader range of shifterconstants.}
  897. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  898. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  899. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  900. into the following instruction}
  901. else if (op = OP_AND) and
  902. is_continuous_mask(a, lsb, width) and
  903. ((lsb = 0) or ((lsb + width) = 32)) then
  904. begin
  905. shifterop_reset(so);
  906. if (width = 16) and
  907. (lsb = 0) and
  908. (current_settings.cputype >= cpu_armv6) then
  909. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  910. else if (width = 8) and
  911. (lsb = 0) and
  912. (current_settings.cputype >= cpu_armv6) then
  913. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  914. else if lsb = 0 then
  915. begin
  916. so.shiftmode:=SM_LSL;
  917. so.shiftimm:=32-width;
  918. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  919. so.shiftmode:=SM_LSR;
  920. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  921. end
  922. else
  923. begin
  924. so.shiftmode:=SM_LSR;
  925. so.shiftimm:=lsb;
  926. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  927. so.shiftmode:=SM_LSL;
  928. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  929. end;
  930. end
  931. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  932. begin
  933. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  934. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  935. end
  936. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  937. not(cgsetflags or setflags) and
  938. split_into_shifter_const(a, imm1, imm2) then
  939. begin
  940. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  941. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  942. end
  943. else
  944. begin
  945. tmpreg:=getintregister(list,size);
  946. a_load_const_reg(list,size,a,tmpreg);
  947. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  948. end;
  949. end;
  950. maybeadjustresult(list,op,size,dst);
  951. end;
  952. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  953. var
  954. so : tshifterop;
  955. tmpreg,overflowreg : tregister;
  956. asmop : tasmop;
  957. begin
  958. ovloc.loc:=LOC_VOID;
  959. case op of
  960. OP_NEG,OP_NOT,
  961. OP_DIV,OP_IDIV:
  962. internalerror(200308283);
  963. OP_SHL,
  964. OP_SHR,
  965. OP_SAR,
  966. OP_ROR:
  967. begin
  968. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  969. internalerror(2008072801);
  970. shifterop_reset(so);
  971. so.rs:=src1;
  972. so.shiftmode:=opshift2shiftmode(op);
  973. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  974. end;
  975. OP_ROL:
  976. begin
  977. if not(size in [OS_32,OS_S32]) then
  978. internalerror(2008072801);
  979. { simulate ROL by ror'ing 32-value }
  980. tmpreg:=getintregister(list,OS_32);
  981. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  982. shifterop_reset(so);
  983. so.rs:=tmpreg;
  984. so.shiftmode:=SM_ROR;
  985. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  986. end;
  987. OP_IMUL,
  988. OP_MUL:
  989. begin
  990. if cgsetflags or setflags then
  991. begin
  992. overflowreg:=getintregister(list,size);
  993. if op=OP_IMUL then
  994. asmop:=A_SMULL
  995. else
  996. asmop:=A_UMULL;
  997. { the arm doesn't allow that rd and rm are the same }
  998. if dst=src2 then
  999. begin
  1000. if dst<>src1 then
  1001. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1002. else
  1003. begin
  1004. tmpreg:=getintregister(list,size);
  1005. a_load_reg_reg(list,size,size,src2,dst);
  1006. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1007. end;
  1008. end
  1009. else
  1010. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1011. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1012. if op=OP_IMUL then
  1013. begin
  1014. shifterop_reset(so);
  1015. so.shiftmode:=SM_ASR;
  1016. so.shiftimm:=31;
  1017. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1018. end
  1019. else
  1020. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1021. ovloc.loc:=LOC_FLAGS;
  1022. ovloc.resflags:=F_NE;
  1023. end
  1024. else
  1025. begin
  1026. { the arm doesn't allow that rd and rm are the same }
  1027. if dst=src2 then
  1028. begin
  1029. if dst<>src1 then
  1030. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1031. else
  1032. begin
  1033. tmpreg:=getintregister(list,size);
  1034. a_load_reg_reg(list,size,size,src2,dst);
  1035. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1036. end;
  1037. end
  1038. else
  1039. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1040. end;
  1041. end;
  1042. else
  1043. begin
  1044. if cgsetflags or setflags then
  1045. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1046. list.concat(setoppostfix(
  1047. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1048. end;
  1049. end;
  1050. maybeadjustresult(list,op,size,dst);
  1051. end;
  1052. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1053. var
  1054. asmop: tasmop;
  1055. begin
  1056. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1057. case size of
  1058. OS_32: asmop:=A_UMULL;
  1059. OS_S32: asmop:=A_SMULL;
  1060. else
  1061. InternalError(2014060802);
  1062. end;
  1063. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1064. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1065. 32x32=32 bit multiplication}
  1066. if (dstlo = NR_NO) then
  1067. dstlo:=getintregister(list,size);
  1068. if (dsthi = NR_NO) then
  1069. dsthi:=getintregister(list,size);
  1070. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1071. end;
  1072. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1073. var
  1074. tmpreg1,tmpreg2 : tregister;
  1075. tmpref : treference;
  1076. l : tasmlabel;
  1077. begin
  1078. tmpreg1:=NR_NO;
  1079. { Be sure to have a base register }
  1080. if (ref.base=NR_NO) then
  1081. begin
  1082. if ref.shiftmode<>SM_None then
  1083. internalerror(2014020701);
  1084. ref.base:=ref.index;
  1085. ref.index:=NR_NO;
  1086. end;
  1087. { absolute symbols can't be handled directly, we've to store the symbol reference
  1088. in the text segment and access it pc relative
  1089. For now, we assume that references where base or index equals to PC are already
  1090. relative, all other references are assumed to be absolute and thus they need
  1091. to be handled extra.
  1092. A proper solution would be to change refoptions to a set and store the information
  1093. if the symbol is absolute or relative there.
  1094. }
  1095. if (assigned(ref.symbol) and
  1096. not(is_pc(ref.base)) and
  1097. not(is_pc(ref.index))
  1098. ) or
  1099. { [#xxx] isn't a valid address operand }
  1100. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1101. (ref.offset<-4095) or
  1102. (ref.offset>4095) or
  1103. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1104. ((ref.offset<-255) or
  1105. (ref.offset>255)
  1106. )
  1107. ) or
  1108. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1109. ((ref.offset<-1020) or
  1110. (ref.offset>1020) or
  1111. ((abs(ref.offset) mod 4)<>0)
  1112. )
  1113. ) or
  1114. ((GenerateThumbCode) and
  1115. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1116. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1117. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1118. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1119. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1120. )
  1121. ) then
  1122. begin
  1123. fixref(list,ref);
  1124. end;
  1125. if GenerateThumbCode then
  1126. begin
  1127. { certain thumb load require base and index }
  1128. if (oppostfix in [PF_SB,PF_SH]) and
  1129. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1130. begin
  1131. tmpreg1:=getintregister(list,OS_ADDR);
  1132. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1133. ref.index:=tmpreg1;
  1134. end;
  1135. { "hi" registers cannot be used as base or index }
  1136. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1137. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1138. begin
  1139. tmpreg1:=getintregister(list,OS_ADDR);
  1140. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1141. ref.base:=tmpreg1;
  1142. end;
  1143. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1144. begin
  1145. tmpreg1:=getintregister(list,OS_ADDR);
  1146. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1147. ref.index:=tmpreg1;
  1148. end;
  1149. end;
  1150. { fold if there is base, index and offset, however, don't fold
  1151. for vfp memory instructions because we later fold the index }
  1152. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1153. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1154. begin
  1155. if tmpreg1<>NR_NO then
  1156. begin
  1157. tmpreg2:=getintregister(list,OS_ADDR);
  1158. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1159. tmpreg1:=tmpreg2;
  1160. end
  1161. else
  1162. begin
  1163. tmpreg1:=getintregister(list,OS_ADDR);
  1164. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1165. ref.base:=tmpreg1;
  1166. end;
  1167. ref.offset:=0;
  1168. end;
  1169. { floating point operations have only limited references
  1170. we expect here, that a base is already set }
  1171. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1172. begin
  1173. if ref.shiftmode<>SM_none then
  1174. internalerror(200309121);
  1175. if tmpreg1<>NR_NO then
  1176. begin
  1177. if ref.base=tmpreg1 then
  1178. begin
  1179. if ref.signindex<0 then
  1180. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1181. else
  1182. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1183. ref.index:=NR_NO;
  1184. end
  1185. else
  1186. begin
  1187. if ref.index<>tmpreg1 then
  1188. internalerror(200403161);
  1189. if ref.signindex<0 then
  1190. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1191. else
  1192. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1193. ref.base:=tmpreg1;
  1194. ref.index:=NR_NO;
  1195. end;
  1196. end
  1197. else
  1198. begin
  1199. tmpreg1:=getintregister(list,OS_ADDR);
  1200. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1201. ref.base:=tmpreg1;
  1202. ref.index:=NR_NO;
  1203. end;
  1204. end;
  1205. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1206. Result := ref;
  1207. end;
  1208. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1209. var
  1210. oppostfix:toppostfix;
  1211. usedtmpref: treference;
  1212. tmpreg : tregister;
  1213. dir : integer;
  1214. begin
  1215. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1216. FromSize := ToSize;
  1217. case ToSize of
  1218. { signed integer registers }
  1219. OS_8,
  1220. OS_S8:
  1221. oppostfix:=PF_B;
  1222. OS_16,
  1223. OS_S16:
  1224. oppostfix:=PF_H;
  1225. OS_32,
  1226. OS_S32,
  1227. { for vfp value stored in integer register }
  1228. OS_F32:
  1229. oppostfix:=PF_None;
  1230. else
  1231. InternalError(200308299);
  1232. end;
  1233. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize]) then
  1234. begin
  1235. if target_info.endian=endian_big then
  1236. dir:=-1
  1237. else
  1238. dir:=1;
  1239. case FromSize of
  1240. OS_16,OS_S16:
  1241. begin
  1242. tmpreg:=getintregister(list,OS_INT);
  1243. usedtmpref:=ref;
  1244. if target_info.endian=endian_big then
  1245. inc(usedtmpref.offset,1);
  1246. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1247. inc(usedtmpref.offset,dir);
  1248. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1249. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1250. end;
  1251. OS_32,OS_S32:
  1252. begin
  1253. tmpreg:=getintregister(list,OS_INT);
  1254. usedtmpref:=ref;
  1255. if ref.alignment=2 then
  1256. begin
  1257. if target_info.endian=endian_big then
  1258. inc(usedtmpref.offset,2);
  1259. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1260. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1261. inc(usedtmpref.offset,dir*2);
  1262. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1263. end
  1264. else
  1265. begin
  1266. if target_info.endian=endian_big then
  1267. inc(usedtmpref.offset,3);
  1268. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1269. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1270. inc(usedtmpref.offset,dir);
  1271. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1272. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1273. inc(usedtmpref.offset,dir);
  1274. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1275. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1276. inc(usedtmpref.offset,dir);
  1277. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1278. end;
  1279. end
  1280. else
  1281. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1282. end;
  1283. end
  1284. else
  1285. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1286. end;
  1287. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1288. var
  1289. oppostfix:toppostfix;
  1290. begin
  1291. case ToSize of
  1292. { signed integer registers }
  1293. OS_8,
  1294. OS_S8:
  1295. oppostfix:=PF_B;
  1296. OS_16,
  1297. OS_S16:
  1298. oppostfix:=PF_H;
  1299. OS_32,
  1300. OS_S32:
  1301. oppostfix:=PF_None;
  1302. else
  1303. InternalError(2003082910);
  1304. end;
  1305. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1306. end;
  1307. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1308. var
  1309. oppostfix:toppostfix;
  1310. begin
  1311. case FromSize of
  1312. { signed integer registers }
  1313. OS_8:
  1314. oppostfix:=PF_B;
  1315. OS_S8:
  1316. oppostfix:=PF_SB;
  1317. OS_16:
  1318. oppostfix:=PF_H;
  1319. OS_S16:
  1320. oppostfix:=PF_SH;
  1321. OS_32,
  1322. OS_S32:
  1323. oppostfix:=PF_None;
  1324. else
  1325. InternalError(200308291);
  1326. end;
  1327. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1328. end;
  1329. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1330. var
  1331. so : tshifterop;
  1332. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1333. begin
  1334. if GenerateThumbCode then
  1335. begin
  1336. case shiftmode of
  1337. SM_ASR:
  1338. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1339. SM_LSR:
  1340. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1341. SM_LSL:
  1342. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1343. else
  1344. internalerror(2013090301);
  1345. end;
  1346. end
  1347. else
  1348. begin
  1349. so.shiftmode:=shiftmode;
  1350. so.shiftimm:=shiftimm;
  1351. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1352. end;
  1353. end;
  1354. var
  1355. instr: taicpu;
  1356. conv_done: boolean;
  1357. begin
  1358. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1359. internalerror(2002090901);
  1360. conv_done:=false;
  1361. if tosize<>fromsize then
  1362. begin
  1363. shifterop_reset(so);
  1364. conv_done:=true;
  1365. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1366. fromsize:=tosize;
  1367. if current_settings.cputype<cpu_armv6 then
  1368. case fromsize of
  1369. OS_8:
  1370. if GenerateThumbCode then
  1371. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1372. else
  1373. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1374. OS_S8:
  1375. begin
  1376. do_shift(SM_LSL,24,reg1);
  1377. if tosize=OS_16 then
  1378. begin
  1379. do_shift(SM_ASR,8,reg2);
  1380. do_shift(SM_LSR,16,reg2);
  1381. end
  1382. else
  1383. do_shift(SM_ASR,24,reg2);
  1384. end;
  1385. OS_16:
  1386. begin
  1387. do_shift(SM_LSL,16,reg1);
  1388. do_shift(SM_LSR,16,reg2);
  1389. end;
  1390. OS_S16:
  1391. begin
  1392. do_shift(SM_LSL,16,reg1);
  1393. do_shift(SM_ASR,16,reg2)
  1394. end;
  1395. else
  1396. conv_done:=false;
  1397. end
  1398. else
  1399. case fromsize of
  1400. OS_8:
  1401. if GenerateThumbCode then
  1402. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1403. else
  1404. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1405. OS_S8:
  1406. begin
  1407. if tosize=OS_16 then
  1408. begin
  1409. so.shiftmode:=SM_ROR;
  1410. so.shiftimm:=16;
  1411. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1412. do_shift(SM_LSR,16,reg2);
  1413. end
  1414. else
  1415. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1416. end;
  1417. OS_16:
  1418. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1419. OS_S16:
  1420. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1421. else
  1422. conv_done:=false;
  1423. end
  1424. end;
  1425. if not conv_done and (reg1<>reg2) then
  1426. begin
  1427. { same size, only a register mov required }
  1428. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1429. list.Concat(instr);
  1430. { Notify the register allocator that we have written a move instruction so
  1431. it can try to eliminate it. }
  1432. add_move_instruction(instr);
  1433. end;
  1434. end;
  1435. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1436. var
  1437. href,href2 : treference;
  1438. hloc : pcgparalocation;
  1439. begin
  1440. href:=ref;
  1441. hloc:=paraloc.location;
  1442. while assigned(hloc) do
  1443. begin
  1444. case hloc^.loc of
  1445. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1446. begin
  1447. paramanager.allocparaloc(list,paraloc.location);
  1448. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1449. end;
  1450. LOC_REGISTER :
  1451. case hloc^.size of
  1452. OS_32,
  1453. OS_F32:
  1454. begin
  1455. paramanager.allocparaloc(list,paraloc.location);
  1456. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1457. end;
  1458. OS_64,
  1459. OS_F64:
  1460. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1461. else
  1462. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1463. end;
  1464. LOC_REFERENCE :
  1465. begin
  1466. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,paraloc.alignment);
  1467. { concatcopy should choose the best way to copy the data }
  1468. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1469. end;
  1470. else
  1471. internalerror(200408241);
  1472. end;
  1473. inc(href.offset,tcgsize2size[hloc^.size]);
  1474. hloc:=hloc^.next;
  1475. end;
  1476. end;
  1477. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1478. begin
  1479. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1480. end;
  1481. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1482. var
  1483. oppostfix:toppostfix;
  1484. begin
  1485. case fromsize of
  1486. OS_32,
  1487. OS_F32:
  1488. oppostfix:=PF_S;
  1489. OS_64,
  1490. OS_F64:
  1491. oppostfix:=PF_D;
  1492. OS_F80:
  1493. oppostfix:=PF_E;
  1494. else
  1495. InternalError(200309021);
  1496. end;
  1497. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1498. if fromsize<>tosize then
  1499. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1500. end;
  1501. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1502. var
  1503. oppostfix:toppostfix;
  1504. begin
  1505. case tosize of
  1506. OS_F32:
  1507. oppostfix:=PF_S;
  1508. OS_F64:
  1509. oppostfix:=PF_D;
  1510. OS_F80:
  1511. oppostfix:=PF_E;
  1512. else
  1513. InternalError(200309022);
  1514. end;
  1515. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1516. end;
  1517. { comparison operations }
  1518. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1519. l : tasmlabel);
  1520. var
  1521. tmpreg : tregister;
  1522. b : byte;
  1523. begin
  1524. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1525. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1526. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1527. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1528. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1529. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1530. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1531. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1532. else
  1533. begin
  1534. tmpreg:=getintregister(list,size);
  1535. a_load_const_reg(list,size,a,tmpreg);
  1536. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1537. end;
  1538. a_jmp_cond(list,cmp_op,l);
  1539. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1540. end;
  1541. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
  1542. begin
  1543. if reverse then
  1544. begin
  1545. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1546. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1547. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1548. end
  1549. { it is decided during the compilation of the system unit if this code is used or not
  1550. so no additional check for rbit is needed }
  1551. else
  1552. begin
  1553. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1554. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1555. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1556. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1557. if GenerateThumb2Code then
  1558. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1559. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1560. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1561. end;
  1562. end;
  1563. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1564. begin
  1565. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1566. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1567. a_jmp_cond(list,cmp_op,l);
  1568. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1569. end;
  1570. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1571. var
  1572. ai : taicpu;
  1573. begin
  1574. { generate far jump, leave it to the optimizer to get rid of it }
  1575. if GenerateThumbCode then
  1576. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s))
  1577. else
  1578. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s));
  1579. ai.is_jmp:=true;
  1580. list.concat(ai);
  1581. end;
  1582. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1583. var
  1584. ai : taicpu;
  1585. begin
  1586. { generate far jump, leave it to the optimizer to get rid of it }
  1587. if GenerateThumbCode then
  1588. ai:=taicpu.op_sym(A_BL,l)
  1589. else
  1590. ai:=taicpu.op_sym(A_B,l);
  1591. ai.is_jmp:=true;
  1592. list.concat(ai);
  1593. end;
  1594. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1595. var
  1596. ai : taicpu;
  1597. inv_flags : TResFlags;
  1598. hlabel : TAsmLabel;
  1599. begin
  1600. if GenerateThumbCode then
  1601. begin
  1602. inv_flags:=f;
  1603. inverse_flags(inv_flags);
  1604. { the optimizer has to fix this if jump range is sufficient short }
  1605. current_asmdata.getjumplabel(hlabel);
  1606. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1607. ai.is_jmp:=true;
  1608. list.concat(ai);
  1609. a_jmp_always(list,l);
  1610. a_label(list,hlabel);
  1611. end
  1612. else
  1613. begin
  1614. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1615. ai.is_jmp:=true;
  1616. list.concat(ai);
  1617. end;
  1618. end;
  1619. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1620. begin
  1621. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1622. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1623. end;
  1624. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1625. var
  1626. ref : treference;
  1627. shift : byte;
  1628. firstfloatreg,lastfloatreg,
  1629. r : byte;
  1630. mmregs,
  1631. regs, saveregs : tcpuregisterset;
  1632. registerarea,
  1633. r7offset,
  1634. stackmisalignment : pint;
  1635. postfix: toppostfix;
  1636. imm1, imm2: DWord;
  1637. stack_parameters : Boolean;
  1638. begin
  1639. LocalSize:=align(LocalSize,4);
  1640. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1641. { call instruction does not put anything on the stack }
  1642. registerarea:=0;
  1643. tarmprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1644. lastfloatreg:=RS_NO;
  1645. if not(nostackframe) then
  1646. begin
  1647. firstfloatreg:=RS_NO;
  1648. mmregs:=[];
  1649. case current_settings.fputype of
  1650. fpu_fpa,
  1651. fpu_fpa10,
  1652. fpu_fpa11:
  1653. begin
  1654. { save floating point registers? }
  1655. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1656. for r:=RS_F0 to RS_F7 do
  1657. if r in regs then
  1658. begin
  1659. if firstfloatreg=RS_NO then
  1660. firstfloatreg:=r;
  1661. lastfloatreg:=r;
  1662. inc(registerarea,12);
  1663. end;
  1664. end;
  1665. fpu_vfpv2,
  1666. fpu_vfpv3,
  1667. fpu_vfpv3_d16:
  1668. begin;
  1669. mmregs:=rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
  1670. end;
  1671. end;
  1672. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1673. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1674. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1675. { save int registers }
  1676. reference_reset(ref,4);
  1677. ref.index:=NR_STACK_POINTER_REG;
  1678. ref.addressmode:=AM_PREINDEXED;
  1679. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1680. if not(target_info.system in systems_darwin) then
  1681. begin
  1682. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1683. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1684. begin
  1685. a_reg_alloc(list,NR_R12);
  1686. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1687. end;
  1688. { the (old) ARM APCS requires saving both the stack pointer (to
  1689. crawl the stack) and the PC (to identify the function this
  1690. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1691. and R15 -- still needs updating for EABI and Darwin, they don't
  1692. need that }
  1693. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1694. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1695. else
  1696. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1697. include(regs,RS_R14);
  1698. if regs<>[] then
  1699. begin
  1700. for r:=RS_R0 to RS_R15 do
  1701. if r in regs then
  1702. inc(registerarea,4);
  1703. { if the stack is not 8 byte aligned, try to add an extra register,
  1704. so we can avoid the extra sub/add ...,#4 later (KB) }
  1705. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1706. for r:=RS_R3 downto RS_R0 do
  1707. if not(r in regs) then
  1708. begin
  1709. regs:=regs+[r];
  1710. inc(registerarea,4);
  1711. tarmprocinfo(current_procinfo).stackpaddingreg:=r;
  1712. break;
  1713. end;
  1714. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1715. end;
  1716. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1717. begin
  1718. { the framepointer now points to the saved R15, so the saved
  1719. framepointer is at R11-12 (for get_caller_frame) }
  1720. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1721. a_reg_dealloc(list,NR_R12);
  1722. end;
  1723. end
  1724. else
  1725. begin
  1726. { always save r14 if we use r7 as the framepointer, because
  1727. the parameter offsets are hardcoded in advance and always
  1728. assume that r14 sits on the stack right behind the saved r7
  1729. }
  1730. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1731. include(regs,RS_FRAME_POINTER_REG);
  1732. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1733. include(regs,RS_R14);
  1734. if regs<>[] then
  1735. begin
  1736. { on Darwin, you first have to save [r4-r7,lr], and then
  1737. [r8,r10,r11] and make r7 point to the previously saved
  1738. r7 so that you can perform a stack crawl based on it
  1739. ([r7] is previous stack frame, [r7+4] is return address
  1740. }
  1741. include(regs,RS_FRAME_POINTER_REG);
  1742. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1743. r7offset:=0;
  1744. for r:=RS_R0 to RS_R15 do
  1745. if r in saveregs then
  1746. begin
  1747. inc(registerarea,4);
  1748. if r<RS_FRAME_POINTER_REG then
  1749. inc(r7offset,4);
  1750. end;
  1751. { save the registers }
  1752. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1753. { make r7 point to the saved r7 (regardless of whether this
  1754. frame uses the framepointer, for backtrace purposes) }
  1755. if r7offset<>0 then
  1756. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1757. else
  1758. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1759. { now save the rest (if any) }
  1760. saveregs:=regs-saveregs;
  1761. if saveregs<>[] then
  1762. begin
  1763. for r:=RS_R8 to RS_R11 do
  1764. if r in saveregs then
  1765. inc(registerarea,4);
  1766. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1767. end;
  1768. end;
  1769. end;
  1770. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1771. if (LocalSize<>0) or
  1772. ((stackmisalignment<>0) and
  1773. ((pi_do_call in current_procinfo.flags) or
  1774. (po_assembler in current_procinfo.procdef.procoptions))) then
  1775. begin
  1776. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1777. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1778. begin
  1779. if localsize>tarmprocinfo(current_procinfo).stackframesize then
  1780. internalerror(2014030901)
  1781. else
  1782. localsize:=tarmprocinfo(current_procinfo).stackframesize-registerarea;
  1783. end;
  1784. if is_shifter_const(localsize,shift) then
  1785. begin
  1786. a_reg_dealloc(list,NR_R12);
  1787. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1788. end
  1789. else if split_into_shifter_const(localsize, imm1, imm2) then
  1790. begin
  1791. a_reg_dealloc(list,NR_R12);
  1792. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1793. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1794. end
  1795. else
  1796. begin
  1797. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1798. a_reg_alloc(list,NR_R12);
  1799. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1800. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1801. a_reg_dealloc(list,NR_R12);
  1802. end;
  1803. end;
  1804. if (mmregs<>[]) or
  1805. (firstfloatreg<>RS_NO) then
  1806. begin
  1807. reference_reset(ref,4);
  1808. if (tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023) or
  1809. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
  1810. begin
  1811. if not is_shifter_const(tarmprocinfo(current_procinfo).floatregstart,shift) then
  1812. begin
  1813. a_reg_alloc(list,NR_R12);
  1814. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  1815. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1816. a_reg_dealloc(list,NR_R12);
  1817. end
  1818. else
  1819. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tarmprocinfo(current_procinfo).floatregstart));
  1820. ref.base:=NR_R12;
  1821. end
  1822. else
  1823. begin
  1824. ref.base:=current_procinfo.framepointer;
  1825. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  1826. end;
  1827. case current_settings.fputype of
  1828. fpu_fpa,
  1829. fpu_fpa10,
  1830. fpu_fpa11:
  1831. begin
  1832. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1833. lastfloatreg-firstfloatreg+1,ref));
  1834. end;
  1835. fpu_vfpv2,
  1836. fpu_vfpv3,
  1837. fpu_vfpv3_d16:
  1838. begin
  1839. ref.index:=ref.base;
  1840. ref.base:=NR_NO;
  1841. { FSTMX is deprecated on ARMv6 and later }
  1842. if (current_settings.cputype<cpu_armv6) then
  1843. postfix:=PF_IAX
  1844. else
  1845. postfix:=PF_IAD;
  1846. list.concat(setoppostfix(taicpu.op_ref_regset(A_FSTM,ref,R_MMREGISTER,R_SUBFD,mmregs),postfix));
  1847. end;
  1848. end;
  1849. end;
  1850. end;
  1851. end;
  1852. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1853. var
  1854. ref : treference;
  1855. LocalSize : longint;
  1856. firstfloatreg,lastfloatreg,
  1857. r,
  1858. shift : byte;
  1859. mmregs,
  1860. saveregs,
  1861. regs : tcpuregisterset;
  1862. registerarea,
  1863. stackmisalignment: pint;
  1864. paddingreg: TSuperRegister;
  1865. mmpostfix: toppostfix;
  1866. imm1, imm2: DWord;
  1867. begin
  1868. if not(nostackframe) then
  1869. begin
  1870. registerarea:=0;
  1871. firstfloatreg:=RS_NO;
  1872. lastfloatreg:=RS_NO;
  1873. mmregs:=[];
  1874. saveregs:=[];
  1875. case current_settings.fputype of
  1876. fpu_fpa,
  1877. fpu_fpa10,
  1878. fpu_fpa11:
  1879. begin
  1880. { restore floating point registers? }
  1881. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1882. for r:=RS_F0 to RS_F7 do
  1883. if r in regs then
  1884. begin
  1885. if firstfloatreg=RS_NO then
  1886. firstfloatreg:=r;
  1887. lastfloatreg:=r;
  1888. { floating point register space is already included in
  1889. localsize below by calc_stackframe_size
  1890. inc(registerarea,12);
  1891. }
  1892. end;
  1893. end;
  1894. fpu_vfpv2,
  1895. fpu_vfpv3,
  1896. fpu_vfpv3_d16:
  1897. begin;
  1898. { restore vfp registers? }
  1899. mmregs:=rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
  1900. end;
  1901. end;
  1902. if (firstfloatreg<>RS_NO) or
  1903. (mmregs<>[]) then
  1904. begin
  1905. reference_reset(ref,4);
  1906. if (tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023) or
  1907. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
  1908. begin
  1909. if not is_shifter_const(tarmprocinfo(current_procinfo).floatregstart,shift) then
  1910. begin
  1911. a_reg_alloc(list,NR_R12);
  1912. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  1913. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1914. a_reg_dealloc(list,NR_R12);
  1915. end
  1916. else
  1917. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tarmprocinfo(current_procinfo).floatregstart));
  1918. ref.base:=NR_R12;
  1919. end
  1920. else
  1921. begin
  1922. ref.base:=current_procinfo.framepointer;
  1923. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  1924. end;
  1925. case current_settings.fputype of
  1926. fpu_fpa,
  1927. fpu_fpa10,
  1928. fpu_fpa11:
  1929. begin
  1930. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1931. lastfloatreg-firstfloatreg+1,ref));
  1932. end;
  1933. fpu_vfpv2,
  1934. fpu_vfpv3,
  1935. fpu_vfpv3_d16:
  1936. begin
  1937. ref.index:=ref.base;
  1938. ref.base:=NR_NO;
  1939. { FLDMX is deprecated on ARMv6 and later }
  1940. if (current_settings.cputype<cpu_armv6) then
  1941. mmpostfix:=PF_IAX
  1942. else
  1943. mmpostfix:=PF_IAD;
  1944. list.concat(setoppostfix(taicpu.op_ref_regset(A_FLDM,ref,R_MMREGISTER,R_SUBFD,mmregs),mmpostfix));
  1945. end;
  1946. end;
  1947. end;
  1948. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1949. if (pi_do_call in current_procinfo.flags) or
  1950. (regs<>[]) or
  1951. ((target_info.system in systems_darwin) and
  1952. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  1953. begin
  1954. exclude(regs,RS_R14);
  1955. include(regs,RS_R15);
  1956. if (target_info.system in systems_darwin) then
  1957. include(regs,RS_FRAME_POINTER_REG);
  1958. end;
  1959. if not(target_info.system in systems_darwin) then
  1960. begin
  1961. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  1962. The saved PC came after that but is discarded, since we restore
  1963. the stack pointer }
  1964. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  1965. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  1966. end
  1967. else
  1968. begin
  1969. { restore R8-R11 already if necessary (they've been stored
  1970. before the others) }
  1971. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  1972. if saveregs<>[] then
  1973. begin
  1974. reference_reset(ref,4);
  1975. ref.index:=NR_STACK_POINTER_REG;
  1976. ref.addressmode:=AM_PREINDEXED;
  1977. for r:=RS_R8 to RS_R11 do
  1978. if r in saveregs then
  1979. inc(registerarea,4);
  1980. regs:=regs-saveregs;
  1981. end;
  1982. end;
  1983. for r:=RS_R0 to RS_R15 do
  1984. if r in regs then
  1985. inc(registerarea,4);
  1986. { reapply the stack padding reg, in case there was one, see the complimentary
  1987. comment in g_proc_entry() (KB) }
  1988. paddingreg:=tarmprocinfo(current_procinfo).stackpaddingreg;
  1989. if paddingreg < RS_R4 then
  1990. if paddingreg in regs then
  1991. internalerror(201306190)
  1992. else
  1993. begin
  1994. regs:=regs+[paddingreg];
  1995. inc(registerarea,4);
  1996. end;
  1997. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1998. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  1999. (target_info.system in systems_darwin) then
  2000. begin
  2001. LocalSize:=current_procinfo.calc_stackframe_size;
  2002. if (LocalSize<>0) or
  2003. ((stackmisalignment<>0) and
  2004. ((pi_do_call in current_procinfo.flags) or
  2005. (po_assembler in current_procinfo.procdef.procoptions))) then
  2006. begin
  2007. if pi_estimatestacksize in current_procinfo.flags then
  2008. LocalSize:=tarmprocinfo(current_procinfo).stackframesize-registerarea
  2009. else
  2010. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2011. if is_shifter_const(LocalSize,shift) then
  2012. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2013. else if split_into_shifter_const(localsize, imm1, imm2) then
  2014. begin
  2015. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2016. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2017. end
  2018. else
  2019. begin
  2020. a_reg_alloc(list,NR_R12);
  2021. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2022. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2023. a_reg_dealloc(list,NR_R12);
  2024. end;
  2025. end;
  2026. if (target_info.system in systems_darwin) and
  2027. (saveregs<>[]) then
  2028. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2029. if regs=[] then
  2030. begin
  2031. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2032. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2033. else
  2034. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2035. end
  2036. else
  2037. begin
  2038. reference_reset(ref,4);
  2039. ref.index:=NR_STACK_POINTER_REG;
  2040. ref.addressmode:=AM_PREINDEXED;
  2041. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2042. end;
  2043. end
  2044. else
  2045. begin
  2046. { restore int registers and return }
  2047. reference_reset(ref,4);
  2048. ref.index:=NR_FRAME_POINTER_REG;
  2049. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2050. end;
  2051. end
  2052. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2053. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2054. else
  2055. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2056. end;
  2057. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2058. var
  2059. ref : treference;
  2060. l : TAsmLabel;
  2061. begin
  2062. if (cs_create_pic in current_settings.moduleswitches) and
  2063. (pi_needs_got in current_procinfo.flags) and
  2064. (tf_pic_uses_got in target_info.flags) then
  2065. begin
  2066. reference_reset(ref,4);
  2067. current_asmdata.getdatalabel(l);
  2068. cg.a_label(current_procinfo.aktlocaldata,l);
  2069. ref.symbol:=l;
  2070. ref.base:=NR_PC;
  2071. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2072. list.concat(Taicpu.op_reg_ref(A_LDR,current_procinfo.got,ref));
  2073. current_asmdata.getaddrlabel(l);
  2074. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_'),-8));
  2075. cg.a_label(list,l);
  2076. list.concat(Taicpu.op_reg_reg_reg(A_ADD,current_procinfo.got,NR_PC,current_procinfo.got));
  2077. end;
  2078. end;
  2079. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2080. var
  2081. b : byte;
  2082. tmpref : treference;
  2083. instr : taicpu;
  2084. begin
  2085. if ref.addressmode<>AM_OFFSET then
  2086. internalerror(200309071);
  2087. tmpref:=ref;
  2088. { Be sure to have a base register }
  2089. if (tmpref.base=NR_NO) then
  2090. begin
  2091. if tmpref.shiftmode<>SM_None then
  2092. internalerror(2014020702);
  2093. if tmpref.signindex<0 then
  2094. internalerror(200312023);
  2095. tmpref.base:=tmpref.index;
  2096. tmpref.index:=NR_NO;
  2097. end;
  2098. if assigned(tmpref.symbol) or
  2099. not((is_shifter_const(tmpref.offset,b)) or
  2100. (is_shifter_const(-tmpref.offset,b))
  2101. ) then
  2102. fixref(list,tmpref);
  2103. { expect a base here if there is an index }
  2104. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2105. internalerror(200312022);
  2106. if tmpref.index<>NR_NO then
  2107. begin
  2108. if tmpref.shiftmode<>SM_None then
  2109. internalerror(200312021);
  2110. if tmpref.signindex<0 then
  2111. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2112. else
  2113. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2114. if tmpref.offset<>0 then
  2115. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2116. end
  2117. else
  2118. begin
  2119. if tmpref.base=NR_NO then
  2120. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2121. else
  2122. if tmpref.offset<>0 then
  2123. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2124. else
  2125. begin
  2126. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2127. list.concat(instr);
  2128. add_move_instruction(instr);
  2129. end;
  2130. end;
  2131. end;
  2132. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2133. var
  2134. tmpreg, tmpreg2 : tregister;
  2135. tmpref : treference;
  2136. l, piclabel : tasmlabel;
  2137. indirection_done : boolean;
  2138. begin
  2139. { absolute symbols can't be handled directly, we've to store the symbol reference
  2140. in the text segment and access it pc relative
  2141. For now, we assume that references where base or index equals to PC are already
  2142. relative, all other references are assumed to be absolute and thus they need
  2143. to be handled extra.
  2144. A proper solution would be to change refoptions to a set and store the information
  2145. if the symbol is absolute or relative there.
  2146. }
  2147. { create consts entry }
  2148. reference_reset(tmpref,4);
  2149. current_asmdata.getjumplabel(l);
  2150. cg.a_label(current_procinfo.aktlocaldata,l);
  2151. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2152. piclabel:=nil;
  2153. tmpreg:=NR_NO;
  2154. indirection_done:=false;
  2155. if assigned(ref.symbol) then
  2156. begin
  2157. if (target_info.system=system_arm_darwin) and
  2158. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2159. begin
  2160. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2161. if ref.offset<>0 then
  2162. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2163. indirection_done:=true;
  2164. end
  2165. else if (cs_create_pic in current_settings.moduleswitches) then
  2166. if (tf_pic_uses_got in target_info.flags) then
  2167. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym_offset(aitconst_got,ref.symbol,ref.offset))
  2168. else
  2169. begin
  2170. { ideally, we would want to generate
  2171. ldr r1, LPICConstPool
  2172. LPICLocal:
  2173. ldr/str r2,[pc,r1]
  2174. ...
  2175. LPICConstPool:
  2176. .long _globsym-(LPICLocal+8)
  2177. However, we cannot be sure that the ldr/str will follow
  2178. right after the call to fixref, so we have to load the
  2179. complete address already in a register.
  2180. }
  2181. current_asmdata.getaddrlabel(piclabel);
  2182. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2183. end
  2184. else
  2185. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2186. end
  2187. else
  2188. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2189. { load consts entry }
  2190. if not indirection_done then
  2191. begin
  2192. tmpreg:=getintregister(list,OS_INT);
  2193. tmpref.symbol:=l;
  2194. tmpref.base:=NR_PC;
  2195. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2196. if (cs_create_pic in current_settings.moduleswitches) and
  2197. (tf_pic_uses_got in target_info.flags) and
  2198. assigned(ref.symbol) then
  2199. begin
  2200. reference_reset(tmpref,4);
  2201. tmpref.base:=current_procinfo.got;
  2202. tmpref.index:=tmpreg;
  2203. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2204. end;
  2205. end;
  2206. if assigned(piclabel) then
  2207. begin
  2208. cg.a_label(list,piclabel);
  2209. tmpreg2:=getaddressregister(list);
  2210. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2211. tmpreg:=tmpreg2
  2212. end;
  2213. { This routine can be called with PC as base/index in case the offset
  2214. was too large to encode in a load/store. In that case, the entire
  2215. absolute expression has been re-encoded in a new constpool entry, and
  2216. we have to remove the use of PC from the original reference (the code
  2217. above made everything relative to the value loaded from the new
  2218. constpool entry) }
  2219. if is_pc(ref.base) then
  2220. ref.base:=NR_NO;
  2221. if is_pc(ref.index) then
  2222. ref.index:=NR_NO;
  2223. if (ref.base<>NR_NO) then
  2224. begin
  2225. if ref.index<>NR_NO then
  2226. begin
  2227. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2228. ref.base:=tmpreg;
  2229. end
  2230. else
  2231. if ref.base<>NR_PC then
  2232. begin
  2233. ref.index:=tmpreg;
  2234. ref.shiftimm:=0;
  2235. ref.signindex:=1;
  2236. ref.shiftmode:=SM_None;
  2237. end
  2238. else
  2239. ref.base:=tmpreg;
  2240. end
  2241. else
  2242. ref.base:=tmpreg;
  2243. ref.offset:=0;
  2244. ref.symbol:=nil;
  2245. end;
  2246. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2247. var
  2248. paraloc1,paraloc2,paraloc3 : TCGPara;
  2249. pd : tprocdef;
  2250. begin
  2251. pd:=search_system_proc('MOVE');
  2252. paraloc1.init;
  2253. paraloc2.init;
  2254. paraloc3.init;
  2255. paramanager.getintparaloc(pd,1,paraloc1);
  2256. paramanager.getintparaloc(pd,2,paraloc2);
  2257. paramanager.getintparaloc(pd,3,paraloc3);
  2258. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2259. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2260. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2261. paramanager.freecgpara(list,paraloc3);
  2262. paramanager.freecgpara(list,paraloc2);
  2263. paramanager.freecgpara(list,paraloc1);
  2264. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2265. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2266. a_call_name(list,'FPC_MOVE',false);
  2267. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2268. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2269. paraloc3.done;
  2270. paraloc2.done;
  2271. paraloc1.done;
  2272. end;
  2273. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2274. const
  2275. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2276. maxtmpreg_thumb = 5;
  2277. var
  2278. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2279. srcreg,destreg,countreg,r,tmpreg:tregister;
  2280. helpsize:aint;
  2281. copysize:byte;
  2282. cgsize:Tcgsize;
  2283. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2284. maxtmpreg,
  2285. tmpregi,tmpregi2:byte;
  2286. { will never be called with count<=4 }
  2287. procedure genloop(count : aword;size : byte);
  2288. const
  2289. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2290. var
  2291. l : tasmlabel;
  2292. begin
  2293. current_asmdata.getjumplabel(l);
  2294. if count<size then size:=1;
  2295. a_load_const_reg(list,OS_INT,count div size,countreg);
  2296. cg.a_label(list,l);
  2297. srcref.addressmode:=AM_POSTINDEXED;
  2298. dstref.addressmode:=AM_POSTINDEXED;
  2299. srcref.offset:=size;
  2300. dstref.offset:=size;
  2301. r:=getintregister(list,size2opsize[size]);
  2302. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2303. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2304. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2305. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2306. a_jmp_flags(list,F_NE,l);
  2307. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2308. srcref.offset:=1;
  2309. dstref.offset:=1;
  2310. case count mod size of
  2311. 1:
  2312. begin
  2313. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2314. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2315. end;
  2316. 2:
  2317. if aligned then
  2318. begin
  2319. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2320. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2321. end
  2322. else
  2323. begin
  2324. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2325. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2326. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2327. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2328. end;
  2329. 3:
  2330. if aligned then
  2331. begin
  2332. srcref.offset:=2;
  2333. dstref.offset:=2;
  2334. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2335. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2336. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2337. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2338. end
  2339. else
  2340. begin
  2341. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2342. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2343. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2344. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2345. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2346. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2347. end;
  2348. end;
  2349. { keep the registers alive }
  2350. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2351. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2352. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2353. end;
  2354. { will never be called with count<=4 }
  2355. procedure genloop_thumb(count : aword;size : byte);
  2356. procedure refincofs(const ref : treference;const value : longint = 1);
  2357. begin
  2358. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2359. end;
  2360. const
  2361. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2362. var
  2363. l : tasmlabel;
  2364. begin
  2365. current_asmdata.getjumplabel(l);
  2366. if count<size then size:=1;
  2367. a_load_const_reg(list,OS_INT,count div size,countreg);
  2368. cg.a_label(list,l);
  2369. r:=getintregister(list,size2opsize[size]);
  2370. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2371. refincofs(srcref);
  2372. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2373. refincofs(dstref);
  2374. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2375. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2376. a_jmp_flags(list,F_NE,l);
  2377. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2378. case count mod size of
  2379. 1:
  2380. begin
  2381. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2382. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2383. end;
  2384. 2:
  2385. if aligned then
  2386. begin
  2387. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2388. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2389. end
  2390. else
  2391. begin
  2392. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2393. refincofs(srcref);
  2394. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2395. refincofs(dstref);
  2396. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2397. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2398. end;
  2399. 3:
  2400. if aligned then
  2401. begin
  2402. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2403. refincofs(srcref,2);
  2404. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2405. refincofs(dstref,2);
  2406. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2407. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2408. end
  2409. else
  2410. begin
  2411. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2412. refincofs(srcref);
  2413. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2414. refincofs(dstref);
  2415. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2416. refincofs(srcref);
  2417. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2418. refincofs(dstref);
  2419. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2420. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2421. end;
  2422. end;
  2423. { keep the registers alive }
  2424. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2425. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2426. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2427. end;
  2428. begin
  2429. if len=0 then
  2430. exit;
  2431. if GenerateThumbCode then
  2432. maxtmpreg:=maxtmpreg_thumb
  2433. else
  2434. maxtmpreg:=maxtmpreg_arm;
  2435. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2436. dstref:=dest;
  2437. srcref:=source;
  2438. if cs_opt_size in current_settings.optimizerswitches then
  2439. helpsize:=8;
  2440. if aligned and (len=4) then
  2441. begin
  2442. tmpreg:=getintregister(list,OS_32);
  2443. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2444. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2445. end
  2446. else if aligned and (len=2) then
  2447. begin
  2448. tmpreg:=getintregister(list,OS_16);
  2449. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2450. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2451. end
  2452. else if (len<=helpsize) and aligned then
  2453. begin
  2454. tmpregi:=0;
  2455. srcreg:=getintregister(list,OS_ADDR);
  2456. { explicit pc relative addressing, could be
  2457. e.g. a floating point constant }
  2458. if source.base=NR_PC then
  2459. begin
  2460. { ... then we don't need a loadaddr }
  2461. srcref:=source;
  2462. end
  2463. else
  2464. begin
  2465. a_loadaddr_ref_reg(list,source,srcreg);
  2466. reference_reset_base(srcref,srcreg,0,source.alignment);
  2467. end;
  2468. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2469. begin
  2470. inc(tmpregi);
  2471. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2472. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2473. inc(srcref.offset,4);
  2474. dec(len,4);
  2475. end;
  2476. destreg:=getintregister(list,OS_ADDR);
  2477. a_loadaddr_ref_reg(list,dest,destreg);
  2478. reference_reset_base(dstref,destreg,0,dest.alignment);
  2479. tmpregi2:=1;
  2480. while (tmpregi2<=tmpregi) do
  2481. begin
  2482. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2483. inc(dstref.offset,4);
  2484. inc(tmpregi2);
  2485. end;
  2486. copysize:=4;
  2487. cgsize:=OS_32;
  2488. while len<>0 do
  2489. begin
  2490. if len<2 then
  2491. begin
  2492. copysize:=1;
  2493. cgsize:=OS_8;
  2494. end
  2495. else if len<4 then
  2496. begin
  2497. copysize:=2;
  2498. cgsize:=OS_16;
  2499. end;
  2500. dec(len,copysize);
  2501. r:=getintregister(list,cgsize);
  2502. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2503. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2504. inc(srcref.offset,copysize);
  2505. inc(dstref.offset,copysize);
  2506. end;{end of while}
  2507. end
  2508. else
  2509. begin
  2510. cgsize:=OS_32;
  2511. if (len<=4) then{len<=4 and not aligned}
  2512. begin
  2513. r:=getintregister(list,cgsize);
  2514. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2515. if Len=1 then
  2516. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2517. else
  2518. begin
  2519. tmpreg:=getintregister(list,cgsize);
  2520. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2521. inc(usedtmpref.offset,1);
  2522. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2523. inc(usedtmpref2.offset,1);
  2524. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2525. if len>2 then
  2526. begin
  2527. inc(usedtmpref.offset,1);
  2528. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2529. inc(usedtmpref2.offset,1);
  2530. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2531. if len>3 then
  2532. begin
  2533. inc(usedtmpref.offset,1);
  2534. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2535. inc(usedtmpref2.offset,1);
  2536. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2537. end;
  2538. end;
  2539. end;
  2540. end{end of if len<=4}
  2541. else
  2542. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2543. destreg:=getintregister(list,OS_ADDR);
  2544. a_loadaddr_ref_reg(list,dest,destreg);
  2545. reference_reset_base(dstref,destreg,0,dest.alignment);
  2546. srcreg:=getintregister(list,OS_ADDR);
  2547. a_loadaddr_ref_reg(list,source,srcreg);
  2548. reference_reset_base(srcref,srcreg,0,source.alignment);
  2549. countreg:=getintregister(list,OS_32);
  2550. // if cs_opt_size in current_settings.optimizerswitches then
  2551. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2552. {if aligned then
  2553. genloop(len,4)
  2554. else}
  2555. if GenerateThumbCode then
  2556. genloop_thumb(len,1)
  2557. else
  2558. genloop(len,1);
  2559. end;
  2560. end;
  2561. end;
  2562. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2563. begin
  2564. g_concatcopy_internal(list,source,dest,len,false);
  2565. end;
  2566. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2567. begin
  2568. if (source.alignment in [1,3]) or
  2569. (dest.alignment in [1,3]) then
  2570. g_concatcopy_internal(list,source,dest,len,false)
  2571. else
  2572. g_concatcopy_internal(list,source,dest,len,true);
  2573. end;
  2574. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2575. var
  2576. ovloc : tlocation;
  2577. begin
  2578. ovloc.loc:=LOC_VOID;
  2579. g_overflowCheck_loc(list,l,def,ovloc);
  2580. end;
  2581. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2582. var
  2583. hl : tasmlabel;
  2584. ai:TAiCpu;
  2585. hflags : tresflags;
  2586. begin
  2587. if not(cs_check_overflow in current_settings.localswitches) then
  2588. exit;
  2589. current_asmdata.getjumplabel(hl);
  2590. case ovloc.loc of
  2591. LOC_VOID:
  2592. begin
  2593. ai:=taicpu.op_sym(A_B,hl);
  2594. ai.is_jmp:=true;
  2595. if not((def.typ=pointerdef) or
  2596. ((def.typ=orddef) and
  2597. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2598. pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2599. ai.SetCondition(C_VC)
  2600. else
  2601. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2602. ai.SetCondition(C_CS)
  2603. else
  2604. ai.SetCondition(C_CC);
  2605. list.concat(ai);
  2606. end;
  2607. LOC_FLAGS:
  2608. begin
  2609. hflags:=ovloc.resflags;
  2610. inverse_flags(hflags);
  2611. cg.a_jmp_flags(list,hflags,hl);
  2612. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2613. end;
  2614. else
  2615. internalerror(200409281);
  2616. end;
  2617. a_call_name(list,'FPC_OVERFLOW',false);
  2618. a_label(list,hl);
  2619. end;
  2620. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2621. begin
  2622. { this work is done in g_proc_entry }
  2623. end;
  2624. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2625. begin
  2626. { this work is done in g_proc_exit }
  2627. end;
  2628. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2629. var
  2630. ai : taicpu;
  2631. hlabel : TAsmLabel;
  2632. begin
  2633. if GenerateThumbCode then
  2634. begin
  2635. { the optimizer has to fix this if jump range is sufficient short }
  2636. current_asmdata.getjumplabel(hlabel);
  2637. ai:=Taicpu.Op_sym(A_B,hlabel);
  2638. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2639. ai.is_jmp:=true;
  2640. list.concat(ai);
  2641. a_jmp_always(list,l);
  2642. a_label(list,hlabel);
  2643. end
  2644. else
  2645. begin
  2646. ai:=Taicpu.Op_sym(A_B,l);
  2647. ai.SetCondition(OpCmp2AsmCond[cond]);
  2648. ai.is_jmp:=true;
  2649. list.concat(ai);
  2650. end;
  2651. end;
  2652. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2653. const
  2654. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2655. (A_FCPYS,A_FCVTSD,A_NONE,A_NONE,A_NONE),
  2656. (A_FCVTDS,A_FCPYD,A_NONE,A_NONE,A_NONE),
  2657. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2658. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2659. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2660. begin
  2661. result:=convertop[fromsize,tosize];
  2662. if result=A_NONE then
  2663. internalerror(200312205);
  2664. end;
  2665. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2666. var
  2667. instr: taicpu;
  2668. begin
  2669. if shuffle=nil then
  2670. begin
  2671. if fromsize=tosize then
  2672. { needs correct size in case of spilling }
  2673. case fromsize of
  2674. OS_F32:
  2675. instr:=taicpu.op_reg_reg(A_FCPYS,reg2,reg1);
  2676. OS_F64:
  2677. instr:=taicpu.op_reg_reg(A_FCPYD,reg2,reg1);
  2678. else
  2679. internalerror(2009112405);
  2680. end
  2681. else
  2682. internalerror(2009112406);
  2683. end
  2684. else if shufflescalar(shuffle) then
  2685. instr:=taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1)
  2686. else
  2687. internalerror(2009112407);
  2688. list.concat(instr);
  2689. case instr.opcode of
  2690. A_FCPYS,
  2691. A_FCPYD:
  2692. add_move_instruction(instr);
  2693. end;
  2694. end;
  2695. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2696. var
  2697. intreg,
  2698. tmpmmreg : tregister;
  2699. reg64 : tregister64;
  2700. op : tasmop;
  2701. begin
  2702. if assigned(shuffle) and
  2703. not(shufflescalar(shuffle)) then
  2704. internalerror(2009112413);
  2705. case fromsize of
  2706. OS_32,OS_S32:
  2707. begin
  2708. fromsize:=OS_F32;
  2709. { since we are loading an integer, no conversion may be required }
  2710. if (fromsize<>tosize) then
  2711. internalerror(2009112801);
  2712. end;
  2713. OS_64,OS_S64:
  2714. begin
  2715. fromsize:=OS_F64;
  2716. { since we are loading an integer, no conversion may be required }
  2717. if (fromsize<>tosize) then
  2718. internalerror(2009112901);
  2719. end;
  2720. end;
  2721. if (fromsize<>tosize) then
  2722. tmpmmreg:=getmmregister(list,fromsize)
  2723. else
  2724. tmpmmreg:=reg;
  2725. if (ref.alignment in [1,2]) then
  2726. begin
  2727. case fromsize of
  2728. OS_F32:
  2729. begin
  2730. intreg:=getintregister(list,OS_32);
  2731. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2732. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2733. end;
  2734. OS_F64:
  2735. begin
  2736. reg64.reglo:=getintregister(list,OS_32);
  2737. reg64.reghi:=getintregister(list,OS_32);
  2738. cg64.a_load64_ref_reg(list,ref,reg64);
  2739. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2740. end;
  2741. else
  2742. internalerror(2009112412);
  2743. end;
  2744. end
  2745. else
  2746. begin
  2747. case fromsize of
  2748. OS_F32:
  2749. op:=A_FLDS;
  2750. OS_F64:
  2751. op:=A_FLDD;
  2752. else
  2753. internalerror(2009112415);
  2754. end;
  2755. handle_load_store(list,op,PF_None,tmpmmreg,ref);
  2756. end;
  2757. if (tmpmmreg<>reg) then
  2758. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2759. end;
  2760. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2761. var
  2762. intreg,
  2763. tmpmmreg : tregister;
  2764. reg64 : tregister64;
  2765. op : tasmop;
  2766. begin
  2767. if assigned(shuffle) and
  2768. not(shufflescalar(shuffle)) then
  2769. internalerror(2009112416);
  2770. case tosize of
  2771. OS_32,OS_S32:
  2772. begin
  2773. tosize:=OS_F32;
  2774. { since we are loading an integer, no conversion may be required }
  2775. if (fromsize<>tosize) then
  2776. internalerror(2009112801);
  2777. end;
  2778. OS_64,OS_S64:
  2779. begin
  2780. tosize:=OS_F64;
  2781. { since we are loading an integer, no conversion may be required }
  2782. if (fromsize<>tosize) then
  2783. internalerror(2009112901);
  2784. end;
  2785. end;
  2786. if (fromsize<>tosize) then
  2787. begin
  2788. tmpmmreg:=getmmregister(list,tosize);
  2789. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2790. end
  2791. else
  2792. tmpmmreg:=reg;
  2793. if (ref.alignment in [1,2]) then
  2794. begin
  2795. case tosize of
  2796. OS_F32:
  2797. begin
  2798. intreg:=getintregister(list,OS_32);
  2799. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2800. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2801. end;
  2802. OS_F64:
  2803. begin
  2804. reg64.reglo:=getintregister(list,OS_32);
  2805. reg64.reghi:=getintregister(list,OS_32);
  2806. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2807. cg64.a_load64_reg_ref(list,reg64,ref);
  2808. end;
  2809. else
  2810. internalerror(2009112417);
  2811. end;
  2812. end
  2813. else
  2814. begin
  2815. case fromsize of
  2816. OS_F32:
  2817. op:=A_FSTS;
  2818. OS_F64:
  2819. op:=A_FSTD;
  2820. else
  2821. internalerror(2009112418);
  2822. end;
  2823. handle_load_store(list,op,PF_None,tmpmmreg,ref);
  2824. end;
  2825. end;
  2826. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2827. begin
  2828. { this code can only be used to transfer raw data, not to perform
  2829. conversions }
  2830. if (tosize<>OS_F32) then
  2831. internalerror(2009112419);
  2832. if not(fromsize in [OS_32,OS_S32]) then
  2833. internalerror(2009112420);
  2834. if assigned(shuffle) and
  2835. not shufflescalar(shuffle) then
  2836. internalerror(2009112516);
  2837. list.concat(taicpu.op_reg_reg(A_FMSR,mmreg,intreg));
  2838. end;
  2839. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  2840. begin
  2841. { this code can only be used to transfer raw data, not to perform
  2842. conversions }
  2843. if (fromsize<>OS_F32) then
  2844. internalerror(2009112430);
  2845. if not(tosize in [OS_32,OS_S32]) then
  2846. internalerror(2009112420);
  2847. if assigned(shuffle) and
  2848. not shufflescalar(shuffle) then
  2849. internalerror(2009112514);
  2850. list.concat(taicpu.op_reg_reg(A_FMRS,intreg,mmreg));
  2851. end;
  2852. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  2853. var
  2854. tmpreg: tregister;
  2855. begin
  2856. { the vfp doesn't support xor nor any other logical operation, but
  2857. this routine is used to initialise global mm regvars. We can
  2858. easily initialise an mm reg with 0 though. }
  2859. case op of
  2860. OP_XOR:
  2861. begin
  2862. if (src<>dst) or
  2863. (reg_cgsize(src)<>size) or
  2864. assigned(shuffle) then
  2865. internalerror(2009112907);
  2866. tmpreg:=getintregister(list,OS_32);
  2867. a_load_const_reg(list,OS_32,0,tmpreg);
  2868. case size of
  2869. OS_F32:
  2870. list.concat(taicpu.op_reg_reg(A_FMSR,dst,tmpreg));
  2871. OS_F64:
  2872. list.concat(taicpu.op_reg_reg_reg(A_FMDRR,dst,tmpreg,tmpreg));
  2873. else
  2874. internalerror(2009112908);
  2875. end;
  2876. end
  2877. else
  2878. internalerror(2009112906);
  2879. end;
  2880. end;
  2881. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  2882. const
  2883. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  2884. begin
  2885. if (op in overflowops) and
  2886. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  2887. a_load_reg_reg(list,OS_32,size,dst,dst);
  2888. end;
  2889. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  2890. procedure checkreg(var reg : TRegister);
  2891. var
  2892. tmpreg : TRegister;
  2893. begin
  2894. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  2895. (getsupreg(reg)=RS_R15) then
  2896. begin
  2897. tmpreg:=getintregister(list,OS_INT);
  2898. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  2899. reg:=tmpreg;
  2900. end;
  2901. end;
  2902. begin
  2903. checkreg(op1);
  2904. checkreg(op2);
  2905. checkreg(op3);
  2906. checkreg(op4);
  2907. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  2908. end;
  2909. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  2910. begin
  2911. case op of
  2912. OP_NEG:
  2913. begin
  2914. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2915. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  2916. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  2917. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2918. end;
  2919. OP_NOT:
  2920. begin
  2921. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  2922. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  2923. end;
  2924. else
  2925. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  2926. end;
  2927. end;
  2928. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  2929. begin
  2930. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  2931. end;
  2932. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  2933. var
  2934. ovloc : tlocation;
  2935. begin
  2936. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  2937. end;
  2938. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  2939. var
  2940. ovloc : tlocation;
  2941. begin
  2942. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  2943. end;
  2944. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  2945. begin
  2946. { this code can only be used to transfer raw data, not to perform
  2947. conversions }
  2948. if (mmsize<>OS_F64) then
  2949. internalerror(2009112405);
  2950. list.concat(taicpu.op_reg_reg_reg(A_FMDRR,mmreg,intreg.reglo,intreg.reghi));
  2951. end;
  2952. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  2953. begin
  2954. { this code can only be used to transfer raw data, not to perform
  2955. conversions }
  2956. if (mmsize<>OS_F64) then
  2957. internalerror(2009112406);
  2958. list.concat(taicpu.op_reg_reg_reg(A_FMRRD,intreg.reglo,intreg.reghi,mmreg));
  2959. end;
  2960. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  2961. var
  2962. tmpreg : tregister;
  2963. b : byte;
  2964. begin
  2965. ovloc.loc:=LOC_VOID;
  2966. case op of
  2967. OP_NEG,
  2968. OP_NOT :
  2969. internalerror(2012022501);
  2970. end;
  2971. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  2972. begin
  2973. case op of
  2974. OP_ADD:
  2975. begin
  2976. if is_shifter_const(lo(value),b) then
  2977. begin
  2978. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2979. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  2980. end
  2981. else
  2982. begin
  2983. tmpreg:=cg.getintregister(list,OS_32);
  2984. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  2985. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2986. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  2987. end;
  2988. if is_shifter_const(hi(value),b) then
  2989. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  2990. else
  2991. begin
  2992. tmpreg:=cg.getintregister(list,OS_32);
  2993. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  2994. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  2995. end;
  2996. end;
  2997. OP_SUB:
  2998. begin
  2999. if is_shifter_const(lo(value),b) then
  3000. begin
  3001. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3002. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3003. end
  3004. else
  3005. begin
  3006. tmpreg:=cg.getintregister(list,OS_32);
  3007. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3008. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3009. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3010. end;
  3011. if is_shifter_const(hi(value),b) then
  3012. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3013. else
  3014. begin
  3015. tmpreg:=cg.getintregister(list,OS_32);
  3016. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3017. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3018. end;
  3019. end;
  3020. else
  3021. internalerror(200502131);
  3022. end;
  3023. if size=OS_64 then
  3024. begin
  3025. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3026. ovloc.loc:=LOC_FLAGS;
  3027. case op of
  3028. OP_ADD:
  3029. ovloc.resflags:=F_CS;
  3030. OP_SUB:
  3031. ovloc.resflags:=F_CC;
  3032. end;
  3033. end;
  3034. end
  3035. else
  3036. begin
  3037. case op of
  3038. OP_AND,OP_OR,OP_XOR:
  3039. begin
  3040. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3041. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3042. end;
  3043. OP_ADD:
  3044. begin
  3045. if is_shifter_const(aint(lo(value)),b) then
  3046. begin
  3047. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3048. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3049. end
  3050. else
  3051. begin
  3052. tmpreg:=cg.getintregister(list,OS_32);
  3053. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3054. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3055. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3056. end;
  3057. if is_shifter_const(aint(hi(value)),b) then
  3058. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3059. else
  3060. begin
  3061. tmpreg:=cg.getintregister(list,OS_32);
  3062. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3063. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3064. end;
  3065. end;
  3066. OP_SUB:
  3067. begin
  3068. if is_shifter_const(aint(lo(value)),b) then
  3069. begin
  3070. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3071. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3072. end
  3073. else
  3074. begin
  3075. tmpreg:=cg.getintregister(list,OS_32);
  3076. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3077. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3078. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3079. end;
  3080. if is_shifter_const(aint(hi(value)),b) then
  3081. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3082. else
  3083. begin
  3084. tmpreg:=cg.getintregister(list,OS_32);
  3085. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3086. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3087. end;
  3088. end;
  3089. else
  3090. internalerror(2003083101);
  3091. end;
  3092. end;
  3093. end;
  3094. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3095. begin
  3096. ovloc.loc:=LOC_VOID;
  3097. case op of
  3098. OP_NEG,
  3099. OP_NOT :
  3100. internalerror(2012022502);
  3101. end;
  3102. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3103. begin
  3104. case op of
  3105. OP_ADD:
  3106. begin
  3107. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3108. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3109. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3110. end;
  3111. OP_SUB:
  3112. begin
  3113. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3114. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3115. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3116. end;
  3117. else
  3118. internalerror(2003083101);
  3119. end;
  3120. if size=OS_64 then
  3121. begin
  3122. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3123. ovloc.loc:=LOC_FLAGS;
  3124. case op of
  3125. OP_ADD:
  3126. ovloc.resflags:=F_CS;
  3127. OP_SUB:
  3128. ovloc.resflags:=F_CC;
  3129. end;
  3130. end;
  3131. end
  3132. else
  3133. begin
  3134. case op of
  3135. OP_AND,OP_OR,OP_XOR:
  3136. begin
  3137. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3138. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3139. end;
  3140. OP_ADD:
  3141. begin
  3142. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3143. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3144. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3145. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3146. end;
  3147. OP_SUB:
  3148. begin
  3149. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3150. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3151. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3152. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3153. end;
  3154. else
  3155. internalerror(2003083101);
  3156. end;
  3157. end;
  3158. end;
  3159. procedure tthumbcgarm.init_register_allocators;
  3160. begin
  3161. inherited init_register_allocators;
  3162. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3163. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3164. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3165. else
  3166. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3167. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3168. end;
  3169. procedure tthumbcgarm.done_register_allocators;
  3170. begin
  3171. rg[R_INTREGISTER].free;
  3172. rg[R_FPUREGISTER].free;
  3173. rg[R_MMREGISTER].free;
  3174. inherited done_register_allocators;
  3175. end;
  3176. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3177. var
  3178. ref : treference;
  3179. shift : byte;
  3180. r : byte;
  3181. regs, saveregs : tcpuregisterset;
  3182. r7offset,
  3183. stackmisalignment : pint;
  3184. postfix: toppostfix;
  3185. registerarea,
  3186. imm1, imm2: DWord;
  3187. stack_parameters: Boolean;
  3188. begin
  3189. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3190. LocalSize:=align(LocalSize,4);
  3191. { call instruction does not put anything on the stack }
  3192. stackmisalignment:=0;
  3193. if not(nostackframe) then
  3194. begin
  3195. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3196. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3197. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3198. { save int registers }
  3199. reference_reset(ref,4);
  3200. ref.index:=NR_STACK_POINTER_REG;
  3201. ref.addressmode:=AM_PREINDEXED;
  3202. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3203. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3204. begin
  3205. //!!!! a_reg_alloc(list,NR_R12);
  3206. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3207. end;
  3208. { the (old) ARM APCS requires saving both the stack pointer (to
  3209. crawl the stack) and the PC (to identify the function this
  3210. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3211. and R15 -- still needs updating for EABI and Darwin, they don't
  3212. need that }
  3213. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3214. regs:=regs+[RS_R7,RS_R14]
  3215. else
  3216. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3217. include(regs,RS_R14);
  3218. { safely estimate stack size }
  3219. if localsize+current_settings.alignment.localalignmax+4>508 then
  3220. begin
  3221. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3222. include(regs,RS_R4);
  3223. end;
  3224. registerarea:=0;
  3225. if regs<>[] then
  3226. begin
  3227. for r:=RS_R0 to RS_R15 do
  3228. if r in regs then
  3229. inc(registerarea,4);
  3230. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3231. end;
  3232. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3233. if stack_parameters or (LocalSize<>0) or
  3234. ((stackmisalignment<>0) and
  3235. ((pi_do_call in current_procinfo.flags) or
  3236. (po_assembler in current_procinfo.procdef.procoptions))) then
  3237. begin
  3238. { do we access stack parameters?
  3239. if yes, the previously estimated stacksize must be used }
  3240. if stack_parameters then
  3241. begin
  3242. if localsize>tarmprocinfo(current_procinfo).stackframesize then
  3243. begin
  3244. writeln(localsize);
  3245. writeln(tarmprocinfo(current_procinfo).stackframesize);
  3246. internalerror(2013040601);
  3247. end
  3248. else
  3249. localsize:=tarmprocinfo(current_procinfo).stackframesize-registerarea;
  3250. end
  3251. else
  3252. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3253. if localsize<508 then
  3254. begin
  3255. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3256. end
  3257. else if localsize<=1016 then
  3258. begin
  3259. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3260. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3261. end
  3262. else
  3263. begin
  3264. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3265. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3266. include(regs,RS_R4);
  3267. //!!!! if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  3268. //!!!! a_reg_alloc(list,NR_R12);
  3269. //!!!! a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3270. //!!!! list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  3271. //!!!! a_reg_dealloc(list,NR_R12);
  3272. end;
  3273. end;
  3274. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3275. begin
  3276. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3277. end;
  3278. end;
  3279. end;
  3280. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3281. var
  3282. ref : treference;
  3283. LocalSize : longint;
  3284. r,
  3285. shift : byte;
  3286. saveregs,
  3287. regs : tcpuregisterset;
  3288. registerarea : DWord;
  3289. stackmisalignment: pint;
  3290. imm1, imm2: DWord;
  3291. stack_parameters : Boolean;
  3292. begin
  3293. if not(nostackframe) then
  3294. begin
  3295. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3296. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3297. include(regs,RS_R15);
  3298. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3299. include(regs,getsupreg(current_procinfo.framepointer));
  3300. registerarea:=0;
  3301. for r:=RS_R0 to RS_R15 do
  3302. if r in regs then
  3303. inc(registerarea,4);
  3304. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3305. LocalSize:=current_procinfo.calc_stackframe_size;
  3306. if stack_parameters then
  3307. localsize:=tarmprocinfo(current_procinfo).stackframesize-registerarea
  3308. else
  3309. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3310. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3311. (target_info.system in systems_darwin) then
  3312. begin
  3313. if (LocalSize<>0) or
  3314. ((stackmisalignment<>0) and
  3315. ((pi_do_call in current_procinfo.flags) or
  3316. (po_assembler in current_procinfo.procdef.procoptions))) then
  3317. begin
  3318. if LocalSize=0 then
  3319. else if LocalSize<=508 then
  3320. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3321. else if LocalSize<=1016 then
  3322. begin
  3323. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3324. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3325. end
  3326. else
  3327. begin
  3328. a_reg_alloc(list,NR_R3);
  3329. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3330. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3331. a_reg_dealloc(list,NR_R3);
  3332. end;
  3333. end;
  3334. if regs=[] then
  3335. begin
  3336. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3337. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3338. else
  3339. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3340. end
  3341. else
  3342. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3343. end;
  3344. end
  3345. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3346. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3347. else
  3348. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3349. end;
  3350. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3351. var
  3352. oppostfix:toppostfix;
  3353. usedtmpref: treference;
  3354. tmpreg,tmpreg2 : tregister;
  3355. dir : integer;
  3356. begin
  3357. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3358. FromSize := ToSize;
  3359. case FromSize of
  3360. { signed integer registers }
  3361. OS_8:
  3362. oppostfix:=PF_B;
  3363. OS_S8:
  3364. oppostfix:=PF_SB;
  3365. OS_16:
  3366. oppostfix:=PF_H;
  3367. OS_S16:
  3368. oppostfix:=PF_SH;
  3369. OS_32,
  3370. OS_S32:
  3371. oppostfix:=PF_None;
  3372. else
  3373. InternalError(200308298);
  3374. end;
  3375. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3376. begin
  3377. if target_info.endian=endian_big then
  3378. dir:=-1
  3379. else
  3380. dir:=1;
  3381. case FromSize of
  3382. OS_16,OS_S16:
  3383. begin
  3384. { only complicated references need an extra loadaddr }
  3385. if assigned(ref.symbol) or
  3386. (ref.index<>NR_NO) or
  3387. (ref.offset<-124) or
  3388. (ref.offset>124) or
  3389. { sometimes the compiler reused registers }
  3390. (reg=ref.index) or
  3391. (reg=ref.base) then
  3392. begin
  3393. tmpreg2:=getintregister(list,OS_INT);
  3394. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3395. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  3396. end
  3397. else
  3398. usedtmpref:=ref;
  3399. if target_info.endian=endian_big then
  3400. inc(usedtmpref.offset,1);
  3401. tmpreg:=getintregister(list,OS_INT);
  3402. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3403. inc(usedtmpref.offset,dir);
  3404. if FromSize=OS_16 then
  3405. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3406. else
  3407. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3408. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3409. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3410. end;
  3411. OS_32,OS_S32:
  3412. begin
  3413. tmpreg:=getintregister(list,OS_INT);
  3414. { only complicated references need an extra loadaddr }
  3415. if assigned(ref.symbol) or
  3416. (ref.index<>NR_NO) or
  3417. (ref.offset<-124) or
  3418. (ref.offset>124) or
  3419. { sometimes the compiler reused registers }
  3420. (reg=ref.index) or
  3421. (reg=ref.base) then
  3422. begin
  3423. tmpreg2:=getintregister(list,OS_INT);
  3424. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3425. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  3426. end
  3427. else
  3428. usedtmpref:=ref;
  3429. if ref.alignment=2 then
  3430. begin
  3431. if target_info.endian=endian_big then
  3432. inc(usedtmpref.offset,2);
  3433. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3434. inc(usedtmpref.offset,dir*2);
  3435. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3436. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3437. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3438. end
  3439. else
  3440. begin
  3441. if target_info.endian=endian_big then
  3442. inc(usedtmpref.offset,3);
  3443. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3444. inc(usedtmpref.offset,dir);
  3445. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3446. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3447. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3448. inc(usedtmpref.offset,dir);
  3449. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3450. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3451. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3452. inc(usedtmpref.offset,dir);
  3453. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3454. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3455. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3456. end;
  3457. end
  3458. else
  3459. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3460. end;
  3461. end
  3462. else
  3463. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3464. if (fromsize=OS_S8) and (tosize = OS_16) then
  3465. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3466. end;
  3467. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3468. var
  3469. imm_shift : byte;
  3470. l : tasmlabel;
  3471. hr : treference;
  3472. begin
  3473. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3474. internalerror(2002090902);
  3475. if is_thumb_imm(a) then
  3476. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3477. else
  3478. begin
  3479. reference_reset(hr,4);
  3480. current_asmdata.getjumplabel(l);
  3481. cg.a_label(current_procinfo.aktlocaldata,l);
  3482. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3483. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3484. hr.symbol:=l;
  3485. hr.base:=NR_PC;
  3486. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3487. end;
  3488. end;
  3489. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3490. var
  3491. hsym : tsym;
  3492. href,
  3493. tmpref : treference;
  3494. paraloc : Pcgparalocation;
  3495. l : TAsmLabel;
  3496. begin
  3497. { calculate the parameter info for the procdef }
  3498. procdef.init_paraloc_info(callerside);
  3499. hsym:=tsym(procdef.parast.Find('self'));
  3500. if not(assigned(hsym) and
  3501. (hsym.typ=paravarsym)) then
  3502. internalerror(200305251);
  3503. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3504. while paraloc<>nil do
  3505. with paraloc^ do
  3506. begin
  3507. case loc of
  3508. LOC_REGISTER:
  3509. begin
  3510. if is_thumb_imm(ioffset) then
  3511. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3512. else
  3513. begin
  3514. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3515. reference_reset(tmpref,4);
  3516. current_asmdata.getjumplabel(l);
  3517. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3518. cg.a_label(current_procinfo.aktlocaldata,l);
  3519. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3520. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3521. tmpref.symbol:=l;
  3522. tmpref.base:=NR_PC;
  3523. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3524. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3525. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3526. end;
  3527. end;
  3528. LOC_REFERENCE:
  3529. begin
  3530. { offset in the wrapper needs to be adjusted for the stored
  3531. return address }
  3532. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),sizeof(pint));
  3533. if is_thumb_imm(ioffset) then
  3534. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3535. else
  3536. begin
  3537. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3538. reference_reset(tmpref,4);
  3539. current_asmdata.getjumplabel(l);
  3540. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3541. cg.a_label(current_procinfo.aktlocaldata,l);
  3542. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3543. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3544. tmpref.symbol:=l;
  3545. tmpref.base:=NR_PC;
  3546. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3547. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3548. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3549. end;
  3550. end
  3551. else
  3552. internalerror(200309189);
  3553. end;
  3554. paraloc:=next;
  3555. end;
  3556. end;
  3557. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3558. var
  3559. href : treference;
  3560. tmpreg : TRegister;
  3561. begin
  3562. href:=ref;
  3563. if (op in [A_STR,A_STRB,A_STRH]) and
  3564. (abs(ref.offset)>124) then
  3565. begin
  3566. tmpreg:=getintregister(list,OS_ADDR);
  3567. a_loadaddr_ref_reg(list,ref,tmpreg);
  3568. reference_reset_base(href,tmpreg,0,ref.alignment);
  3569. end
  3570. else if (op=A_LDR) and
  3571. (oppostfix in [PF_None]) and
  3572. (ref.base<>NR_STACK_POINTER_REG) and
  3573. (abs(ref.offset)>124) then
  3574. begin
  3575. tmpreg:=getintregister(list,OS_ADDR);
  3576. a_loadaddr_ref_reg(list,ref,tmpreg);
  3577. reference_reset_base(href,tmpreg,0,ref.alignment);
  3578. end
  3579. else if (op=A_LDR) and
  3580. (oppostfix in [PF_None]) and
  3581. (ref.base=NR_STACK_POINTER_REG) and
  3582. (abs(ref.offset)>1020) then
  3583. begin
  3584. tmpreg:=getintregister(list,OS_ADDR);
  3585. a_loadaddr_ref_reg(list,ref,tmpreg);
  3586. reference_reset_base(href,tmpreg,0,ref.alignment);
  3587. end
  3588. else if (op=A_LDR) and
  3589. ((oppostfix in [PF_SH,PF_SB]) or
  3590. (abs(ref.offset)>124)) then
  3591. begin
  3592. tmpreg:=getintregister(list,OS_ADDR);
  3593. a_loadaddr_ref_reg(list,ref,tmpreg);
  3594. reference_reset_base(href,tmpreg,0,ref.alignment);
  3595. end;
  3596. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3597. end;
  3598. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3599. var
  3600. tmpreg,overflowreg : tregister;
  3601. asmop : tasmop;
  3602. begin
  3603. case op of
  3604. OP_NEG:
  3605. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3606. OP_NOT:
  3607. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  3608. OP_DIV,OP_IDIV:
  3609. internalerror(200308284);
  3610. OP_ROL:
  3611. begin
  3612. if not(size in [OS_32,OS_S32]) then
  3613. internalerror(2008072801);
  3614. { simulate ROL by ror'ing 32-value }
  3615. tmpreg:=getintregister(list,OS_32);
  3616. a_load_const_reg(list,OS_32,32,tmpreg);
  3617. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3618. list.concat(taicpu.op_reg_reg(A_ROR,dst,src));
  3619. end;
  3620. else
  3621. begin
  3622. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3623. list.concat(setoppostfix(
  3624. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix[op]));
  3625. end;
  3626. end;
  3627. maybeadjustresult(list,op,size,dst);
  3628. end;
  3629. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3630. var
  3631. tmpreg : tregister;
  3632. so : tshifterop;
  3633. l1 : longint;
  3634. imm1, imm2: DWord;
  3635. begin
  3636. //!!! ovloc.loc:=LOC_VOID;
  3637. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3638. case op of
  3639. OP_ADD:
  3640. begin
  3641. op:=OP_SUB;
  3642. a:=aint(dword(-a));
  3643. end;
  3644. OP_SUB:
  3645. begin
  3646. op:=OP_ADD;
  3647. a:=aint(dword(-a));
  3648. end
  3649. end;
  3650. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3651. begin
  3652. // if cgsetflags or setflags then
  3653. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3654. list.concat(setoppostfix(
  3655. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix[op]));
  3656. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3657. begin
  3658. //!!! ovloc.loc:=LOC_FLAGS;
  3659. case op of
  3660. OP_ADD:
  3661. //!!! ovloc.resflags:=F_CS;
  3662. ;
  3663. OP_SUB:
  3664. //!!! ovloc.resflags:=F_CC;
  3665. ;
  3666. end;
  3667. end;
  3668. end
  3669. else
  3670. begin
  3671. { there could be added some more sophisticated optimizations }
  3672. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3673. a_load_reg_reg(list,size,size,dst,dst)
  3674. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3675. a_load_const_reg(list,size,0,dst)
  3676. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3677. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3678. { we do this here instead in the peephole optimizer because
  3679. it saves us a register }
  3680. {$ifdef DUMMY}
  3681. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3682. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3683. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3684. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3685. begin
  3686. if l1>32 then{roozbeh does this ever happen?}
  3687. internalerror(200308296);
  3688. shifterop_reset(so);
  3689. so.shiftmode:=SM_LSL;
  3690. so.shiftimm:=l1;
  3691. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3692. end
  3693. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3694. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3695. begin
  3696. if l1>32 then{does this ever happen?}
  3697. internalerror(201205181);
  3698. shifterop_reset(so);
  3699. so.shiftmode:=SM_LSL;
  3700. so.shiftimm:=l1;
  3701. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3702. end
  3703. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3704. begin
  3705. { nothing to do on success }
  3706. end
  3707. {$endif DUMMY}
  3708. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3709. Just using mov x, #0 might allow some easier optimizations down the line. }
  3710. else if (op = OP_AND) and (dword(a)=0) then
  3711. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  3712. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3713. else if (op = OP_AND) and (not(dword(a))=0) then
  3714. // do nothing
  3715. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3716. broader range of shifterconstants.}
  3717. {$ifdef DUMMY}
  3718. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3719. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3720. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3721. begin
  3722. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3723. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3724. end
  3725. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3726. not(cgsetflags or setflags) and
  3727. split_into_shifter_const(a, imm1, imm2) then
  3728. begin
  3729. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3730. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3731. end
  3732. {$endif DUMMY}
  3733. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3734. begin
  3735. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3736. end
  3737. else
  3738. begin
  3739. tmpreg:=getintregister(list,size);
  3740. a_load_const_reg(list,size,a,tmpreg);
  3741. a_op_reg_reg(list,op,size,tmpreg,dst);
  3742. end;
  3743. end;
  3744. maybeadjustresult(list,op,size,dst);
  3745. end;
  3746. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3747. begin
  3748. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3749. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3750. else
  3751. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3752. end;
  3753. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3754. var
  3755. l1,l2 : tasmlabel;
  3756. ai : taicpu;
  3757. begin
  3758. current_asmdata.getjumplabel(l1);
  3759. current_asmdata.getjumplabel(l2);
  3760. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3761. ai.is_jmp:=true;
  3762. list.concat(ai);
  3763. list.concat(taicpu.op_reg_const(A_MOV,reg,0));
  3764. list.concat(taicpu.op_sym(A_B,l2));
  3765. cg.a_label(list,l1);
  3766. list.concat(taicpu.op_reg_const(A_MOV,reg,1));
  3767. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3768. cg.a_label(list,l2);
  3769. end;
  3770. procedure tthumb2cgarm.init_register_allocators;
  3771. begin
  3772. inherited init_register_allocators;
  3773. { currently, we save R14 always, so we can use it }
  3774. if (target_info.system<>system_arm_darwin) then
  3775. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3776. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3777. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  3778. else
  3779. { r9 is not available on Darwin according to the llvm code generator }
  3780. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3781. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3782. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  3783. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  3784. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  3785. if current_settings.fputype in [fpu_fpv4_s16,fpu_vfpv3_d16] then
  3786. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3787. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3788. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3789. ],first_mm_imreg,[])
  3790. else
  3791. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
  3792. [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
  3793. end;
  3794. procedure tthumb2cgarm.done_register_allocators;
  3795. begin
  3796. rg[R_INTREGISTER].free;
  3797. rg[R_FPUREGISTER].free;
  3798. rg[R_MMREGISTER].free;
  3799. inherited done_register_allocators;
  3800. end;
  3801. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  3802. begin
  3803. list.concat(taicpu.op_reg(A_BLX, reg));
  3804. {
  3805. the compiler does not properly set this flag anymore in pass 1, and
  3806. for now we only need it after pass 2 (I hope) (JM)
  3807. if not(pi_do_call in current_procinfo.flags) then
  3808. internalerror(2003060703);
  3809. }
  3810. include(current_procinfo.flags,pi_do_call);
  3811. end;
  3812. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3813. var
  3814. imm_shift : byte;
  3815. l : tasmlabel;
  3816. hr : treference;
  3817. begin
  3818. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3819. internalerror(2002090902);
  3820. if is_thumb32_imm(a) then
  3821. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3822. else if is_thumb32_imm(not(a)) then
  3823. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  3824. else if (a and $FFFF)=a then
  3825. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  3826. else
  3827. begin
  3828. reference_reset(hr,4);
  3829. current_asmdata.getjumplabel(l);
  3830. cg.a_label(current_procinfo.aktlocaldata,l);
  3831. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3832. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3833. hr.symbol:=l;
  3834. hr.base:=NR_PC;
  3835. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3836. end;
  3837. end;
  3838. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3839. var
  3840. oppostfix:toppostfix;
  3841. usedtmpref: treference;
  3842. tmpreg,tmpreg2 : tregister;
  3843. so : tshifterop;
  3844. dir : integer;
  3845. begin
  3846. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3847. FromSize := ToSize;
  3848. case FromSize of
  3849. { signed integer registers }
  3850. OS_8:
  3851. oppostfix:=PF_B;
  3852. OS_S8:
  3853. oppostfix:=PF_SB;
  3854. OS_16:
  3855. oppostfix:=PF_H;
  3856. OS_S16:
  3857. oppostfix:=PF_SH;
  3858. OS_32,
  3859. OS_S32:
  3860. oppostfix:=PF_None;
  3861. else
  3862. InternalError(200308299);
  3863. end;
  3864. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3865. begin
  3866. if target_info.endian=endian_big then
  3867. dir:=-1
  3868. else
  3869. dir:=1;
  3870. case FromSize of
  3871. OS_16,OS_S16:
  3872. begin
  3873. { only complicated references need an extra loadaddr }
  3874. if assigned(ref.symbol) or
  3875. (ref.index<>NR_NO) or
  3876. (ref.offset<-255) or
  3877. (ref.offset>4094) or
  3878. { sometimes the compiler reused registers }
  3879. (reg=ref.index) or
  3880. (reg=ref.base) then
  3881. begin
  3882. tmpreg2:=getintregister(list,OS_INT);
  3883. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3884. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  3885. end
  3886. else
  3887. usedtmpref:=ref;
  3888. if target_info.endian=endian_big then
  3889. inc(usedtmpref.offset,1);
  3890. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  3891. tmpreg:=getintregister(list,OS_INT);
  3892. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3893. inc(usedtmpref.offset,dir);
  3894. if FromSize=OS_16 then
  3895. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3896. else
  3897. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3898. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3899. end;
  3900. OS_32,OS_S32:
  3901. begin
  3902. tmpreg:=getintregister(list,OS_INT);
  3903. { only complicated references need an extra loadaddr }
  3904. if assigned(ref.symbol) or
  3905. (ref.index<>NR_NO) or
  3906. (ref.offset<-255) or
  3907. (ref.offset>4092) or
  3908. { sometimes the compiler reused registers }
  3909. (reg=ref.index) or
  3910. (reg=ref.base) then
  3911. begin
  3912. tmpreg2:=getintregister(list,OS_INT);
  3913. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3914. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  3915. end
  3916. else
  3917. usedtmpref:=ref;
  3918. shifterop_reset(so);so.shiftmode:=SM_LSL;
  3919. if ref.alignment=2 then
  3920. begin
  3921. if target_info.endian=endian_big then
  3922. inc(usedtmpref.offset,2);
  3923. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3924. inc(usedtmpref.offset,dir*2);
  3925. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3926. so.shiftimm:=16;
  3927. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3928. end
  3929. else
  3930. begin
  3931. if target_info.endian=endian_big then
  3932. inc(usedtmpref.offset,3);
  3933. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3934. inc(usedtmpref.offset,dir);
  3935. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3936. so.shiftimm:=8;
  3937. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3938. inc(usedtmpref.offset,dir);
  3939. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3940. so.shiftimm:=16;
  3941. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3942. inc(usedtmpref.offset,dir);
  3943. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3944. so.shiftimm:=24;
  3945. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3946. end;
  3947. end
  3948. else
  3949. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3950. end;
  3951. end
  3952. else
  3953. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3954. if (fromsize=OS_S8) and (tosize = OS_16) then
  3955. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3956. end;
  3957. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3958. begin
  3959. if op = OP_NOT then
  3960. begin
  3961. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  3962. case size of
  3963. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  3964. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  3965. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  3966. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  3967. end;
  3968. end
  3969. else
  3970. inherited a_op_reg_reg(list, op, size, src, dst);
  3971. end;
  3972. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  3973. var
  3974. shift, width : byte;
  3975. tmpreg : tregister;
  3976. so : tshifterop;
  3977. l1 : longint;
  3978. begin
  3979. ovloc.loc:=LOC_VOID;
  3980. if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
  3981. case op of
  3982. OP_ADD:
  3983. begin
  3984. op:=OP_SUB;
  3985. a:=aint(dword(-a));
  3986. end;
  3987. OP_SUB:
  3988. begin
  3989. op:=OP_ADD;
  3990. a:=aint(dword(-a));
  3991. end
  3992. end;
  3993. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  3994. case op of
  3995. OP_NEG,OP_NOT,
  3996. OP_DIV,OP_IDIV:
  3997. internalerror(200308285);
  3998. OP_SHL:
  3999. begin
  4000. if a>32 then
  4001. internalerror(2014020703);
  4002. if a<>0 then
  4003. begin
  4004. shifterop_reset(so);
  4005. so.shiftmode:=SM_LSL;
  4006. so.shiftimm:=a;
  4007. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4008. end
  4009. else
  4010. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4011. end;
  4012. OP_ROL:
  4013. begin
  4014. if a>32 then
  4015. internalerror(2014020704);
  4016. if a<>0 then
  4017. begin
  4018. shifterop_reset(so);
  4019. so.shiftmode:=SM_ROR;
  4020. so.shiftimm:=32-a;
  4021. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4022. end
  4023. else
  4024. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4025. end;
  4026. OP_ROR:
  4027. begin
  4028. if a>32 then
  4029. internalerror(2014020705);
  4030. if a<>0 then
  4031. begin
  4032. shifterop_reset(so);
  4033. so.shiftmode:=SM_ROR;
  4034. so.shiftimm:=a;
  4035. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4036. end
  4037. else
  4038. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4039. end;
  4040. OP_SHR:
  4041. begin
  4042. if a>32 then
  4043. internalerror(200308292);
  4044. shifterop_reset(so);
  4045. if a<>0 then
  4046. begin
  4047. so.shiftmode:=SM_LSR;
  4048. so.shiftimm:=a;
  4049. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4050. end
  4051. else
  4052. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4053. end;
  4054. OP_SAR:
  4055. begin
  4056. if a>32 then
  4057. internalerror(200308295);
  4058. if a<>0 then
  4059. begin
  4060. shifterop_reset(so);
  4061. so.shiftmode:=SM_ASR;
  4062. so.shiftimm:=a;
  4063. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4064. end
  4065. else
  4066. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4067. end;
  4068. else
  4069. if (op in [OP_SUB, OP_ADD]) and
  4070. ((a < 0) or
  4071. (a > 4095)) then
  4072. begin
  4073. tmpreg:=getintregister(list,size);
  4074. a_load_const_reg(list, size, a, tmpreg);
  4075. if cgsetflags or setflags then
  4076. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4077. list.concat(setoppostfix(
  4078. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4079. end
  4080. else
  4081. begin
  4082. if cgsetflags or setflags then
  4083. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4084. list.concat(setoppostfix(
  4085. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4086. end;
  4087. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4088. begin
  4089. ovloc.loc:=LOC_FLAGS;
  4090. case op of
  4091. OP_ADD:
  4092. ovloc.resflags:=F_CS;
  4093. OP_SUB:
  4094. ovloc.resflags:=F_CC;
  4095. end;
  4096. end;
  4097. end
  4098. else
  4099. begin
  4100. { there could be added some more sophisticated optimizations }
  4101. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4102. a_load_reg_reg(list,size,size,src,dst)
  4103. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4104. a_load_const_reg(list,size,0,dst)
  4105. else if (op in [OP_IMUL]) and (a=-1) then
  4106. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4107. { we do this here instead in the peephole optimizer because
  4108. it saves us a register }
  4109. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4110. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4111. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4112. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4113. begin
  4114. if l1>32 then{roozbeh does this ever happen?}
  4115. internalerror(200308296);
  4116. shifterop_reset(so);
  4117. so.shiftmode:=SM_LSL;
  4118. so.shiftimm:=l1;
  4119. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4120. end
  4121. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4122. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4123. begin
  4124. if l1>32 then{does this ever happen?}
  4125. internalerror(201205181);
  4126. shifterop_reset(so);
  4127. so.shiftmode:=SM_LSL;
  4128. so.shiftimm:=l1;
  4129. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4130. end
  4131. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4132. begin
  4133. { nothing to do on success }
  4134. end
  4135. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4136. Just using mov x, #0 might allow some easier optimizations down the line. }
  4137. else if (op = OP_AND) and (dword(a)=0) then
  4138. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4139. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4140. else if (op = OP_AND) and (not(dword(a))=0) then
  4141. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4142. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4143. broader range of shifterconstants.}
  4144. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4145. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4146. else if (op = OP_AND) and is_thumb32_imm(a) then
  4147. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4148. else if (op = OP_AND) and (a = $FFFF) then
  4149. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4150. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4151. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4152. else if (op = OP_AND) and is_continuous_mask(not(a), shift, width) then
  4153. begin
  4154. a_load_reg_reg(list,size,size,src,dst);
  4155. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4156. end
  4157. else
  4158. begin
  4159. tmpreg:=getintregister(list,size);
  4160. a_load_const_reg(list,size,a,tmpreg);
  4161. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4162. end;
  4163. end;
  4164. maybeadjustresult(list,op,size,dst);
  4165. end;
  4166. const
  4167. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4168. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4169. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4170. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4171. var
  4172. so : tshifterop;
  4173. tmpreg,overflowreg : tregister;
  4174. asmop : tasmop;
  4175. begin
  4176. ovloc.loc:=LOC_VOID;
  4177. case op of
  4178. OP_NEG,OP_NOT:
  4179. internalerror(200308286);
  4180. OP_ROL:
  4181. begin
  4182. if not(size in [OS_32,OS_S32]) then
  4183. internalerror(2008072801);
  4184. { simulate ROL by ror'ing 32-value }
  4185. tmpreg:=getintregister(list,OS_32);
  4186. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4187. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4188. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4189. end;
  4190. OP_ROR:
  4191. begin
  4192. if not(size in [OS_32,OS_S32]) then
  4193. internalerror(2008072802);
  4194. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4195. end;
  4196. OP_IMUL,
  4197. OP_MUL:
  4198. begin
  4199. if cgsetflags or setflags then
  4200. begin
  4201. overflowreg:=getintregister(list,size);
  4202. if op=OP_IMUL then
  4203. asmop:=A_SMULL
  4204. else
  4205. asmop:=A_UMULL;
  4206. { the arm doesn't allow that rd and rm are the same }
  4207. if dst=src2 then
  4208. begin
  4209. if dst<>src1 then
  4210. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4211. else
  4212. begin
  4213. tmpreg:=getintregister(list,size);
  4214. a_load_reg_reg(list,size,size,src2,dst);
  4215. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4216. end;
  4217. end
  4218. else
  4219. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4220. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4221. if op=OP_IMUL then
  4222. begin
  4223. shifterop_reset(so);
  4224. so.shiftmode:=SM_ASR;
  4225. so.shiftimm:=31;
  4226. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4227. end
  4228. else
  4229. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4230. ovloc.loc:=LOC_FLAGS;
  4231. ovloc.resflags:=F_NE;
  4232. end
  4233. else
  4234. begin
  4235. { the arm doesn't allow that rd and rm are the same }
  4236. if dst=src2 then
  4237. begin
  4238. if dst<>src1 then
  4239. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4240. else
  4241. begin
  4242. tmpreg:=getintregister(list,size);
  4243. a_load_reg_reg(list,size,size,src2,dst);
  4244. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4245. end;
  4246. end
  4247. else
  4248. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4249. end;
  4250. end;
  4251. else
  4252. begin
  4253. if cgsetflags or setflags then
  4254. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4255. {$ifdef dummy}
  4256. { R13 is not allowed for certain instruction operands }
  4257. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4258. begin
  4259. if getsupreg(dst)=RS_R13 then
  4260. begin
  4261. tmpreg:=getintregister(list,OS_INT);
  4262. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4263. dst:=tmpreg;
  4264. end;
  4265. if getsupreg(src1)=RS_R13 then
  4266. begin
  4267. tmpreg:=getintregister(list,OS_INT);
  4268. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4269. src1:=tmpreg;
  4270. end;
  4271. end;
  4272. {$endif}
  4273. list.concat(setoppostfix(
  4274. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4275. end;
  4276. end;
  4277. maybeadjustresult(list,op,size,dst);
  4278. end;
  4279. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4280. var item: taicpu;
  4281. begin
  4282. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4283. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4284. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4285. end;
  4286. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4287. var
  4288. ref : treference;
  4289. shift : byte;
  4290. firstfloatreg,lastfloatreg,
  4291. r : byte;
  4292. regs : tcpuregisterset;
  4293. stackmisalignment: pint;
  4294. begin
  4295. LocalSize:=align(LocalSize,4);
  4296. { call instruction does not put anything on the stack }
  4297. stackmisalignment:=0;
  4298. if not(nostackframe) then
  4299. begin
  4300. firstfloatreg:=RS_NO;
  4301. lastfloatreg:=RS_NO;
  4302. { save floating point registers? }
  4303. for r:=RS_F0 to RS_F7 do
  4304. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4305. begin
  4306. if firstfloatreg=RS_NO then
  4307. firstfloatreg:=r;
  4308. lastfloatreg:=r;
  4309. inc(stackmisalignment,12);
  4310. end;
  4311. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4312. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4313. begin
  4314. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4315. a_reg_alloc(list,NR_R12);
  4316. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4317. end;
  4318. { save int registers }
  4319. reference_reset(ref,4);
  4320. ref.index:=NR_STACK_POINTER_REG;
  4321. ref.addressmode:=AM_PREINDEXED;
  4322. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4323. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4324. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4325. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4326. include(regs,RS_R14);
  4327. if regs<>[] then
  4328. begin
  4329. for r:=RS_R0 to RS_R15 do
  4330. if (r in regs) then
  4331. inc(stackmisalignment,4);
  4332. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4333. end;
  4334. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4335. begin
  4336. { the framepointer now points to the saved R15, so the saved
  4337. framepointer is at R11-12 (for get_caller_frame) }
  4338. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4339. a_reg_dealloc(list,NR_R12);
  4340. end;
  4341. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4342. if (LocalSize<>0) or
  4343. ((stackmisalignment<>0) and
  4344. ((pi_do_call in current_procinfo.flags) or
  4345. (po_assembler in current_procinfo.procdef.procoptions))) then
  4346. begin
  4347. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4348. if not(is_shifter_const(localsize,shift)) then
  4349. begin
  4350. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4351. a_reg_alloc(list,NR_R12);
  4352. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4353. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4354. a_reg_dealloc(list,NR_R12);
  4355. end
  4356. else
  4357. begin
  4358. a_reg_dealloc(list,NR_R12);
  4359. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4360. end;
  4361. end;
  4362. if firstfloatreg<>RS_NO then
  4363. begin
  4364. reference_reset(ref,4);
  4365. if tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023 then
  4366. begin
  4367. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  4368. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4369. ref.base:=NR_R12;
  4370. end
  4371. else
  4372. begin
  4373. ref.base:=current_procinfo.framepointer;
  4374. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  4375. end;
  4376. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4377. lastfloatreg-firstfloatreg+1,ref));
  4378. end;
  4379. end;
  4380. end;
  4381. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4382. var
  4383. ref : treference;
  4384. firstfloatreg,lastfloatreg,
  4385. r : byte;
  4386. shift : byte;
  4387. regs : tcpuregisterset;
  4388. LocalSize : longint;
  4389. stackmisalignment: pint;
  4390. begin
  4391. if not(nostackframe) then
  4392. begin
  4393. stackmisalignment:=0;
  4394. { restore floating point register }
  4395. firstfloatreg:=RS_NO;
  4396. lastfloatreg:=RS_NO;
  4397. { save floating point registers? }
  4398. for r:=RS_F0 to RS_F7 do
  4399. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4400. begin
  4401. if firstfloatreg=RS_NO then
  4402. firstfloatreg:=r;
  4403. lastfloatreg:=r;
  4404. { floating point register space is already included in
  4405. localsize below by calc_stackframe_size
  4406. inc(stackmisalignment,12);
  4407. }
  4408. end;
  4409. if firstfloatreg<>RS_NO then
  4410. begin
  4411. reference_reset(ref,4);
  4412. if tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023 then
  4413. begin
  4414. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  4415. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4416. ref.base:=NR_R12;
  4417. end
  4418. else
  4419. begin
  4420. ref.base:=current_procinfo.framepointer;
  4421. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  4422. end;
  4423. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4424. lastfloatreg-firstfloatreg+1,ref));
  4425. end;
  4426. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4427. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4428. begin
  4429. exclude(regs,RS_R14);
  4430. include(regs,RS_R15);
  4431. end;
  4432. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4433. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4434. for r:=RS_R0 to RS_R15 do
  4435. if (r in regs) then
  4436. inc(stackmisalignment,4);
  4437. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4438. LocalSize:=current_procinfo.calc_stackframe_size;
  4439. if (LocalSize<>0) or
  4440. ((stackmisalignment<>0) and
  4441. ((pi_do_call in current_procinfo.flags) or
  4442. (po_assembler in current_procinfo.procdef.procoptions))) then
  4443. begin
  4444. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4445. if not(is_shifter_const(LocalSize,shift)) then
  4446. begin
  4447. a_reg_alloc(list,NR_R12);
  4448. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4449. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4450. a_reg_dealloc(list,NR_R12);
  4451. end
  4452. else
  4453. begin
  4454. a_reg_dealloc(list,NR_R12);
  4455. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4456. end;
  4457. end;
  4458. if regs=[] then
  4459. list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
  4460. else
  4461. begin
  4462. reference_reset(ref,4);
  4463. ref.index:=NR_STACK_POINTER_REG;
  4464. ref.addressmode:=AM_PREINDEXED;
  4465. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4466. end;
  4467. end
  4468. else
  4469. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14));
  4470. end;
  4471. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4472. var
  4473. tmpreg : tregister;
  4474. tmpref : treference;
  4475. l : tasmlabel;
  4476. so: tshifterop;
  4477. begin
  4478. tmpreg:=NR_NO;
  4479. { Be sure to have a base register }
  4480. if (ref.base=NR_NO) then
  4481. begin
  4482. if ref.shiftmode<>SM_None then
  4483. internalerror(2014020706);
  4484. ref.base:=ref.index;
  4485. ref.index:=NR_NO;
  4486. end;
  4487. { absolute symbols can't be handled directly, we've to store the symbol reference
  4488. in the text segment and access it pc relative
  4489. For now, we assume that references where base or index equals to PC are already
  4490. relative, all other references are assumed to be absolute and thus they need
  4491. to be handled extra.
  4492. A proper solution would be to change refoptions to a set and store the information
  4493. if the symbol is absolute or relative there.
  4494. }
  4495. if (assigned(ref.symbol) and
  4496. not(is_pc(ref.base)) and
  4497. not(is_pc(ref.index))
  4498. ) or
  4499. { [#xxx] isn't a valid address operand }
  4500. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4501. //(ref.offset<-4095) or
  4502. (ref.offset<-255) or
  4503. (ref.offset>4095) or
  4504. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4505. ((ref.offset<-255) or
  4506. (ref.offset>255)
  4507. )
  4508. ) or
  4509. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4510. ((ref.offset<-1020) or
  4511. (ref.offset>1020) or
  4512. ((abs(ref.offset) mod 4)<>0) or
  4513. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4514. assigned(ref.symbol)
  4515. )
  4516. ) then
  4517. begin
  4518. reference_reset(tmpref,4);
  4519. { load symbol }
  4520. tmpreg:=getintregister(list,OS_INT);
  4521. if assigned(ref.symbol) then
  4522. begin
  4523. current_asmdata.getjumplabel(l);
  4524. cg.a_label(current_procinfo.aktlocaldata,l);
  4525. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4526. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4527. { load consts entry }
  4528. tmpref.symbol:=l;
  4529. tmpref.base:=NR_R15;
  4530. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4531. { in case of LDF/STF, we got rid of the NR_R15 }
  4532. if is_pc(ref.base) then
  4533. ref.base:=NR_NO;
  4534. if is_pc(ref.index) then
  4535. ref.index:=NR_NO;
  4536. end
  4537. else
  4538. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4539. if (ref.base<>NR_NO) then
  4540. begin
  4541. if ref.index<>NR_NO then
  4542. begin
  4543. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4544. ref.base:=tmpreg;
  4545. end
  4546. else
  4547. begin
  4548. ref.index:=tmpreg;
  4549. ref.shiftimm:=0;
  4550. ref.signindex:=1;
  4551. ref.shiftmode:=SM_None;
  4552. end;
  4553. end
  4554. else
  4555. ref.base:=tmpreg;
  4556. ref.offset:=0;
  4557. ref.symbol:=nil;
  4558. end;
  4559. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4560. begin
  4561. if tmpreg<>NR_NO then
  4562. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4563. else
  4564. begin
  4565. tmpreg:=getintregister(list,OS_ADDR);
  4566. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4567. ref.base:=tmpreg;
  4568. end;
  4569. ref.offset:=0;
  4570. end;
  4571. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4572. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4573. begin
  4574. tmpreg:=getintregister(list,OS_ADDR);
  4575. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4576. ref.base := tmpreg;
  4577. end;
  4578. { floating point operations have only limited references
  4579. we expect here, that a base is already set }
  4580. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4581. begin
  4582. if ref.shiftmode<>SM_none then
  4583. internalerror(200309121);
  4584. if tmpreg<>NR_NO then
  4585. begin
  4586. if ref.base=tmpreg then
  4587. begin
  4588. if ref.signindex<0 then
  4589. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4590. else
  4591. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4592. ref.index:=NR_NO;
  4593. end
  4594. else
  4595. begin
  4596. if ref.index<>tmpreg then
  4597. internalerror(200403161);
  4598. if ref.signindex<0 then
  4599. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4600. else
  4601. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4602. ref.base:=tmpreg;
  4603. ref.index:=NR_NO;
  4604. end;
  4605. end
  4606. else
  4607. begin
  4608. tmpreg:=getintregister(list,OS_ADDR);
  4609. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4610. ref.base:=tmpreg;
  4611. ref.index:=NR_NO;
  4612. end;
  4613. end;
  4614. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4615. Result := ref;
  4616. end;
  4617. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4618. var
  4619. instr: taicpu;
  4620. begin
  4621. if (fromsize=OS_F32) and
  4622. (tosize=OS_F32) then
  4623. begin
  4624. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4625. list.Concat(instr);
  4626. add_move_instruction(instr);
  4627. end
  4628. else if (fromsize=OS_F64) and
  4629. (tosize=OS_F64) then
  4630. begin
  4631. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4632. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4633. end
  4634. else if (fromsize=OS_F32) and
  4635. (tosize=OS_F64) then
  4636. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4637. begin
  4638. //list.concat(nil);
  4639. end;
  4640. end;
  4641. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4642. begin
  4643. if fromsize=OS_F32 then
  4644. handle_load_store(list,A_VLDR,PF_F32,reg,ref)
  4645. else
  4646. handle_load_store(list,A_VLDR,PF_F64,reg,ref);
  4647. end;
  4648. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4649. begin
  4650. if fromsize=OS_F32 then
  4651. handle_load_store(list,A_VSTR,PF_F32,reg,ref)
  4652. else
  4653. handle_load_store(list,A_VSTR,PF_F64,reg,ref);
  4654. end;
  4655. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4656. begin
  4657. if //(shuffle=nil) and
  4658. (tosize=OS_F32) then
  4659. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4660. else
  4661. internalerror(2012100813);
  4662. end;
  4663. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4664. begin
  4665. if //(shuffle=nil) and
  4666. (fromsize=OS_F32) then
  4667. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
  4668. else
  4669. internalerror(2012100814);
  4670. end;
  4671. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4672. var tmpreg: tregister;
  4673. begin
  4674. case op of
  4675. OP_NEG:
  4676. begin
  4677. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4678. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4679. tmpreg:=cg.getintregister(list,OS_32);
  4680. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4681. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4682. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4683. end;
  4684. else
  4685. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4686. end;
  4687. end;
  4688. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4689. begin
  4690. case op of
  4691. OP_NEG:
  4692. begin
  4693. list.concat(taicpu.op_reg_const(A_MOV,regdst.reglo,0));
  4694. list.concat(taicpu.op_reg_const(A_MOV,regdst.reghi,0));
  4695. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4696. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4697. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4698. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4699. end;
  4700. OP_NOT:
  4701. begin
  4702. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4703. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4704. end;
  4705. OP_AND,OP_OR,OP_XOR:
  4706. begin
  4707. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4708. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4709. end;
  4710. OP_ADD:
  4711. begin
  4712. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4713. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4714. list.concat(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi));
  4715. end;
  4716. OP_SUB:
  4717. begin
  4718. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4719. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4720. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4721. end;
  4722. else
  4723. internalerror(2003083101);
  4724. end;
  4725. end;
  4726. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4727. var
  4728. tmpreg : tregister;
  4729. b : byte;
  4730. begin
  4731. case op of
  4732. OP_AND,OP_OR,OP_XOR:
  4733. begin
  4734. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4735. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4736. end;
  4737. OP_ADD:
  4738. begin
  4739. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4740. begin
  4741. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4742. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4743. end
  4744. else
  4745. begin
  4746. tmpreg:=cg.getintregister(list,OS_32);
  4747. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4748. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4749. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  4750. end;
  4751. tmpreg:=cg.getintregister(list,OS_32);
  4752. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  4753. list.concat(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg));
  4754. end;
  4755. OP_SUB:
  4756. begin
  4757. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4758. begin
  4759. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4760. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  4761. end
  4762. else
  4763. begin
  4764. tmpreg:=cg.getintregister(list,OS_32);
  4765. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4766. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4767. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  4768. end;
  4769. tmpreg:=cg.getintregister(list,OS_32);
  4770. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  4771. list.concat(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg));
  4772. end;
  4773. else
  4774. internalerror(2003083101);
  4775. end;
  4776. end;
  4777. procedure create_codegen;
  4778. begin
  4779. if GenerateThumb2Code then
  4780. begin
  4781. cg:=tthumb2cgarm.create;
  4782. cg64:=tthumb2cg64farm.create;
  4783. casmoptimizer:=TCpuThumb2AsmOptimizer;
  4784. end
  4785. else if GenerateThumbCode then
  4786. begin
  4787. cg:=tthumbcgarm.create;
  4788. cg64:=tthumbcg64farm.create;
  4789. // casmoptimizer:=TCpuThumbAsmOptimizer;
  4790. end
  4791. else
  4792. begin
  4793. cg:=tarmcgarm.create;
  4794. cg64:=tarmcg64farm.create;
  4795. casmoptimizer:=TCpuAsmOptimizer;
  4796. end;
  4797. end;
  4798. end.