cgcpu.pas 221 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. public
  36. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  37. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  38. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  39. { move instructions }
  40. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  41. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  42. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  43. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  44. { fpu move instructions }
  45. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  46. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  47. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  48. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  49. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  50. { comparison operations }
  51. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  52. l : tasmlabel);override;
  53. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  54. procedure a_jmp_name(list : TAsmList;const s : string); override;
  55. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  56. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  57. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  58. procedure g_profilecode(list : TAsmList); override;
  59. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  60. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  61. procedure g_maybe_got_init(list : TAsmList); override;
  62. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  63. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  64. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  66. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  67. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  68. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  69. procedure g_save_registers(list : TAsmList);override;
  70. procedure g_restore_registers(list : TAsmList);override;
  71. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  72. procedure fixref(list : TAsmList;var ref : treference);
  73. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  74. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  75. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  78. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  79. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  80. { Transform unsupported methods into Internal errors }
  81. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  82. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  83. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  84. { clear out potential overflow bits from 8 or 16 bit operations
  85. the upper 24/16 bits of a register after an operation }
  86. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  87. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  88. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  89. procedure g_maybe_tls_init(list : TAsmList); override;
  90. end;
  91. { tcgarm is shared between normal arm and thumb-2 }
  92. tcgarm = class(tbasecgarm)
  93. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  94. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  95. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  96. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  97. size: tcgsize; a: tcgint; src, dst: tregister); override;
  98. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  99. size: tcgsize; src1, src2, dst: tregister); override;
  100. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  101. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  103. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  104. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  105. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  106. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  107. end;
  108. { normal arm cg }
  109. tarmcgarm = class(tcgarm)
  110. procedure init_register_allocators;override;
  111. procedure done_register_allocators;override;
  112. end;
  113. { 64 bit cg for all arm flavours }
  114. tbasecg64farm = class(tcg64f32)
  115. end;
  116. { tcg64farm is shared between normal arm and thumb-2 }
  117. tcg64farm = class(tbasecg64farm)
  118. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  119. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  120. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  121. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  122. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  123. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  125. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  126. end;
  127. tarmcg64farm = class(tcg64farm)
  128. end;
  129. tthumbcgarm = class(tbasecgarm)
  130. procedure init_register_allocators;override;
  131. procedure done_register_allocators;override;
  132. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  133. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  134. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  135. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  136. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  137. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  138. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  139. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  140. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  141. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  142. end;
  143. tthumbcg64farm = class(tbasecg64farm)
  144. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  145. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  146. end;
  147. tthumb2cgarm = class(tcgarm)
  148. procedure init_register_allocators;override;
  149. procedure done_register_allocators;override;
  150. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  151. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  152. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  153. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  154. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  155. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  157. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  158. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  159. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  160. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  161. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  164. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  165. end;
  166. tthumb2cg64farm = class(tcg64farm)
  167. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  168. end;
  169. const
  170. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  171. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  172. winstackpagesize = 4096;
  173. function get_fpu_postfix(def : tdef) : toppostfix;
  174. procedure create_codegen;
  175. implementation
  176. uses
  177. globals,verbose,systems,cutils,
  178. aopt,aoptcpu,
  179. fmodule,
  180. symconst,symsym,symtable,
  181. tgobj,
  182. procinfo,cpupi,
  183. paramgr;
  184. { Range check must be disabled explicitly as conversions between signed and unsigned
  185. 32-bit values are done without explicit typecasts }
  186. {$R-}
  187. function get_fpu_postfix(def : tdef) : toppostfix;
  188. begin
  189. if def.typ=floatdef then
  190. begin
  191. case tfloatdef(def).floattype of
  192. s32real:
  193. result:=PF_S;
  194. s64real:
  195. result:=PF_D;
  196. s80real:
  197. result:=PF_E;
  198. else
  199. internalerror(200401272);
  200. end;
  201. end
  202. else
  203. internalerror(200401271);
  204. end;
  205. procedure tarmcgarm.init_register_allocators;
  206. begin
  207. inherited init_register_allocators;
  208. { currently, we always save R14, so we can use it }
  209. if (target_info.system<>system_arm_ios) then
  210. begin
  211. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  212. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  213. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  214. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  215. else
  216. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  217. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  218. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  219. end
  220. else
  221. { r7 is not available on Darwin, it's used as frame pointer (always,
  222. for backtrace support -- also in gcc/clang -> R11 can be used).
  223. r9 is volatile }
  224. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  225. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  226. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  227. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  228. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  229. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  230. { The register allocator currently cannot deal with multiple
  231. non-overlapping subregs per register, so we can only use
  232. half the single precision registers for now (as sub registers of the
  233. double precision ones). }
  234. if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  235. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  236. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  237. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  238. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  239. ],first_mm_imreg,[])
  240. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  241. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  242. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15],first_mm_imreg,[]);
  243. end;
  244. procedure tarmcgarm.done_register_allocators;
  245. begin
  246. rg[R_INTREGISTER].free;
  247. rg[R_FPUREGISTER].free;
  248. rg[R_MMREGISTER].free;
  249. inherited done_register_allocators;
  250. end;
  251. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  252. var
  253. imm_shift : byte;
  254. l : tasmlabel;
  255. hr : treference;
  256. imm1, imm2: DWord;
  257. begin
  258. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  259. internalerror(2002090907);
  260. if is_shifter_const(a,imm_shift) then
  261. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  262. else if is_shifter_const(not(a),imm_shift) then
  263. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  264. { loading of constants with mov and orr }
  265. else if (split_into_shifter_const(a,imm1, imm2)) then
  266. begin
  267. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  268. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  269. end
  270. { loading of constants with mvn and bic }
  271. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  272. begin
  273. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  274. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  275. end
  276. else
  277. begin
  278. reference_reset(hr,4,[]);
  279. current_asmdata.getjumplabel(l);
  280. cg.a_label(current_procinfo.aktlocaldata,l);
  281. hr.symboldata:=current_procinfo.aktlocaldata.last;
  282. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  283. hr.symbol:=l;
  284. hr.base:=NR_PC;
  285. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  286. end;
  287. end;
  288. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  289. var
  290. oppostfix:toppostfix;
  291. usedtmpref: treference;
  292. tmpreg,tmpreg2 : tregister;
  293. so : tshifterop;
  294. dir : integer;
  295. begin
  296. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  297. FromSize := ToSize;
  298. case FromSize of
  299. { signed integer registers }
  300. OS_8:
  301. oppostfix:=PF_B;
  302. OS_S8:
  303. oppostfix:=PF_SB;
  304. OS_16:
  305. oppostfix:=PF_H;
  306. OS_S16:
  307. oppostfix:=PF_SH;
  308. OS_32,
  309. OS_S32:
  310. oppostfix:=PF_None;
  311. else
  312. InternalError(200308297);
  313. end;
  314. if (fromsize=OS_S8) and
  315. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  316. oppostfix:=PF_B;
  317. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  318. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  319. (oppostfix in [PF_SH,PF_H])) then
  320. begin
  321. if target_info.endian=endian_big then
  322. dir:=-1
  323. else
  324. dir:=1;
  325. case FromSize of
  326. OS_16,OS_S16:
  327. begin
  328. { only complicated references need an extra loadaddr }
  329. if assigned(ref.symbol) or
  330. (ref.index<>NR_NO) or
  331. (ref.offset<-4095) or
  332. (ref.offset>4094) or
  333. { sometimes the compiler reused registers }
  334. (reg=ref.index) or
  335. (reg=ref.base) then
  336. begin
  337. tmpreg2:=getintregister(list,OS_INT);
  338. a_loadaddr_ref_reg(list,ref,tmpreg2);
  339. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  340. end
  341. else
  342. usedtmpref:=ref;
  343. if target_info.endian=endian_big then
  344. inc(usedtmpref.offset,1);
  345. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  346. tmpreg:=getintregister(list,OS_INT);
  347. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  348. inc(usedtmpref.offset,dir);
  349. if FromSize=OS_16 then
  350. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  351. else
  352. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  353. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  354. end;
  355. OS_32,OS_S32:
  356. begin
  357. tmpreg:=getintregister(list,OS_INT);
  358. { only complicated references need an extra loadaddr }
  359. if assigned(ref.symbol) or
  360. (ref.index<>NR_NO) or
  361. (ref.offset<-4095) or
  362. (ref.offset>4092) or
  363. { sometimes the compiler reused registers }
  364. (reg=ref.index) or
  365. (reg=ref.base) then
  366. begin
  367. tmpreg2:=getintregister(list,OS_INT);
  368. a_loadaddr_ref_reg(list,ref,tmpreg2);
  369. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  370. end
  371. else
  372. usedtmpref:=ref;
  373. shifterop_reset(so);so.shiftmode:=SM_LSL;
  374. if ref.alignment=2 then
  375. begin
  376. if target_info.endian=endian_big then
  377. inc(usedtmpref.offset,2);
  378. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  379. inc(usedtmpref.offset,dir*2);
  380. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  381. so.shiftimm:=16;
  382. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  383. end
  384. else
  385. begin
  386. tmpreg2:=getintregister(list,OS_INT);
  387. if target_info.endian=endian_big then
  388. inc(usedtmpref.offset,3);
  389. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  390. inc(usedtmpref.offset,dir);
  391. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  392. inc(usedtmpref.offset,dir);
  393. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  394. so.shiftimm:=8;
  395. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  396. inc(usedtmpref.offset,dir);
  397. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  398. so.shiftimm:=16;
  399. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  400. so.shiftimm:=24;
  401. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  402. end;
  403. end
  404. else
  405. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  406. end;
  407. end
  408. else
  409. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  410. if (fromsize=OS_S8) and
  411. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  412. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  413. else if (fromsize=OS_S8) and (tosize = OS_16) then
  414. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  415. end;
  416. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  417. var
  418. hsym : tsym;
  419. href : treference;
  420. paraloc : Pcgparalocation;
  421. shift : byte;
  422. begin
  423. { calculate the parameter info for the procdef }
  424. procdef.init_paraloc_info(callerside);
  425. hsym:=tsym(procdef.parast.Find('self'));
  426. if not(assigned(hsym) and
  427. (hsym.typ=paravarsym)) then
  428. internalerror(2003052503);
  429. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  430. while paraloc<>nil do
  431. with paraloc^ do
  432. begin
  433. case loc of
  434. LOC_REGISTER:
  435. begin
  436. if is_shifter_const(ioffset,shift) then
  437. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  438. else
  439. begin
  440. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  441. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  442. end;
  443. end;
  444. LOC_REFERENCE:
  445. begin
  446. { offset in the wrapper needs to be adjusted for the stored
  447. return address }
  448. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  449. if is_shifter_const(ioffset,shift) then
  450. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  451. else
  452. begin
  453. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  454. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  455. end;
  456. end
  457. else
  458. internalerror(2003091803);
  459. end;
  460. paraloc:=next;
  461. end;
  462. end;
  463. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  464. var
  465. ref: treference;
  466. begin
  467. paraloc.check_simple_location;
  468. paramanager.allocparaloc(list,paraloc.location);
  469. case paraloc.location^.loc of
  470. LOC_REGISTER,LOC_CREGISTER:
  471. a_load_const_reg(list,size,a,paraloc.location^.register);
  472. LOC_REFERENCE:
  473. begin
  474. reference_reset(ref,paraloc.alignment,[]);
  475. ref.base:=paraloc.location^.reference.index;
  476. ref.offset:=paraloc.location^.reference.offset;
  477. a_load_const_ref(list,size,a,ref);
  478. end;
  479. else
  480. internalerror(2002081101);
  481. end;
  482. end;
  483. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  484. begin
  485. { doubles in softemu mode have a strange order of registers and references }
  486. if (cgpara.size=OS_F64) and
  487. (location^.size=OS_32) then
  488. begin
  489. g_concatcopy(list,ref,paralocref,4)
  490. end
  491. else
  492. inherited;
  493. end;
  494. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  495. var
  496. ref: treference;
  497. tmpreg: tregister;
  498. begin
  499. paraloc.check_simple_location;
  500. paramanager.allocparaloc(list,paraloc.location);
  501. case paraloc.location^.loc of
  502. LOC_REGISTER,LOC_CREGISTER:
  503. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  504. LOC_REFERENCE:
  505. begin
  506. reference_reset(ref,paraloc.alignment,[]);
  507. ref.base := paraloc.location^.reference.index;
  508. ref.offset := paraloc.location^.reference.offset;
  509. tmpreg := getintregister(list,OS_ADDR);
  510. a_loadaddr_ref_reg(list,r,tmpreg);
  511. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  512. end;
  513. else
  514. internalerror(2002080701);
  515. end;
  516. end;
  517. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  518. var
  519. branchopcode: tasmop;
  520. r : treference;
  521. sym : TAsmSymbol;
  522. begin
  523. { use always BL as newer binutils do not translate blx apparently
  524. generating BL is also what clang and gcc do by default }
  525. branchopcode:=A_BL;
  526. if not(weak) then
  527. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  528. else
  529. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  530. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  531. if (tf_pic_uses_got in target_info.flags) and
  532. (cs_create_pic in current_settings.moduleswitches) then
  533. begin
  534. r.refaddr:=addr_pic
  535. end
  536. else
  537. r.refaddr:=addr_full;
  538. list.concat(taicpu.op_ref(branchopcode,r));
  539. {
  540. the compiler does not properly set this flag anymore in pass 1, and
  541. for now we only need it after pass 2 (I hope) (JM)
  542. if not(pi_do_call in current_procinfo.flags) then
  543. internalerror(2003060703);
  544. }
  545. include(current_procinfo.flags,pi_do_call);
  546. end;
  547. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  548. begin
  549. { check not really correct: should only be used for non-Thumb cpus }
  550. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  551. begin
  552. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  553. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  554. end
  555. else
  556. list.concat(taicpu.op_reg(A_BLX, reg));
  557. {
  558. the compiler does not properly set this flag anymore in pass 1, and
  559. for now we only need it after pass 2 (I hope) (JM)
  560. if not(pi_do_call in current_procinfo.flags) then
  561. internalerror(2003060703);
  562. }
  563. include(current_procinfo.flags,pi_do_call);
  564. end;
  565. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  566. begin
  567. a_op_const_reg_reg(list,op,size,a,reg,reg);
  568. end;
  569. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  570. var
  571. tmpreg,tmpresreg : tregister;
  572. tmpref : treference;
  573. begin
  574. tmpreg:=getintregister(list,size);
  575. tmpresreg:=getintregister(list,size);
  576. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  577. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  578. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  579. end;
  580. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  581. var
  582. so : tshifterop;
  583. begin
  584. if op = OP_NEG then
  585. begin
  586. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  587. maybeadjustresult(list,OP_NEG,size,dst);
  588. end
  589. else if op = OP_NOT then
  590. begin
  591. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  592. begin
  593. shifterop_reset(so);
  594. so.shiftmode:=SM_LSL;
  595. if size in [OS_8, OS_S8] then
  596. so.shiftimm:=24
  597. else
  598. so.shiftimm:=16;
  599. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  600. {Using a shift here allows this to be folded into another instruction}
  601. if size in [OS_S8, OS_S16] then
  602. so.shiftmode:=SM_ASR
  603. else
  604. so.shiftmode:=SM_LSR;
  605. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  606. end
  607. else
  608. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  609. end
  610. else
  611. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  612. end;
  613. const
  614. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  615. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  616. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  617. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  618. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  619. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  620. op_reg_postfix: array[TOpCG] of TOpPostfix =
  621. (PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
  622. PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None);
  623. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  624. size: tcgsize; a: tcgint; src, dst: tregister);
  625. var
  626. ovloc : tlocation;
  627. begin
  628. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  629. end;
  630. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  631. size: tcgsize; src1, src2, dst: tregister);
  632. var
  633. ovloc : tlocation;
  634. begin
  635. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  636. end;
  637. function opshift2shiftmode(op: TOpCg): tshiftmode;
  638. begin
  639. case op of
  640. OP_SHL: Result:=SM_LSL;
  641. OP_SHR: Result:=SM_LSR;
  642. OP_ROR: Result:=SM_ROR;
  643. OP_ROL: Result:=SM_ROR;
  644. OP_SAR: Result:=SM_ASR;
  645. else internalerror(2012070501);
  646. end
  647. end;
  648. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  649. var
  650. multiplier : dword;
  651. power : longint;
  652. shifterop : tshifterop;
  653. bitsset : byte;
  654. negative : boolean;
  655. first : boolean;
  656. b,
  657. cycles : byte;
  658. maxeffort : byte;
  659. begin
  660. result:=true;
  661. cycles:=0;
  662. negative:=a<0;
  663. shifterop.rs:=NR_NO;
  664. shifterop.shiftmode:=SM_LSL;
  665. if negative then
  666. inc(cycles);
  667. multiplier:=dword(abs(a));
  668. bitsset:=popcnt(multiplier and $fffffffe);
  669. { heuristics to estimate how much instructions are reasonable to replace the mul,
  670. this is currently based on XScale timings }
  671. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  672. actual multiplication, this requires min. 1+4 cycles
  673. because the first shift imm. might cause a stall and because we need more instructions
  674. when replacing the mul we generate max. 3 instructions to replace this mul }
  675. maxeffort:=3;
  676. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  677. a ldr, so generating one more operation to replace this is beneficial }
  678. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  679. inc(maxeffort);
  680. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  681. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  682. dec(maxeffort);
  683. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  684. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  685. dec(maxeffort);
  686. { most simple cases }
  687. if a=1 then
  688. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  689. else if a=0 then
  690. a_load_const_reg(list,OS_32,0,dst)
  691. else if a=-1 then
  692. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  693. { add up ?
  694. basically, one add is needed for each bit being set in the constant factor
  695. however, the least significant bit is for free, it can be hidden in the initial
  696. instruction
  697. }
  698. else if (bitsset+cycles<=maxeffort) and
  699. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  700. begin
  701. first:=true;
  702. while multiplier<>0 do
  703. begin
  704. shifterop.shiftimm:=BsrDWord(multiplier);
  705. if odd(multiplier) then
  706. begin
  707. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  708. dec(multiplier);
  709. end
  710. else
  711. if first then
  712. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  713. else
  714. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  715. first:=false;
  716. dec(multiplier,1 shl shifterop.shiftimm);
  717. end;
  718. if negative then
  719. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  720. end
  721. { subtract from the next greater power of two? }
  722. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  723. begin
  724. first:=true;
  725. while multiplier<>0 do
  726. begin
  727. if first then
  728. begin
  729. multiplier:=(1 shl power)-multiplier;
  730. shifterop.shiftimm:=power;
  731. end
  732. else
  733. shifterop.shiftimm:=BsrDWord(multiplier);
  734. if odd(multiplier) then
  735. begin
  736. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  737. dec(multiplier);
  738. end
  739. else
  740. if first then
  741. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  742. else
  743. begin
  744. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  745. dec(multiplier,1 shl shifterop.shiftimm);
  746. end;
  747. first:=false;
  748. end;
  749. if negative then
  750. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  751. end
  752. else
  753. result:=false;
  754. end;
  755. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  756. var
  757. shift, lsb, width : byte;
  758. tmpreg : tregister;
  759. so : tshifterop;
  760. l1 : longint;
  761. imm1, imm2: DWord;
  762. begin
  763. optimize_op_const(size, op, a);
  764. case op of
  765. OP_NONE:
  766. begin
  767. if src <> dst then
  768. a_load_reg_reg(list, size, size, src, dst);
  769. exit;
  770. end;
  771. OP_MOVE:
  772. begin
  773. a_load_const_reg(list, size, a, dst);
  774. exit;
  775. end;
  776. else
  777. ;
  778. end;
  779. ovloc.loc:=LOC_VOID;
  780. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  781. case op of
  782. OP_ADD:
  783. begin
  784. op:=OP_SUB;
  785. a:=aint(dword(-a));
  786. end;
  787. OP_SUB:
  788. begin
  789. op:=OP_ADD;
  790. a:=aint(dword(-a));
  791. end
  792. else
  793. ;
  794. end;
  795. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  796. case op of
  797. OP_NEG,OP_NOT:
  798. internalerror(200308281);
  799. OP_SHL,
  800. OP_SHR,
  801. OP_ROL,
  802. OP_ROR,
  803. OP_SAR:
  804. begin
  805. if a>32 then
  806. internalerror(200308294);
  807. shifterop_reset(so);
  808. so.shiftmode:=opshift2shiftmode(op);
  809. if op = OP_ROL then
  810. so.shiftimm:=32-a
  811. else
  812. so.shiftimm:=a;
  813. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  814. end;
  815. else
  816. {if (op in [OP_SUB, OP_ADD]) and
  817. ((a < 0) or
  818. (a > 4095)) then
  819. begin
  820. tmpreg:=getintregister(list,size);
  821. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  822. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  823. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  824. ));
  825. end
  826. else}
  827. begin
  828. if cgsetflags or setflags then
  829. a_reg_alloc(list,NR_DEFAULTFLAGS);
  830. list.concat(setoppostfix(
  831. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  832. end;
  833. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  834. begin
  835. ovloc.loc:=LOC_FLAGS;
  836. case op of
  837. OP_ADD:
  838. ovloc.resflags:=F_CS;
  839. OP_SUB:
  840. ovloc.resflags:=F_CC;
  841. else
  842. internalerror(2019050922);
  843. end;
  844. end;
  845. end
  846. else
  847. begin
  848. { there could be added some more sophisticated optimizations }
  849. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  850. a_op_reg_reg(list,OP_NEG,size,src,dst)
  851. { we do this here instead in the peephole optimizer because
  852. it saves us a register }
  853. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  854. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  855. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  856. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  857. begin
  858. if l1>32 then{roozbeh does this ever happen?}
  859. internalerror(200308296);
  860. shifterop_reset(so);
  861. so.shiftmode:=SM_LSL;
  862. so.shiftimm:=l1;
  863. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  864. end
  865. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  866. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  867. begin
  868. if l1>32 then{does this ever happen?}
  869. internalerror(201205181);
  870. shifterop_reset(so);
  871. so.shiftmode:=SM_LSL;
  872. so.shiftimm:=l1;
  873. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  874. end
  875. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  876. begin
  877. { nothing to do on success }
  878. end
  879. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  880. broader range of shifterconstants.}
  881. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  882. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  883. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  884. into the following instruction}
  885. else if (op = OP_AND) and
  886. is_continuous_mask(aword(a), lsb, width) and
  887. ((lsb = 0) or ((lsb + width) = 32)) then
  888. begin
  889. shifterop_reset(so);
  890. if (width = 16) and
  891. (lsb = 0) and
  892. (current_settings.cputype >= cpu_armv6) then
  893. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  894. else if (width = 8) and
  895. (lsb = 0) and
  896. (current_settings.cputype >= cpu_armv6) then
  897. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  898. else if lsb = 0 then
  899. begin
  900. so.shiftmode:=SM_LSL;
  901. so.shiftimm:=32-width;
  902. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  903. so.shiftmode:=SM_LSR;
  904. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  905. end
  906. else
  907. begin
  908. so.shiftmode:=SM_LSR;
  909. so.shiftimm:=lsb;
  910. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  911. so.shiftmode:=SM_LSL;
  912. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  913. end;
  914. end
  915. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  916. begin
  917. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  918. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  919. end
  920. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  921. not(cgsetflags or setflags) and
  922. split_into_shifter_const(a, imm1, imm2) then
  923. begin
  924. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  925. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  926. end
  927. else
  928. begin
  929. tmpreg:=getintregister(list,size);
  930. a_load_const_reg(list,size,a,tmpreg);
  931. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  932. end;
  933. end;
  934. maybeadjustresult(list,op,size,dst);
  935. end;
  936. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  937. var
  938. so : tshifterop;
  939. tmpreg,overflowreg : tregister;
  940. asmop : tasmop;
  941. begin
  942. ovloc.loc:=LOC_VOID;
  943. case op of
  944. OP_NEG,OP_NOT,
  945. OP_DIV,OP_IDIV:
  946. internalerror(200308283);
  947. OP_SHL,
  948. OP_SHR,
  949. OP_SAR,
  950. OP_ROR:
  951. begin
  952. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  953. internalerror(2008072801);
  954. shifterop_reset(so);
  955. so.rs:=src1;
  956. so.shiftmode:=opshift2shiftmode(op);
  957. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  958. end;
  959. OP_ROL:
  960. begin
  961. if not(size in [OS_32,OS_S32]) then
  962. internalerror(2008072804);
  963. { simulate ROL by ror'ing 32-value }
  964. tmpreg:=getintregister(list,OS_32);
  965. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  966. shifterop_reset(so);
  967. so.rs:=tmpreg;
  968. so.shiftmode:=SM_ROR;
  969. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  970. end;
  971. OP_IMUL,
  972. OP_MUL:
  973. begin
  974. if (cgsetflags or setflags) and
  975. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  976. begin
  977. overflowreg:=getintregister(list,size);
  978. if op=OP_IMUL then
  979. asmop:=A_SMULL
  980. else
  981. asmop:=A_UMULL;
  982. { the arm doesn't allow that rd and rm are the same }
  983. if dst=src2 then
  984. begin
  985. if dst<>src1 then
  986. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  987. else
  988. begin
  989. tmpreg:=getintregister(list,size);
  990. a_load_reg_reg(list,size,size,src2,dst);
  991. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  992. end;
  993. end
  994. else
  995. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  996. a_reg_alloc(list,NR_DEFAULTFLAGS);
  997. if op=OP_IMUL then
  998. begin
  999. shifterop_reset(so);
  1000. so.shiftmode:=SM_ASR;
  1001. so.shiftimm:=31;
  1002. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1003. end
  1004. else
  1005. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1006. ovloc.loc:=LOC_FLAGS;
  1007. ovloc.resflags:=F_NE;
  1008. end
  1009. else
  1010. begin
  1011. { the arm doesn't allow that rd and rm are the same }
  1012. if dst=src2 then
  1013. begin
  1014. if dst<>src1 then
  1015. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1016. else
  1017. begin
  1018. tmpreg:=getintregister(list,size);
  1019. a_load_reg_reg(list,size,size,src2,dst);
  1020. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1021. end;
  1022. end
  1023. else
  1024. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1025. end;
  1026. end;
  1027. else
  1028. begin
  1029. if cgsetflags or setflags then
  1030. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1031. list.concat(setoppostfix(
  1032. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1033. end;
  1034. end;
  1035. maybeadjustresult(list,op,size,dst);
  1036. end;
  1037. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1038. var
  1039. asmop: tasmop;
  1040. begin
  1041. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1042. begin
  1043. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1044. case size of
  1045. OS_32: asmop:=A_UMULL;
  1046. OS_S32: asmop:=A_SMULL;
  1047. else
  1048. InternalError(2014060802);
  1049. end;
  1050. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1051. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1052. 32x32=32 bit multiplication}
  1053. if (dstlo = NR_NO) then
  1054. dstlo:=getintregister(list,size);
  1055. if (dsthi = NR_NO) then
  1056. dsthi:=getintregister(list,size);
  1057. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1058. end
  1059. else if dsthi=NR_NO then
  1060. begin
  1061. if (dstlo = NR_NO) then
  1062. dstlo:=getintregister(list,size);
  1063. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1064. end
  1065. else
  1066. begin
  1067. internalerror(2015083022);
  1068. end;
  1069. end;
  1070. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1071. var
  1072. tmpreg1,tmpreg2 : tregister;
  1073. begin
  1074. tmpreg1:=NR_NO;
  1075. { Be sure to have a base register }
  1076. if (ref.base=NR_NO) then
  1077. begin
  1078. if ref.shiftmode<>SM_None then
  1079. internalerror(2014020707);
  1080. ref.base:=ref.index;
  1081. ref.index:=NR_NO;
  1082. end;
  1083. { absolute symbols can't be handled directly, we've to store the symbol reference
  1084. in the text segment and access it pc relative
  1085. For now, we assume that references where base or index equals to PC are already
  1086. relative, all other references are assumed to be absolute and thus they need
  1087. to be handled extra.
  1088. A proper solution would be to change refoptions to a set and store the information
  1089. if the symbol is absolute or relative there.
  1090. }
  1091. if (assigned(ref.symbol) and
  1092. not(is_pc(ref.base)) and
  1093. not(is_pc(ref.index))
  1094. ) or
  1095. { [#xxx] isn't a valid address operand }
  1096. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1097. (ref.offset<-4095) or
  1098. (ref.offset>4095) or
  1099. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1100. ((ref.offset<-255) or
  1101. (ref.offset>255)
  1102. )
  1103. ) or
  1104. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1105. ((ref.offset<-1020) or
  1106. (ref.offset>1020) or
  1107. ((abs(ref.offset) mod 4)<>0)
  1108. )
  1109. ) or
  1110. ((GenerateThumbCode) and
  1111. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1112. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1113. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1114. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1115. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1116. )
  1117. ) then
  1118. begin
  1119. fixref(list,ref);
  1120. end;
  1121. if GenerateThumbCode then
  1122. begin
  1123. { certain thumb load require base and index }
  1124. if (oppostfix in [PF_SB,PF_SH]) and
  1125. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1126. begin
  1127. tmpreg1:=getintregister(list,OS_ADDR);
  1128. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1129. ref.index:=tmpreg1;
  1130. end;
  1131. { "hi" registers cannot be used as base or index }
  1132. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1133. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1134. begin
  1135. tmpreg1:=getintregister(list,OS_ADDR);
  1136. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1137. ref.base:=tmpreg1;
  1138. end;
  1139. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1140. begin
  1141. tmpreg1:=getintregister(list,OS_ADDR);
  1142. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1143. ref.index:=tmpreg1;
  1144. end;
  1145. end;
  1146. { fold if there is base, index and offset, however, don't fold
  1147. for vfp memory instructions because we later fold the index }
  1148. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1149. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1150. begin
  1151. if tmpreg1<>NR_NO then
  1152. begin
  1153. tmpreg2:=getintregister(list,OS_ADDR);
  1154. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1155. tmpreg1:=tmpreg2;
  1156. end
  1157. else
  1158. begin
  1159. tmpreg1:=getintregister(list,OS_ADDR);
  1160. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1161. ref.base:=tmpreg1;
  1162. end;
  1163. ref.offset:=0;
  1164. end;
  1165. { floating point operations have only limited references
  1166. we expect here, that a base is already set }
  1167. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1168. begin
  1169. if ref.shiftmode<>SM_none then
  1170. internalerror(200309121);
  1171. if tmpreg1<>NR_NO then
  1172. begin
  1173. if ref.base=tmpreg1 then
  1174. begin
  1175. if ref.signindex<0 then
  1176. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1177. else
  1178. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1179. ref.index:=NR_NO;
  1180. end
  1181. else
  1182. begin
  1183. if ref.index<>tmpreg1 then
  1184. internalerror(200403161);
  1185. if ref.signindex<0 then
  1186. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1187. else
  1188. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1189. ref.base:=tmpreg1;
  1190. ref.index:=NR_NO;
  1191. end;
  1192. end
  1193. else
  1194. begin
  1195. tmpreg1:=getintregister(list,OS_ADDR);
  1196. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1197. ref.base:=tmpreg1;
  1198. ref.index:=NR_NO;
  1199. end;
  1200. end;
  1201. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1202. Result := ref;
  1203. end;
  1204. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1205. var
  1206. oppostfix:toppostfix;
  1207. usedtmpref: treference;
  1208. tmpreg : tregister;
  1209. dir : integer;
  1210. begin
  1211. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1212. FromSize := ToSize;
  1213. case ToSize of
  1214. { signed integer registers }
  1215. OS_8,
  1216. OS_S8:
  1217. oppostfix:=PF_B;
  1218. OS_16,
  1219. OS_S16:
  1220. oppostfix:=PF_H;
  1221. OS_32,
  1222. OS_S32,
  1223. { for vfp value stored in integer register }
  1224. OS_F32:
  1225. oppostfix:=PF_None;
  1226. else
  1227. InternalError(2003082912);
  1228. end;
  1229. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1230. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1231. (oppostfix =PF_H)) then
  1232. begin
  1233. if target_info.endian=endian_big then
  1234. dir:=-1
  1235. else
  1236. dir:=1;
  1237. case FromSize of
  1238. OS_16,OS_S16:
  1239. begin
  1240. tmpreg:=getintregister(list,OS_INT);
  1241. usedtmpref:=ref;
  1242. if target_info.endian=endian_big then
  1243. inc(usedtmpref.offset,1);
  1244. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1245. inc(usedtmpref.offset,dir);
  1246. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1247. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1248. end;
  1249. OS_32,OS_S32:
  1250. begin
  1251. tmpreg:=getintregister(list,OS_INT);
  1252. usedtmpref:=ref;
  1253. if ref.alignment=2 then
  1254. begin
  1255. if target_info.endian=endian_big then
  1256. inc(usedtmpref.offset,2);
  1257. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1258. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1259. inc(usedtmpref.offset,dir*2);
  1260. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1261. end
  1262. else
  1263. begin
  1264. if target_info.endian=endian_big then
  1265. inc(usedtmpref.offset,3);
  1266. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1267. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1268. inc(usedtmpref.offset,dir);
  1269. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1270. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1271. inc(usedtmpref.offset,dir);
  1272. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1273. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1274. inc(usedtmpref.offset,dir);
  1275. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1276. end;
  1277. end
  1278. else
  1279. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1280. end;
  1281. end
  1282. else
  1283. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1284. end;
  1285. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1286. var
  1287. oppostfix:toppostfix;
  1288. href: treference;
  1289. tmpreg: TRegister;
  1290. begin
  1291. case ToSize of
  1292. { signed integer registers }
  1293. OS_8,
  1294. OS_S8:
  1295. oppostfix:=PF_B;
  1296. OS_16,
  1297. OS_S16:
  1298. oppostfix:=PF_H;
  1299. OS_32,
  1300. OS_S32:
  1301. oppostfix:=PF_None;
  1302. else
  1303. InternalError(2003082910);
  1304. end;
  1305. if (tosize in [OS_S16,OS_16]) and
  1306. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1307. begin
  1308. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1309. tmpreg:=getintregister(list,OS_INT);
  1310. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1311. href:=result;
  1312. inc(href.offset);
  1313. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1314. end
  1315. else
  1316. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1317. end;
  1318. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1319. var
  1320. oppostfix:toppostfix;
  1321. so: tshifterop;
  1322. tmpreg: TRegister;
  1323. href: treference;
  1324. begin
  1325. case FromSize of
  1326. { signed integer registers }
  1327. OS_8:
  1328. oppostfix:=PF_B;
  1329. OS_S8:
  1330. oppostfix:=PF_SB;
  1331. OS_16:
  1332. oppostfix:=PF_H;
  1333. OS_S16:
  1334. oppostfix:=PF_SH;
  1335. OS_32,
  1336. OS_S32:
  1337. oppostfix:=PF_None;
  1338. else
  1339. InternalError(200308291);
  1340. end;
  1341. if (tosize=OS_S8) and
  1342. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1343. begin
  1344. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1345. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1346. end
  1347. else if (tosize in [OS_S16,OS_16]) and
  1348. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1349. begin
  1350. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1351. tmpreg:=getintregister(list,OS_INT);
  1352. href:=result;
  1353. inc(href.offset);
  1354. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1355. shifterop_reset(so);
  1356. so.shiftmode:=SM_LSL;
  1357. so.shiftimm:=8;
  1358. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1359. end
  1360. else
  1361. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1362. end;
  1363. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1364. var
  1365. so : tshifterop;
  1366. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1367. begin
  1368. if GenerateThumbCode then
  1369. begin
  1370. case shiftmode of
  1371. SM_ASR:
  1372. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1373. SM_LSR:
  1374. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1375. SM_LSL:
  1376. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1377. else
  1378. internalerror(2013090301);
  1379. end;
  1380. end
  1381. else
  1382. begin
  1383. so.shiftmode:=shiftmode;
  1384. so.shiftimm:=shiftimm;
  1385. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1386. end;
  1387. end;
  1388. var
  1389. instr: taicpu;
  1390. conv_done: boolean;
  1391. begin
  1392. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1393. internalerror(2002090901);
  1394. conv_done:=false;
  1395. if tosize<>fromsize then
  1396. begin
  1397. shifterop_reset(so);
  1398. conv_done:=true;
  1399. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1400. fromsize:=tosize;
  1401. if current_settings.cputype<cpu_armv6 then
  1402. case fromsize of
  1403. OS_8:
  1404. if GenerateThumbCode then
  1405. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1406. else
  1407. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1408. OS_S8:
  1409. begin
  1410. do_shift(SM_LSL,24,reg1);
  1411. if tosize=OS_16 then
  1412. begin
  1413. do_shift(SM_ASR,8,reg2);
  1414. do_shift(SM_LSR,16,reg2);
  1415. end
  1416. else
  1417. do_shift(SM_ASR,24,reg2);
  1418. end;
  1419. OS_16:
  1420. begin
  1421. do_shift(SM_LSL,16,reg1);
  1422. do_shift(SM_LSR,16,reg2);
  1423. end;
  1424. OS_S16:
  1425. begin
  1426. do_shift(SM_LSL,16,reg1);
  1427. do_shift(SM_ASR,16,reg2)
  1428. end;
  1429. else
  1430. conv_done:=false;
  1431. end
  1432. else
  1433. case fromsize of
  1434. OS_8:
  1435. if GenerateThumbCode then
  1436. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1437. else
  1438. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1439. OS_S8:
  1440. begin
  1441. if tosize=OS_16 then
  1442. begin
  1443. so.shiftmode:=SM_ROR;
  1444. so.shiftimm:=16;
  1445. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1446. do_shift(SM_LSR,16,reg2);
  1447. end
  1448. else
  1449. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1450. end;
  1451. OS_16:
  1452. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1453. OS_S16:
  1454. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1455. else
  1456. conv_done:=false;
  1457. end
  1458. end;
  1459. if not conv_done and (reg1<>reg2) then
  1460. begin
  1461. { same size, only a register mov required }
  1462. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1463. list.Concat(instr);
  1464. { Notify the register allocator that we have written a move instruction so
  1465. it can try to eliminate it. }
  1466. add_move_instruction(instr);
  1467. end;
  1468. end;
  1469. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1470. var
  1471. href,href2 : treference;
  1472. hloc : pcgparalocation;
  1473. begin
  1474. href:=ref;
  1475. hloc:=paraloc.location;
  1476. while assigned(hloc) do
  1477. begin
  1478. case hloc^.loc of
  1479. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1480. begin
  1481. paramanager.allocparaloc(list,paraloc.location);
  1482. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1483. end;
  1484. LOC_REGISTER :
  1485. case hloc^.size of
  1486. OS_32,
  1487. OS_F32:
  1488. begin
  1489. paramanager.allocparaloc(list,paraloc.location);
  1490. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1491. end;
  1492. OS_64,
  1493. OS_F64:
  1494. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1495. else
  1496. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1497. end;
  1498. LOC_REFERENCE :
  1499. begin
  1500. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1501. { concatcopy should choose the best way to copy the data }
  1502. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1503. end;
  1504. else
  1505. internalerror(200408241);
  1506. end;
  1507. inc(href.offset,tcgsize2size[hloc^.size]);
  1508. hloc:=hloc^.next;
  1509. end;
  1510. end;
  1511. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1512. begin
  1513. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1514. end;
  1515. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1516. var
  1517. oppostfix:toppostfix;
  1518. begin
  1519. case fromsize of
  1520. OS_32,
  1521. OS_F32:
  1522. oppostfix:=PF_S;
  1523. OS_64,
  1524. OS_F64:
  1525. oppostfix:=PF_D;
  1526. OS_F80:
  1527. oppostfix:=PF_E;
  1528. else
  1529. InternalError(200309021);
  1530. end;
  1531. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1532. if fromsize<>tosize then
  1533. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1534. end;
  1535. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1536. var
  1537. oppostfix:toppostfix;
  1538. begin
  1539. case tosize of
  1540. OS_F32:
  1541. oppostfix:=PF_S;
  1542. OS_F64:
  1543. oppostfix:=PF_D;
  1544. OS_F80:
  1545. oppostfix:=PF_E;
  1546. else
  1547. InternalError(200309022);
  1548. end;
  1549. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1550. end;
  1551. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1552. var
  1553. r : TRegister;
  1554. ai: taicpu;
  1555. l: TAsmLabel;
  1556. begin
  1557. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  1558. not(FPUARM_HAS_EXCEPTION_TRAPPING in fpu_capabilities[current_settings.fputype]) and
  1559. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  1560. begin
  1561. r:=getintregister(list,OS_INT);
  1562. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1563. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1564. current_asmdata.getjumplabel(l);
  1565. ai:=taicpu.op_sym(A_B,l);
  1566. ai.is_jmp:=true;
  1567. ai.condition:=C_EQ;
  1568. list.concat(ai);
  1569. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1570. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1571. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1572. a_label(list,l);
  1573. if clear then
  1574. current_procinfo.FPUExceptionCheckNeeded:=false;
  1575. end;
  1576. end;
  1577. { comparison operations }
  1578. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1579. l : tasmlabel);
  1580. var
  1581. tmpreg : tregister;
  1582. b : byte;
  1583. begin
  1584. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1585. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1586. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1587. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1588. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1589. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1590. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1591. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1592. else
  1593. begin
  1594. tmpreg:=getintregister(list,size);
  1595. a_load_const_reg(list,size,a,tmpreg);
  1596. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1597. end;
  1598. a_jmp_cond(list,cmp_op,l);
  1599. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1600. end;
  1601. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1602. begin
  1603. if reverse then
  1604. begin
  1605. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1606. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1607. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1608. end
  1609. { it is decided during the compilation of the system unit if this code is used or not
  1610. so no additional check for rbit is needed }
  1611. else
  1612. begin
  1613. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1614. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1615. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1616. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1617. if GenerateThumb2Code then
  1618. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1619. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1620. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1621. end;
  1622. end;
  1623. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1624. begin
  1625. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1626. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1627. a_jmp_cond(list,cmp_op,l);
  1628. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1629. end;
  1630. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1631. var
  1632. ai : taicpu;
  1633. begin
  1634. { generate far jump, leave it to the optimizer to get rid of it }
  1635. if GenerateThumbCode then
  1636. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1637. else
  1638. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1639. ai.is_jmp:=true;
  1640. list.concat(ai);
  1641. end;
  1642. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1643. var
  1644. ai : taicpu;
  1645. begin
  1646. { generate far jump, leave it to the optimizer to get rid of it }
  1647. if GenerateThumbCode then
  1648. ai:=taicpu.op_sym(A_BL,l)
  1649. else
  1650. ai:=taicpu.op_sym(A_B,l);
  1651. ai.is_jmp:=true;
  1652. list.concat(ai);
  1653. end;
  1654. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1655. var
  1656. ai : taicpu;
  1657. inv_flags : TResFlags;
  1658. hlabel : TAsmLabel;
  1659. begin
  1660. if GenerateThumbCode then
  1661. begin
  1662. inv_flags:=f;
  1663. inverse_flags(inv_flags);
  1664. { the optimizer has to fix this if jump range is sufficient short }
  1665. current_asmdata.getjumplabel(hlabel);
  1666. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1667. ai.is_jmp:=true;
  1668. list.concat(ai);
  1669. a_jmp_always(list,l);
  1670. a_label(list,hlabel);
  1671. end
  1672. else
  1673. begin
  1674. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1675. ai.is_jmp:=true;
  1676. list.concat(ai);
  1677. end;
  1678. end;
  1679. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1680. begin
  1681. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1682. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1683. end;
  1684. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1685. begin
  1686. if target_info.system = system_arm_linux then
  1687. begin
  1688. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1689. a_call_name(list,'__gnu_mcount_nc',false);
  1690. end
  1691. else
  1692. internalerror(2014091201);
  1693. end;
  1694. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1695. var
  1696. ref : treference;
  1697. shift : byte;
  1698. firstfloatreg,lastfloatreg,
  1699. r : byte;
  1700. mmregs,
  1701. regs, saveregs : tcpuregisterset;
  1702. registerarea,
  1703. r7offset,
  1704. stackmisalignment : pint;
  1705. imm1, imm2: DWord;
  1706. stack_parameters : Boolean;
  1707. begin
  1708. LocalSize:=align(LocalSize,4);
  1709. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1710. { call instruction does not put anything on the stack }
  1711. registerarea:=0;
  1712. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1713. lastfloatreg:=RS_NO;
  1714. if not(nostackframe) then
  1715. begin
  1716. firstfloatreg:=RS_NO;
  1717. mmregs:=[];
  1718. case current_settings.fputype of
  1719. fpu_none,
  1720. fpu_soft,
  1721. fpu_libgcc:
  1722. ;
  1723. fpu_fpa,
  1724. fpu_fpa10,
  1725. fpu_fpa11:
  1726. begin
  1727. { save floating point registers? }
  1728. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1729. for r:=RS_F0 to RS_F7 do
  1730. if r in regs then
  1731. begin
  1732. if firstfloatreg=RS_NO then
  1733. firstfloatreg:=r;
  1734. lastfloatreg:=r;
  1735. inc(registerarea,12);
  1736. end;
  1737. end;
  1738. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1739. begin;
  1740. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1741. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1742. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1743. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1744. end
  1745. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1746. begin;
  1747. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1748. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1749. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1750. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1751. end
  1752. else
  1753. internalerror(2019050924);
  1754. end;
  1755. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1756. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1757. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1758. { save int registers }
  1759. reference_reset(ref,4,[]);
  1760. ref.index:=NR_STACK_POINTER_REG;
  1761. ref.addressmode:=AM_PREINDEXED;
  1762. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1763. if not(target_info.system in systems_darwin) then
  1764. begin
  1765. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1766. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1767. begin
  1768. a_reg_alloc(list,NR_R12);
  1769. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1770. end;
  1771. { the (old) ARM APCS requires saving both the stack pointer (to
  1772. crawl the stack) and the PC (to identify the function this
  1773. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1774. and R15 -- still needs updating for EABI and Darwin, they don't
  1775. need that }
  1776. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1777. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1778. else
  1779. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1780. include(regs,RS_R14);
  1781. if regs<>[] then
  1782. begin
  1783. for r:=RS_R0 to RS_R15 do
  1784. if r in regs then
  1785. inc(registerarea,4);
  1786. { if the stack is not 8 byte aligned, try to add an extra register,
  1787. so we can avoid the extra sub/add ...,#4 later (KB) }
  1788. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1789. for r:=RS_R3 downto RS_R0 do
  1790. if not(r in regs) then
  1791. begin
  1792. regs:=regs+[r];
  1793. inc(registerarea,4);
  1794. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1795. break;
  1796. end;
  1797. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1798. end;
  1799. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1800. begin
  1801. { the framepointer now points to the saved R15, so the saved
  1802. framepointer is at R11-12 (for get_caller_frame) }
  1803. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1804. a_reg_dealloc(list,NR_R12);
  1805. end;
  1806. end
  1807. else
  1808. begin
  1809. { always save r14 if we use r7 as the framepointer, because
  1810. the parameter offsets are hardcoded in advance and always
  1811. assume that r14 sits on the stack right behind the saved r7
  1812. }
  1813. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1814. include(regs,RS_FRAME_POINTER_REG);
  1815. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1816. include(regs,RS_R14);
  1817. if regs<>[] then
  1818. begin
  1819. { on Darwin, you first have to save [r4-r7,lr], and then
  1820. [r8,r10,r11] and make r7 point to the previously saved
  1821. r7 so that you can perform a stack crawl based on it
  1822. ([r7] is previous stack frame, [r7+4] is return address
  1823. }
  1824. include(regs,RS_FRAME_POINTER_REG);
  1825. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1826. r7offset:=0;
  1827. for r:=RS_R0 to RS_R15 do
  1828. if r in saveregs then
  1829. begin
  1830. inc(registerarea,4);
  1831. if r<RS_FRAME_POINTER_REG then
  1832. inc(r7offset,4);
  1833. end;
  1834. { save the registers }
  1835. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1836. { make r7 point to the saved r7 (regardless of whether this
  1837. frame uses the framepointer, for backtrace purposes) }
  1838. if r7offset<>0 then
  1839. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1840. else
  1841. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1842. { now save the rest (if any) }
  1843. saveregs:=regs-saveregs;
  1844. if saveregs<>[] then
  1845. begin
  1846. for r:=RS_R8 to RS_R11 do
  1847. if r in saveregs then
  1848. inc(registerarea,4);
  1849. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1850. end;
  1851. end;
  1852. end;
  1853. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1854. if (LocalSize<>0) or
  1855. ((stackmisalignment<>0) and
  1856. ((pi_do_call in current_procinfo.flags) or
  1857. (po_assembler in current_procinfo.procdef.procoptions))) then
  1858. begin
  1859. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1860. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1861. begin
  1862. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1863. internalerror(2014030901)
  1864. else
  1865. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1866. end;
  1867. if is_shifter_const(localsize,shift) then
  1868. begin
  1869. a_reg_dealloc(list,NR_R12);
  1870. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1871. end
  1872. else if split_into_shifter_const(localsize, imm1, imm2) then
  1873. begin
  1874. a_reg_dealloc(list,NR_R12);
  1875. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1876. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1877. end
  1878. else
  1879. begin
  1880. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1881. a_reg_alloc(list,NR_R12);
  1882. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1883. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1884. a_reg_dealloc(list,NR_R12);
  1885. end;
  1886. end;
  1887. if (mmregs<>[]) or
  1888. (firstfloatreg<>RS_NO) then
  1889. begin
  1890. reference_reset(ref,4,[]);
  1891. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1892. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1893. begin
  1894. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1895. begin
  1896. a_reg_alloc(list,NR_R12);
  1897. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1898. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1899. a_reg_dealloc(list,NR_R12);
  1900. end
  1901. else
  1902. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1903. ref.base:=NR_R12;
  1904. end
  1905. else
  1906. begin
  1907. ref.base:=current_procinfo.framepointer;
  1908. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1909. end;
  1910. case current_settings.fputype of
  1911. fpu_fpa,
  1912. fpu_fpa10,
  1913. fpu_fpa11:
  1914. begin
  1915. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1916. lastfloatreg-firstfloatreg+1,ref));
  1917. end;
  1918. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1919. begin
  1920. ref.index:=ref.base;
  1921. ref.base:=NR_NO;
  1922. if mmregs<>[] then
  1923. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1924. end
  1925. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1926. begin
  1927. ref.index:=ref.base;
  1928. ref.base:=NR_NO;
  1929. if mmregs<>[] then
  1930. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1931. end
  1932. else
  1933. internalerror(2019050923);
  1934. end;
  1935. end;
  1936. end;
  1937. end;
  1938. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1939. var
  1940. ref : treference;
  1941. LocalSize : longint;
  1942. firstfloatreg,lastfloatreg,
  1943. r,
  1944. shift : byte;
  1945. mmregs,
  1946. saveregs,
  1947. regs : tcpuregisterset;
  1948. registerarea,
  1949. stackmisalignment: pint;
  1950. paddingreg: TSuperRegister;
  1951. imm1, imm2: DWord;
  1952. begin
  1953. if not(nostackframe) then
  1954. begin
  1955. registerarea:=0;
  1956. firstfloatreg:=RS_NO;
  1957. lastfloatreg:=RS_NO;
  1958. mmregs:=[];
  1959. saveregs:=[];
  1960. case current_settings.fputype of
  1961. fpu_none,
  1962. fpu_soft,
  1963. fpu_libgcc:
  1964. ;
  1965. fpu_fpa,
  1966. fpu_fpa10,
  1967. fpu_fpa11:
  1968. begin
  1969. { restore floating point registers? }
  1970. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1971. for r:=RS_F0 to RS_F7 do
  1972. if r in regs then
  1973. begin
  1974. if firstfloatreg=RS_NO then
  1975. firstfloatreg:=r;
  1976. lastfloatreg:=r;
  1977. { floating point register space is already included in
  1978. localsize below by calc_stackframe_size
  1979. inc(registerarea,12);
  1980. }
  1981. end;
  1982. end;
  1983. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1984. begin
  1985. { restore vfp registers? }
  1986. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1987. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1988. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1989. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1990. end
  1991. else
  1992. internalerror(2019050908);
  1993. end;
  1994. if (firstfloatreg<>RS_NO) or
  1995. (mmregs<>[]) then
  1996. begin
  1997. reference_reset(ref,4,[]);
  1998. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1999. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2000. begin
  2001. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2002. begin
  2003. a_reg_alloc(list,NR_R12);
  2004. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2005. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2006. a_reg_dealloc(list,NR_R12);
  2007. end
  2008. else
  2009. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2010. ref.base:=NR_R12;
  2011. end
  2012. else
  2013. begin
  2014. ref.base:=current_procinfo.framepointer;
  2015. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2016. end;
  2017. case current_settings.fputype of
  2018. fpu_fpa,
  2019. fpu_fpa10,
  2020. fpu_fpa11:
  2021. begin
  2022. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2023. lastfloatreg-firstfloatreg+1,ref));
  2024. end;
  2025. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2026. begin
  2027. ref.index:=ref.base;
  2028. ref.base:=NR_NO;
  2029. if mmregs<>[] then
  2030. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2031. end
  2032. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2033. begin
  2034. ref.index:=ref.base;
  2035. ref.base:=NR_NO;
  2036. if mmregs<>[] then
  2037. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2038. end
  2039. else
  2040. internalerror(2019050921);
  2041. end;
  2042. end;
  2043. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2044. if (pi_do_call in current_procinfo.flags) or
  2045. (regs<>[]) or
  2046. ((target_info.system in systems_darwin) and
  2047. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2048. begin
  2049. exclude(regs,RS_R14);
  2050. include(regs,RS_R15);
  2051. if (target_info.system in systems_darwin) then
  2052. include(regs,RS_FRAME_POINTER_REG);
  2053. end;
  2054. if not(target_info.system in systems_darwin) then
  2055. begin
  2056. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2057. The saved PC came after that but is discarded, since we restore
  2058. the stack pointer }
  2059. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2060. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2061. end
  2062. else
  2063. begin
  2064. { restore R8-R11 already if necessary (they've been stored
  2065. before the others) }
  2066. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2067. if saveregs<>[] then
  2068. begin
  2069. reference_reset(ref,4,[]);
  2070. ref.index:=NR_STACK_POINTER_REG;
  2071. ref.addressmode:=AM_PREINDEXED;
  2072. for r:=RS_R8 to RS_R11 do
  2073. if r in saveregs then
  2074. inc(registerarea,4);
  2075. regs:=regs-saveregs;
  2076. end;
  2077. end;
  2078. for r:=RS_R0 to RS_R15 do
  2079. if r in regs then
  2080. inc(registerarea,4);
  2081. { reapply the stack padding reg, in case there was one, see the complimentary
  2082. comment in g_proc_entry() (KB) }
  2083. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2084. if paddingreg < RS_R4 then
  2085. if paddingreg in regs then
  2086. internalerror(201306190)
  2087. else
  2088. begin
  2089. regs:=regs+[paddingreg];
  2090. inc(registerarea,4);
  2091. end;
  2092. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2093. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2094. (target_info.system in systems_darwin) then
  2095. begin
  2096. LocalSize:=current_procinfo.calc_stackframe_size;
  2097. if (LocalSize<>0) or
  2098. ((stackmisalignment<>0) and
  2099. ((pi_do_call in current_procinfo.flags) or
  2100. (po_assembler in current_procinfo.procdef.procoptions))) then
  2101. begin
  2102. if pi_estimatestacksize in current_procinfo.flags then
  2103. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2104. else
  2105. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2106. if is_shifter_const(LocalSize,shift) then
  2107. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2108. else if split_into_shifter_const(localsize, imm1, imm2) then
  2109. begin
  2110. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2111. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2112. end
  2113. else
  2114. begin
  2115. a_reg_alloc(list,NR_R12);
  2116. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2117. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2118. a_reg_dealloc(list,NR_R12);
  2119. end;
  2120. end;
  2121. if (target_info.system in systems_darwin) and
  2122. (saveregs<>[]) then
  2123. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2124. if regs=[] then
  2125. begin
  2126. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2127. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2128. else
  2129. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2130. end
  2131. else
  2132. begin
  2133. reference_reset(ref,4,[]);
  2134. ref.index:=NR_STACK_POINTER_REG;
  2135. ref.addressmode:=AM_PREINDEXED;
  2136. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2137. end;
  2138. end
  2139. else
  2140. begin
  2141. { restore int registers and return }
  2142. reference_reset(ref,4,[]);
  2143. ref.index:=NR_FRAME_POINTER_REG;
  2144. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2145. end;
  2146. end
  2147. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2148. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2149. else
  2150. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2151. end;
  2152. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2153. var
  2154. ref : treference;
  2155. l : TAsmLabel;
  2156. regs : tcpuregisterset;
  2157. r: byte;
  2158. begin
  2159. if (cs_create_pic in current_settings.moduleswitches) and
  2160. (pi_needs_got in current_procinfo.flags) and
  2161. (tf_pic_uses_got in target_info.flags) then
  2162. begin
  2163. { Procedure parametrs are not initialized at this stage.
  2164. Before GOT initialization code, allocate registers used for procedure parameters
  2165. to prevent usage of these registers for temp operations in later stages of code
  2166. generation. }
  2167. regs:=rg[R_INTREGISTER].used_in_proc;
  2168. for r:=RS_R0 to RS_R3 do
  2169. if r in regs then
  2170. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2171. { Allocate scratch register R12 and use it for GOT calculations directly.
  2172. Otherwise the init code can be distorted in later stages of code generation. }
  2173. a_reg_alloc(list,NR_R12);
  2174. reference_reset(ref,4,[]);
  2175. current_asmdata.getglobaldatalabel(l);
  2176. cg.a_label(current_procinfo.aktlocaldata,l);
  2177. ref.symbol:=l;
  2178. ref.base:=NR_PC;
  2179. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2180. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2181. current_asmdata.getaddrlabel(l);
  2182. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2183. cg.a_label(list,l);
  2184. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2185. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2186. { Deallocate registers }
  2187. a_reg_dealloc(list,NR_R12);
  2188. for r:=RS_R3 downto RS_R0 do
  2189. if r in regs then
  2190. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2191. end;
  2192. end;
  2193. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2194. var
  2195. b : byte;
  2196. tmpref : treference;
  2197. instr : taicpu;
  2198. begin
  2199. if ref.addressmode<>AM_OFFSET then
  2200. internalerror(200309071);
  2201. tmpref:=ref;
  2202. { Be sure to have a base register }
  2203. if (tmpref.base=NR_NO) then
  2204. begin
  2205. if tmpref.shiftmode<>SM_None then
  2206. internalerror(2014020702);
  2207. if tmpref.signindex<0 then
  2208. internalerror(200312023);
  2209. tmpref.base:=tmpref.index;
  2210. tmpref.index:=NR_NO;
  2211. end;
  2212. if assigned(tmpref.symbol) or
  2213. not((is_shifter_const(tmpref.offset,b)) or
  2214. (is_shifter_const(-tmpref.offset,b))
  2215. ) then
  2216. fixref(list,tmpref);
  2217. { expect a base here if there is an index }
  2218. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2219. internalerror(200312022);
  2220. if tmpref.index<>NR_NO then
  2221. begin
  2222. if tmpref.shiftmode<>SM_None then
  2223. internalerror(200312021);
  2224. if tmpref.signindex<0 then
  2225. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2226. else
  2227. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2228. if tmpref.offset<>0 then
  2229. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2230. end
  2231. else
  2232. begin
  2233. if tmpref.base=NR_NO then
  2234. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2235. else
  2236. if tmpref.offset<>0 then
  2237. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2238. else
  2239. begin
  2240. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2241. list.concat(instr);
  2242. add_move_instruction(instr);
  2243. end;
  2244. end;
  2245. end;
  2246. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2247. var
  2248. tmpreg, tmpreg2 : tregister;
  2249. tmpref : treference;
  2250. l, piclabel : tasmlabel;
  2251. indirection_done : boolean;
  2252. begin
  2253. { absolute symbols can't be handled directly, we've to store the symbol reference
  2254. in the text segment and access it pc relative
  2255. For now, we assume that references where base or index equals to PC are already
  2256. relative, all other references are assumed to be absolute and thus they need
  2257. to be handled extra.
  2258. A proper solution would be to change refoptions to a set and store the information
  2259. if the symbol is absolute or relative there.
  2260. }
  2261. { create consts entry }
  2262. reference_reset(tmpref,4,[]);
  2263. current_asmdata.getjumplabel(l);
  2264. cg.a_label(current_procinfo.aktlocaldata,l);
  2265. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2266. piclabel:=nil;
  2267. tmpreg:=NR_NO;
  2268. indirection_done:=false;
  2269. if assigned(ref.symbol) then
  2270. begin
  2271. if (target_info.system=system_arm_ios) and
  2272. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2273. begin
  2274. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2275. if ref.offset<>0 then
  2276. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2277. indirection_done:=true;
  2278. end
  2279. else if ref.refaddr=addr_gottpoff then
  2280. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2281. else if ref.refaddr=addr_tlsgd then
  2282. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2283. else if ref.refaddr=addr_tlsdesc then
  2284. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2285. else if ref.refaddr=addr_tpoff then
  2286. begin
  2287. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2288. Internalerror(2019092804);
  2289. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2290. end
  2291. else if (cs_create_pic in current_settings.moduleswitches) then
  2292. if (tf_pic_uses_got in target_info.flags) then
  2293. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2294. else
  2295. begin
  2296. { ideally, we would want to generate
  2297. ldr r1, LPICConstPool
  2298. LPICLocal:
  2299. ldr/str r2,[pc,r1]
  2300. ...
  2301. LPICConstPool:
  2302. .long _globsym-(LPICLocal+8)
  2303. However, we cannot be sure that the ldr/str will follow
  2304. right after the call to fixref, so we have to load the
  2305. complete address already in a register.
  2306. }
  2307. current_asmdata.getaddrlabel(piclabel);
  2308. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2309. end
  2310. else
  2311. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2312. end
  2313. else
  2314. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2315. { load consts entry }
  2316. if not indirection_done then
  2317. begin
  2318. tmpreg:=getintregister(list,OS_INT);
  2319. tmpref.symbol:=l;
  2320. tmpref.base:=NR_PC;
  2321. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2322. if (cs_create_pic in current_settings.moduleswitches) and
  2323. (tf_pic_uses_got in target_info.flags) and
  2324. assigned(ref.symbol) then
  2325. begin
  2326. {$ifdef EXTDEBUG}
  2327. if not (pi_needs_got in current_procinfo.flags) then
  2328. Comment(V_warning,'pi_needs_got not included');
  2329. {$endif EXTDEBUG}
  2330. Include(current_procinfo.flags,pi_needs_got);
  2331. reference_reset(tmpref,4,[]);
  2332. tmpref.base:=current_procinfo.got;
  2333. tmpref.index:=tmpreg;
  2334. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2335. if ref.offset<>0 then
  2336. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2337. end;
  2338. end;
  2339. if assigned(piclabel) then
  2340. begin
  2341. cg.a_label(list,piclabel);
  2342. tmpreg2:=getaddressregister(list);
  2343. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2344. tmpreg:=tmpreg2
  2345. end;
  2346. { This routine can be called with PC as base/index in case the offset
  2347. was too large to encode in a load/store. In that case, the entire
  2348. absolute expression has been re-encoded in a new constpool entry, and
  2349. we have to remove the use of PC from the original reference (the code
  2350. above made everything relative to the value loaded from the new
  2351. constpool entry) }
  2352. if is_pc(ref.base) then
  2353. ref.base:=NR_NO;
  2354. if is_pc(ref.index) then
  2355. ref.index:=NR_NO;
  2356. if (ref.base<>NR_NO) then
  2357. begin
  2358. if ref.index<>NR_NO then
  2359. begin
  2360. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2361. ref.base:=tmpreg;
  2362. end
  2363. else
  2364. if ref.base<>NR_PC then
  2365. begin
  2366. ref.index:=tmpreg;
  2367. ref.shiftimm:=0;
  2368. ref.signindex:=1;
  2369. ref.shiftmode:=SM_None;
  2370. end
  2371. else
  2372. ref.base:=tmpreg;
  2373. end
  2374. else
  2375. ref.base:=tmpreg;
  2376. ref.offset:=0;
  2377. ref.symbol:=nil;
  2378. end;
  2379. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2380. var
  2381. paraloc1,paraloc2,paraloc3 : TCGPara;
  2382. pd : tprocdef;
  2383. begin
  2384. pd:=search_system_proc('MOVE');
  2385. paraloc1.init;
  2386. paraloc2.init;
  2387. paraloc3.init;
  2388. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  2389. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  2390. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  2391. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2392. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2393. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2394. paramanager.freecgpara(list,paraloc3);
  2395. paramanager.freecgpara(list,paraloc2);
  2396. paramanager.freecgpara(list,paraloc1);
  2397. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2398. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2399. a_call_name(list,'FPC_MOVE',false);
  2400. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2401. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2402. paraloc3.done;
  2403. paraloc2.done;
  2404. paraloc1.done;
  2405. end;
  2406. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2407. const
  2408. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2409. maxtmpreg_thumb = 5;
  2410. var
  2411. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2412. srcreg,destreg,countreg,r,tmpreg:tregister;
  2413. helpsize:aint;
  2414. copysize:byte;
  2415. cgsize:Tcgsize;
  2416. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2417. maxtmpreg,
  2418. tmpregi,tmpregi2:byte;
  2419. { will never be called with count<=4 }
  2420. procedure genloop(count : aword;size : byte);
  2421. const
  2422. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2423. var
  2424. l : tasmlabel;
  2425. begin
  2426. current_asmdata.getjumplabel(l);
  2427. if count<size then size:=1;
  2428. a_load_const_reg(list,OS_INT,count div size,countreg);
  2429. cg.a_label(list,l);
  2430. srcref.addressmode:=AM_POSTINDEXED;
  2431. dstref.addressmode:=AM_POSTINDEXED;
  2432. srcref.offset:=size;
  2433. dstref.offset:=size;
  2434. r:=getintregister(list,size2opsize[size]);
  2435. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2436. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2437. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2438. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2439. a_jmp_flags(list,F_NE,l);
  2440. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2441. srcref.offset:=1;
  2442. dstref.offset:=1;
  2443. case count mod size of
  2444. 1:
  2445. begin
  2446. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2447. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2448. end;
  2449. 2:
  2450. if aligned then
  2451. begin
  2452. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2453. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2454. end
  2455. else
  2456. begin
  2457. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2458. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2459. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2460. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2461. end;
  2462. 3:
  2463. if aligned then
  2464. begin
  2465. srcref.offset:=2;
  2466. dstref.offset:=2;
  2467. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2468. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2469. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2470. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2471. end
  2472. else
  2473. begin
  2474. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2475. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2476. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2477. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2478. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2479. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2480. end;
  2481. end;
  2482. { keep the registers alive }
  2483. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2484. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2485. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2486. end;
  2487. { save estimation, if a creating a separate ref is needed or
  2488. if we can keep the original reference while copying }
  2489. function SimpleRef(const ref : treference) : boolean;
  2490. begin
  2491. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2492. ((ref.symbol=nil) and
  2493. (ref.addressmode=AM_OFFSET) and
  2494. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2495. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2496. { ldrh has a limited offset range }
  2497. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2498. )
  2499. );
  2500. end;
  2501. { will never be called with count<=4 }
  2502. procedure genloop_thumb(count : aword;size : byte);
  2503. procedure refincofs(const ref : treference;const value : longint = 1);
  2504. begin
  2505. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2506. end;
  2507. const
  2508. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2509. var
  2510. l : tasmlabel;
  2511. begin
  2512. current_asmdata.getjumplabel(l);
  2513. if count<size then size:=1;
  2514. a_load_const_reg(list,OS_INT,count div size,countreg);
  2515. cg.a_label(list,l);
  2516. r:=getintregister(list,size2opsize[size]);
  2517. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2518. refincofs(srcref);
  2519. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2520. refincofs(dstref);
  2521. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2522. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2523. a_jmp_flags(list,F_NE,l);
  2524. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2525. case count mod size of
  2526. 1:
  2527. begin
  2528. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2529. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2530. end;
  2531. 2:
  2532. if aligned then
  2533. begin
  2534. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2535. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2536. end
  2537. else
  2538. begin
  2539. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2540. refincofs(srcref);
  2541. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2542. refincofs(dstref);
  2543. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2544. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2545. end;
  2546. 3:
  2547. if aligned then
  2548. begin
  2549. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2550. refincofs(srcref,2);
  2551. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2552. refincofs(dstref,2);
  2553. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2554. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2555. end
  2556. else
  2557. begin
  2558. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2559. refincofs(srcref);
  2560. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2561. refincofs(dstref);
  2562. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2563. refincofs(srcref);
  2564. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2565. refincofs(dstref);
  2566. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2567. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2568. end;
  2569. end;
  2570. { keep the registers alive }
  2571. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2572. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2573. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2574. end;
  2575. begin
  2576. if len=0 then
  2577. exit;
  2578. if GenerateThumbCode then
  2579. maxtmpreg:=maxtmpreg_thumb
  2580. else
  2581. maxtmpreg:=maxtmpreg_arm;
  2582. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2583. dstref:=dest;
  2584. srcref:=source;
  2585. if cs_opt_size in current_settings.optimizerswitches then
  2586. helpsize:=8;
  2587. if aligned and (len=4) then
  2588. begin
  2589. tmpreg:=getintregister(list,OS_32);
  2590. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2591. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2592. end
  2593. else if aligned and (len=2) then
  2594. begin
  2595. tmpreg:=getintregister(list,OS_16);
  2596. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2597. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2598. end
  2599. else if (len<=helpsize) and aligned then
  2600. begin
  2601. tmpregi:=0;
  2602. { loading address in a separate register needed? }
  2603. if SimpleRef(source) then
  2604. begin
  2605. { ... then we don't need a loadaddr }
  2606. srcref:=source;
  2607. end
  2608. else
  2609. begin
  2610. srcreg:=getintregister(list,OS_ADDR);
  2611. a_loadaddr_ref_reg(list,source,srcreg);
  2612. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2613. end;
  2614. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2615. begin
  2616. inc(tmpregi);
  2617. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2618. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2619. inc(srcref.offset,4);
  2620. dec(len,4);
  2621. end;
  2622. { loading address in a separate register needed? }
  2623. if SimpleRef(dest) then
  2624. dstref:=dest
  2625. else
  2626. begin
  2627. destreg:=getintregister(list,OS_ADDR);
  2628. a_loadaddr_ref_reg(list,dest,destreg);
  2629. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2630. end;
  2631. tmpregi2:=1;
  2632. while (tmpregi2<=tmpregi) do
  2633. begin
  2634. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2635. inc(dstref.offset,4);
  2636. inc(tmpregi2);
  2637. end;
  2638. copysize:=4;
  2639. cgsize:=OS_32;
  2640. while len<>0 do
  2641. begin
  2642. if len<2 then
  2643. begin
  2644. copysize:=1;
  2645. cgsize:=OS_8;
  2646. end
  2647. else if len<4 then
  2648. begin
  2649. copysize:=2;
  2650. cgsize:=OS_16;
  2651. end;
  2652. dec(len,copysize);
  2653. r:=getintregister(list,cgsize);
  2654. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2655. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2656. inc(srcref.offset,copysize);
  2657. inc(dstref.offset,copysize);
  2658. end;{end of while}
  2659. end
  2660. else
  2661. begin
  2662. cgsize:=OS_32;
  2663. if (len<=4) then{len<=4 and not aligned}
  2664. begin
  2665. r:=getintregister(list,cgsize);
  2666. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2667. if Len=1 then
  2668. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2669. else
  2670. begin
  2671. tmpreg:=getintregister(list,cgsize);
  2672. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2673. inc(usedtmpref.offset,1);
  2674. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2675. inc(usedtmpref2.offset,1);
  2676. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2677. if len>2 then
  2678. begin
  2679. inc(usedtmpref.offset,1);
  2680. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2681. inc(usedtmpref2.offset,1);
  2682. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2683. if len>3 then
  2684. begin
  2685. inc(usedtmpref.offset,1);
  2686. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2687. inc(usedtmpref2.offset,1);
  2688. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2689. end;
  2690. end;
  2691. end;
  2692. end{end of if len<=4}
  2693. else
  2694. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2695. destreg:=getintregister(list,OS_ADDR);
  2696. a_loadaddr_ref_reg(list,dest,destreg);
  2697. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2698. srcreg:=getintregister(list,OS_ADDR);
  2699. a_loadaddr_ref_reg(list,source,srcreg);
  2700. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2701. countreg:=getintregister(list,OS_32);
  2702. // if cs_opt_size in current_settings.optimizerswitches then
  2703. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2704. {if aligned then
  2705. genloop(len,4)
  2706. else}
  2707. if GenerateThumbCode then
  2708. genloop_thumb(len,1)
  2709. else
  2710. genloop(len,1);
  2711. end;
  2712. end;
  2713. end;
  2714. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2715. begin
  2716. g_concatcopy_internal(list,source,dest,len,false);
  2717. end;
  2718. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2719. begin
  2720. if (source.alignment in [1,3]) or
  2721. (dest.alignment in [1,3]) then
  2722. g_concatcopy_internal(list,source,dest,len,false)
  2723. else
  2724. g_concatcopy_internal(list,source,dest,len,true);
  2725. end;
  2726. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2727. var
  2728. ovloc : tlocation;
  2729. begin
  2730. ovloc.loc:=LOC_VOID;
  2731. g_overflowCheck_loc(list,l,def,ovloc);
  2732. end;
  2733. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2734. var
  2735. hl : tasmlabel;
  2736. ai:TAiCpu;
  2737. hflags : tresflags;
  2738. begin
  2739. if not(cs_check_overflow in current_settings.localswitches) then
  2740. exit;
  2741. current_asmdata.getjumplabel(hl);
  2742. case ovloc.loc of
  2743. LOC_VOID:
  2744. begin
  2745. ai:=taicpu.op_sym(A_B,hl);
  2746. ai.is_jmp:=true;
  2747. if not((def.typ=pointerdef) or
  2748. ((def.typ=orddef) and
  2749. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2750. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2751. ai.SetCondition(C_VC)
  2752. else
  2753. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2754. ai.SetCondition(C_CS)
  2755. else
  2756. ai.SetCondition(C_CC);
  2757. list.concat(ai);
  2758. end;
  2759. LOC_FLAGS:
  2760. begin
  2761. hflags:=ovloc.resflags;
  2762. inverse_flags(hflags);
  2763. cg.a_jmp_flags(list,hflags,hl);
  2764. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2765. end;
  2766. else
  2767. internalerror(200409281);
  2768. end;
  2769. a_call_name(list,'FPC_OVERFLOW',false);
  2770. a_label(list,hl);
  2771. end;
  2772. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2773. begin
  2774. { this work is done in g_proc_entry }
  2775. end;
  2776. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2777. begin
  2778. { this work is done in g_proc_exit }
  2779. end;
  2780. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2781. var
  2782. ai : taicpu;
  2783. hlabel : TAsmLabel;
  2784. begin
  2785. if GenerateThumbCode then
  2786. begin
  2787. { the optimizer has to fix this if jump range is sufficient short }
  2788. current_asmdata.getjumplabel(hlabel);
  2789. ai:=Taicpu.Op_sym(A_B,hlabel);
  2790. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2791. ai.is_jmp:=true;
  2792. list.concat(ai);
  2793. a_jmp_always(list,l);
  2794. a_label(list,hlabel);
  2795. end
  2796. else
  2797. begin
  2798. ai:=Taicpu.Op_sym(A_B,l);
  2799. ai.SetCondition(OpCmp2AsmCond[cond]);
  2800. ai.is_jmp:=true;
  2801. list.concat(ai);
  2802. end;
  2803. end;
  2804. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2805. const
  2806. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2807. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2808. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2809. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2810. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2811. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2812. begin
  2813. result:=convertop[fromsize,tosize];
  2814. if result=A_NONE then
  2815. internalerror(200312205);
  2816. end;
  2817. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2818. const
  2819. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2820. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2821. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2822. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2823. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2824. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2825. begin
  2826. result:=convertop[fromsize,tosize];
  2827. end;
  2828. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2829. var
  2830. instr: taicpu;
  2831. begin
  2832. if (shuffle=nil) or shufflescalar(shuffle) then
  2833. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2834. else
  2835. internalerror(2009112407);
  2836. list.concat(instr);
  2837. case instr.opcode of
  2838. A_VMOV:
  2839. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2840. add_move_instruction(instr);
  2841. else
  2842. { VCVT can generate an exception }
  2843. maybe_check_for_fpu_exception(list);
  2844. end;
  2845. end;
  2846. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2847. var
  2848. intreg,
  2849. tmpmmreg : tregister;
  2850. reg64 : tregister64;
  2851. begin
  2852. if assigned(shuffle) and
  2853. not(shufflescalar(shuffle)) then
  2854. internalerror(2009112413);
  2855. case fromsize of
  2856. OS_32,OS_S32:
  2857. begin
  2858. fromsize:=OS_F32;
  2859. { since we are loading an integer, no conversion may be required }
  2860. if (fromsize<>tosize) then
  2861. internalerror(2009112801);
  2862. end;
  2863. OS_64,OS_S64:
  2864. begin
  2865. fromsize:=OS_F64;
  2866. { since we are loading an integer, no conversion may be required }
  2867. if (fromsize<>tosize) then
  2868. internalerror(2009112901);
  2869. end;
  2870. OS_F32,OS_F64:
  2871. ;
  2872. else
  2873. internalerror(2019050920);
  2874. end;
  2875. if (fromsize<>tosize) then
  2876. tmpmmreg:=getmmregister(list,fromsize)
  2877. else
  2878. tmpmmreg:=reg;
  2879. if (ref.alignment in [1,2]) then
  2880. begin
  2881. case fromsize of
  2882. OS_F32:
  2883. begin
  2884. intreg:=getintregister(list,OS_32);
  2885. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2886. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2887. end;
  2888. OS_F64:
  2889. begin
  2890. reg64.reglo:=getintregister(list,OS_32);
  2891. reg64.reghi:=getintregister(list,OS_32);
  2892. cg64.a_load64_ref_reg(list,ref,reg64);
  2893. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2894. end;
  2895. else
  2896. internalerror(2009112412);
  2897. end;
  2898. end
  2899. else
  2900. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2901. if (tmpmmreg<>reg) then
  2902. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2903. end;
  2904. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2905. var
  2906. intreg,
  2907. tmpmmreg : tregister;
  2908. reg64 : tregister64;
  2909. begin
  2910. if assigned(shuffle) and
  2911. not(shufflescalar(shuffle)) then
  2912. internalerror(2009112416);
  2913. case tosize of
  2914. OS_32,OS_S32:
  2915. begin
  2916. tosize:=OS_F32;
  2917. { since we are loading an integer, no conversion may be required }
  2918. if (fromsize<>tosize) then
  2919. internalerror(2009112802);
  2920. end;
  2921. OS_64,OS_S64:
  2922. begin
  2923. tosize:=OS_F64;
  2924. { since we are loading an integer, no conversion may be required }
  2925. if (fromsize<>tosize) then
  2926. internalerror(2009112902);
  2927. end;
  2928. OS_F32,OS_F64:
  2929. ;
  2930. else
  2931. internalerror(2019050919);
  2932. end;
  2933. if (fromsize<>tosize) then
  2934. begin
  2935. tmpmmreg:=getmmregister(list,tosize);
  2936. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2937. end
  2938. else
  2939. tmpmmreg:=reg;
  2940. if (ref.alignment in [1,2]) then
  2941. begin
  2942. case tosize of
  2943. OS_F32:
  2944. begin
  2945. intreg:=getintregister(list,OS_32);
  2946. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2947. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2948. end;
  2949. OS_F64:
  2950. begin
  2951. reg64.reglo:=getintregister(list,OS_32);
  2952. reg64.reghi:=getintregister(list,OS_32);
  2953. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2954. cg64.a_load64_reg_ref(list,reg64,ref);
  2955. end;
  2956. else
  2957. internalerror(2009112417);
  2958. end;
  2959. end
  2960. else
  2961. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2962. { VSTR cannot generate an FPU exception, VCVT is handled seperately, so we do not need a check here }
  2963. end;
  2964. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2965. begin
  2966. { this code can only be used to transfer raw data, not to perform
  2967. conversions }
  2968. if (tosize<>OS_F32) then
  2969. internalerror(2009112419);
  2970. if not(fromsize in [OS_32,OS_S32]) then
  2971. internalerror(2009112420);
  2972. if assigned(shuffle) and
  2973. not shufflescalar(shuffle) then
  2974. internalerror(2009112516);
  2975. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  2976. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2977. end;
  2978. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  2979. begin
  2980. { this code can only be used to transfer raw data, not to perform
  2981. conversions }
  2982. if (fromsize<>OS_F32) then
  2983. internalerror(2009112430);
  2984. if not(tosize in [OS_32,OS_S32]) then
  2985. internalerror(2009112409);
  2986. if assigned(shuffle) and
  2987. not shufflescalar(shuffle) then
  2988. internalerror(2009112514);
  2989. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  2990. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2991. end;
  2992. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  2993. var
  2994. tmpreg: tregister;
  2995. begin
  2996. { the vfp doesn't support xor nor any other logical operation, but
  2997. this routine is used to initialise global mm regvars. We can
  2998. easily initialise an mm reg with 0 though. }
  2999. case op of
  3000. OP_XOR:
  3001. begin
  3002. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3003. begin
  3004. if (reg_cgsize(src)<>size) or
  3005. assigned(shuffle) then
  3006. internalerror(2019081301);
  3007. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3008. end
  3009. else
  3010. begin
  3011. if (src<>dst) or
  3012. (reg_cgsize(src)<>size) or
  3013. assigned(shuffle) then
  3014. internalerror(2009112907);
  3015. tmpreg:=getintregister(list,OS_32);
  3016. a_load_const_reg(list,OS_32,0,tmpreg);
  3017. case size of
  3018. OS_F32:
  3019. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3020. OS_F64:
  3021. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3022. else
  3023. internalerror(2009112908);
  3024. end;
  3025. end;
  3026. end
  3027. else
  3028. internalerror(2009112906);
  3029. end;
  3030. end;
  3031. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3032. const
  3033. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3034. begin
  3035. if (op in overflowops) and
  3036. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3037. a_load_reg_reg(list,OS_32,size,dst,dst);
  3038. end;
  3039. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3040. procedure checkreg(var reg : TRegister);
  3041. var
  3042. tmpreg : TRegister;
  3043. begin
  3044. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3045. (getsupreg(reg)=RS_R15) then
  3046. begin
  3047. tmpreg:=getintregister(list,OS_INT);
  3048. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3049. reg:=tmpreg;
  3050. end;
  3051. end;
  3052. begin
  3053. checkreg(op1);
  3054. checkreg(op2);
  3055. checkreg(op3);
  3056. checkreg(op4);
  3057. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3058. end;
  3059. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3060. begin
  3061. if pi_needs_tls in current_procinfo.flags then
  3062. begin
  3063. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3064. a_call_name(list,'fpc_read_tp',false);
  3065. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3066. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3067. end;
  3068. end;
  3069. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3070. begin
  3071. case op of
  3072. OP_NEG:
  3073. begin
  3074. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3075. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3076. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3077. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3078. end;
  3079. OP_NOT:
  3080. begin
  3081. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3082. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3083. end;
  3084. else
  3085. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3086. end;
  3087. end;
  3088. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3089. begin
  3090. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3091. end;
  3092. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3093. var
  3094. ovloc : tlocation;
  3095. begin
  3096. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3097. end;
  3098. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3099. var
  3100. ovloc : tlocation;
  3101. begin
  3102. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3103. end;
  3104. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3105. begin
  3106. { this code can only be used to transfer raw data, not to perform
  3107. conversions }
  3108. if (mmsize<>OS_F64) then
  3109. internalerror(2009112405);
  3110. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3111. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3112. end;
  3113. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3114. begin
  3115. { this code can only be used to transfer raw data, not to perform
  3116. conversions }
  3117. if (mmsize<>OS_F64) then
  3118. internalerror(2009112406);
  3119. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3120. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3121. end;
  3122. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3123. var
  3124. tmpreg : tregister;
  3125. b : byte;
  3126. begin
  3127. ovloc.loc:=LOC_VOID;
  3128. case op of
  3129. OP_NEG,
  3130. OP_NOT :
  3131. internalerror(2012022501);
  3132. else
  3133. ;
  3134. end;
  3135. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3136. begin
  3137. case op of
  3138. OP_ADD:
  3139. begin
  3140. if is_shifter_const(lo(value),b) then
  3141. begin
  3142. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3143. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3144. end
  3145. else
  3146. begin
  3147. tmpreg:=cg.getintregister(list,OS_32);
  3148. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3149. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3150. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3151. end;
  3152. if is_shifter_const(hi(value),b) then
  3153. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3154. else
  3155. begin
  3156. tmpreg:=cg.getintregister(list,OS_32);
  3157. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3158. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3159. end;
  3160. end;
  3161. OP_SUB:
  3162. begin
  3163. if is_shifter_const(lo(value),b) then
  3164. begin
  3165. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3166. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3167. end
  3168. else
  3169. begin
  3170. tmpreg:=cg.getintregister(list,OS_32);
  3171. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3172. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3173. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3174. end;
  3175. if is_shifter_const(hi(value),b) then
  3176. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3177. else
  3178. begin
  3179. tmpreg:=cg.getintregister(list,OS_32);
  3180. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3181. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3182. end;
  3183. end;
  3184. else
  3185. internalerror(200502131);
  3186. end;
  3187. if size=OS_64 then
  3188. begin
  3189. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3190. ovloc.loc:=LOC_FLAGS;
  3191. case op of
  3192. OP_ADD:
  3193. ovloc.resflags:=F_CS;
  3194. OP_SUB:
  3195. ovloc.resflags:=F_CC;
  3196. else
  3197. internalerror(2019050918);
  3198. end;
  3199. end;
  3200. end
  3201. else
  3202. begin
  3203. case op of
  3204. OP_AND,OP_OR,OP_XOR:
  3205. begin
  3206. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3207. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3208. end;
  3209. OP_ADD:
  3210. begin
  3211. if is_shifter_const(aint(lo(value)),b) then
  3212. begin
  3213. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3214. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3215. end
  3216. else
  3217. begin
  3218. tmpreg:=cg.getintregister(list,OS_32);
  3219. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3220. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3221. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3222. end;
  3223. if is_shifter_const(aint(hi(value)),b) then
  3224. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3225. else
  3226. begin
  3227. tmpreg:=cg.getintregister(list,OS_32);
  3228. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3229. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3230. end;
  3231. end;
  3232. OP_SUB:
  3233. begin
  3234. if is_shifter_const(aint(lo(value)),b) then
  3235. begin
  3236. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3237. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3238. end
  3239. else
  3240. begin
  3241. tmpreg:=cg.getintregister(list,OS_32);
  3242. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3243. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3244. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3245. end;
  3246. if is_shifter_const(aint(hi(value)),b) then
  3247. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3248. else
  3249. begin
  3250. tmpreg:=cg.getintregister(list,OS_32);
  3251. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3252. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3253. end;
  3254. end;
  3255. else
  3256. internalerror(2003083101);
  3257. end;
  3258. end;
  3259. end;
  3260. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3261. begin
  3262. ovloc.loc:=LOC_VOID;
  3263. case op of
  3264. OP_NEG,
  3265. OP_NOT :
  3266. internalerror(2012022502);
  3267. else
  3268. ;
  3269. end;
  3270. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3271. begin
  3272. case op of
  3273. OP_ADD:
  3274. begin
  3275. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3276. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3277. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3278. end;
  3279. OP_SUB:
  3280. begin
  3281. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3282. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3283. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3284. end;
  3285. else
  3286. internalerror(2003083102);
  3287. end;
  3288. if size=OS_64 then
  3289. begin
  3290. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3291. ovloc.loc:=LOC_FLAGS;
  3292. case op of
  3293. OP_ADD:
  3294. ovloc.resflags:=F_CS;
  3295. OP_SUB:
  3296. ovloc.resflags:=F_CC;
  3297. else
  3298. internalerror(2019050917);
  3299. end;
  3300. end;
  3301. end
  3302. else
  3303. begin
  3304. case op of
  3305. OP_AND,OP_OR,OP_XOR:
  3306. begin
  3307. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3308. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3309. end;
  3310. OP_ADD:
  3311. begin
  3312. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3313. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3314. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3315. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3316. end;
  3317. OP_SUB:
  3318. begin
  3319. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3320. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3321. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3322. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3323. end;
  3324. else
  3325. internalerror(2003083104);
  3326. end;
  3327. end;
  3328. end;
  3329. procedure tthumbcgarm.init_register_allocators;
  3330. begin
  3331. inherited init_register_allocators;
  3332. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3333. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3334. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3335. else
  3336. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3337. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3338. end;
  3339. procedure tthumbcgarm.done_register_allocators;
  3340. begin
  3341. rg[R_INTREGISTER].free;
  3342. rg[R_FPUREGISTER].free;
  3343. rg[R_MMREGISTER].free;
  3344. inherited done_register_allocators;
  3345. end;
  3346. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3347. var
  3348. ref : treference;
  3349. r : byte;
  3350. regs : tcpuregisterset;
  3351. stackmisalignment : pint;
  3352. registerarea: DWord;
  3353. stack_parameters: Boolean;
  3354. begin
  3355. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3356. LocalSize:=align(LocalSize,4);
  3357. { call instruction does not put anything on the stack }
  3358. stackmisalignment:=0;
  3359. if not(nostackframe) then
  3360. begin
  3361. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3362. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3363. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3364. { save int registers }
  3365. reference_reset(ref,4,[]);
  3366. ref.index:=NR_STACK_POINTER_REG;
  3367. ref.addressmode:=AM_PREINDEXED;
  3368. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3369. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3370. begin
  3371. //!!!! a_reg_alloc(list,NR_R12);
  3372. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3373. end;
  3374. { the (old) ARM APCS requires saving both the stack pointer (to
  3375. crawl the stack) and the PC (to identify the function this
  3376. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3377. and R15 -- still needs updating for EABI and Darwin, they don't
  3378. need that }
  3379. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3380. regs:=regs+[RS_R7,RS_R14]
  3381. else
  3382. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3383. include(regs,RS_R14);
  3384. { safely estimate stack size }
  3385. if localsize+current_settings.alignment.localalignmax+4>508 then
  3386. begin
  3387. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3388. include(regs,RS_R4);
  3389. end;
  3390. registerarea:=0;
  3391. if regs<>[] then
  3392. begin
  3393. for r:=RS_R0 to RS_R15 do
  3394. if r in regs then
  3395. inc(registerarea,4);
  3396. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3397. end;
  3398. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3399. if stack_parameters or (LocalSize<>0) or
  3400. ((stackmisalignment<>0) and
  3401. ((pi_do_call in current_procinfo.flags) or
  3402. (po_assembler in current_procinfo.procdef.procoptions))) then
  3403. begin
  3404. { do we access stack parameters?
  3405. if yes, the previously estimated stacksize must be used }
  3406. if stack_parameters then
  3407. begin
  3408. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3409. begin
  3410. writeln(localsize);
  3411. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3412. internalerror(2013040601);
  3413. end
  3414. else
  3415. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3416. end
  3417. else
  3418. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3419. if localsize<508 then
  3420. begin
  3421. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3422. end
  3423. else if localsize<=1016 then
  3424. begin
  3425. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3426. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3427. end
  3428. else
  3429. begin
  3430. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3431. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3432. include(regs,RS_R4);
  3433. //!!!! if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  3434. //!!!! a_reg_alloc(list,NR_R12);
  3435. //!!!! a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3436. //!!!! list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  3437. //!!!! a_reg_dealloc(list,NR_R12);
  3438. end;
  3439. end;
  3440. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3441. begin
  3442. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3443. end;
  3444. end;
  3445. end;
  3446. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3447. var
  3448. LocalSize : longint;
  3449. r: byte;
  3450. regs : tcpuregisterset;
  3451. registerarea : DWord;
  3452. stackmisalignment: pint;
  3453. stack_parameters : Boolean;
  3454. begin
  3455. if not(nostackframe) then
  3456. begin
  3457. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3458. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3459. include(regs,RS_R15);
  3460. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3461. include(regs,getsupreg(current_procinfo.framepointer));
  3462. registerarea:=0;
  3463. for r:=RS_R0 to RS_R15 do
  3464. if r in regs then
  3465. inc(registerarea,4);
  3466. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3467. LocalSize:=current_procinfo.calc_stackframe_size;
  3468. if stack_parameters then
  3469. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3470. else
  3471. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3472. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3473. (target_info.system in systems_darwin) then
  3474. begin
  3475. if (LocalSize<>0) or
  3476. ((stackmisalignment<>0) and
  3477. ((pi_do_call in current_procinfo.flags) or
  3478. (po_assembler in current_procinfo.procdef.procoptions))) then
  3479. begin
  3480. if LocalSize=0 then
  3481. else if LocalSize<=508 then
  3482. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3483. else if LocalSize<=1016 then
  3484. begin
  3485. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3486. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3487. end
  3488. else
  3489. begin
  3490. a_reg_alloc(list,NR_R3);
  3491. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3492. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3493. a_reg_dealloc(list,NR_R3);
  3494. end;
  3495. end;
  3496. if regs=[] then
  3497. begin
  3498. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3499. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3500. else
  3501. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3502. end
  3503. else
  3504. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3505. end;
  3506. end
  3507. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3508. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3509. else
  3510. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3511. end;
  3512. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3513. var
  3514. oppostfix:toppostfix;
  3515. usedtmpref: treference;
  3516. tmpreg,tmpreg2 : tregister;
  3517. dir : integer;
  3518. begin
  3519. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3520. FromSize := ToSize;
  3521. case FromSize of
  3522. { signed integer registers }
  3523. OS_8:
  3524. oppostfix:=PF_B;
  3525. OS_S8:
  3526. oppostfix:=PF_SB;
  3527. OS_16:
  3528. oppostfix:=PF_H;
  3529. OS_S16:
  3530. oppostfix:=PF_SH;
  3531. OS_32,
  3532. OS_S32:
  3533. oppostfix:=PF_None;
  3534. else
  3535. InternalError(200308298);
  3536. end;
  3537. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3538. begin
  3539. if target_info.endian=endian_big then
  3540. dir:=-1
  3541. else
  3542. dir:=1;
  3543. case FromSize of
  3544. OS_16,OS_S16:
  3545. begin
  3546. { only complicated references need an extra loadaddr }
  3547. if assigned(ref.symbol) or
  3548. (ref.index<>NR_NO) or
  3549. (ref.offset<-124) or
  3550. (ref.offset>124) or
  3551. { sometimes the compiler reused registers }
  3552. (reg=ref.index) or
  3553. (reg=ref.base) then
  3554. begin
  3555. tmpreg2:=getintregister(list,OS_INT);
  3556. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3557. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3558. end
  3559. else
  3560. usedtmpref:=ref;
  3561. if target_info.endian=endian_big then
  3562. inc(usedtmpref.offset,1);
  3563. tmpreg:=getintregister(list,OS_INT);
  3564. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3565. inc(usedtmpref.offset,dir);
  3566. if FromSize=OS_16 then
  3567. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3568. else
  3569. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3570. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3571. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3572. end;
  3573. OS_32,OS_S32:
  3574. begin
  3575. tmpreg:=getintregister(list,OS_INT);
  3576. { only complicated references need an extra loadaddr }
  3577. if assigned(ref.symbol) or
  3578. (ref.index<>NR_NO) or
  3579. (ref.offset<-124) or
  3580. (ref.offset>124) or
  3581. { sometimes the compiler reused registers }
  3582. (reg=ref.index) or
  3583. (reg=ref.base) then
  3584. begin
  3585. tmpreg2:=getintregister(list,OS_INT);
  3586. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3587. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3588. end
  3589. else
  3590. usedtmpref:=ref;
  3591. if ref.alignment=2 then
  3592. begin
  3593. if target_info.endian=endian_big then
  3594. inc(usedtmpref.offset,2);
  3595. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3596. inc(usedtmpref.offset,dir*2);
  3597. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3598. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3599. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3600. end
  3601. else
  3602. begin
  3603. if target_info.endian=endian_big then
  3604. inc(usedtmpref.offset,3);
  3605. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3606. inc(usedtmpref.offset,dir);
  3607. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3608. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3609. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3610. inc(usedtmpref.offset,dir);
  3611. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3612. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3613. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3614. inc(usedtmpref.offset,dir);
  3615. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3616. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3617. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3618. end;
  3619. end
  3620. else
  3621. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3622. end;
  3623. end
  3624. else
  3625. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3626. if (fromsize=OS_S8) and (tosize = OS_16) then
  3627. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3628. end;
  3629. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3630. var
  3631. l : tasmlabel;
  3632. hr : treference;
  3633. begin
  3634. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3635. internalerror(2002090908);
  3636. if is_thumb_imm(a) then
  3637. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3638. else
  3639. begin
  3640. reference_reset(hr,4,[]);
  3641. current_asmdata.getjumplabel(l);
  3642. cg.a_label(current_procinfo.aktlocaldata,l);
  3643. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3644. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3645. hr.symbol:=l;
  3646. hr.base:=NR_PC;
  3647. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3648. end;
  3649. end;
  3650. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3651. var
  3652. hsym : tsym;
  3653. href,
  3654. tmpref : treference;
  3655. paraloc : Pcgparalocation;
  3656. l : TAsmLabel;
  3657. begin
  3658. { calculate the parameter info for the procdef }
  3659. procdef.init_paraloc_info(callerside);
  3660. hsym:=tsym(procdef.parast.Find('self'));
  3661. if not(assigned(hsym) and
  3662. (hsym.typ=paravarsym)) then
  3663. internalerror(2003052504);
  3664. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3665. while paraloc<>nil do
  3666. with paraloc^ do
  3667. begin
  3668. case loc of
  3669. LOC_REGISTER:
  3670. begin
  3671. if is_thumb_imm(ioffset) then
  3672. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3673. else
  3674. begin
  3675. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3676. reference_reset(tmpref,4,[]);
  3677. current_asmdata.getjumplabel(l);
  3678. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3679. cg.a_label(current_procinfo.aktlocaldata,l);
  3680. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3681. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3682. tmpref.symbol:=l;
  3683. tmpref.base:=NR_PC;
  3684. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3685. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3686. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3687. end;
  3688. end;
  3689. LOC_REFERENCE:
  3690. begin
  3691. { offset in the wrapper needs to be adjusted for the stored
  3692. return address }
  3693. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3694. if is_thumb_imm(ioffset) then
  3695. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3696. else
  3697. begin
  3698. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3699. reference_reset(tmpref,4,[]);
  3700. current_asmdata.getjumplabel(l);
  3701. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3702. cg.a_label(current_procinfo.aktlocaldata,l);
  3703. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3704. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3705. tmpref.symbol:=l;
  3706. tmpref.base:=NR_PC;
  3707. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3708. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3709. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3710. end;
  3711. end
  3712. else
  3713. internalerror(2003091804);
  3714. end;
  3715. paraloc:=next;
  3716. end;
  3717. end;
  3718. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3719. var
  3720. href : treference;
  3721. tmpreg : TRegister;
  3722. begin
  3723. href:=ref;
  3724. if { LDR/STR limitations }
  3725. (
  3726. (((op=A_LDR) and (oppostfix=PF_None)) or
  3727. ((op=A_STR) and (oppostfix=PF_None))) and
  3728. (ref.base<>NR_STACK_POINTER_REG) and
  3729. (abs(ref.offset)>124)
  3730. ) or
  3731. { LDRB/STRB limitations }
  3732. (
  3733. (((op=A_LDR) and (oppostfix=PF_B)) or
  3734. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3735. ((op=A_STR) and (oppostfix=PF_B)) or
  3736. ((op=A_STRB) and (oppostfix=PF_None))) and
  3737. ((ref.base=NR_STACK_POINTER_REG) or
  3738. (ref.index=NR_STACK_POINTER_REG) or
  3739. (abs(ref.offset)>31)
  3740. )
  3741. ) or
  3742. { LDRH/STRH limitations }
  3743. (
  3744. (((op=A_LDR) and (oppostfix=PF_H)) or
  3745. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3746. ((op=A_STR) and (oppostfix=PF_H)) or
  3747. ((op=A_STRH) and (oppostfix=PF_None))) and
  3748. ((ref.base=NR_STACK_POINTER_REG) or
  3749. (ref.index=NR_STACK_POINTER_REG) or
  3750. (abs(ref.offset)>62) or
  3751. ((abs(ref.offset) mod 2)<>0)
  3752. )
  3753. ) then
  3754. begin
  3755. tmpreg:=getintregister(list,OS_ADDR);
  3756. a_loadaddr_ref_reg(list,ref,tmpreg);
  3757. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3758. end
  3759. else if (op=A_LDR) and
  3760. (oppostfix in [PF_None]) and
  3761. (ref.base=NR_STACK_POINTER_REG) and
  3762. (abs(ref.offset)>1020) then
  3763. begin
  3764. tmpreg:=getintregister(list,OS_ADDR);
  3765. a_loadaddr_ref_reg(list,ref,tmpreg);
  3766. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3767. end
  3768. else if (op=A_LDR) and
  3769. ((oppostfix in [PF_SH,PF_SB]) or
  3770. (abs(ref.offset)>124)) then
  3771. begin
  3772. tmpreg:=getintregister(list,OS_ADDR);
  3773. a_loadaddr_ref_reg(list,ref,tmpreg);
  3774. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3775. end;
  3776. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3777. end;
  3778. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3779. var
  3780. tmpreg : tregister;
  3781. begin
  3782. case op of
  3783. OP_NEG:
  3784. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3785. OP_NOT:
  3786. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  3787. OP_DIV,OP_IDIV:
  3788. internalerror(200308284);
  3789. OP_ROL:
  3790. begin
  3791. if not(size in [OS_32,OS_S32]) then
  3792. internalerror(2008072805);
  3793. { simulate ROL by ror'ing 32-value }
  3794. tmpreg:=getintregister(list,OS_32);
  3795. a_load_const_reg(list,OS_32,32,tmpreg);
  3796. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3797. list.concat(taicpu.op_reg_reg(A_ROR,dst,src));
  3798. end;
  3799. else
  3800. begin
  3801. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3802. list.concat(setoppostfix(
  3803. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix[op]));
  3804. end;
  3805. end;
  3806. maybeadjustresult(list,op,size,dst);
  3807. end;
  3808. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3809. var
  3810. tmpreg : tregister;
  3811. {$ifdef DUMMY}
  3812. l1 : longint;
  3813. {$endif DUMMY}
  3814. begin
  3815. //!!! ovloc.loc:=LOC_VOID;
  3816. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3817. case op of
  3818. OP_ADD:
  3819. begin
  3820. op:=OP_SUB;
  3821. a:=aint(dword(-a));
  3822. end;
  3823. OP_SUB:
  3824. begin
  3825. op:=OP_ADD;
  3826. a:=aint(dword(-a));
  3827. end
  3828. else
  3829. ;
  3830. end;
  3831. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3832. begin
  3833. // if cgsetflags or setflags then
  3834. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3835. list.concat(setoppostfix(
  3836. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix[op]));
  3837. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3838. begin
  3839. //!!! ovloc.loc:=LOC_FLAGS;
  3840. case op of
  3841. OP_ADD:
  3842. //!!! ovloc.resflags:=F_CS;
  3843. ;
  3844. OP_SUB:
  3845. //!!! ovloc.resflags:=F_CC;
  3846. ;
  3847. else
  3848. ;
  3849. end;
  3850. end;
  3851. end
  3852. else
  3853. begin
  3854. { there could be added some more sophisticated optimizations }
  3855. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3856. a_load_reg_reg(list,size,size,dst,dst)
  3857. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3858. a_load_const_reg(list,size,0,dst)
  3859. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3860. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3861. { we do this here instead in the peephole optimizer because
  3862. it saves us a register }
  3863. {$ifdef DUMMY}
  3864. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3865. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3866. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3867. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3868. begin
  3869. if l1>32 then{roozbeh does this ever happen?}
  3870. internalerror(2003082903);
  3871. shifterop_reset(so);
  3872. so.shiftmode:=SM_LSL;
  3873. so.shiftimm:=l1;
  3874. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3875. end
  3876. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3877. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3878. begin
  3879. if l1>32 then{does this ever happen?}
  3880. internalerror(2012051802);
  3881. shifterop_reset(so);
  3882. so.shiftmode:=SM_LSL;
  3883. so.shiftimm:=l1;
  3884. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3885. end
  3886. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3887. begin
  3888. { nothing to do on success }
  3889. end
  3890. {$endif DUMMY}
  3891. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3892. Just using mov x, #0 might allow some easier optimizations down the line. }
  3893. else if (op = OP_AND) and (dword(a)=0) then
  3894. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  3895. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3896. else if (op = OP_AND) and (not(dword(a))=0) then
  3897. // do nothing
  3898. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3899. broader range of shifterconstants.}
  3900. {$ifdef DUMMY}
  3901. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3902. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3903. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3904. begin
  3905. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3906. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3907. end
  3908. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3909. not(cgsetflags or setflags) and
  3910. split_into_shifter_const(a, imm1, imm2) then
  3911. begin
  3912. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3913. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3914. end
  3915. {$endif DUMMY}
  3916. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3917. begin
  3918. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3919. end
  3920. else
  3921. begin
  3922. tmpreg:=getintregister(list,size);
  3923. a_load_const_reg(list,size,a,tmpreg);
  3924. a_op_reg_reg(list,op,size,tmpreg,dst);
  3925. end;
  3926. end;
  3927. maybeadjustresult(list,op,size,dst);
  3928. end;
  3929. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3930. begin
  3931. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3932. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3933. else
  3934. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3935. end;
  3936. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3937. var
  3938. l1,l2 : tasmlabel;
  3939. ai : taicpu;
  3940. begin
  3941. current_asmdata.getjumplabel(l1);
  3942. current_asmdata.getjumplabel(l2);
  3943. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3944. ai.is_jmp:=true;
  3945. list.concat(ai);
  3946. list.concat(taicpu.op_reg_const(A_MOV,reg,0));
  3947. list.concat(taicpu.op_sym(A_B,l2));
  3948. cg.a_label(list,l1);
  3949. list.concat(taicpu.op_reg_const(A_MOV,reg,1));
  3950. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3951. cg.a_label(list,l2);
  3952. end;
  3953. procedure tthumb2cgarm.init_register_allocators;
  3954. begin
  3955. inherited init_register_allocators;
  3956. { currently, we save R14 always, so we can use it }
  3957. if (target_info.system<>system_arm_ios) then
  3958. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3959. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3960. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  3961. else
  3962. { r9 is not available on Darwin according to the llvm code generator }
  3963. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3964. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3965. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  3966. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  3967. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  3968. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  3969. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  3970. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  3971. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3972. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3973. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  3974. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3975. ],first_mm_imreg,[])
  3976. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  3977. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  3978. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  3979. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  3980. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  3981. ],first_mm_imreg,[])
  3982. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  3983. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3984. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3985. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3986. ],first_mm_imreg,[]);
  3987. end;
  3988. procedure tthumb2cgarm.done_register_allocators;
  3989. begin
  3990. rg[R_INTREGISTER].free;
  3991. rg[R_FPUREGISTER].free;
  3992. rg[R_MMREGISTER].free;
  3993. inherited done_register_allocators;
  3994. end;
  3995. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  3996. begin
  3997. list.concat(taicpu.op_reg(A_BLX, reg));
  3998. {
  3999. the compiler does not properly set this flag anymore in pass 1, and
  4000. for now we only need it after pass 2 (I hope) (JM)
  4001. if not(pi_do_call in current_procinfo.flags) then
  4002. internalerror(2003060703);
  4003. }
  4004. include(current_procinfo.flags,pi_do_call);
  4005. end;
  4006. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4007. var
  4008. l : tasmlabel;
  4009. hr : treference;
  4010. begin
  4011. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4012. internalerror(2002090909);
  4013. if is_thumb32_imm(a) then
  4014. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4015. else if is_thumb32_imm(not(a)) then
  4016. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4017. else if (a and $FFFF)=a then
  4018. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4019. else
  4020. begin
  4021. reference_reset(hr,4,[]);
  4022. current_asmdata.getjumplabel(l);
  4023. cg.a_label(current_procinfo.aktlocaldata,l);
  4024. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4025. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4026. hr.symbol:=l;
  4027. hr.base:=NR_PC;
  4028. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4029. end;
  4030. end;
  4031. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4032. var
  4033. oppostfix:toppostfix;
  4034. usedtmpref: treference;
  4035. tmpreg,tmpreg2 : tregister;
  4036. so : tshifterop;
  4037. dir : integer;
  4038. begin
  4039. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4040. FromSize := ToSize;
  4041. case FromSize of
  4042. { signed integer registers }
  4043. OS_8:
  4044. oppostfix:=PF_B;
  4045. OS_S8:
  4046. oppostfix:=PF_SB;
  4047. OS_16:
  4048. oppostfix:=PF_H;
  4049. OS_S16:
  4050. oppostfix:=PF_SH;
  4051. OS_32,
  4052. OS_S32:
  4053. oppostfix:=PF_None;
  4054. else
  4055. InternalError(2003082913);
  4056. end;
  4057. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4058. begin
  4059. if target_info.endian=endian_big then
  4060. dir:=-1
  4061. else
  4062. dir:=1;
  4063. case FromSize of
  4064. OS_16,OS_S16:
  4065. begin
  4066. { only complicated references need an extra loadaddr }
  4067. if assigned(ref.symbol) or
  4068. (ref.index<>NR_NO) or
  4069. (ref.offset<-255) or
  4070. (ref.offset>4094) or
  4071. { sometimes the compiler reused registers }
  4072. (reg=ref.index) or
  4073. (reg=ref.base) then
  4074. begin
  4075. tmpreg2:=getintregister(list,OS_INT);
  4076. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4077. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4078. end
  4079. else
  4080. usedtmpref:=ref;
  4081. if target_info.endian=endian_big then
  4082. inc(usedtmpref.offset,1);
  4083. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4084. tmpreg:=getintregister(list,OS_INT);
  4085. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4086. inc(usedtmpref.offset,dir);
  4087. if FromSize=OS_16 then
  4088. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4089. else
  4090. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4091. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4092. end;
  4093. OS_32,OS_S32:
  4094. begin
  4095. tmpreg:=getintregister(list,OS_INT);
  4096. { only complicated references need an extra loadaddr }
  4097. if assigned(ref.symbol) or
  4098. (ref.index<>NR_NO) or
  4099. (ref.offset<-255) or
  4100. (ref.offset>4092) or
  4101. { sometimes the compiler reused registers }
  4102. (reg=ref.index) or
  4103. (reg=ref.base) then
  4104. begin
  4105. tmpreg2:=getintregister(list,OS_INT);
  4106. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4107. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4108. end
  4109. else
  4110. usedtmpref:=ref;
  4111. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4112. if ref.alignment=2 then
  4113. begin
  4114. if target_info.endian=endian_big then
  4115. inc(usedtmpref.offset,2);
  4116. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4117. inc(usedtmpref.offset,dir*2);
  4118. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4119. so.shiftimm:=16;
  4120. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4121. end
  4122. else
  4123. begin
  4124. if target_info.endian=endian_big then
  4125. inc(usedtmpref.offset,3);
  4126. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4127. inc(usedtmpref.offset,dir);
  4128. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4129. so.shiftimm:=8;
  4130. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4131. inc(usedtmpref.offset,dir);
  4132. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4133. so.shiftimm:=16;
  4134. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4135. inc(usedtmpref.offset,dir);
  4136. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4137. so.shiftimm:=24;
  4138. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4139. end;
  4140. end
  4141. else
  4142. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4143. end;
  4144. end
  4145. else
  4146. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4147. if (fromsize=OS_S8) and (tosize = OS_16) then
  4148. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4149. end;
  4150. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4151. begin
  4152. if op = OP_NOT then
  4153. begin
  4154. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4155. case size of
  4156. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4157. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4158. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4159. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4160. OS_32,
  4161. OS_S32:
  4162. ;
  4163. else
  4164. internalerror(2019050916);
  4165. end;
  4166. end
  4167. else
  4168. inherited a_op_reg_reg(list, op, size, src, dst);
  4169. end;
  4170. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4171. var
  4172. shift, width : byte;
  4173. tmpreg : tregister;
  4174. so : tshifterop;
  4175. l1 : longint;
  4176. begin
  4177. ovloc.loc:=LOC_VOID;
  4178. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4179. case op of
  4180. OP_ADD:
  4181. begin
  4182. op:=OP_SUB;
  4183. a:=aint(dword(-a));
  4184. end;
  4185. OP_SUB:
  4186. begin
  4187. op:=OP_ADD;
  4188. a:=aint(dword(-a));
  4189. end
  4190. else
  4191. ;
  4192. end;
  4193. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4194. case op of
  4195. OP_NEG,OP_NOT,
  4196. OP_DIV,OP_IDIV:
  4197. internalerror(200308285);
  4198. OP_SHL:
  4199. begin
  4200. if a>32 then
  4201. internalerror(2014020703);
  4202. if a<>0 then
  4203. begin
  4204. shifterop_reset(so);
  4205. so.shiftmode:=SM_LSL;
  4206. so.shiftimm:=a;
  4207. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4208. end
  4209. else
  4210. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4211. end;
  4212. OP_ROL:
  4213. begin
  4214. if a>32 then
  4215. internalerror(2014020704);
  4216. if a<>0 then
  4217. begin
  4218. shifterop_reset(so);
  4219. so.shiftmode:=SM_ROR;
  4220. so.shiftimm:=32-a;
  4221. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4222. end
  4223. else
  4224. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4225. end;
  4226. OP_ROR:
  4227. begin
  4228. if a>32 then
  4229. internalerror(2014020705);
  4230. if a<>0 then
  4231. begin
  4232. shifterop_reset(so);
  4233. so.shiftmode:=SM_ROR;
  4234. so.shiftimm:=a;
  4235. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4236. end
  4237. else
  4238. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4239. end;
  4240. OP_SHR:
  4241. begin
  4242. if a>32 then
  4243. internalerror(200308292);
  4244. shifterop_reset(so);
  4245. if a<>0 then
  4246. begin
  4247. so.shiftmode:=SM_LSR;
  4248. so.shiftimm:=a;
  4249. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4250. end
  4251. else
  4252. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4253. end;
  4254. OP_SAR:
  4255. begin
  4256. if a>32 then
  4257. internalerror(200308295);
  4258. if a<>0 then
  4259. begin
  4260. shifterop_reset(so);
  4261. so.shiftmode:=SM_ASR;
  4262. so.shiftimm:=a;
  4263. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4264. end
  4265. else
  4266. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4267. end;
  4268. else
  4269. if (op in [OP_SUB, OP_ADD]) and
  4270. ((a < 0) or
  4271. (a > 4095)) then
  4272. begin
  4273. tmpreg:=getintregister(list,size);
  4274. a_load_const_reg(list, size, a, tmpreg);
  4275. if cgsetflags or setflags then
  4276. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4277. list.concat(setoppostfix(
  4278. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4279. end
  4280. else
  4281. begin
  4282. if cgsetflags or setflags then
  4283. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4284. list.concat(setoppostfix(
  4285. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4286. end;
  4287. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4288. begin
  4289. ovloc.loc:=LOC_FLAGS;
  4290. case op of
  4291. OP_ADD:
  4292. ovloc.resflags:=F_CS;
  4293. OP_SUB:
  4294. ovloc.resflags:=F_CC;
  4295. else
  4296. ;
  4297. end;
  4298. end;
  4299. end
  4300. else
  4301. begin
  4302. { there could be added some more sophisticated optimizations }
  4303. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4304. a_load_reg_reg(list,size,size,src,dst)
  4305. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4306. a_load_const_reg(list,size,0,dst)
  4307. else if (op in [OP_IMUL]) and (a=-1) then
  4308. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4309. { we do this here instead in the peephole optimizer because
  4310. it saves us a register }
  4311. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4312. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4313. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4314. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4315. begin
  4316. if l1>32 then{roozbeh does this ever happen?}
  4317. internalerror(2003082911);
  4318. shifterop_reset(so);
  4319. so.shiftmode:=SM_LSL;
  4320. so.shiftimm:=l1;
  4321. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4322. end
  4323. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4324. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4325. begin
  4326. if l1>32 then{does this ever happen?}
  4327. internalerror(2012051803);
  4328. shifterop_reset(so);
  4329. so.shiftmode:=SM_LSL;
  4330. so.shiftimm:=l1;
  4331. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4332. end
  4333. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4334. begin
  4335. { nothing to do on success }
  4336. end
  4337. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4338. Just using mov x, #0 might allow some easier optimizations down the line. }
  4339. else if (op = OP_AND) and (dword(a)=0) then
  4340. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4341. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4342. else if (op = OP_AND) and (not(dword(a))=0) then
  4343. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4344. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4345. broader range of shifterconstants.}
  4346. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4347. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4348. else if (op = OP_AND) and is_thumb32_imm(a) then
  4349. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4350. else if (op = OP_AND) and (a = $FFFF) then
  4351. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4352. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4353. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4354. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4355. begin
  4356. a_load_reg_reg(list,size,size,src,dst);
  4357. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4358. end
  4359. else
  4360. begin
  4361. tmpreg:=getintregister(list,size);
  4362. a_load_const_reg(list,size,a,tmpreg);
  4363. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4364. end;
  4365. end;
  4366. maybeadjustresult(list,op,size,dst);
  4367. end;
  4368. const
  4369. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4370. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4371. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4372. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4373. var
  4374. so : tshifterop;
  4375. tmpreg,overflowreg : tregister;
  4376. asmop : tasmop;
  4377. begin
  4378. ovloc.loc:=LOC_VOID;
  4379. case op of
  4380. OP_NEG,OP_NOT:
  4381. internalerror(200308286);
  4382. OP_ROL:
  4383. begin
  4384. if not(size in [OS_32,OS_S32]) then
  4385. internalerror(2008072806);
  4386. { simulate ROL by ror'ing 32-value }
  4387. tmpreg:=getintregister(list,OS_32);
  4388. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4389. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4390. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4391. end;
  4392. OP_ROR:
  4393. begin
  4394. if not(size in [OS_32,OS_S32]) then
  4395. internalerror(2008072802);
  4396. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4397. end;
  4398. OP_IMUL,
  4399. OP_MUL:
  4400. begin
  4401. if cgsetflags or setflags then
  4402. begin
  4403. overflowreg:=getintregister(list,size);
  4404. if op=OP_IMUL then
  4405. asmop:=A_SMULL
  4406. else
  4407. asmop:=A_UMULL;
  4408. { the arm doesn't allow that rd and rm are the same }
  4409. if dst=src2 then
  4410. begin
  4411. if dst<>src1 then
  4412. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4413. else
  4414. begin
  4415. tmpreg:=getintregister(list,size);
  4416. a_load_reg_reg(list,size,size,src2,dst);
  4417. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4418. end;
  4419. end
  4420. else
  4421. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4422. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4423. if op=OP_IMUL then
  4424. begin
  4425. shifterop_reset(so);
  4426. so.shiftmode:=SM_ASR;
  4427. so.shiftimm:=31;
  4428. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4429. end
  4430. else
  4431. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4432. ovloc.loc:=LOC_FLAGS;
  4433. ovloc.resflags:=F_NE;
  4434. end
  4435. else
  4436. begin
  4437. { the arm doesn't allow that rd and rm are the same }
  4438. if dst=src2 then
  4439. begin
  4440. if dst<>src1 then
  4441. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4442. else
  4443. begin
  4444. tmpreg:=getintregister(list,size);
  4445. a_load_reg_reg(list,size,size,src2,dst);
  4446. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4447. end;
  4448. end
  4449. else
  4450. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4451. end;
  4452. end;
  4453. else
  4454. begin
  4455. if cgsetflags or setflags then
  4456. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4457. {$ifdef dummy}
  4458. { R13 is not allowed for certain instruction operands }
  4459. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4460. begin
  4461. if getsupreg(dst)=RS_R13 then
  4462. begin
  4463. tmpreg:=getintregister(list,OS_INT);
  4464. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4465. dst:=tmpreg;
  4466. end;
  4467. if getsupreg(src1)=RS_R13 then
  4468. begin
  4469. tmpreg:=getintregister(list,OS_INT);
  4470. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4471. src1:=tmpreg;
  4472. end;
  4473. end;
  4474. {$endif}
  4475. list.concat(setoppostfix(
  4476. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4477. end;
  4478. end;
  4479. maybeadjustresult(list,op,size,dst);
  4480. end;
  4481. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4482. begin
  4483. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4484. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4485. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4486. end;
  4487. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4488. var
  4489. ref : treference;
  4490. shift : byte;
  4491. firstfloatreg,lastfloatreg,
  4492. r : byte;
  4493. regs : tcpuregisterset;
  4494. stackmisalignment: pint;
  4495. begin
  4496. LocalSize:=align(LocalSize,4);
  4497. { call instruction does not put anything on the stack }
  4498. stackmisalignment:=0;
  4499. if not(nostackframe) then
  4500. begin
  4501. firstfloatreg:=RS_NO;
  4502. lastfloatreg:=RS_NO;
  4503. { save floating point registers? }
  4504. for r:=RS_F0 to RS_F7 do
  4505. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4506. begin
  4507. if firstfloatreg=RS_NO then
  4508. firstfloatreg:=r;
  4509. lastfloatreg:=r;
  4510. inc(stackmisalignment,12);
  4511. end;
  4512. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4513. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4514. begin
  4515. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4516. a_reg_alloc(list,NR_R12);
  4517. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4518. end;
  4519. { save int registers }
  4520. reference_reset(ref,4,[]);
  4521. ref.index:=NR_STACK_POINTER_REG;
  4522. ref.addressmode:=AM_PREINDEXED;
  4523. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4524. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4525. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4526. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4527. include(regs,RS_R14);
  4528. if regs<>[] then
  4529. begin
  4530. for r:=RS_R0 to RS_R15 do
  4531. if (r in regs) then
  4532. inc(stackmisalignment,4);
  4533. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4534. end;
  4535. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4536. begin
  4537. { the framepointer now points to the saved R15, so the saved
  4538. framepointer is at R11-12 (for get_caller_frame) }
  4539. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4540. a_reg_dealloc(list,NR_R12);
  4541. end;
  4542. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4543. if (LocalSize<>0) or
  4544. ((stackmisalignment<>0) and
  4545. ((pi_do_call in current_procinfo.flags) or
  4546. (po_assembler in current_procinfo.procdef.procoptions))) then
  4547. begin
  4548. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4549. if not(is_shifter_const(localsize,shift)) then
  4550. begin
  4551. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4552. a_reg_alloc(list,NR_R12);
  4553. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4554. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4555. a_reg_dealloc(list,NR_R12);
  4556. end
  4557. else
  4558. begin
  4559. a_reg_dealloc(list,NR_R12);
  4560. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4561. end;
  4562. end;
  4563. if firstfloatreg<>RS_NO then
  4564. begin
  4565. reference_reset(ref,4,[]);
  4566. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4567. begin
  4568. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4569. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4570. ref.base:=NR_R12;
  4571. end
  4572. else
  4573. begin
  4574. ref.base:=current_procinfo.framepointer;
  4575. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4576. end;
  4577. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4578. lastfloatreg-firstfloatreg+1,ref));
  4579. end;
  4580. end;
  4581. end;
  4582. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4583. var
  4584. ref : treference;
  4585. firstfloatreg,lastfloatreg,
  4586. r : byte;
  4587. shift : byte;
  4588. regs : tcpuregisterset;
  4589. LocalSize : longint;
  4590. stackmisalignment: pint;
  4591. begin
  4592. if not(nostackframe) then
  4593. begin
  4594. stackmisalignment:=0;
  4595. { restore floating point register }
  4596. firstfloatreg:=RS_NO;
  4597. lastfloatreg:=RS_NO;
  4598. { save floating point registers? }
  4599. for r:=RS_F0 to RS_F7 do
  4600. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4601. begin
  4602. if firstfloatreg=RS_NO then
  4603. firstfloatreg:=r;
  4604. lastfloatreg:=r;
  4605. { floating point register space is already included in
  4606. localsize below by calc_stackframe_size
  4607. inc(stackmisalignment,12);
  4608. }
  4609. end;
  4610. if firstfloatreg<>RS_NO then
  4611. begin
  4612. reference_reset(ref,4,[]);
  4613. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4614. begin
  4615. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4616. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4617. ref.base:=NR_R12;
  4618. end
  4619. else
  4620. begin
  4621. ref.base:=current_procinfo.framepointer;
  4622. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4623. end;
  4624. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4625. lastfloatreg-firstfloatreg+1,ref));
  4626. end;
  4627. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4628. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4629. begin
  4630. exclude(regs,RS_R14);
  4631. include(regs,RS_R15);
  4632. end;
  4633. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4634. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4635. for r:=RS_R0 to RS_R15 do
  4636. if (r in regs) then
  4637. inc(stackmisalignment,4);
  4638. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4639. LocalSize:=current_procinfo.calc_stackframe_size;
  4640. if (LocalSize<>0) or
  4641. ((stackmisalignment<>0) and
  4642. ((pi_do_call in current_procinfo.flags) or
  4643. (po_assembler in current_procinfo.procdef.procoptions))) then
  4644. begin
  4645. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4646. if not(is_shifter_const(LocalSize,shift)) then
  4647. begin
  4648. a_reg_alloc(list,NR_R12);
  4649. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4650. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4651. a_reg_dealloc(list,NR_R12);
  4652. end
  4653. else
  4654. begin
  4655. a_reg_dealloc(list,NR_R12);
  4656. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4657. end;
  4658. end;
  4659. if regs=[] then
  4660. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4661. else
  4662. begin
  4663. reference_reset(ref,4,[]);
  4664. ref.index:=NR_STACK_POINTER_REG;
  4665. ref.addressmode:=AM_PREINDEXED;
  4666. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4667. end;
  4668. end
  4669. else
  4670. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4671. end;
  4672. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4673. var
  4674. tmpreg : tregister;
  4675. tmpref : treference;
  4676. l : tasmlabel;
  4677. begin
  4678. tmpreg:=NR_NO;
  4679. { Be sure to have a base register }
  4680. if (ref.base=NR_NO) then
  4681. begin
  4682. if ref.shiftmode<>SM_None then
  4683. internalerror(2014020706);
  4684. ref.base:=ref.index;
  4685. ref.index:=NR_NO;
  4686. end;
  4687. { absolute symbols can't be handled directly, we've to store the symbol reference
  4688. in the text segment and access it pc relative
  4689. For now, we assume that references where base or index equals to PC are already
  4690. relative, all other references are assumed to be absolute and thus they need
  4691. to be handled extra.
  4692. A proper solution would be to change refoptions to a set and store the information
  4693. if the symbol is absolute or relative there.
  4694. }
  4695. if (assigned(ref.symbol) and
  4696. not(is_pc(ref.base)) and
  4697. not(is_pc(ref.index))
  4698. ) or
  4699. { [#xxx] isn't a valid address operand }
  4700. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4701. //(ref.offset<-4095) or
  4702. (ref.offset<-255) or
  4703. (ref.offset>4095) or
  4704. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4705. ((ref.offset<-255) or
  4706. (ref.offset>255)
  4707. )
  4708. ) or
  4709. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4710. ((ref.offset<-1020) or
  4711. (ref.offset>1020) or
  4712. ((abs(ref.offset) mod 4)<>0) or
  4713. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4714. assigned(ref.symbol)
  4715. )
  4716. ) then
  4717. begin
  4718. reference_reset(tmpref,4,[]);
  4719. { load symbol }
  4720. tmpreg:=getintregister(list,OS_INT);
  4721. if assigned(ref.symbol) then
  4722. begin
  4723. current_asmdata.getjumplabel(l);
  4724. cg.a_label(current_procinfo.aktlocaldata,l);
  4725. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4726. if ref.refaddr=addr_gottpoff then
  4727. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4728. else if ref.refaddr=addr_tlsgd then
  4729. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4730. else if ref.refaddr=addr_tlsdesc then
  4731. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4732. else if ref.refaddr=addr_tpoff then
  4733. begin
  4734. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4735. Internalerror(2019092807);
  4736. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4737. end
  4738. else
  4739. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4740. { load consts entry }
  4741. tmpref.symbol:=l;
  4742. tmpref.base:=NR_R15;
  4743. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4744. { in case of LDF/STF, we got rid of the NR_R15 }
  4745. if is_pc(ref.base) then
  4746. ref.base:=NR_NO;
  4747. if is_pc(ref.index) then
  4748. ref.index:=NR_NO;
  4749. end
  4750. else
  4751. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4752. if (ref.base<>NR_NO) then
  4753. begin
  4754. if ref.index<>NR_NO then
  4755. begin
  4756. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4757. ref.base:=tmpreg;
  4758. end
  4759. else
  4760. begin
  4761. ref.index:=tmpreg;
  4762. ref.shiftimm:=0;
  4763. ref.signindex:=1;
  4764. ref.shiftmode:=SM_None;
  4765. end;
  4766. end
  4767. else
  4768. ref.base:=tmpreg;
  4769. ref.offset:=0;
  4770. ref.symbol:=nil;
  4771. end;
  4772. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4773. begin
  4774. if tmpreg<>NR_NO then
  4775. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4776. else
  4777. begin
  4778. tmpreg:=getintregister(list,OS_ADDR);
  4779. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4780. ref.base:=tmpreg;
  4781. end;
  4782. ref.offset:=0;
  4783. end;
  4784. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4785. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4786. begin
  4787. tmpreg:=getintregister(list,OS_ADDR);
  4788. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4789. ref.base := tmpreg;
  4790. end;
  4791. { floating point operations have only limited references
  4792. we expect here, that a base is already set }
  4793. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4794. begin
  4795. if ref.shiftmode<>SM_none then
  4796. internalerror(2003091202);
  4797. if tmpreg<>NR_NO then
  4798. begin
  4799. if ref.base=tmpreg then
  4800. begin
  4801. if ref.signindex<0 then
  4802. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4803. else
  4804. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4805. ref.index:=NR_NO;
  4806. end
  4807. else
  4808. begin
  4809. if ref.index<>tmpreg then
  4810. internalerror(2004031602);
  4811. if ref.signindex<0 then
  4812. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4813. else
  4814. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4815. ref.base:=tmpreg;
  4816. ref.index:=NR_NO;
  4817. end;
  4818. end
  4819. else
  4820. begin
  4821. tmpreg:=getintregister(list,OS_ADDR);
  4822. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4823. ref.base:=tmpreg;
  4824. ref.index:=NR_NO;
  4825. end;
  4826. end;
  4827. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4828. Result := ref;
  4829. end;
  4830. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4831. var
  4832. instr: taicpu;
  4833. begin
  4834. if (fromsize=OS_F32) and
  4835. (tosize=OS_F32) then
  4836. begin
  4837. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4838. list.Concat(instr);
  4839. add_move_instruction(instr);
  4840. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4841. end
  4842. else if (fromsize=OS_F64) and
  4843. (tosize=OS_F64) then
  4844. begin
  4845. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4846. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4847. end
  4848. else if (fromsize=OS_F32) and
  4849. (tosize=OS_F64) then
  4850. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4851. begin
  4852. //list.concat(nil);
  4853. end;
  4854. end;
  4855. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4856. begin
  4857. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4858. end;
  4859. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4860. begin
  4861. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4862. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4863. end;
  4864. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4865. begin
  4866. if //(shuffle=nil) and
  4867. (tosize=OS_F32) then
  4868. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4869. else
  4870. internalerror(2012100813);
  4871. end;
  4872. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4873. begin
  4874. if //(shuffle=nil) and
  4875. (fromsize=OS_F32) then
  4876. begin
  4877. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4878. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4879. end
  4880. else
  4881. internalerror(2012100814);
  4882. end;
  4883. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4884. var tmpreg: tregister;
  4885. begin
  4886. case op of
  4887. OP_NEG:
  4888. begin
  4889. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4890. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4891. tmpreg:=cg.getintregister(list,OS_32);
  4892. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4893. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4894. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4895. end;
  4896. else
  4897. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4898. end;
  4899. end;
  4900. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4901. begin
  4902. case op of
  4903. OP_NEG:
  4904. begin
  4905. list.concat(taicpu.op_reg_const(A_MOV,regdst.reglo,0));
  4906. list.concat(taicpu.op_reg_const(A_MOV,regdst.reghi,0));
  4907. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4908. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4909. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4910. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4911. end;
  4912. OP_NOT:
  4913. begin
  4914. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4915. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4916. end;
  4917. OP_AND,OP_OR,OP_XOR:
  4918. begin
  4919. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4920. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4921. end;
  4922. OP_ADD:
  4923. begin
  4924. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4925. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4926. list.concat(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi));
  4927. end;
  4928. OP_SUB:
  4929. begin
  4930. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4931. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4932. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4933. end;
  4934. else
  4935. internalerror(2003083105);
  4936. end;
  4937. end;
  4938. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4939. var
  4940. tmpreg : tregister;
  4941. begin
  4942. case op of
  4943. OP_AND,OP_OR,OP_XOR:
  4944. begin
  4945. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4946. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4947. end;
  4948. OP_ADD:
  4949. begin
  4950. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4951. begin
  4952. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4953. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4954. end
  4955. else
  4956. begin
  4957. tmpreg:=cg.getintregister(list,OS_32);
  4958. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4959. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4960. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  4961. end;
  4962. tmpreg:=cg.getintregister(list,OS_32);
  4963. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  4964. list.concat(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg));
  4965. end;
  4966. OP_SUB:
  4967. begin
  4968. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4969. begin
  4970. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4971. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  4972. end
  4973. else
  4974. begin
  4975. tmpreg:=cg.getintregister(list,OS_32);
  4976. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4977. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4978. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  4979. end;
  4980. tmpreg:=cg.getintregister(list,OS_32);
  4981. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  4982. list.concat(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg));
  4983. end;
  4984. else
  4985. internalerror(2003083106);
  4986. end;
  4987. end;
  4988. procedure create_codegen;
  4989. begin
  4990. if GenerateThumb2Code then
  4991. begin
  4992. cg:=tthumb2cgarm.create;
  4993. cg64:=tthumb2cg64farm.create;
  4994. casmoptimizer:=TCpuThumb2AsmOptimizer;
  4995. end
  4996. else if GenerateThumbCode then
  4997. begin
  4998. cg:=tthumbcgarm.create;
  4999. cg64:=tthumbcg64farm.create;
  5000. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5001. end
  5002. else
  5003. begin
  5004. cg:=tarmcgarm.create;
  5005. cg64:=tarmcg64farm.create;
  5006. casmoptimizer:=TCpuAsmOptimizer;
  5007. end;
  5008. end;
  5009. end.