2
0

cgcpu.pas 217 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. public
  36. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  37. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  38. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  39. { move instructions }
  40. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  41. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  42. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  43. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  44. { fpu move instructions }
  45. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  46. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  47. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  48. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  49. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  50. { comparison operations }
  51. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  52. l : tasmlabel);override;
  53. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  54. procedure a_jmp_name(list : TAsmList;const s : string); override;
  55. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  56. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  57. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  58. procedure g_profilecode(list : TAsmList); override;
  59. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  60. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  61. procedure g_maybe_got_init(list : TAsmList); override;
  62. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  63. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  64. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  66. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  67. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  68. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  69. procedure g_save_registers(list : TAsmList);override;
  70. procedure g_restore_registers(list : TAsmList);override;
  71. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  72. procedure fixref(list : TAsmList;var ref : treference);
  73. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  74. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  75. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  78. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  79. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  80. { Transform unsupported methods into Internal errors }
  81. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  82. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  83. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  84. { clear out potential overflow bits from 8 or 16 bit operations
  85. the upper 24/16 bits of a register after an operation }
  86. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  87. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  88. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  89. procedure g_maybe_tls_init(list : TAsmList); override;
  90. end;
  91. { tcgarm is shared between normal arm and thumb-2 }
  92. tcgarm = class(tbasecgarm)
  93. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  94. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  95. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  96. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  97. size: tcgsize; a: tcgint; src, dst: tregister); override;
  98. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  99. size: tcgsize; src1, src2, dst: tregister); override;
  100. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  101. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  103. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  104. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  105. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  106. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  107. end;
  108. { normal arm cg }
  109. tarmcgarm = class(tcgarm)
  110. procedure init_register_allocators;override;
  111. procedure done_register_allocators;override;
  112. end;
  113. { 64 bit cg for all arm flavours }
  114. tbasecg64farm = class(tcg64f32)
  115. end;
  116. { tcg64farm is shared between normal arm and thumb-2 }
  117. tcg64farm = class(tbasecg64farm)
  118. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  119. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  120. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  121. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  122. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  123. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  125. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  126. end;
  127. tarmcg64farm = class(tcg64farm)
  128. end;
  129. tthumbcgarm = class(tbasecgarm)
  130. procedure init_register_allocators;override;
  131. procedure done_register_allocators;override;
  132. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  133. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  134. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  135. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  136. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  137. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  138. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  139. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  140. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  141. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  142. end;
  143. tthumbcg64farm = class(tbasecg64farm)
  144. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  145. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  146. end;
  147. tthumb2cgarm = class(tcgarm)
  148. procedure init_register_allocators;override;
  149. procedure done_register_allocators;override;
  150. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  151. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  152. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  153. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  154. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  155. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  157. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  158. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  159. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  160. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  161. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  164. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  165. end;
  166. tthumb2cg64farm = class(tcg64farm)
  167. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  168. end;
  169. const
  170. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  171. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  172. winstackpagesize = 4096;
  173. function get_fpu_postfix(def : tdef) : toppostfix;
  174. procedure create_codegen;
  175. implementation
  176. uses
  177. globals,verbose,systems,cutils,
  178. aopt,aoptcpu,
  179. fmodule,
  180. symconst,symsym,symtable,
  181. tgobj,
  182. procinfo,cpupi,
  183. paramgr;
  184. { Range check must be disabled explicitly as conversions between signed and unsigned
  185. 32-bit values are done without explicit typecasts }
  186. {$R-}
  187. function get_fpu_postfix(def : tdef) : toppostfix;
  188. begin
  189. if def.typ=floatdef then
  190. begin
  191. case tfloatdef(def).floattype of
  192. s32real:
  193. result:=PF_S;
  194. s64real:
  195. result:=PF_D;
  196. s80real:
  197. result:=PF_E;
  198. else
  199. internalerror(200401272);
  200. end;
  201. end
  202. else
  203. internalerror(200401271);
  204. end;
  205. procedure tarmcgarm.init_register_allocators;
  206. begin
  207. inherited init_register_allocators;
  208. { currently, we always save R14, so we can use it }
  209. if (target_info.system<>system_arm_darwin) then
  210. begin
  211. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  212. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  213. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  214. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  215. else
  216. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  217. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  218. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  219. end
  220. else
  221. { r7 is not available on Darwin, it's used as frame pointer (always,
  222. for backtrace support -- also in gcc/clang -> R11 can be used).
  223. r9 is volatile }
  224. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  225. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  226. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  227. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  228. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  229. { The register allocator currently cannot deal with multiple
  230. non-overlapping subregs per register, so we can only use
  231. half the single precision registers for now (as sub registers of the
  232. double precision ones). }
  233. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  234. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  235. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  236. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  237. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  238. ],first_mm_imreg,[])
  239. else
  240. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  241. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15],first_mm_imreg,[]);
  242. end;
  243. procedure tarmcgarm.done_register_allocators;
  244. begin
  245. rg[R_INTREGISTER].free;
  246. rg[R_FPUREGISTER].free;
  247. rg[R_MMREGISTER].free;
  248. inherited done_register_allocators;
  249. end;
  250. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  251. var
  252. imm_shift : byte;
  253. l : tasmlabel;
  254. hr : treference;
  255. imm1, imm2: DWord;
  256. begin
  257. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  258. internalerror(2002090902);
  259. if is_shifter_const(a,imm_shift) then
  260. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  261. else if is_shifter_const(not(a),imm_shift) then
  262. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  263. { loading of constants with mov and orr }
  264. else if (split_into_shifter_const(a,imm1, imm2)) then
  265. begin
  266. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  267. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  268. end
  269. { loading of constants with mvn and bic }
  270. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  271. begin
  272. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  273. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  274. end
  275. else
  276. begin
  277. reference_reset(hr,4,[]);
  278. current_asmdata.getjumplabel(l);
  279. cg.a_label(current_procinfo.aktlocaldata,l);
  280. hr.symboldata:=current_procinfo.aktlocaldata.last;
  281. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  282. hr.symbol:=l;
  283. hr.base:=NR_PC;
  284. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  285. end;
  286. end;
  287. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  288. var
  289. oppostfix:toppostfix;
  290. usedtmpref: treference;
  291. tmpreg,tmpreg2 : tregister;
  292. so : tshifterop;
  293. dir : integer;
  294. begin
  295. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  296. FromSize := ToSize;
  297. case FromSize of
  298. { signed integer registers }
  299. OS_8:
  300. oppostfix:=PF_B;
  301. OS_S8:
  302. oppostfix:=PF_SB;
  303. OS_16:
  304. oppostfix:=PF_H;
  305. OS_S16:
  306. oppostfix:=PF_SH;
  307. OS_32,
  308. OS_S32:
  309. oppostfix:=PF_None;
  310. else
  311. InternalError(200308297);
  312. end;
  313. if (fromsize=OS_S8) and
  314. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  315. oppostfix:=PF_B;
  316. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  317. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  318. (oppostfix in [PF_SH,PF_H])) then
  319. begin
  320. if target_info.endian=endian_big then
  321. dir:=-1
  322. else
  323. dir:=1;
  324. case FromSize of
  325. OS_16,OS_S16:
  326. begin
  327. { only complicated references need an extra loadaddr }
  328. if assigned(ref.symbol) or
  329. (ref.index<>NR_NO) or
  330. (ref.offset<-4095) or
  331. (ref.offset>4094) or
  332. { sometimes the compiler reused registers }
  333. (reg=ref.index) or
  334. (reg=ref.base) then
  335. begin
  336. tmpreg2:=getintregister(list,OS_INT);
  337. a_loadaddr_ref_reg(list,ref,tmpreg2);
  338. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  339. end
  340. else
  341. usedtmpref:=ref;
  342. if target_info.endian=endian_big then
  343. inc(usedtmpref.offset,1);
  344. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  345. tmpreg:=getintregister(list,OS_INT);
  346. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  347. inc(usedtmpref.offset,dir);
  348. if FromSize=OS_16 then
  349. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  350. else
  351. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  352. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  353. end;
  354. OS_32,OS_S32:
  355. begin
  356. tmpreg:=getintregister(list,OS_INT);
  357. { only complicated references need an extra loadaddr }
  358. if assigned(ref.symbol) or
  359. (ref.index<>NR_NO) or
  360. (ref.offset<-4095) or
  361. (ref.offset>4092) or
  362. { sometimes the compiler reused registers }
  363. (reg=ref.index) or
  364. (reg=ref.base) then
  365. begin
  366. tmpreg2:=getintregister(list,OS_INT);
  367. a_loadaddr_ref_reg(list,ref,tmpreg2);
  368. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  369. end
  370. else
  371. usedtmpref:=ref;
  372. shifterop_reset(so);so.shiftmode:=SM_LSL;
  373. if ref.alignment=2 then
  374. begin
  375. if target_info.endian=endian_big then
  376. inc(usedtmpref.offset,2);
  377. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  378. inc(usedtmpref.offset,dir*2);
  379. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  380. so.shiftimm:=16;
  381. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  382. end
  383. else
  384. begin
  385. tmpreg2:=getintregister(list,OS_INT);
  386. if target_info.endian=endian_big then
  387. inc(usedtmpref.offset,3);
  388. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  389. inc(usedtmpref.offset,dir);
  390. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  391. inc(usedtmpref.offset,dir);
  392. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  393. so.shiftimm:=8;
  394. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  395. inc(usedtmpref.offset,dir);
  396. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  397. so.shiftimm:=16;
  398. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  399. so.shiftimm:=24;
  400. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  401. end;
  402. end
  403. else
  404. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  405. end;
  406. end
  407. else
  408. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  409. if (fromsize=OS_S8) and
  410. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  411. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  412. else if (fromsize=OS_S8) and (tosize = OS_16) then
  413. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  414. end;
  415. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  416. var
  417. hsym : tsym;
  418. href : treference;
  419. paraloc : Pcgparalocation;
  420. shift : byte;
  421. begin
  422. { calculate the parameter info for the procdef }
  423. procdef.init_paraloc_info(callerside);
  424. hsym:=tsym(procdef.parast.Find('self'));
  425. if not(assigned(hsym) and
  426. (hsym.typ=paravarsym)) then
  427. internalerror(200305251);
  428. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  429. while paraloc<>nil do
  430. with paraloc^ do
  431. begin
  432. case loc of
  433. LOC_REGISTER:
  434. begin
  435. if is_shifter_const(ioffset,shift) then
  436. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  437. else
  438. begin
  439. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  440. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  441. end;
  442. end;
  443. LOC_REFERENCE:
  444. begin
  445. { offset in the wrapper needs to be adjusted for the stored
  446. return address }
  447. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  448. if is_shifter_const(ioffset,shift) then
  449. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  450. else
  451. begin
  452. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  453. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  454. end;
  455. end
  456. else
  457. internalerror(200309189);
  458. end;
  459. paraloc:=next;
  460. end;
  461. end;
  462. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  463. var
  464. ref: treference;
  465. begin
  466. paraloc.check_simple_location;
  467. paramanager.allocparaloc(list,paraloc.location);
  468. case paraloc.location^.loc of
  469. LOC_REGISTER,LOC_CREGISTER:
  470. a_load_const_reg(list,size,a,paraloc.location^.register);
  471. LOC_REFERENCE:
  472. begin
  473. reference_reset(ref,paraloc.alignment,[]);
  474. ref.base:=paraloc.location^.reference.index;
  475. ref.offset:=paraloc.location^.reference.offset;
  476. a_load_const_ref(list,size,a,ref);
  477. end;
  478. else
  479. internalerror(2002081101);
  480. end;
  481. end;
  482. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  483. begin
  484. { doubles in softemu mode have a strange order of registers and references }
  485. if (cgpara.size=OS_F64) and
  486. (location^.size=OS_32) then
  487. begin
  488. g_concatcopy(list,ref,paralocref,4)
  489. end
  490. else
  491. inherited;
  492. end;
  493. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  494. var
  495. ref: treference;
  496. tmpreg: tregister;
  497. begin
  498. paraloc.check_simple_location;
  499. paramanager.allocparaloc(list,paraloc.location);
  500. case paraloc.location^.loc of
  501. LOC_REGISTER,LOC_CREGISTER:
  502. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  503. LOC_REFERENCE:
  504. begin
  505. reference_reset(ref,paraloc.alignment,[]);
  506. ref.base := paraloc.location^.reference.index;
  507. ref.offset := paraloc.location^.reference.offset;
  508. tmpreg := getintregister(list,OS_ADDR);
  509. a_loadaddr_ref_reg(list,r,tmpreg);
  510. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  511. end;
  512. else
  513. internalerror(2002080701);
  514. end;
  515. end;
  516. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  517. var
  518. branchopcode: tasmop;
  519. r : treference;
  520. sym : TAsmSymbol;
  521. begin
  522. { use always BL as newer binutils do not translate blx apparently
  523. generating BL is also what clang and gcc do by default }
  524. branchopcode:=A_BL;
  525. if not(weak) then
  526. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  527. else
  528. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  529. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  530. if (tf_pic_uses_got in target_info.flags) and
  531. (cs_create_pic in current_settings.moduleswitches) then
  532. begin
  533. r.refaddr:=addr_pic
  534. end
  535. else
  536. r.refaddr:=addr_full;
  537. list.concat(taicpu.op_ref(branchopcode,r));
  538. {
  539. the compiler does not properly set this flag anymore in pass 1, and
  540. for now we only need it after pass 2 (I hope) (JM)
  541. if not(pi_do_call in current_procinfo.flags) then
  542. internalerror(2003060703);
  543. }
  544. include(current_procinfo.flags,pi_do_call);
  545. end;
  546. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  547. begin
  548. { check not really correct: should only be used for non-Thumb cpus }
  549. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  550. begin
  551. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  552. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  553. end
  554. else
  555. list.concat(taicpu.op_reg(A_BLX, reg));
  556. {
  557. the compiler does not properly set this flag anymore in pass 1, and
  558. for now we only need it after pass 2 (I hope) (JM)
  559. if not(pi_do_call in current_procinfo.flags) then
  560. internalerror(2003060703);
  561. }
  562. include(current_procinfo.flags,pi_do_call);
  563. end;
  564. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  565. begin
  566. a_op_const_reg_reg(list,op,size,a,reg,reg);
  567. end;
  568. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  569. var
  570. tmpreg,tmpresreg : tregister;
  571. tmpref : treference;
  572. begin
  573. tmpreg:=getintregister(list,size);
  574. tmpresreg:=getintregister(list,size);
  575. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  576. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  577. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  578. end;
  579. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  580. var
  581. so : tshifterop;
  582. begin
  583. if op = OP_NEG then
  584. begin
  585. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  586. maybeadjustresult(list,OP_NEG,size,dst);
  587. end
  588. else if op = OP_NOT then
  589. begin
  590. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  591. begin
  592. shifterop_reset(so);
  593. so.shiftmode:=SM_LSL;
  594. if size in [OS_8, OS_S8] then
  595. so.shiftimm:=24
  596. else
  597. so.shiftimm:=16;
  598. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  599. {Using a shift here allows this to be folded into another instruction}
  600. if size in [OS_S8, OS_S16] then
  601. so.shiftmode:=SM_ASR
  602. else
  603. so.shiftmode:=SM_LSR;
  604. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  605. end
  606. else
  607. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  608. end
  609. else
  610. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  611. end;
  612. const
  613. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  614. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  615. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  616. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  617. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  618. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  619. op_reg_postfix: array[TOpCG] of TOpPostfix =
  620. (PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
  621. PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None);
  622. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  623. size: tcgsize; a: tcgint; src, dst: tregister);
  624. var
  625. ovloc : tlocation;
  626. begin
  627. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  628. end;
  629. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  630. size: tcgsize; src1, src2, dst: tregister);
  631. var
  632. ovloc : tlocation;
  633. begin
  634. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  635. end;
  636. function opshift2shiftmode(op: TOpCg): tshiftmode;
  637. begin
  638. case op of
  639. OP_SHL: Result:=SM_LSL;
  640. OP_SHR: Result:=SM_LSR;
  641. OP_ROR: Result:=SM_ROR;
  642. OP_ROL: Result:=SM_ROR;
  643. OP_SAR: Result:=SM_ASR;
  644. else internalerror(2012070501);
  645. end
  646. end;
  647. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  648. var
  649. multiplier : dword;
  650. power : longint;
  651. shifterop : tshifterop;
  652. bitsset : byte;
  653. negative : boolean;
  654. first : boolean;
  655. b,
  656. cycles : byte;
  657. maxeffort : byte;
  658. begin
  659. result:=true;
  660. cycles:=0;
  661. negative:=a<0;
  662. shifterop.rs:=NR_NO;
  663. shifterop.shiftmode:=SM_LSL;
  664. if negative then
  665. inc(cycles);
  666. multiplier:=dword(abs(a));
  667. bitsset:=popcnt(multiplier and $fffffffe);
  668. { heuristics to estimate how much instructions are reasonable to replace the mul,
  669. this is currently based on XScale timings }
  670. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  671. actual multiplication, this requires min. 1+4 cycles
  672. because the first shift imm. might cause a stall and because we need more instructions
  673. when replacing the mul we generate max. 3 instructions to replace this mul }
  674. maxeffort:=3;
  675. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  676. a ldr, so generating one more operation to replace this is beneficial }
  677. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  678. inc(maxeffort);
  679. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  680. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  681. dec(maxeffort);
  682. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  683. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  684. dec(maxeffort);
  685. { most simple cases }
  686. if a=1 then
  687. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  688. else if a=0 then
  689. a_load_const_reg(list,OS_32,0,dst)
  690. else if a=-1 then
  691. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  692. { add up ?
  693. basically, one add is needed for each bit being set in the constant factor
  694. however, the least significant bit is for free, it can be hidden in the initial
  695. instruction
  696. }
  697. else if (bitsset+cycles<=maxeffort) and
  698. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  699. begin
  700. first:=true;
  701. while multiplier<>0 do
  702. begin
  703. shifterop.shiftimm:=BsrDWord(multiplier);
  704. if odd(multiplier) then
  705. begin
  706. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  707. dec(multiplier);
  708. end
  709. else
  710. if first then
  711. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  712. else
  713. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  714. first:=false;
  715. dec(multiplier,1 shl shifterop.shiftimm);
  716. end;
  717. if negative then
  718. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  719. end
  720. { subtract from the next greater power of two? }
  721. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  722. begin
  723. first:=true;
  724. while multiplier<>0 do
  725. begin
  726. if first then
  727. begin
  728. multiplier:=(1 shl power)-multiplier;
  729. shifterop.shiftimm:=power;
  730. end
  731. else
  732. shifterop.shiftimm:=BsrDWord(multiplier);
  733. if odd(multiplier) then
  734. begin
  735. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  736. dec(multiplier);
  737. end
  738. else
  739. if first then
  740. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  741. else
  742. begin
  743. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  744. dec(multiplier,1 shl shifterop.shiftimm);
  745. end;
  746. first:=false;
  747. end;
  748. if negative then
  749. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  750. end
  751. else
  752. result:=false;
  753. end;
  754. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  755. var
  756. shift, lsb, width : byte;
  757. tmpreg : tregister;
  758. so : tshifterop;
  759. l1 : longint;
  760. imm1, imm2: DWord;
  761. begin
  762. optimize_op_const(size, op, a);
  763. case op of
  764. OP_NONE:
  765. begin
  766. if src <> dst then
  767. a_load_reg_reg(list, size, size, src, dst);
  768. exit;
  769. end;
  770. OP_MOVE:
  771. begin
  772. a_load_const_reg(list, size, a, dst);
  773. exit;
  774. end;
  775. else
  776. ;
  777. end;
  778. ovloc.loc:=LOC_VOID;
  779. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  780. case op of
  781. OP_ADD:
  782. begin
  783. op:=OP_SUB;
  784. a:=aint(dword(-a));
  785. end;
  786. OP_SUB:
  787. begin
  788. op:=OP_ADD;
  789. a:=aint(dword(-a));
  790. end
  791. else
  792. ;
  793. end;
  794. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  795. case op of
  796. OP_NEG,OP_NOT:
  797. internalerror(200308281);
  798. OP_SHL,
  799. OP_SHR,
  800. OP_ROL,
  801. OP_ROR,
  802. OP_SAR:
  803. begin
  804. if a>32 then
  805. internalerror(200308294);
  806. shifterop_reset(so);
  807. so.shiftmode:=opshift2shiftmode(op);
  808. if op = OP_ROL then
  809. so.shiftimm:=32-a
  810. else
  811. so.shiftimm:=a;
  812. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  813. end;
  814. else
  815. {if (op in [OP_SUB, OP_ADD]) and
  816. ((a < 0) or
  817. (a > 4095)) then
  818. begin
  819. tmpreg:=getintregister(list,size);
  820. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  821. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  822. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  823. ));
  824. end
  825. else}
  826. begin
  827. if cgsetflags or setflags then
  828. a_reg_alloc(list,NR_DEFAULTFLAGS);
  829. list.concat(setoppostfix(
  830. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  831. end;
  832. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  833. begin
  834. ovloc.loc:=LOC_FLAGS;
  835. case op of
  836. OP_ADD:
  837. ovloc.resflags:=F_CS;
  838. OP_SUB:
  839. ovloc.resflags:=F_CC;
  840. else
  841. internalerror(2019050922);
  842. end;
  843. end;
  844. end
  845. else
  846. begin
  847. { there could be added some more sophisticated optimizations }
  848. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  849. a_op_reg_reg(list,OP_NEG,size,src,dst)
  850. { we do this here instead in the peephole optimizer because
  851. it saves us a register }
  852. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  853. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  854. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  855. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  856. begin
  857. if l1>32 then{roozbeh does this ever happen?}
  858. internalerror(200308296);
  859. shifterop_reset(so);
  860. so.shiftmode:=SM_LSL;
  861. so.shiftimm:=l1;
  862. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  863. end
  864. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  865. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  866. begin
  867. if l1>32 then{does this ever happen?}
  868. internalerror(201205181);
  869. shifterop_reset(so);
  870. so.shiftmode:=SM_LSL;
  871. so.shiftimm:=l1;
  872. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  873. end
  874. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  875. begin
  876. { nothing to do on success }
  877. end
  878. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  879. broader range of shifterconstants.}
  880. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  881. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  882. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  883. into the following instruction}
  884. else if (op = OP_AND) and
  885. is_continuous_mask(aword(a), lsb, width) and
  886. ((lsb = 0) or ((lsb + width) = 32)) then
  887. begin
  888. shifterop_reset(so);
  889. if (width = 16) and
  890. (lsb = 0) and
  891. (current_settings.cputype >= cpu_armv6) then
  892. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  893. else if (width = 8) and
  894. (lsb = 0) and
  895. (current_settings.cputype >= cpu_armv6) then
  896. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  897. else if lsb = 0 then
  898. begin
  899. so.shiftmode:=SM_LSL;
  900. so.shiftimm:=32-width;
  901. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  902. so.shiftmode:=SM_LSR;
  903. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  904. end
  905. else
  906. begin
  907. so.shiftmode:=SM_LSR;
  908. so.shiftimm:=lsb;
  909. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  910. so.shiftmode:=SM_LSL;
  911. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  912. end;
  913. end
  914. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  915. begin
  916. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  917. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  918. end
  919. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  920. not(cgsetflags or setflags) and
  921. split_into_shifter_const(a, imm1, imm2) then
  922. begin
  923. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  924. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  925. end
  926. else
  927. begin
  928. tmpreg:=getintregister(list,size);
  929. a_load_const_reg(list,size,a,tmpreg);
  930. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  931. end;
  932. end;
  933. maybeadjustresult(list,op,size,dst);
  934. end;
  935. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  936. var
  937. so : tshifterop;
  938. tmpreg,overflowreg : tregister;
  939. asmop : tasmop;
  940. begin
  941. ovloc.loc:=LOC_VOID;
  942. case op of
  943. OP_NEG,OP_NOT,
  944. OP_DIV,OP_IDIV:
  945. internalerror(200308283);
  946. OP_SHL,
  947. OP_SHR,
  948. OP_SAR,
  949. OP_ROR:
  950. begin
  951. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  952. internalerror(2008072801);
  953. shifterop_reset(so);
  954. so.rs:=src1;
  955. so.shiftmode:=opshift2shiftmode(op);
  956. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  957. end;
  958. OP_ROL:
  959. begin
  960. if not(size in [OS_32,OS_S32]) then
  961. internalerror(2008072801);
  962. { simulate ROL by ror'ing 32-value }
  963. tmpreg:=getintregister(list,OS_32);
  964. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  965. shifterop_reset(so);
  966. so.rs:=tmpreg;
  967. so.shiftmode:=SM_ROR;
  968. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  969. end;
  970. OP_IMUL,
  971. OP_MUL:
  972. begin
  973. if (cgsetflags or setflags) and
  974. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  975. begin
  976. overflowreg:=getintregister(list,size);
  977. if op=OP_IMUL then
  978. asmop:=A_SMULL
  979. else
  980. asmop:=A_UMULL;
  981. { the arm doesn't allow that rd and rm are the same }
  982. if dst=src2 then
  983. begin
  984. if dst<>src1 then
  985. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  986. else
  987. begin
  988. tmpreg:=getintregister(list,size);
  989. a_load_reg_reg(list,size,size,src2,dst);
  990. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  991. end;
  992. end
  993. else
  994. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  995. a_reg_alloc(list,NR_DEFAULTFLAGS);
  996. if op=OP_IMUL then
  997. begin
  998. shifterop_reset(so);
  999. so.shiftmode:=SM_ASR;
  1000. so.shiftimm:=31;
  1001. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1002. end
  1003. else
  1004. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1005. ovloc.loc:=LOC_FLAGS;
  1006. ovloc.resflags:=F_NE;
  1007. end
  1008. else
  1009. begin
  1010. { the arm doesn't allow that rd and rm are the same }
  1011. if dst=src2 then
  1012. begin
  1013. if dst<>src1 then
  1014. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1015. else
  1016. begin
  1017. tmpreg:=getintregister(list,size);
  1018. a_load_reg_reg(list,size,size,src2,dst);
  1019. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1020. end;
  1021. end
  1022. else
  1023. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1024. end;
  1025. end;
  1026. else
  1027. begin
  1028. if cgsetflags or setflags then
  1029. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1030. list.concat(setoppostfix(
  1031. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1032. end;
  1033. end;
  1034. maybeadjustresult(list,op,size,dst);
  1035. end;
  1036. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1037. var
  1038. asmop: tasmop;
  1039. begin
  1040. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1041. begin
  1042. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1043. case size of
  1044. OS_32: asmop:=A_UMULL;
  1045. OS_S32: asmop:=A_SMULL;
  1046. else
  1047. InternalError(2014060802);
  1048. end;
  1049. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1050. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1051. 32x32=32 bit multiplication}
  1052. if (dstlo = NR_NO) then
  1053. dstlo:=getintregister(list,size);
  1054. if (dsthi = NR_NO) then
  1055. dsthi:=getintregister(list,size);
  1056. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1057. end
  1058. else if dsthi=NR_NO then
  1059. begin
  1060. if (dstlo = NR_NO) then
  1061. dstlo:=getintregister(list,size);
  1062. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1063. end
  1064. else
  1065. begin
  1066. internalerror(2015083022);
  1067. end;
  1068. end;
  1069. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1070. var
  1071. tmpreg1,tmpreg2 : tregister;
  1072. begin
  1073. tmpreg1:=NR_NO;
  1074. { Be sure to have a base register }
  1075. if (ref.base=NR_NO) then
  1076. begin
  1077. if ref.shiftmode<>SM_None then
  1078. internalerror(2014020701);
  1079. ref.base:=ref.index;
  1080. ref.index:=NR_NO;
  1081. end;
  1082. { absolute symbols can't be handled directly, we've to store the symbol reference
  1083. in the text segment and access it pc relative
  1084. For now, we assume that references where base or index equals to PC are already
  1085. relative, all other references are assumed to be absolute and thus they need
  1086. to be handled extra.
  1087. A proper solution would be to change refoptions to a set and store the information
  1088. if the symbol is absolute or relative there.
  1089. }
  1090. if (assigned(ref.symbol) and
  1091. not(is_pc(ref.base)) and
  1092. not(is_pc(ref.index))
  1093. ) or
  1094. { [#xxx] isn't a valid address operand }
  1095. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1096. (ref.offset<-4095) or
  1097. (ref.offset>4095) or
  1098. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1099. ((ref.offset<-255) or
  1100. (ref.offset>255)
  1101. )
  1102. ) or
  1103. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1104. ((ref.offset<-1020) or
  1105. (ref.offset>1020) or
  1106. ((abs(ref.offset) mod 4)<>0)
  1107. )
  1108. ) or
  1109. ((GenerateThumbCode) and
  1110. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1111. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1112. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1113. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1114. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1115. )
  1116. ) then
  1117. begin
  1118. fixref(list,ref);
  1119. end;
  1120. if GenerateThumbCode then
  1121. begin
  1122. { certain thumb load require base and index }
  1123. if (oppostfix in [PF_SB,PF_SH]) and
  1124. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1125. begin
  1126. tmpreg1:=getintregister(list,OS_ADDR);
  1127. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1128. ref.index:=tmpreg1;
  1129. end;
  1130. { "hi" registers cannot be used as base or index }
  1131. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1132. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1133. begin
  1134. tmpreg1:=getintregister(list,OS_ADDR);
  1135. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1136. ref.base:=tmpreg1;
  1137. end;
  1138. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1139. begin
  1140. tmpreg1:=getintregister(list,OS_ADDR);
  1141. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1142. ref.index:=tmpreg1;
  1143. end;
  1144. end;
  1145. { fold if there is base, index and offset, however, don't fold
  1146. for vfp memory instructions because we later fold the index }
  1147. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1148. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1149. begin
  1150. if tmpreg1<>NR_NO then
  1151. begin
  1152. tmpreg2:=getintregister(list,OS_ADDR);
  1153. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1154. tmpreg1:=tmpreg2;
  1155. end
  1156. else
  1157. begin
  1158. tmpreg1:=getintregister(list,OS_ADDR);
  1159. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1160. ref.base:=tmpreg1;
  1161. end;
  1162. ref.offset:=0;
  1163. end;
  1164. { floating point operations have only limited references
  1165. we expect here, that a base is already set }
  1166. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1167. begin
  1168. if ref.shiftmode<>SM_none then
  1169. internalerror(200309121);
  1170. if tmpreg1<>NR_NO then
  1171. begin
  1172. if ref.base=tmpreg1 then
  1173. begin
  1174. if ref.signindex<0 then
  1175. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1176. else
  1177. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1178. ref.index:=NR_NO;
  1179. end
  1180. else
  1181. begin
  1182. if ref.index<>tmpreg1 then
  1183. internalerror(200403161);
  1184. if ref.signindex<0 then
  1185. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1186. else
  1187. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1188. ref.base:=tmpreg1;
  1189. ref.index:=NR_NO;
  1190. end;
  1191. end
  1192. else
  1193. begin
  1194. tmpreg1:=getintregister(list,OS_ADDR);
  1195. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1196. ref.base:=tmpreg1;
  1197. ref.index:=NR_NO;
  1198. end;
  1199. end;
  1200. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1201. Result := ref;
  1202. end;
  1203. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1204. var
  1205. oppostfix:toppostfix;
  1206. usedtmpref: treference;
  1207. tmpreg : tregister;
  1208. dir : integer;
  1209. begin
  1210. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1211. FromSize := ToSize;
  1212. case ToSize of
  1213. { signed integer registers }
  1214. OS_8,
  1215. OS_S8:
  1216. oppostfix:=PF_B;
  1217. OS_16,
  1218. OS_S16:
  1219. oppostfix:=PF_H;
  1220. OS_32,
  1221. OS_S32,
  1222. { for vfp value stored in integer register }
  1223. OS_F32:
  1224. oppostfix:=PF_None;
  1225. else
  1226. InternalError(200308299);
  1227. end;
  1228. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1229. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1230. (oppostfix =PF_H)) then
  1231. begin
  1232. if target_info.endian=endian_big then
  1233. dir:=-1
  1234. else
  1235. dir:=1;
  1236. case FromSize of
  1237. OS_16,OS_S16:
  1238. begin
  1239. tmpreg:=getintregister(list,OS_INT);
  1240. usedtmpref:=ref;
  1241. if target_info.endian=endian_big then
  1242. inc(usedtmpref.offset,1);
  1243. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1244. inc(usedtmpref.offset,dir);
  1245. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1246. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1247. end;
  1248. OS_32,OS_S32:
  1249. begin
  1250. tmpreg:=getintregister(list,OS_INT);
  1251. usedtmpref:=ref;
  1252. if ref.alignment=2 then
  1253. begin
  1254. if target_info.endian=endian_big then
  1255. inc(usedtmpref.offset,2);
  1256. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1257. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1258. inc(usedtmpref.offset,dir*2);
  1259. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1260. end
  1261. else
  1262. begin
  1263. if target_info.endian=endian_big then
  1264. inc(usedtmpref.offset,3);
  1265. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1266. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1267. inc(usedtmpref.offset,dir);
  1268. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1269. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1270. inc(usedtmpref.offset,dir);
  1271. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1272. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1273. inc(usedtmpref.offset,dir);
  1274. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1275. end;
  1276. end
  1277. else
  1278. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1279. end;
  1280. end
  1281. else
  1282. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1283. end;
  1284. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1285. var
  1286. oppostfix:toppostfix;
  1287. href: treference;
  1288. tmpreg: TRegister;
  1289. begin
  1290. case ToSize of
  1291. { signed integer registers }
  1292. OS_8,
  1293. OS_S8:
  1294. oppostfix:=PF_B;
  1295. OS_16,
  1296. OS_S16:
  1297. oppostfix:=PF_H;
  1298. OS_32,
  1299. OS_S32:
  1300. oppostfix:=PF_None;
  1301. else
  1302. InternalError(2003082910);
  1303. end;
  1304. if (tosize in [OS_S16,OS_16]) and
  1305. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1306. begin
  1307. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1308. tmpreg:=getintregister(list,OS_INT);
  1309. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1310. href:=result;
  1311. inc(href.offset);
  1312. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1313. end
  1314. else
  1315. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1316. end;
  1317. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1318. var
  1319. oppostfix:toppostfix;
  1320. so: tshifterop;
  1321. tmpreg: TRegister;
  1322. href: treference;
  1323. begin
  1324. case FromSize of
  1325. { signed integer registers }
  1326. OS_8:
  1327. oppostfix:=PF_B;
  1328. OS_S8:
  1329. oppostfix:=PF_SB;
  1330. OS_16:
  1331. oppostfix:=PF_H;
  1332. OS_S16:
  1333. oppostfix:=PF_SH;
  1334. OS_32,
  1335. OS_S32:
  1336. oppostfix:=PF_None;
  1337. else
  1338. InternalError(200308291);
  1339. end;
  1340. if (tosize=OS_S8) and
  1341. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1342. begin
  1343. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1344. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1345. end
  1346. else if (tosize in [OS_S16,OS_16]) and
  1347. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1348. begin
  1349. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1350. tmpreg:=getintregister(list,OS_INT);
  1351. href:=result;
  1352. inc(href.offset);
  1353. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1354. shifterop_reset(so);
  1355. so.shiftmode:=SM_LSL;
  1356. so.shiftimm:=8;
  1357. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1358. end
  1359. else
  1360. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1361. end;
  1362. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1363. var
  1364. so : tshifterop;
  1365. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1366. begin
  1367. if GenerateThumbCode then
  1368. begin
  1369. case shiftmode of
  1370. SM_ASR:
  1371. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1372. SM_LSR:
  1373. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1374. SM_LSL:
  1375. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1376. else
  1377. internalerror(2013090301);
  1378. end;
  1379. end
  1380. else
  1381. begin
  1382. so.shiftmode:=shiftmode;
  1383. so.shiftimm:=shiftimm;
  1384. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1385. end;
  1386. end;
  1387. var
  1388. instr: taicpu;
  1389. conv_done: boolean;
  1390. begin
  1391. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1392. internalerror(2002090901);
  1393. conv_done:=false;
  1394. if tosize<>fromsize then
  1395. begin
  1396. shifterop_reset(so);
  1397. conv_done:=true;
  1398. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1399. fromsize:=tosize;
  1400. if current_settings.cputype<cpu_armv6 then
  1401. case fromsize of
  1402. OS_8:
  1403. if GenerateThumbCode then
  1404. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1405. else
  1406. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1407. OS_S8:
  1408. begin
  1409. do_shift(SM_LSL,24,reg1);
  1410. if tosize=OS_16 then
  1411. begin
  1412. do_shift(SM_ASR,8,reg2);
  1413. do_shift(SM_LSR,16,reg2);
  1414. end
  1415. else
  1416. do_shift(SM_ASR,24,reg2);
  1417. end;
  1418. OS_16:
  1419. begin
  1420. do_shift(SM_LSL,16,reg1);
  1421. do_shift(SM_LSR,16,reg2);
  1422. end;
  1423. OS_S16:
  1424. begin
  1425. do_shift(SM_LSL,16,reg1);
  1426. do_shift(SM_ASR,16,reg2)
  1427. end;
  1428. else
  1429. conv_done:=false;
  1430. end
  1431. else
  1432. case fromsize of
  1433. OS_8:
  1434. if GenerateThumbCode then
  1435. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1436. else
  1437. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1438. OS_S8:
  1439. begin
  1440. if tosize=OS_16 then
  1441. begin
  1442. so.shiftmode:=SM_ROR;
  1443. so.shiftimm:=16;
  1444. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1445. do_shift(SM_LSR,16,reg2);
  1446. end
  1447. else
  1448. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1449. end;
  1450. OS_16:
  1451. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1452. OS_S16:
  1453. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1454. else
  1455. conv_done:=false;
  1456. end
  1457. end;
  1458. if not conv_done and (reg1<>reg2) then
  1459. begin
  1460. { same size, only a register mov required }
  1461. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1462. list.Concat(instr);
  1463. { Notify the register allocator that we have written a move instruction so
  1464. it can try to eliminate it. }
  1465. add_move_instruction(instr);
  1466. end;
  1467. end;
  1468. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1469. var
  1470. href,href2 : treference;
  1471. hloc : pcgparalocation;
  1472. begin
  1473. href:=ref;
  1474. hloc:=paraloc.location;
  1475. while assigned(hloc) do
  1476. begin
  1477. case hloc^.loc of
  1478. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1479. begin
  1480. paramanager.allocparaloc(list,paraloc.location);
  1481. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1482. end;
  1483. LOC_REGISTER :
  1484. case hloc^.size of
  1485. OS_32,
  1486. OS_F32:
  1487. begin
  1488. paramanager.allocparaloc(list,paraloc.location);
  1489. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1490. end;
  1491. OS_64,
  1492. OS_F64:
  1493. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1494. else
  1495. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1496. end;
  1497. LOC_REFERENCE :
  1498. begin
  1499. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1500. { concatcopy should choose the best way to copy the data }
  1501. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1502. end;
  1503. else
  1504. internalerror(200408241);
  1505. end;
  1506. inc(href.offset,tcgsize2size[hloc^.size]);
  1507. hloc:=hloc^.next;
  1508. end;
  1509. end;
  1510. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1511. begin
  1512. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1513. end;
  1514. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1515. var
  1516. oppostfix:toppostfix;
  1517. begin
  1518. case fromsize of
  1519. OS_32,
  1520. OS_F32:
  1521. oppostfix:=PF_S;
  1522. OS_64,
  1523. OS_F64:
  1524. oppostfix:=PF_D;
  1525. OS_F80:
  1526. oppostfix:=PF_E;
  1527. else
  1528. InternalError(200309021);
  1529. end;
  1530. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1531. if fromsize<>tosize then
  1532. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1533. end;
  1534. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1535. var
  1536. oppostfix:toppostfix;
  1537. begin
  1538. case tosize of
  1539. OS_F32:
  1540. oppostfix:=PF_S;
  1541. OS_F64:
  1542. oppostfix:=PF_D;
  1543. OS_F80:
  1544. oppostfix:=PF_E;
  1545. else
  1546. InternalError(200309022);
  1547. end;
  1548. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1549. end;
  1550. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1551. var
  1552. r : TRegister;
  1553. ai: taicpu;
  1554. l: TAsmLabel;
  1555. begin
  1556. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  1557. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  1558. begin
  1559. r:=getintregister(list,OS_INT);
  1560. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1561. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1562. current_asmdata.getjumplabel(l);
  1563. ai:=taicpu.op_sym(A_B,l);
  1564. ai.is_jmp:=true;
  1565. ai.condition:=C_EQ;
  1566. list.concat(ai);
  1567. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1568. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_THROWFPUEXCEPTION',false);
  1569. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1570. a_label(list,l);
  1571. if clear then
  1572. current_procinfo.FPUExceptionCheckNeeded:=false;
  1573. end;
  1574. end;
  1575. { comparison operations }
  1576. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1577. l : tasmlabel);
  1578. var
  1579. tmpreg : tregister;
  1580. b : byte;
  1581. begin
  1582. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1583. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1584. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1585. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1586. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1587. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1588. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1589. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1590. else
  1591. begin
  1592. tmpreg:=getintregister(list,size);
  1593. a_load_const_reg(list,size,a,tmpreg);
  1594. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1595. end;
  1596. a_jmp_cond(list,cmp_op,l);
  1597. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1598. end;
  1599. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1600. begin
  1601. if reverse then
  1602. begin
  1603. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1604. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1605. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1606. end
  1607. { it is decided during the compilation of the system unit if this code is used or not
  1608. so no additional check for rbit is needed }
  1609. else
  1610. begin
  1611. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1612. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1613. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1614. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1615. if GenerateThumb2Code then
  1616. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1617. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1618. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1619. end;
  1620. end;
  1621. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1622. begin
  1623. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1624. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1625. a_jmp_cond(list,cmp_op,l);
  1626. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1627. end;
  1628. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1629. var
  1630. ai : taicpu;
  1631. begin
  1632. { generate far jump, leave it to the optimizer to get rid of it }
  1633. if GenerateThumbCode then
  1634. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1635. else
  1636. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1637. ai.is_jmp:=true;
  1638. list.concat(ai);
  1639. end;
  1640. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1641. var
  1642. ai : taicpu;
  1643. begin
  1644. { generate far jump, leave it to the optimizer to get rid of it }
  1645. if GenerateThumbCode then
  1646. ai:=taicpu.op_sym(A_BL,l)
  1647. else
  1648. ai:=taicpu.op_sym(A_B,l);
  1649. ai.is_jmp:=true;
  1650. list.concat(ai);
  1651. end;
  1652. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1653. var
  1654. ai : taicpu;
  1655. inv_flags : TResFlags;
  1656. hlabel : TAsmLabel;
  1657. begin
  1658. if GenerateThumbCode then
  1659. begin
  1660. inv_flags:=f;
  1661. inverse_flags(inv_flags);
  1662. { the optimizer has to fix this if jump range is sufficient short }
  1663. current_asmdata.getjumplabel(hlabel);
  1664. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1665. ai.is_jmp:=true;
  1666. list.concat(ai);
  1667. a_jmp_always(list,l);
  1668. a_label(list,hlabel);
  1669. end
  1670. else
  1671. begin
  1672. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1673. ai.is_jmp:=true;
  1674. list.concat(ai);
  1675. end;
  1676. end;
  1677. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1678. begin
  1679. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1680. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1681. end;
  1682. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1683. begin
  1684. if target_info.system = system_arm_linux then
  1685. begin
  1686. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1687. a_call_name(list,'__gnu_mcount_nc',false);
  1688. end
  1689. else
  1690. internalerror(2014091201);
  1691. end;
  1692. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1693. var
  1694. ref : treference;
  1695. shift : byte;
  1696. firstfloatreg,lastfloatreg,
  1697. r : byte;
  1698. mmregs,
  1699. regs, saveregs : tcpuregisterset;
  1700. registerarea,
  1701. r7offset,
  1702. stackmisalignment : pint;
  1703. imm1, imm2: DWord;
  1704. stack_parameters : Boolean;
  1705. begin
  1706. LocalSize:=align(LocalSize,4);
  1707. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1708. { call instruction does not put anything on the stack }
  1709. registerarea:=0;
  1710. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1711. lastfloatreg:=RS_NO;
  1712. if not(nostackframe) then
  1713. begin
  1714. firstfloatreg:=RS_NO;
  1715. mmregs:=[];
  1716. case current_settings.fputype of
  1717. fpu_none,
  1718. fpu_soft,
  1719. fpu_libgcc:
  1720. ;
  1721. fpu_fpa,
  1722. fpu_fpa10,
  1723. fpu_fpa11:
  1724. begin
  1725. { save floating point registers? }
  1726. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1727. for r:=RS_F0 to RS_F7 do
  1728. if r in regs then
  1729. begin
  1730. if firstfloatreg=RS_NO then
  1731. firstfloatreg:=r;
  1732. lastfloatreg:=r;
  1733. inc(registerarea,12);
  1734. end;
  1735. end;
  1736. fpu_vfpv2,
  1737. fpu_vfpv3,
  1738. fpu_vfpv4,
  1739. fpu_vfpv3_d16:
  1740. begin;
  1741. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1742. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1743. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1744. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1745. end;
  1746. else
  1747. internalerror(2019050924);
  1748. end;
  1749. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1750. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1751. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1752. { save int registers }
  1753. reference_reset(ref,4,[]);
  1754. ref.index:=NR_STACK_POINTER_REG;
  1755. ref.addressmode:=AM_PREINDEXED;
  1756. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1757. if not(target_info.system in systems_darwin) then
  1758. begin
  1759. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1760. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1761. begin
  1762. a_reg_alloc(list,NR_R12);
  1763. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1764. end;
  1765. { the (old) ARM APCS requires saving both the stack pointer (to
  1766. crawl the stack) and the PC (to identify the function this
  1767. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1768. and R15 -- still needs updating for EABI and Darwin, they don't
  1769. need that }
  1770. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1771. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1772. else
  1773. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1774. include(regs,RS_R14);
  1775. if regs<>[] then
  1776. begin
  1777. for r:=RS_R0 to RS_R15 do
  1778. if r in regs then
  1779. inc(registerarea,4);
  1780. { if the stack is not 8 byte aligned, try to add an extra register,
  1781. so we can avoid the extra sub/add ...,#4 later (KB) }
  1782. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1783. for r:=RS_R3 downto RS_R0 do
  1784. if not(r in regs) then
  1785. begin
  1786. regs:=regs+[r];
  1787. inc(registerarea,4);
  1788. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1789. break;
  1790. end;
  1791. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1792. end;
  1793. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1794. begin
  1795. { the framepointer now points to the saved R15, so the saved
  1796. framepointer is at R11-12 (for get_caller_frame) }
  1797. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1798. a_reg_dealloc(list,NR_R12);
  1799. end;
  1800. end
  1801. else
  1802. begin
  1803. { always save r14 if we use r7 as the framepointer, because
  1804. the parameter offsets are hardcoded in advance and always
  1805. assume that r14 sits on the stack right behind the saved r7
  1806. }
  1807. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1808. include(regs,RS_FRAME_POINTER_REG);
  1809. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1810. include(regs,RS_R14);
  1811. if regs<>[] then
  1812. begin
  1813. { on Darwin, you first have to save [r4-r7,lr], and then
  1814. [r8,r10,r11] and make r7 point to the previously saved
  1815. r7 so that you can perform a stack crawl based on it
  1816. ([r7] is previous stack frame, [r7+4] is return address
  1817. }
  1818. include(regs,RS_FRAME_POINTER_REG);
  1819. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1820. r7offset:=0;
  1821. for r:=RS_R0 to RS_R15 do
  1822. if r in saveregs then
  1823. begin
  1824. inc(registerarea,4);
  1825. if r<RS_FRAME_POINTER_REG then
  1826. inc(r7offset,4);
  1827. end;
  1828. { save the registers }
  1829. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1830. { make r7 point to the saved r7 (regardless of whether this
  1831. frame uses the framepointer, for backtrace purposes) }
  1832. if r7offset<>0 then
  1833. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1834. else
  1835. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1836. { now save the rest (if any) }
  1837. saveregs:=regs-saveregs;
  1838. if saveregs<>[] then
  1839. begin
  1840. for r:=RS_R8 to RS_R11 do
  1841. if r in saveregs then
  1842. inc(registerarea,4);
  1843. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1844. end;
  1845. end;
  1846. end;
  1847. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1848. if (LocalSize<>0) or
  1849. ((stackmisalignment<>0) and
  1850. ((pi_do_call in current_procinfo.flags) or
  1851. (po_assembler in current_procinfo.procdef.procoptions))) then
  1852. begin
  1853. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1854. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1855. begin
  1856. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1857. internalerror(2014030901)
  1858. else
  1859. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1860. end;
  1861. if is_shifter_const(localsize,shift) then
  1862. begin
  1863. a_reg_dealloc(list,NR_R12);
  1864. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1865. end
  1866. else if split_into_shifter_const(localsize, imm1, imm2) then
  1867. begin
  1868. a_reg_dealloc(list,NR_R12);
  1869. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1870. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1871. end
  1872. else
  1873. begin
  1874. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1875. a_reg_alloc(list,NR_R12);
  1876. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1877. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1878. a_reg_dealloc(list,NR_R12);
  1879. end;
  1880. end;
  1881. if (mmregs<>[]) or
  1882. (firstfloatreg<>RS_NO) then
  1883. begin
  1884. reference_reset(ref,4,[]);
  1885. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1886. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1887. begin
  1888. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1889. begin
  1890. a_reg_alloc(list,NR_R12);
  1891. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1892. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1893. a_reg_dealloc(list,NR_R12);
  1894. end
  1895. else
  1896. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1897. ref.base:=NR_R12;
  1898. end
  1899. else
  1900. begin
  1901. ref.base:=current_procinfo.framepointer;
  1902. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1903. end;
  1904. case current_settings.fputype of
  1905. fpu_fpa,
  1906. fpu_fpa10,
  1907. fpu_fpa11:
  1908. begin
  1909. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1910. lastfloatreg-firstfloatreg+1,ref));
  1911. end;
  1912. fpu_vfpv2,
  1913. fpu_vfpv3,
  1914. fpu_vfpv4,
  1915. fpu_vfpv3_d16:
  1916. begin
  1917. ref.index:=ref.base;
  1918. ref.base:=NR_NO;
  1919. { FSTMX is deprecated on ARMv6 and later }
  1920. {if (current_settings.cputype<cpu_armv6) then
  1921. postfix:=PF_IAX
  1922. else
  1923. postfix:=PF_IAD;}
  1924. if mmregs<>[] then
  1925. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1926. end;
  1927. else
  1928. internalerror(2019050923);
  1929. end;
  1930. end;
  1931. end;
  1932. end;
  1933. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1934. var
  1935. ref : treference;
  1936. LocalSize : longint;
  1937. firstfloatreg,lastfloatreg,
  1938. r,
  1939. shift : byte;
  1940. mmregs,
  1941. saveregs,
  1942. regs : tcpuregisterset;
  1943. registerarea,
  1944. stackmisalignment: pint;
  1945. paddingreg: TSuperRegister;
  1946. imm1, imm2: DWord;
  1947. begin
  1948. if not(nostackframe) then
  1949. begin
  1950. registerarea:=0;
  1951. firstfloatreg:=RS_NO;
  1952. lastfloatreg:=RS_NO;
  1953. mmregs:=[];
  1954. saveregs:=[];
  1955. case current_settings.fputype of
  1956. fpu_none,
  1957. fpu_soft,
  1958. fpu_libgcc:
  1959. ;
  1960. fpu_fpa,
  1961. fpu_fpa10,
  1962. fpu_fpa11:
  1963. begin
  1964. { restore floating point registers? }
  1965. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1966. for r:=RS_F0 to RS_F7 do
  1967. if r in regs then
  1968. begin
  1969. if firstfloatreg=RS_NO then
  1970. firstfloatreg:=r;
  1971. lastfloatreg:=r;
  1972. { floating point register space is already included in
  1973. localsize below by calc_stackframe_size
  1974. inc(registerarea,12);
  1975. }
  1976. end;
  1977. end;
  1978. fpu_vfpv2,
  1979. fpu_vfpv3,
  1980. fpu_vfpv4,
  1981. fpu_vfpv3_d16:
  1982. begin;
  1983. { restore vfp registers? }
  1984. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1985. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1986. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1987. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1988. end;
  1989. else
  1990. internalerror(2019050926);
  1991. end;
  1992. if (firstfloatreg<>RS_NO) or
  1993. (mmregs<>[]) then
  1994. begin
  1995. reference_reset(ref,4,[]);
  1996. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1997. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1998. begin
  1999. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2000. begin
  2001. a_reg_alloc(list,NR_R12);
  2002. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2003. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2004. a_reg_dealloc(list,NR_R12);
  2005. end
  2006. else
  2007. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2008. ref.base:=NR_R12;
  2009. end
  2010. else
  2011. begin
  2012. ref.base:=current_procinfo.framepointer;
  2013. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2014. end;
  2015. case current_settings.fputype of
  2016. fpu_fpa,
  2017. fpu_fpa10,
  2018. fpu_fpa11:
  2019. begin
  2020. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2021. lastfloatreg-firstfloatreg+1,ref));
  2022. end;
  2023. fpu_vfpv2,
  2024. fpu_vfpv3,
  2025. fpu_vfpv4,
  2026. fpu_vfpv3_d16:
  2027. begin
  2028. ref.index:=ref.base;
  2029. ref.base:=NR_NO;
  2030. { FLDMX is deprecated on ARMv6 and later }
  2031. {if (current_settings.cputype<cpu_armv6) then
  2032. mmpostfix:=PF_IAX
  2033. else
  2034. mmpostfix:=PF_IAD;}
  2035. if mmregs<>[] then
  2036. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2037. end;
  2038. else
  2039. internalerror(2019050921);
  2040. end;
  2041. end;
  2042. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2043. if (pi_do_call in current_procinfo.flags) or
  2044. (regs<>[]) or
  2045. ((target_info.system in systems_darwin) and
  2046. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2047. begin
  2048. exclude(regs,RS_R14);
  2049. include(regs,RS_R15);
  2050. if (target_info.system in systems_darwin) then
  2051. include(regs,RS_FRAME_POINTER_REG);
  2052. end;
  2053. if not(target_info.system in systems_darwin) then
  2054. begin
  2055. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2056. The saved PC came after that but is discarded, since we restore
  2057. the stack pointer }
  2058. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2059. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2060. end
  2061. else
  2062. begin
  2063. { restore R8-R11 already if necessary (they've been stored
  2064. before the others) }
  2065. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2066. if saveregs<>[] then
  2067. begin
  2068. reference_reset(ref,4,[]);
  2069. ref.index:=NR_STACK_POINTER_REG;
  2070. ref.addressmode:=AM_PREINDEXED;
  2071. for r:=RS_R8 to RS_R11 do
  2072. if r in saveregs then
  2073. inc(registerarea,4);
  2074. regs:=regs-saveregs;
  2075. end;
  2076. end;
  2077. for r:=RS_R0 to RS_R15 do
  2078. if r in regs then
  2079. inc(registerarea,4);
  2080. { reapply the stack padding reg, in case there was one, see the complimentary
  2081. comment in g_proc_entry() (KB) }
  2082. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2083. if paddingreg < RS_R4 then
  2084. if paddingreg in regs then
  2085. internalerror(201306190)
  2086. else
  2087. begin
  2088. regs:=regs+[paddingreg];
  2089. inc(registerarea,4);
  2090. end;
  2091. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2092. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2093. (target_info.system in systems_darwin) then
  2094. begin
  2095. LocalSize:=current_procinfo.calc_stackframe_size;
  2096. if (LocalSize<>0) or
  2097. ((stackmisalignment<>0) and
  2098. ((pi_do_call in current_procinfo.flags) or
  2099. (po_assembler in current_procinfo.procdef.procoptions))) then
  2100. begin
  2101. if pi_estimatestacksize in current_procinfo.flags then
  2102. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2103. else
  2104. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2105. if is_shifter_const(LocalSize,shift) then
  2106. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2107. else if split_into_shifter_const(localsize, imm1, imm2) then
  2108. begin
  2109. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2110. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2111. end
  2112. else
  2113. begin
  2114. a_reg_alloc(list,NR_R12);
  2115. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2116. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2117. a_reg_dealloc(list,NR_R12);
  2118. end;
  2119. end;
  2120. if (target_info.system in systems_darwin) and
  2121. (saveregs<>[]) then
  2122. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2123. if regs=[] then
  2124. begin
  2125. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2126. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2127. else
  2128. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2129. end
  2130. else
  2131. begin
  2132. reference_reset(ref,4,[]);
  2133. ref.index:=NR_STACK_POINTER_REG;
  2134. ref.addressmode:=AM_PREINDEXED;
  2135. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2136. end;
  2137. end
  2138. else
  2139. begin
  2140. { restore int registers and return }
  2141. reference_reset(ref,4,[]);
  2142. ref.index:=NR_FRAME_POINTER_REG;
  2143. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2144. end;
  2145. end
  2146. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2147. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2148. else
  2149. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2150. end;
  2151. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2152. var
  2153. ref : treference;
  2154. l : TAsmLabel;
  2155. regs : tcpuregisterset;
  2156. r: byte;
  2157. begin
  2158. if (cs_create_pic in current_settings.moduleswitches) and
  2159. (pi_needs_got in current_procinfo.flags) and
  2160. (tf_pic_uses_got in target_info.flags) then
  2161. begin
  2162. { Procedure parametrs are not initialized at this stage.
  2163. Before GOT initialization code, allocate registers used for procedure parameters
  2164. to prevent usage of these registers for temp operations in later stages of code
  2165. generation. }
  2166. regs:=rg[R_INTREGISTER].used_in_proc;
  2167. for r:=RS_R0 to RS_R3 do
  2168. if r in regs then
  2169. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2170. { Allocate scratch register R12 and use it for GOT calculations directly.
  2171. Otherwise the init code can be distorted in later stages of code generation. }
  2172. a_reg_alloc(list,NR_R12);
  2173. reference_reset(ref,4,[]);
  2174. current_asmdata.getglobaldatalabel(l);
  2175. cg.a_label(current_procinfo.aktlocaldata,l);
  2176. ref.symbol:=l;
  2177. ref.base:=NR_PC;
  2178. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2179. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2180. current_asmdata.getaddrlabel(l);
  2181. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2182. cg.a_label(list,l);
  2183. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2184. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2185. { Deallocate registers }
  2186. a_reg_dealloc(list,NR_R12);
  2187. for r:=RS_R3 downto RS_R0 do
  2188. if r in regs then
  2189. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2190. end;
  2191. end;
  2192. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2193. var
  2194. b : byte;
  2195. tmpref : treference;
  2196. instr : taicpu;
  2197. begin
  2198. if ref.addressmode<>AM_OFFSET then
  2199. internalerror(200309071);
  2200. tmpref:=ref;
  2201. { Be sure to have a base register }
  2202. if (tmpref.base=NR_NO) then
  2203. begin
  2204. if tmpref.shiftmode<>SM_None then
  2205. internalerror(2014020702);
  2206. if tmpref.signindex<0 then
  2207. internalerror(200312023);
  2208. tmpref.base:=tmpref.index;
  2209. tmpref.index:=NR_NO;
  2210. end;
  2211. if assigned(tmpref.symbol) or
  2212. not((is_shifter_const(tmpref.offset,b)) or
  2213. (is_shifter_const(-tmpref.offset,b))
  2214. ) then
  2215. fixref(list,tmpref);
  2216. { expect a base here if there is an index }
  2217. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2218. internalerror(200312022);
  2219. if tmpref.index<>NR_NO then
  2220. begin
  2221. if tmpref.shiftmode<>SM_None then
  2222. internalerror(200312021);
  2223. if tmpref.signindex<0 then
  2224. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2225. else
  2226. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2227. if tmpref.offset<>0 then
  2228. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2229. end
  2230. else
  2231. begin
  2232. if tmpref.base=NR_NO then
  2233. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2234. else
  2235. if tmpref.offset<>0 then
  2236. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2237. else
  2238. begin
  2239. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2240. list.concat(instr);
  2241. add_move_instruction(instr);
  2242. end;
  2243. end;
  2244. end;
  2245. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2246. var
  2247. tmpreg, tmpreg2 : tregister;
  2248. tmpref : treference;
  2249. l, piclabel : tasmlabel;
  2250. indirection_done : boolean;
  2251. begin
  2252. { absolute symbols can't be handled directly, we've to store the symbol reference
  2253. in the text segment and access it pc relative
  2254. For now, we assume that references where base or index equals to PC are already
  2255. relative, all other references are assumed to be absolute and thus they need
  2256. to be handled extra.
  2257. A proper solution would be to change refoptions to a set and store the information
  2258. if the symbol is absolute or relative there.
  2259. }
  2260. { create consts entry }
  2261. reference_reset(tmpref,4,[]);
  2262. current_asmdata.getjumplabel(l);
  2263. cg.a_label(current_procinfo.aktlocaldata,l);
  2264. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2265. piclabel:=nil;
  2266. tmpreg:=NR_NO;
  2267. indirection_done:=false;
  2268. if assigned(ref.symbol) then
  2269. begin
  2270. if (target_info.system=system_arm_darwin) and
  2271. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2272. begin
  2273. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2274. if ref.offset<>0 then
  2275. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2276. indirection_done:=true;
  2277. end
  2278. else if ref.refaddr=addr_gottpoff then
  2279. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2280. else if (cs_create_pic in current_settings.moduleswitches) then
  2281. if (tf_pic_uses_got in target_info.flags) then
  2282. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2283. else
  2284. begin
  2285. { ideally, we would want to generate
  2286. ldr r1, LPICConstPool
  2287. LPICLocal:
  2288. ldr/str r2,[pc,r1]
  2289. ...
  2290. LPICConstPool:
  2291. .long _globsym-(LPICLocal+8)
  2292. However, we cannot be sure that the ldr/str will follow
  2293. right after the call to fixref, so we have to load the
  2294. complete address already in a register.
  2295. }
  2296. current_asmdata.getaddrlabel(piclabel);
  2297. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2298. end
  2299. else
  2300. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2301. end
  2302. else
  2303. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2304. { load consts entry }
  2305. if not indirection_done then
  2306. begin
  2307. tmpreg:=getintregister(list,OS_INT);
  2308. tmpref.symbol:=l;
  2309. tmpref.base:=NR_PC;
  2310. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2311. if (cs_create_pic in current_settings.moduleswitches) and
  2312. (tf_pic_uses_got in target_info.flags) and
  2313. assigned(ref.symbol) then
  2314. begin
  2315. {$ifdef EXTDEBUG}
  2316. if not (pi_needs_got in current_procinfo.flags) then
  2317. Comment(V_warning,'pi_needs_got not included');
  2318. {$endif EXTDEBUG}
  2319. Include(current_procinfo.flags,pi_needs_got);
  2320. reference_reset(tmpref,4,[]);
  2321. tmpref.base:=current_procinfo.got;
  2322. tmpref.index:=tmpreg;
  2323. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2324. if ref.offset<>0 then
  2325. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2326. end;
  2327. end;
  2328. if assigned(piclabel) then
  2329. begin
  2330. cg.a_label(list,piclabel);
  2331. tmpreg2:=getaddressregister(list);
  2332. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2333. tmpreg:=tmpreg2
  2334. end;
  2335. { This routine can be called with PC as base/index in case the offset
  2336. was too large to encode in a load/store. In that case, the entire
  2337. absolute expression has been re-encoded in a new constpool entry, and
  2338. we have to remove the use of PC from the original reference (the code
  2339. above made everything relative to the value loaded from the new
  2340. constpool entry) }
  2341. if is_pc(ref.base) then
  2342. ref.base:=NR_NO;
  2343. if is_pc(ref.index) then
  2344. ref.index:=NR_NO;
  2345. if (ref.base<>NR_NO) then
  2346. begin
  2347. if ref.index<>NR_NO then
  2348. begin
  2349. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2350. ref.base:=tmpreg;
  2351. end
  2352. else
  2353. if ref.base<>NR_PC then
  2354. begin
  2355. ref.index:=tmpreg;
  2356. ref.shiftimm:=0;
  2357. ref.signindex:=1;
  2358. ref.shiftmode:=SM_None;
  2359. end
  2360. else
  2361. ref.base:=tmpreg;
  2362. end
  2363. else
  2364. ref.base:=tmpreg;
  2365. ref.offset:=0;
  2366. ref.symbol:=nil;
  2367. end;
  2368. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2369. var
  2370. paraloc1,paraloc2,paraloc3 : TCGPara;
  2371. pd : tprocdef;
  2372. begin
  2373. pd:=search_system_proc('MOVE');
  2374. paraloc1.init;
  2375. paraloc2.init;
  2376. paraloc3.init;
  2377. paramanager.getintparaloc(list,pd,1,paraloc1);
  2378. paramanager.getintparaloc(list,pd,2,paraloc2);
  2379. paramanager.getintparaloc(list,pd,3,paraloc3);
  2380. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2381. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2382. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2383. paramanager.freecgpara(list,paraloc3);
  2384. paramanager.freecgpara(list,paraloc2);
  2385. paramanager.freecgpara(list,paraloc1);
  2386. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2387. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2388. a_call_name(list,'FPC_MOVE',false);
  2389. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2390. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2391. paraloc3.done;
  2392. paraloc2.done;
  2393. paraloc1.done;
  2394. end;
  2395. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2396. const
  2397. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2398. maxtmpreg_thumb = 5;
  2399. var
  2400. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2401. srcreg,destreg,countreg,r,tmpreg:tregister;
  2402. helpsize:aint;
  2403. copysize:byte;
  2404. cgsize:Tcgsize;
  2405. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2406. maxtmpreg,
  2407. tmpregi,tmpregi2:byte;
  2408. { will never be called with count<=4 }
  2409. procedure genloop(count : aword;size : byte);
  2410. const
  2411. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2412. var
  2413. l : tasmlabel;
  2414. begin
  2415. current_asmdata.getjumplabel(l);
  2416. if count<size then size:=1;
  2417. a_load_const_reg(list,OS_INT,count div size,countreg);
  2418. cg.a_label(list,l);
  2419. srcref.addressmode:=AM_POSTINDEXED;
  2420. dstref.addressmode:=AM_POSTINDEXED;
  2421. srcref.offset:=size;
  2422. dstref.offset:=size;
  2423. r:=getintregister(list,size2opsize[size]);
  2424. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2425. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2426. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2427. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2428. a_jmp_flags(list,F_NE,l);
  2429. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2430. srcref.offset:=1;
  2431. dstref.offset:=1;
  2432. case count mod size of
  2433. 1:
  2434. begin
  2435. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2436. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2437. end;
  2438. 2:
  2439. if aligned then
  2440. begin
  2441. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2442. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2443. end
  2444. else
  2445. begin
  2446. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2447. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2448. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2449. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2450. end;
  2451. 3:
  2452. if aligned then
  2453. begin
  2454. srcref.offset:=2;
  2455. dstref.offset:=2;
  2456. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2457. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2458. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2459. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2460. end
  2461. else
  2462. begin
  2463. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2464. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2465. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2466. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2467. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2468. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2469. end;
  2470. end;
  2471. { keep the registers alive }
  2472. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2473. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2474. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2475. end;
  2476. { save estimation, if a creating a separate ref is needed or
  2477. if we can keep the original reference while copying }
  2478. function SimpleRef(const ref : treference) : boolean;
  2479. begin
  2480. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2481. ((ref.symbol=nil) and
  2482. (ref.addressmode=AM_OFFSET) and
  2483. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2484. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2485. { ldrh has a limited offset range }
  2486. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2487. )
  2488. );
  2489. end;
  2490. { will never be called with count<=4 }
  2491. procedure genloop_thumb(count : aword;size : byte);
  2492. procedure refincofs(const ref : treference;const value : longint = 1);
  2493. begin
  2494. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2495. end;
  2496. const
  2497. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2498. var
  2499. l : tasmlabel;
  2500. begin
  2501. current_asmdata.getjumplabel(l);
  2502. if count<size then size:=1;
  2503. a_load_const_reg(list,OS_INT,count div size,countreg);
  2504. cg.a_label(list,l);
  2505. r:=getintregister(list,size2opsize[size]);
  2506. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2507. refincofs(srcref);
  2508. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2509. refincofs(dstref);
  2510. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2511. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2512. a_jmp_flags(list,F_NE,l);
  2513. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2514. case count mod size of
  2515. 1:
  2516. begin
  2517. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2518. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2519. end;
  2520. 2:
  2521. if aligned then
  2522. begin
  2523. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2524. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2525. end
  2526. else
  2527. begin
  2528. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2529. refincofs(srcref);
  2530. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2531. refincofs(dstref);
  2532. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2533. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2534. end;
  2535. 3:
  2536. if aligned then
  2537. begin
  2538. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2539. refincofs(srcref,2);
  2540. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2541. refincofs(dstref,2);
  2542. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2543. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2544. end
  2545. else
  2546. begin
  2547. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2548. refincofs(srcref);
  2549. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2550. refincofs(dstref);
  2551. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2552. refincofs(srcref);
  2553. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2554. refincofs(dstref);
  2555. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2556. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2557. end;
  2558. end;
  2559. { keep the registers alive }
  2560. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2561. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2562. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2563. end;
  2564. begin
  2565. if len=0 then
  2566. exit;
  2567. if GenerateThumbCode then
  2568. maxtmpreg:=maxtmpreg_thumb
  2569. else
  2570. maxtmpreg:=maxtmpreg_arm;
  2571. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2572. dstref:=dest;
  2573. srcref:=source;
  2574. if cs_opt_size in current_settings.optimizerswitches then
  2575. helpsize:=8;
  2576. if aligned and (len=4) then
  2577. begin
  2578. tmpreg:=getintregister(list,OS_32);
  2579. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2580. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2581. end
  2582. else if aligned and (len=2) then
  2583. begin
  2584. tmpreg:=getintregister(list,OS_16);
  2585. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2586. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2587. end
  2588. else if (len<=helpsize) and aligned then
  2589. begin
  2590. tmpregi:=0;
  2591. { loading address in a separate register needed? }
  2592. if SimpleRef(source) then
  2593. begin
  2594. { ... then we don't need a loadaddr }
  2595. srcref:=source;
  2596. end
  2597. else
  2598. begin
  2599. srcreg:=getintregister(list,OS_ADDR);
  2600. a_loadaddr_ref_reg(list,source,srcreg);
  2601. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2602. end;
  2603. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2604. begin
  2605. inc(tmpregi);
  2606. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2607. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2608. inc(srcref.offset,4);
  2609. dec(len,4);
  2610. end;
  2611. { loading address in a separate register needed? }
  2612. if SimpleRef(dest) then
  2613. dstref:=dest
  2614. else
  2615. begin
  2616. destreg:=getintregister(list,OS_ADDR);
  2617. a_loadaddr_ref_reg(list,dest,destreg);
  2618. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2619. end;
  2620. tmpregi2:=1;
  2621. while (tmpregi2<=tmpregi) do
  2622. begin
  2623. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2624. inc(dstref.offset,4);
  2625. inc(tmpregi2);
  2626. end;
  2627. copysize:=4;
  2628. cgsize:=OS_32;
  2629. while len<>0 do
  2630. begin
  2631. if len<2 then
  2632. begin
  2633. copysize:=1;
  2634. cgsize:=OS_8;
  2635. end
  2636. else if len<4 then
  2637. begin
  2638. copysize:=2;
  2639. cgsize:=OS_16;
  2640. end;
  2641. dec(len,copysize);
  2642. r:=getintregister(list,cgsize);
  2643. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2644. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2645. inc(srcref.offset,copysize);
  2646. inc(dstref.offset,copysize);
  2647. end;{end of while}
  2648. end
  2649. else
  2650. begin
  2651. cgsize:=OS_32;
  2652. if (len<=4) then{len<=4 and not aligned}
  2653. begin
  2654. r:=getintregister(list,cgsize);
  2655. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2656. if Len=1 then
  2657. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2658. else
  2659. begin
  2660. tmpreg:=getintregister(list,cgsize);
  2661. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2662. inc(usedtmpref.offset,1);
  2663. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2664. inc(usedtmpref2.offset,1);
  2665. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2666. if len>2 then
  2667. begin
  2668. inc(usedtmpref.offset,1);
  2669. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2670. inc(usedtmpref2.offset,1);
  2671. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2672. if len>3 then
  2673. begin
  2674. inc(usedtmpref.offset,1);
  2675. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2676. inc(usedtmpref2.offset,1);
  2677. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2678. end;
  2679. end;
  2680. end;
  2681. end{end of if len<=4}
  2682. else
  2683. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2684. destreg:=getintregister(list,OS_ADDR);
  2685. a_loadaddr_ref_reg(list,dest,destreg);
  2686. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2687. srcreg:=getintregister(list,OS_ADDR);
  2688. a_loadaddr_ref_reg(list,source,srcreg);
  2689. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2690. countreg:=getintregister(list,OS_32);
  2691. // if cs_opt_size in current_settings.optimizerswitches then
  2692. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2693. {if aligned then
  2694. genloop(len,4)
  2695. else}
  2696. if GenerateThumbCode then
  2697. genloop_thumb(len,1)
  2698. else
  2699. genloop(len,1);
  2700. end;
  2701. end;
  2702. end;
  2703. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2704. begin
  2705. g_concatcopy_internal(list,source,dest,len,false);
  2706. end;
  2707. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2708. begin
  2709. if (source.alignment in [1,3]) or
  2710. (dest.alignment in [1,3]) then
  2711. g_concatcopy_internal(list,source,dest,len,false)
  2712. else
  2713. g_concatcopy_internal(list,source,dest,len,true);
  2714. end;
  2715. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2716. var
  2717. ovloc : tlocation;
  2718. begin
  2719. ovloc.loc:=LOC_VOID;
  2720. g_overflowCheck_loc(list,l,def,ovloc);
  2721. end;
  2722. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2723. var
  2724. hl : tasmlabel;
  2725. ai:TAiCpu;
  2726. hflags : tresflags;
  2727. begin
  2728. if not(cs_check_overflow in current_settings.localswitches) then
  2729. exit;
  2730. current_asmdata.getjumplabel(hl);
  2731. case ovloc.loc of
  2732. LOC_VOID:
  2733. begin
  2734. ai:=taicpu.op_sym(A_B,hl);
  2735. ai.is_jmp:=true;
  2736. if not((def.typ=pointerdef) or
  2737. ((def.typ=orddef) and
  2738. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2739. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2740. ai.SetCondition(C_VC)
  2741. else
  2742. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2743. ai.SetCondition(C_CS)
  2744. else
  2745. ai.SetCondition(C_CC);
  2746. list.concat(ai);
  2747. end;
  2748. LOC_FLAGS:
  2749. begin
  2750. hflags:=ovloc.resflags;
  2751. inverse_flags(hflags);
  2752. cg.a_jmp_flags(list,hflags,hl);
  2753. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2754. end;
  2755. else
  2756. internalerror(200409281);
  2757. end;
  2758. a_call_name(list,'FPC_OVERFLOW',false);
  2759. a_label(list,hl);
  2760. end;
  2761. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2762. begin
  2763. { this work is done in g_proc_entry }
  2764. end;
  2765. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2766. begin
  2767. { this work is done in g_proc_exit }
  2768. end;
  2769. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2770. var
  2771. ai : taicpu;
  2772. hlabel : TAsmLabel;
  2773. begin
  2774. if GenerateThumbCode then
  2775. begin
  2776. { the optimizer has to fix this if jump range is sufficient short }
  2777. current_asmdata.getjumplabel(hlabel);
  2778. ai:=Taicpu.Op_sym(A_B,hlabel);
  2779. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2780. ai.is_jmp:=true;
  2781. list.concat(ai);
  2782. a_jmp_always(list,l);
  2783. a_label(list,hlabel);
  2784. end
  2785. else
  2786. begin
  2787. ai:=Taicpu.Op_sym(A_B,l);
  2788. ai.SetCondition(OpCmp2AsmCond[cond]);
  2789. ai.is_jmp:=true;
  2790. list.concat(ai);
  2791. end;
  2792. end;
  2793. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2794. const
  2795. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2796. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2797. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2798. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2799. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2800. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2801. begin
  2802. result:=convertop[fromsize,tosize];
  2803. if result=A_NONE then
  2804. internalerror(200312205);
  2805. end;
  2806. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2807. const
  2808. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2809. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2810. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2811. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2812. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2813. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2814. begin
  2815. result:=convertop[fromsize,tosize];
  2816. end;
  2817. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2818. var
  2819. instr: taicpu;
  2820. begin
  2821. if (shuffle=nil) or shufflescalar(shuffle) then
  2822. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2823. else
  2824. internalerror(2009112407);
  2825. list.concat(instr);
  2826. case instr.opcode of
  2827. A_VMOV:
  2828. add_move_instruction(instr);
  2829. else
  2830. ;
  2831. end;
  2832. maybe_check_for_fpu_exception(list);
  2833. end;
  2834. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2835. var
  2836. intreg,
  2837. tmpmmreg : tregister;
  2838. reg64 : tregister64;
  2839. begin
  2840. if assigned(shuffle) and
  2841. not(shufflescalar(shuffle)) then
  2842. internalerror(2009112413);
  2843. case fromsize of
  2844. OS_32,OS_S32:
  2845. begin
  2846. fromsize:=OS_F32;
  2847. { since we are loading an integer, no conversion may be required }
  2848. if (fromsize<>tosize) then
  2849. internalerror(2009112801);
  2850. end;
  2851. OS_64,OS_S64:
  2852. begin
  2853. fromsize:=OS_F64;
  2854. { since we are loading an integer, no conversion may be required }
  2855. if (fromsize<>tosize) then
  2856. internalerror(2009112901);
  2857. end;
  2858. OS_F32,OS_F64:
  2859. ;
  2860. else
  2861. internalerror(2019050920);
  2862. end;
  2863. if (fromsize<>tosize) then
  2864. tmpmmreg:=getmmregister(list,fromsize)
  2865. else
  2866. tmpmmreg:=reg;
  2867. if (ref.alignment in [1,2]) then
  2868. begin
  2869. case fromsize of
  2870. OS_F32:
  2871. begin
  2872. intreg:=getintregister(list,OS_32);
  2873. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2874. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2875. end;
  2876. OS_F64:
  2877. begin
  2878. reg64.reglo:=getintregister(list,OS_32);
  2879. reg64.reghi:=getintregister(list,OS_32);
  2880. cg64.a_load64_ref_reg(list,ref,reg64);
  2881. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2882. end;
  2883. else
  2884. internalerror(2009112412);
  2885. end;
  2886. end
  2887. else
  2888. begin
  2889. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2890. end;
  2891. if (tmpmmreg<>reg) then
  2892. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2893. maybe_check_for_fpu_exception(list);
  2894. end;
  2895. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2896. var
  2897. intreg,
  2898. tmpmmreg : tregister;
  2899. reg64 : tregister64;
  2900. begin
  2901. if assigned(shuffle) and
  2902. not(shufflescalar(shuffle)) then
  2903. internalerror(2009112416);
  2904. case tosize of
  2905. OS_32,OS_S32:
  2906. begin
  2907. tosize:=OS_F32;
  2908. { since we are loading an integer, no conversion may be required }
  2909. if (fromsize<>tosize) then
  2910. internalerror(2009112801);
  2911. end;
  2912. OS_64,OS_S64:
  2913. begin
  2914. tosize:=OS_F64;
  2915. { since we are loading an integer, no conversion may be required }
  2916. if (fromsize<>tosize) then
  2917. internalerror(2009112901);
  2918. end;
  2919. OS_F32,OS_F64:
  2920. ;
  2921. else
  2922. internalerror(2019050919);
  2923. end;
  2924. if (fromsize<>tosize) then
  2925. begin
  2926. tmpmmreg:=getmmregister(list,tosize);
  2927. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2928. end
  2929. else
  2930. tmpmmreg:=reg;
  2931. if (ref.alignment in [1,2]) then
  2932. begin
  2933. case tosize of
  2934. OS_F32:
  2935. begin
  2936. intreg:=getintregister(list,OS_32);
  2937. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2938. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2939. end;
  2940. OS_F64:
  2941. begin
  2942. reg64.reglo:=getintregister(list,OS_32);
  2943. reg64.reghi:=getintregister(list,OS_32);
  2944. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2945. cg64.a_load64_reg_ref(list,reg64,ref);
  2946. end;
  2947. else
  2948. internalerror(2009112417);
  2949. end;
  2950. end
  2951. else
  2952. begin
  2953. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2954. end;
  2955. maybe_check_for_fpu_exception(list);
  2956. end;
  2957. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2958. begin
  2959. { this code can only be used to transfer raw data, not to perform
  2960. conversions }
  2961. if (tosize<>OS_F32) then
  2962. internalerror(2009112419);
  2963. if not(fromsize in [OS_32,OS_S32]) then
  2964. internalerror(2009112420);
  2965. if assigned(shuffle) and
  2966. not shufflescalar(shuffle) then
  2967. internalerror(2009112516);
  2968. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  2969. maybe_check_for_fpu_exception(list);
  2970. end;
  2971. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  2972. begin
  2973. { this code can only be used to transfer raw data, not to perform
  2974. conversions }
  2975. if (fromsize<>OS_F32) then
  2976. internalerror(2009112430);
  2977. if not(tosize in [OS_32,OS_S32]) then
  2978. internalerror(2009112420);
  2979. if assigned(shuffle) and
  2980. not shufflescalar(shuffle) then
  2981. internalerror(2009112514);
  2982. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  2983. maybe_check_for_fpu_exception(list);
  2984. end;
  2985. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  2986. var
  2987. tmpreg: tregister;
  2988. begin
  2989. { the vfp doesn't support xor nor any other logical operation, but
  2990. this routine is used to initialise global mm regvars. We can
  2991. easily initialise an mm reg with 0 though. }
  2992. case op of
  2993. OP_XOR:
  2994. begin
  2995. if (src<>dst) or
  2996. (reg_cgsize(src)<>size) or
  2997. assigned(shuffle) then
  2998. internalerror(2009112907);
  2999. tmpreg:=getintregister(list,OS_32);
  3000. a_load_const_reg(list,OS_32,0,tmpreg);
  3001. case size of
  3002. OS_F32:
  3003. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3004. OS_F64:
  3005. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3006. else
  3007. internalerror(2009112908);
  3008. end;
  3009. end
  3010. else
  3011. internalerror(2009112906);
  3012. end;
  3013. end;
  3014. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3015. const
  3016. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3017. begin
  3018. if (op in overflowops) and
  3019. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3020. a_load_reg_reg(list,OS_32,size,dst,dst);
  3021. end;
  3022. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3023. procedure checkreg(var reg : TRegister);
  3024. var
  3025. tmpreg : TRegister;
  3026. begin
  3027. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3028. (getsupreg(reg)=RS_R15) then
  3029. begin
  3030. tmpreg:=getintregister(list,OS_INT);
  3031. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3032. reg:=tmpreg;
  3033. end;
  3034. end;
  3035. begin
  3036. checkreg(op1);
  3037. checkreg(op2);
  3038. checkreg(op3);
  3039. checkreg(op4);
  3040. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3041. end;
  3042. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3043. begin
  3044. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3045. a_call_name(list,'fpc_read_tp',false);
  3046. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3047. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3048. end;
  3049. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3050. begin
  3051. case op of
  3052. OP_NEG:
  3053. begin
  3054. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3055. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3056. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3057. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3058. end;
  3059. OP_NOT:
  3060. begin
  3061. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3062. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3063. end;
  3064. else
  3065. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3066. end;
  3067. end;
  3068. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3069. begin
  3070. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3071. end;
  3072. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3073. var
  3074. ovloc : tlocation;
  3075. begin
  3076. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3077. end;
  3078. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3079. var
  3080. ovloc : tlocation;
  3081. begin
  3082. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3083. end;
  3084. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3085. begin
  3086. { this code can only be used to transfer raw data, not to perform
  3087. conversions }
  3088. if (mmsize<>OS_F64) then
  3089. internalerror(2009112405);
  3090. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3091. cg.maybe_check_for_fpu_exception(list);
  3092. end;
  3093. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3094. begin
  3095. { this code can only be used to transfer raw data, not to perform
  3096. conversions }
  3097. if (mmsize<>OS_F64) then
  3098. internalerror(2009112406);
  3099. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3100. cg.maybe_check_for_fpu_exception(list);
  3101. end;
  3102. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3103. var
  3104. tmpreg : tregister;
  3105. b : byte;
  3106. begin
  3107. ovloc.loc:=LOC_VOID;
  3108. case op of
  3109. OP_NEG,
  3110. OP_NOT :
  3111. internalerror(2012022501);
  3112. else
  3113. ;
  3114. end;
  3115. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3116. begin
  3117. case op of
  3118. OP_ADD:
  3119. begin
  3120. if is_shifter_const(lo(value),b) then
  3121. begin
  3122. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3123. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3124. end
  3125. else
  3126. begin
  3127. tmpreg:=cg.getintregister(list,OS_32);
  3128. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3129. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3130. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3131. end;
  3132. if is_shifter_const(hi(value),b) then
  3133. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3134. else
  3135. begin
  3136. tmpreg:=cg.getintregister(list,OS_32);
  3137. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3138. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3139. end;
  3140. end;
  3141. OP_SUB:
  3142. begin
  3143. if is_shifter_const(lo(value),b) then
  3144. begin
  3145. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3146. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3147. end
  3148. else
  3149. begin
  3150. tmpreg:=cg.getintregister(list,OS_32);
  3151. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3152. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3153. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3154. end;
  3155. if is_shifter_const(hi(value),b) then
  3156. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3157. else
  3158. begin
  3159. tmpreg:=cg.getintregister(list,OS_32);
  3160. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3161. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3162. end;
  3163. end;
  3164. else
  3165. internalerror(200502131);
  3166. end;
  3167. if size=OS_64 then
  3168. begin
  3169. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3170. ovloc.loc:=LOC_FLAGS;
  3171. case op of
  3172. OP_ADD:
  3173. ovloc.resflags:=F_CS;
  3174. OP_SUB:
  3175. ovloc.resflags:=F_CC;
  3176. else
  3177. internalerror(2019050918);
  3178. end;
  3179. end;
  3180. end
  3181. else
  3182. begin
  3183. case op of
  3184. OP_AND,OP_OR,OP_XOR:
  3185. begin
  3186. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3187. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3188. end;
  3189. OP_ADD:
  3190. begin
  3191. if is_shifter_const(aint(lo(value)),b) then
  3192. begin
  3193. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3194. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3195. end
  3196. else
  3197. begin
  3198. tmpreg:=cg.getintregister(list,OS_32);
  3199. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3200. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3201. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3202. end;
  3203. if is_shifter_const(aint(hi(value)),b) then
  3204. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3205. else
  3206. begin
  3207. tmpreg:=cg.getintregister(list,OS_32);
  3208. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3209. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3210. end;
  3211. end;
  3212. OP_SUB:
  3213. begin
  3214. if is_shifter_const(aint(lo(value)),b) then
  3215. begin
  3216. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3217. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3218. end
  3219. else
  3220. begin
  3221. tmpreg:=cg.getintregister(list,OS_32);
  3222. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3223. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3224. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3225. end;
  3226. if is_shifter_const(aint(hi(value)),b) then
  3227. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3228. else
  3229. begin
  3230. tmpreg:=cg.getintregister(list,OS_32);
  3231. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3232. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3233. end;
  3234. end;
  3235. else
  3236. internalerror(2003083101);
  3237. end;
  3238. end;
  3239. end;
  3240. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3241. begin
  3242. ovloc.loc:=LOC_VOID;
  3243. case op of
  3244. OP_NEG,
  3245. OP_NOT :
  3246. internalerror(2012022502);
  3247. else
  3248. ;
  3249. end;
  3250. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3251. begin
  3252. case op of
  3253. OP_ADD:
  3254. begin
  3255. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3256. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3257. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3258. end;
  3259. OP_SUB:
  3260. begin
  3261. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3262. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3263. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3264. end;
  3265. else
  3266. internalerror(2003083101);
  3267. end;
  3268. if size=OS_64 then
  3269. begin
  3270. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3271. ovloc.loc:=LOC_FLAGS;
  3272. case op of
  3273. OP_ADD:
  3274. ovloc.resflags:=F_CS;
  3275. OP_SUB:
  3276. ovloc.resflags:=F_CC;
  3277. else
  3278. internalerror(2019050917);
  3279. end;
  3280. end;
  3281. end
  3282. else
  3283. begin
  3284. case op of
  3285. OP_AND,OP_OR,OP_XOR:
  3286. begin
  3287. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3288. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3289. end;
  3290. OP_ADD:
  3291. begin
  3292. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3293. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3294. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3295. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3296. end;
  3297. OP_SUB:
  3298. begin
  3299. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3300. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3301. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3302. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3303. end;
  3304. else
  3305. internalerror(2003083101);
  3306. end;
  3307. end;
  3308. end;
  3309. procedure tthumbcgarm.init_register_allocators;
  3310. begin
  3311. inherited init_register_allocators;
  3312. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3313. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3314. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3315. else
  3316. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3317. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3318. end;
  3319. procedure tthumbcgarm.done_register_allocators;
  3320. begin
  3321. rg[R_INTREGISTER].free;
  3322. rg[R_FPUREGISTER].free;
  3323. rg[R_MMREGISTER].free;
  3324. inherited done_register_allocators;
  3325. end;
  3326. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3327. var
  3328. ref : treference;
  3329. r : byte;
  3330. regs : tcpuregisterset;
  3331. stackmisalignment : pint;
  3332. registerarea: DWord;
  3333. stack_parameters: Boolean;
  3334. begin
  3335. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3336. LocalSize:=align(LocalSize,4);
  3337. { call instruction does not put anything on the stack }
  3338. stackmisalignment:=0;
  3339. if not(nostackframe) then
  3340. begin
  3341. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3342. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3343. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3344. { save int registers }
  3345. reference_reset(ref,4,[]);
  3346. ref.index:=NR_STACK_POINTER_REG;
  3347. ref.addressmode:=AM_PREINDEXED;
  3348. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3349. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3350. begin
  3351. //!!!! a_reg_alloc(list,NR_R12);
  3352. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3353. end;
  3354. { the (old) ARM APCS requires saving both the stack pointer (to
  3355. crawl the stack) and the PC (to identify the function this
  3356. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3357. and R15 -- still needs updating for EABI and Darwin, they don't
  3358. need that }
  3359. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3360. regs:=regs+[RS_R7,RS_R14]
  3361. else
  3362. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3363. include(regs,RS_R14);
  3364. { safely estimate stack size }
  3365. if localsize+current_settings.alignment.localalignmax+4>508 then
  3366. begin
  3367. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3368. include(regs,RS_R4);
  3369. end;
  3370. registerarea:=0;
  3371. if regs<>[] then
  3372. begin
  3373. for r:=RS_R0 to RS_R15 do
  3374. if r in regs then
  3375. inc(registerarea,4);
  3376. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3377. end;
  3378. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3379. if stack_parameters or (LocalSize<>0) or
  3380. ((stackmisalignment<>0) and
  3381. ((pi_do_call in current_procinfo.flags) or
  3382. (po_assembler in current_procinfo.procdef.procoptions))) then
  3383. begin
  3384. { do we access stack parameters?
  3385. if yes, the previously estimated stacksize must be used }
  3386. if stack_parameters then
  3387. begin
  3388. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3389. begin
  3390. writeln(localsize);
  3391. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3392. internalerror(2013040601);
  3393. end
  3394. else
  3395. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3396. end
  3397. else
  3398. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3399. if localsize<508 then
  3400. begin
  3401. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3402. end
  3403. else if localsize<=1016 then
  3404. begin
  3405. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3406. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3407. end
  3408. else
  3409. begin
  3410. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3411. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3412. include(regs,RS_R4);
  3413. //!!!! if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  3414. //!!!! a_reg_alloc(list,NR_R12);
  3415. //!!!! a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3416. //!!!! list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  3417. //!!!! a_reg_dealloc(list,NR_R12);
  3418. end;
  3419. end;
  3420. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3421. begin
  3422. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3423. end;
  3424. end;
  3425. end;
  3426. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3427. var
  3428. LocalSize : longint;
  3429. r: byte;
  3430. regs : tcpuregisterset;
  3431. registerarea : DWord;
  3432. stackmisalignment: pint;
  3433. stack_parameters : Boolean;
  3434. begin
  3435. if not(nostackframe) then
  3436. begin
  3437. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3438. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3439. include(regs,RS_R15);
  3440. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3441. include(regs,getsupreg(current_procinfo.framepointer));
  3442. registerarea:=0;
  3443. for r:=RS_R0 to RS_R15 do
  3444. if r in regs then
  3445. inc(registerarea,4);
  3446. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3447. LocalSize:=current_procinfo.calc_stackframe_size;
  3448. if stack_parameters then
  3449. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3450. else
  3451. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3452. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3453. (target_info.system in systems_darwin) then
  3454. begin
  3455. if (LocalSize<>0) or
  3456. ((stackmisalignment<>0) and
  3457. ((pi_do_call in current_procinfo.flags) or
  3458. (po_assembler in current_procinfo.procdef.procoptions))) then
  3459. begin
  3460. if LocalSize=0 then
  3461. else if LocalSize<=508 then
  3462. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3463. else if LocalSize<=1016 then
  3464. begin
  3465. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3466. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3467. end
  3468. else
  3469. begin
  3470. a_reg_alloc(list,NR_R3);
  3471. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3472. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3473. a_reg_dealloc(list,NR_R3);
  3474. end;
  3475. end;
  3476. if regs=[] then
  3477. begin
  3478. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3479. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3480. else
  3481. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3482. end
  3483. else
  3484. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3485. end;
  3486. end
  3487. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3488. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3489. else
  3490. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3491. end;
  3492. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3493. var
  3494. oppostfix:toppostfix;
  3495. usedtmpref: treference;
  3496. tmpreg,tmpreg2 : tregister;
  3497. dir : integer;
  3498. begin
  3499. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3500. FromSize := ToSize;
  3501. case FromSize of
  3502. { signed integer registers }
  3503. OS_8:
  3504. oppostfix:=PF_B;
  3505. OS_S8:
  3506. oppostfix:=PF_SB;
  3507. OS_16:
  3508. oppostfix:=PF_H;
  3509. OS_S16:
  3510. oppostfix:=PF_SH;
  3511. OS_32,
  3512. OS_S32:
  3513. oppostfix:=PF_None;
  3514. else
  3515. InternalError(200308298);
  3516. end;
  3517. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3518. begin
  3519. if target_info.endian=endian_big then
  3520. dir:=-1
  3521. else
  3522. dir:=1;
  3523. case FromSize of
  3524. OS_16,OS_S16:
  3525. begin
  3526. { only complicated references need an extra loadaddr }
  3527. if assigned(ref.symbol) or
  3528. (ref.index<>NR_NO) or
  3529. (ref.offset<-124) or
  3530. (ref.offset>124) or
  3531. { sometimes the compiler reused registers }
  3532. (reg=ref.index) or
  3533. (reg=ref.base) then
  3534. begin
  3535. tmpreg2:=getintregister(list,OS_INT);
  3536. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3537. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3538. end
  3539. else
  3540. usedtmpref:=ref;
  3541. if target_info.endian=endian_big then
  3542. inc(usedtmpref.offset,1);
  3543. tmpreg:=getintregister(list,OS_INT);
  3544. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3545. inc(usedtmpref.offset,dir);
  3546. if FromSize=OS_16 then
  3547. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3548. else
  3549. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3550. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3551. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3552. end;
  3553. OS_32,OS_S32:
  3554. begin
  3555. tmpreg:=getintregister(list,OS_INT);
  3556. { only complicated references need an extra loadaddr }
  3557. if assigned(ref.symbol) or
  3558. (ref.index<>NR_NO) or
  3559. (ref.offset<-124) or
  3560. (ref.offset>124) or
  3561. { sometimes the compiler reused registers }
  3562. (reg=ref.index) or
  3563. (reg=ref.base) then
  3564. begin
  3565. tmpreg2:=getintregister(list,OS_INT);
  3566. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3567. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3568. end
  3569. else
  3570. usedtmpref:=ref;
  3571. if ref.alignment=2 then
  3572. begin
  3573. if target_info.endian=endian_big then
  3574. inc(usedtmpref.offset,2);
  3575. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3576. inc(usedtmpref.offset,dir*2);
  3577. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3578. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3579. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3580. end
  3581. else
  3582. begin
  3583. if target_info.endian=endian_big then
  3584. inc(usedtmpref.offset,3);
  3585. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3586. inc(usedtmpref.offset,dir);
  3587. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3588. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3589. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3590. inc(usedtmpref.offset,dir);
  3591. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3592. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3593. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3594. inc(usedtmpref.offset,dir);
  3595. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3596. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3597. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3598. end;
  3599. end
  3600. else
  3601. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3602. end;
  3603. end
  3604. else
  3605. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3606. if (fromsize=OS_S8) and (tosize = OS_16) then
  3607. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3608. end;
  3609. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3610. var
  3611. l : tasmlabel;
  3612. hr : treference;
  3613. begin
  3614. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3615. internalerror(2002090902);
  3616. if is_thumb_imm(a) then
  3617. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3618. else
  3619. begin
  3620. reference_reset(hr,4,[]);
  3621. current_asmdata.getjumplabel(l);
  3622. cg.a_label(current_procinfo.aktlocaldata,l);
  3623. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3624. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3625. hr.symbol:=l;
  3626. hr.base:=NR_PC;
  3627. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3628. end;
  3629. end;
  3630. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3631. var
  3632. hsym : tsym;
  3633. href,
  3634. tmpref : treference;
  3635. paraloc : Pcgparalocation;
  3636. l : TAsmLabel;
  3637. begin
  3638. { calculate the parameter info for the procdef }
  3639. procdef.init_paraloc_info(callerside);
  3640. hsym:=tsym(procdef.parast.Find('self'));
  3641. if not(assigned(hsym) and
  3642. (hsym.typ=paravarsym)) then
  3643. internalerror(200305251);
  3644. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3645. while paraloc<>nil do
  3646. with paraloc^ do
  3647. begin
  3648. case loc of
  3649. LOC_REGISTER:
  3650. begin
  3651. if is_thumb_imm(ioffset) then
  3652. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3653. else
  3654. begin
  3655. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3656. reference_reset(tmpref,4,[]);
  3657. current_asmdata.getjumplabel(l);
  3658. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3659. cg.a_label(current_procinfo.aktlocaldata,l);
  3660. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3661. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3662. tmpref.symbol:=l;
  3663. tmpref.base:=NR_PC;
  3664. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3665. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3666. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3667. end;
  3668. end;
  3669. LOC_REFERENCE:
  3670. begin
  3671. { offset in the wrapper needs to be adjusted for the stored
  3672. return address }
  3673. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3674. if is_thumb_imm(ioffset) then
  3675. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3676. else
  3677. begin
  3678. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3679. reference_reset(tmpref,4,[]);
  3680. current_asmdata.getjumplabel(l);
  3681. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3682. cg.a_label(current_procinfo.aktlocaldata,l);
  3683. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3684. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3685. tmpref.symbol:=l;
  3686. tmpref.base:=NR_PC;
  3687. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3688. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3689. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3690. end;
  3691. end
  3692. else
  3693. internalerror(200309189);
  3694. end;
  3695. paraloc:=next;
  3696. end;
  3697. end;
  3698. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3699. var
  3700. href : treference;
  3701. tmpreg : TRegister;
  3702. begin
  3703. href:=ref;
  3704. if { LDR/STR limitations }
  3705. (
  3706. (((op=A_LDR) and (oppostfix=PF_None)) or
  3707. ((op=A_STR) and (oppostfix=PF_None))) and
  3708. (ref.base<>NR_STACK_POINTER_REG) and
  3709. (abs(ref.offset)>124)
  3710. ) or
  3711. { LDRB/STRB limitations }
  3712. (
  3713. (((op=A_LDR) and (oppostfix=PF_B)) or
  3714. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3715. ((op=A_STR) and (oppostfix=PF_B)) or
  3716. ((op=A_STRB) and (oppostfix=PF_None))) and
  3717. ((ref.base=NR_STACK_POINTER_REG) or
  3718. (ref.index=NR_STACK_POINTER_REG) or
  3719. (abs(ref.offset)>31)
  3720. )
  3721. ) or
  3722. { LDRH/STRH limitations }
  3723. (
  3724. (((op=A_LDR) and (oppostfix=PF_H)) or
  3725. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3726. ((op=A_STR) and (oppostfix=PF_H)) or
  3727. ((op=A_STRH) and (oppostfix=PF_None))) and
  3728. ((ref.base=NR_STACK_POINTER_REG) or
  3729. (ref.index=NR_STACK_POINTER_REG) or
  3730. (abs(ref.offset)>62) or
  3731. ((abs(ref.offset) mod 2)<>0)
  3732. )
  3733. ) then
  3734. begin
  3735. tmpreg:=getintregister(list,OS_ADDR);
  3736. a_loadaddr_ref_reg(list,ref,tmpreg);
  3737. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3738. end
  3739. else if (op=A_LDR) and
  3740. (oppostfix in [PF_None]) and
  3741. (ref.base=NR_STACK_POINTER_REG) and
  3742. (abs(ref.offset)>1020) then
  3743. begin
  3744. tmpreg:=getintregister(list,OS_ADDR);
  3745. a_loadaddr_ref_reg(list,ref,tmpreg);
  3746. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3747. end
  3748. else if (op=A_LDR) and
  3749. ((oppostfix in [PF_SH,PF_SB]) or
  3750. (abs(ref.offset)>124)) then
  3751. begin
  3752. tmpreg:=getintregister(list,OS_ADDR);
  3753. a_loadaddr_ref_reg(list,ref,tmpreg);
  3754. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3755. end;
  3756. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3757. end;
  3758. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3759. var
  3760. tmpreg : tregister;
  3761. begin
  3762. case op of
  3763. OP_NEG:
  3764. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3765. OP_NOT:
  3766. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  3767. OP_DIV,OP_IDIV:
  3768. internalerror(200308284);
  3769. OP_ROL:
  3770. begin
  3771. if not(size in [OS_32,OS_S32]) then
  3772. internalerror(2008072801);
  3773. { simulate ROL by ror'ing 32-value }
  3774. tmpreg:=getintregister(list,OS_32);
  3775. a_load_const_reg(list,OS_32,32,tmpreg);
  3776. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3777. list.concat(taicpu.op_reg_reg(A_ROR,dst,src));
  3778. end;
  3779. else
  3780. begin
  3781. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3782. list.concat(setoppostfix(
  3783. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix[op]));
  3784. end;
  3785. end;
  3786. maybeadjustresult(list,op,size,dst);
  3787. end;
  3788. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3789. var
  3790. tmpreg : tregister;
  3791. {$ifdef DUMMY}
  3792. l1 : longint;
  3793. {$endif DUMMY}
  3794. begin
  3795. //!!! ovloc.loc:=LOC_VOID;
  3796. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3797. case op of
  3798. OP_ADD:
  3799. begin
  3800. op:=OP_SUB;
  3801. a:=aint(dword(-a));
  3802. end;
  3803. OP_SUB:
  3804. begin
  3805. op:=OP_ADD;
  3806. a:=aint(dword(-a));
  3807. end
  3808. else
  3809. ;
  3810. end;
  3811. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3812. begin
  3813. // if cgsetflags or setflags then
  3814. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3815. list.concat(setoppostfix(
  3816. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix[op]));
  3817. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3818. begin
  3819. //!!! ovloc.loc:=LOC_FLAGS;
  3820. case op of
  3821. OP_ADD:
  3822. //!!! ovloc.resflags:=F_CS;
  3823. ;
  3824. OP_SUB:
  3825. //!!! ovloc.resflags:=F_CC;
  3826. ;
  3827. else
  3828. ;
  3829. end;
  3830. end;
  3831. end
  3832. else
  3833. begin
  3834. { there could be added some more sophisticated optimizations }
  3835. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3836. a_load_reg_reg(list,size,size,dst,dst)
  3837. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3838. a_load_const_reg(list,size,0,dst)
  3839. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3840. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3841. { we do this here instead in the peephole optimizer because
  3842. it saves us a register }
  3843. {$ifdef DUMMY}
  3844. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3845. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3846. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3847. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3848. begin
  3849. if l1>32 then{roozbeh does this ever happen?}
  3850. internalerror(200308296);
  3851. shifterop_reset(so);
  3852. so.shiftmode:=SM_LSL;
  3853. so.shiftimm:=l1;
  3854. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3855. end
  3856. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3857. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3858. begin
  3859. if l1>32 then{does this ever happen?}
  3860. internalerror(201205181);
  3861. shifterop_reset(so);
  3862. so.shiftmode:=SM_LSL;
  3863. so.shiftimm:=l1;
  3864. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3865. end
  3866. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3867. begin
  3868. { nothing to do on success }
  3869. end
  3870. {$endif DUMMY}
  3871. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3872. Just using mov x, #0 might allow some easier optimizations down the line. }
  3873. else if (op = OP_AND) and (dword(a)=0) then
  3874. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  3875. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3876. else if (op = OP_AND) and (not(dword(a))=0) then
  3877. // do nothing
  3878. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3879. broader range of shifterconstants.}
  3880. {$ifdef DUMMY}
  3881. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3882. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3883. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3884. begin
  3885. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3886. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3887. end
  3888. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3889. not(cgsetflags or setflags) and
  3890. split_into_shifter_const(a, imm1, imm2) then
  3891. begin
  3892. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3893. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3894. end
  3895. {$endif DUMMY}
  3896. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3897. begin
  3898. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3899. end
  3900. else
  3901. begin
  3902. tmpreg:=getintregister(list,size);
  3903. a_load_const_reg(list,size,a,tmpreg);
  3904. a_op_reg_reg(list,op,size,tmpreg,dst);
  3905. end;
  3906. end;
  3907. maybeadjustresult(list,op,size,dst);
  3908. end;
  3909. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3910. begin
  3911. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3912. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3913. else
  3914. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3915. end;
  3916. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3917. var
  3918. l1,l2 : tasmlabel;
  3919. ai : taicpu;
  3920. begin
  3921. current_asmdata.getjumplabel(l1);
  3922. current_asmdata.getjumplabel(l2);
  3923. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3924. ai.is_jmp:=true;
  3925. list.concat(ai);
  3926. list.concat(taicpu.op_reg_const(A_MOV,reg,0));
  3927. list.concat(taicpu.op_sym(A_B,l2));
  3928. cg.a_label(list,l1);
  3929. list.concat(taicpu.op_reg_const(A_MOV,reg,1));
  3930. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3931. cg.a_label(list,l2);
  3932. end;
  3933. procedure tthumb2cgarm.init_register_allocators;
  3934. begin
  3935. inherited init_register_allocators;
  3936. { currently, we save R14 always, so we can use it }
  3937. if (target_info.system<>system_arm_darwin) then
  3938. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3939. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3940. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  3941. else
  3942. { r9 is not available on Darwin according to the llvm code generator }
  3943. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3944. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3945. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  3946. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  3947. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  3948. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  3949. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3950. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3951. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  3952. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3953. ],first_mm_imreg,[])
  3954. else if current_settings.fputype in [fpu_fpv4_s16,fpu_vfpv3_d16] then
  3955. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3956. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3957. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3958. ],first_mm_imreg,[])
  3959. else
  3960. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
  3961. [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
  3962. end;
  3963. procedure tthumb2cgarm.done_register_allocators;
  3964. begin
  3965. rg[R_INTREGISTER].free;
  3966. rg[R_FPUREGISTER].free;
  3967. rg[R_MMREGISTER].free;
  3968. inherited done_register_allocators;
  3969. end;
  3970. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  3971. begin
  3972. list.concat(taicpu.op_reg(A_BLX, reg));
  3973. {
  3974. the compiler does not properly set this flag anymore in pass 1, and
  3975. for now we only need it after pass 2 (I hope) (JM)
  3976. if not(pi_do_call in current_procinfo.flags) then
  3977. internalerror(2003060703);
  3978. }
  3979. include(current_procinfo.flags,pi_do_call);
  3980. end;
  3981. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3982. var
  3983. l : tasmlabel;
  3984. hr : treference;
  3985. begin
  3986. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3987. internalerror(2002090902);
  3988. if is_thumb32_imm(a) then
  3989. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3990. else if is_thumb32_imm(not(a)) then
  3991. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  3992. else if (a and $FFFF)=a then
  3993. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  3994. else
  3995. begin
  3996. reference_reset(hr,4,[]);
  3997. current_asmdata.getjumplabel(l);
  3998. cg.a_label(current_procinfo.aktlocaldata,l);
  3999. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4000. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4001. hr.symbol:=l;
  4002. hr.base:=NR_PC;
  4003. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4004. end;
  4005. end;
  4006. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4007. var
  4008. oppostfix:toppostfix;
  4009. usedtmpref: treference;
  4010. tmpreg,tmpreg2 : tregister;
  4011. so : tshifterop;
  4012. dir : integer;
  4013. begin
  4014. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4015. FromSize := ToSize;
  4016. case FromSize of
  4017. { signed integer registers }
  4018. OS_8:
  4019. oppostfix:=PF_B;
  4020. OS_S8:
  4021. oppostfix:=PF_SB;
  4022. OS_16:
  4023. oppostfix:=PF_H;
  4024. OS_S16:
  4025. oppostfix:=PF_SH;
  4026. OS_32,
  4027. OS_S32:
  4028. oppostfix:=PF_None;
  4029. else
  4030. InternalError(200308299);
  4031. end;
  4032. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4033. begin
  4034. if target_info.endian=endian_big then
  4035. dir:=-1
  4036. else
  4037. dir:=1;
  4038. case FromSize of
  4039. OS_16,OS_S16:
  4040. begin
  4041. { only complicated references need an extra loadaddr }
  4042. if assigned(ref.symbol) or
  4043. (ref.index<>NR_NO) or
  4044. (ref.offset<-255) or
  4045. (ref.offset>4094) or
  4046. { sometimes the compiler reused registers }
  4047. (reg=ref.index) or
  4048. (reg=ref.base) then
  4049. begin
  4050. tmpreg2:=getintregister(list,OS_INT);
  4051. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4052. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4053. end
  4054. else
  4055. usedtmpref:=ref;
  4056. if target_info.endian=endian_big then
  4057. inc(usedtmpref.offset,1);
  4058. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4059. tmpreg:=getintregister(list,OS_INT);
  4060. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4061. inc(usedtmpref.offset,dir);
  4062. if FromSize=OS_16 then
  4063. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4064. else
  4065. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4066. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4067. end;
  4068. OS_32,OS_S32:
  4069. begin
  4070. tmpreg:=getintregister(list,OS_INT);
  4071. { only complicated references need an extra loadaddr }
  4072. if assigned(ref.symbol) or
  4073. (ref.index<>NR_NO) or
  4074. (ref.offset<-255) or
  4075. (ref.offset>4092) or
  4076. { sometimes the compiler reused registers }
  4077. (reg=ref.index) or
  4078. (reg=ref.base) then
  4079. begin
  4080. tmpreg2:=getintregister(list,OS_INT);
  4081. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4082. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4083. end
  4084. else
  4085. usedtmpref:=ref;
  4086. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4087. if ref.alignment=2 then
  4088. begin
  4089. if target_info.endian=endian_big then
  4090. inc(usedtmpref.offset,2);
  4091. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4092. inc(usedtmpref.offset,dir*2);
  4093. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4094. so.shiftimm:=16;
  4095. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4096. end
  4097. else
  4098. begin
  4099. if target_info.endian=endian_big then
  4100. inc(usedtmpref.offset,3);
  4101. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4102. inc(usedtmpref.offset,dir);
  4103. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4104. so.shiftimm:=8;
  4105. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4106. inc(usedtmpref.offset,dir);
  4107. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4108. so.shiftimm:=16;
  4109. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4110. inc(usedtmpref.offset,dir);
  4111. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4112. so.shiftimm:=24;
  4113. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4114. end;
  4115. end
  4116. else
  4117. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4118. end;
  4119. end
  4120. else
  4121. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4122. if (fromsize=OS_S8) and (tosize = OS_16) then
  4123. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4124. end;
  4125. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4126. begin
  4127. if op = OP_NOT then
  4128. begin
  4129. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4130. case size of
  4131. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4132. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4133. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4134. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4135. OS_32,
  4136. OS_S32:
  4137. ;
  4138. else
  4139. internalerror(2019050916);
  4140. end;
  4141. end
  4142. else
  4143. inherited a_op_reg_reg(list, op, size, src, dst);
  4144. end;
  4145. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4146. var
  4147. shift, width : byte;
  4148. tmpreg : tregister;
  4149. so : tshifterop;
  4150. l1 : longint;
  4151. begin
  4152. ovloc.loc:=LOC_VOID;
  4153. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4154. case op of
  4155. OP_ADD:
  4156. begin
  4157. op:=OP_SUB;
  4158. a:=aint(dword(-a));
  4159. end;
  4160. OP_SUB:
  4161. begin
  4162. op:=OP_ADD;
  4163. a:=aint(dword(-a));
  4164. end
  4165. else
  4166. ;
  4167. end;
  4168. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4169. case op of
  4170. OP_NEG,OP_NOT,
  4171. OP_DIV,OP_IDIV:
  4172. internalerror(200308285);
  4173. OP_SHL:
  4174. begin
  4175. if a>32 then
  4176. internalerror(2014020703);
  4177. if a<>0 then
  4178. begin
  4179. shifterop_reset(so);
  4180. so.shiftmode:=SM_LSL;
  4181. so.shiftimm:=a;
  4182. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4183. end
  4184. else
  4185. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4186. end;
  4187. OP_ROL:
  4188. begin
  4189. if a>32 then
  4190. internalerror(2014020704);
  4191. if a<>0 then
  4192. begin
  4193. shifterop_reset(so);
  4194. so.shiftmode:=SM_ROR;
  4195. so.shiftimm:=32-a;
  4196. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4197. end
  4198. else
  4199. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4200. end;
  4201. OP_ROR:
  4202. begin
  4203. if a>32 then
  4204. internalerror(2014020705);
  4205. if a<>0 then
  4206. begin
  4207. shifterop_reset(so);
  4208. so.shiftmode:=SM_ROR;
  4209. so.shiftimm:=a;
  4210. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4211. end
  4212. else
  4213. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4214. end;
  4215. OP_SHR:
  4216. begin
  4217. if a>32 then
  4218. internalerror(200308292);
  4219. shifterop_reset(so);
  4220. if a<>0 then
  4221. begin
  4222. so.shiftmode:=SM_LSR;
  4223. so.shiftimm:=a;
  4224. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4225. end
  4226. else
  4227. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4228. end;
  4229. OP_SAR:
  4230. begin
  4231. if a>32 then
  4232. internalerror(200308295);
  4233. if a<>0 then
  4234. begin
  4235. shifterop_reset(so);
  4236. so.shiftmode:=SM_ASR;
  4237. so.shiftimm:=a;
  4238. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4239. end
  4240. else
  4241. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4242. end;
  4243. else
  4244. if (op in [OP_SUB, OP_ADD]) and
  4245. ((a < 0) or
  4246. (a > 4095)) then
  4247. begin
  4248. tmpreg:=getintregister(list,size);
  4249. a_load_const_reg(list, size, a, tmpreg);
  4250. if cgsetflags or setflags then
  4251. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4252. list.concat(setoppostfix(
  4253. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4254. end
  4255. else
  4256. begin
  4257. if cgsetflags or setflags then
  4258. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4259. list.concat(setoppostfix(
  4260. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4261. end;
  4262. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4263. begin
  4264. ovloc.loc:=LOC_FLAGS;
  4265. case op of
  4266. OP_ADD:
  4267. ovloc.resflags:=F_CS;
  4268. OP_SUB:
  4269. ovloc.resflags:=F_CC;
  4270. else
  4271. ;
  4272. end;
  4273. end;
  4274. end
  4275. else
  4276. begin
  4277. { there could be added some more sophisticated optimizations }
  4278. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4279. a_load_reg_reg(list,size,size,src,dst)
  4280. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4281. a_load_const_reg(list,size,0,dst)
  4282. else if (op in [OP_IMUL]) and (a=-1) then
  4283. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4284. { we do this here instead in the peephole optimizer because
  4285. it saves us a register }
  4286. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4287. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4288. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4289. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4290. begin
  4291. if l1>32 then{roozbeh does this ever happen?}
  4292. internalerror(200308296);
  4293. shifterop_reset(so);
  4294. so.shiftmode:=SM_LSL;
  4295. so.shiftimm:=l1;
  4296. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4297. end
  4298. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4299. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4300. begin
  4301. if l1>32 then{does this ever happen?}
  4302. internalerror(201205181);
  4303. shifterop_reset(so);
  4304. so.shiftmode:=SM_LSL;
  4305. so.shiftimm:=l1;
  4306. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4307. end
  4308. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4309. begin
  4310. { nothing to do on success }
  4311. end
  4312. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4313. Just using mov x, #0 might allow some easier optimizations down the line. }
  4314. else if (op = OP_AND) and (dword(a)=0) then
  4315. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4316. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4317. else if (op = OP_AND) and (not(dword(a))=0) then
  4318. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4319. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4320. broader range of shifterconstants.}
  4321. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4322. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4323. else if (op = OP_AND) and is_thumb32_imm(a) then
  4324. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4325. else if (op = OP_AND) and (a = $FFFF) then
  4326. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4327. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4328. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4329. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4330. begin
  4331. a_load_reg_reg(list,size,size,src,dst);
  4332. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4333. end
  4334. else
  4335. begin
  4336. tmpreg:=getintregister(list,size);
  4337. a_load_const_reg(list,size,a,tmpreg);
  4338. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4339. end;
  4340. end;
  4341. maybeadjustresult(list,op,size,dst);
  4342. end;
  4343. const
  4344. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4345. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4346. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4347. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4348. var
  4349. so : tshifterop;
  4350. tmpreg,overflowreg : tregister;
  4351. asmop : tasmop;
  4352. begin
  4353. ovloc.loc:=LOC_VOID;
  4354. case op of
  4355. OP_NEG,OP_NOT:
  4356. internalerror(200308286);
  4357. OP_ROL:
  4358. begin
  4359. if not(size in [OS_32,OS_S32]) then
  4360. internalerror(2008072801);
  4361. { simulate ROL by ror'ing 32-value }
  4362. tmpreg:=getintregister(list,OS_32);
  4363. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4364. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4365. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4366. end;
  4367. OP_ROR:
  4368. begin
  4369. if not(size in [OS_32,OS_S32]) then
  4370. internalerror(2008072802);
  4371. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4372. end;
  4373. OP_IMUL,
  4374. OP_MUL:
  4375. begin
  4376. if cgsetflags or setflags then
  4377. begin
  4378. overflowreg:=getintregister(list,size);
  4379. if op=OP_IMUL then
  4380. asmop:=A_SMULL
  4381. else
  4382. asmop:=A_UMULL;
  4383. { the arm doesn't allow that rd and rm are the same }
  4384. if dst=src2 then
  4385. begin
  4386. if dst<>src1 then
  4387. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4388. else
  4389. begin
  4390. tmpreg:=getintregister(list,size);
  4391. a_load_reg_reg(list,size,size,src2,dst);
  4392. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4393. end;
  4394. end
  4395. else
  4396. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4397. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4398. if op=OP_IMUL then
  4399. begin
  4400. shifterop_reset(so);
  4401. so.shiftmode:=SM_ASR;
  4402. so.shiftimm:=31;
  4403. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4404. end
  4405. else
  4406. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4407. ovloc.loc:=LOC_FLAGS;
  4408. ovloc.resflags:=F_NE;
  4409. end
  4410. else
  4411. begin
  4412. { the arm doesn't allow that rd and rm are the same }
  4413. if dst=src2 then
  4414. begin
  4415. if dst<>src1 then
  4416. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4417. else
  4418. begin
  4419. tmpreg:=getintregister(list,size);
  4420. a_load_reg_reg(list,size,size,src2,dst);
  4421. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4422. end;
  4423. end
  4424. else
  4425. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4426. end;
  4427. end;
  4428. else
  4429. begin
  4430. if cgsetflags or setflags then
  4431. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4432. {$ifdef dummy}
  4433. { R13 is not allowed for certain instruction operands }
  4434. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4435. begin
  4436. if getsupreg(dst)=RS_R13 then
  4437. begin
  4438. tmpreg:=getintregister(list,OS_INT);
  4439. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4440. dst:=tmpreg;
  4441. end;
  4442. if getsupreg(src1)=RS_R13 then
  4443. begin
  4444. tmpreg:=getintregister(list,OS_INT);
  4445. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4446. src1:=tmpreg;
  4447. end;
  4448. end;
  4449. {$endif}
  4450. list.concat(setoppostfix(
  4451. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4452. end;
  4453. end;
  4454. maybeadjustresult(list,op,size,dst);
  4455. end;
  4456. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4457. begin
  4458. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4459. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4460. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4461. end;
  4462. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4463. var
  4464. ref : treference;
  4465. shift : byte;
  4466. firstfloatreg,lastfloatreg,
  4467. r : byte;
  4468. regs : tcpuregisterset;
  4469. stackmisalignment: pint;
  4470. begin
  4471. LocalSize:=align(LocalSize,4);
  4472. { call instruction does not put anything on the stack }
  4473. stackmisalignment:=0;
  4474. if not(nostackframe) then
  4475. begin
  4476. firstfloatreg:=RS_NO;
  4477. lastfloatreg:=RS_NO;
  4478. { save floating point registers? }
  4479. for r:=RS_F0 to RS_F7 do
  4480. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4481. begin
  4482. if firstfloatreg=RS_NO then
  4483. firstfloatreg:=r;
  4484. lastfloatreg:=r;
  4485. inc(stackmisalignment,12);
  4486. end;
  4487. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4488. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4489. begin
  4490. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4491. a_reg_alloc(list,NR_R12);
  4492. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4493. end;
  4494. { save int registers }
  4495. reference_reset(ref,4,[]);
  4496. ref.index:=NR_STACK_POINTER_REG;
  4497. ref.addressmode:=AM_PREINDEXED;
  4498. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4499. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4500. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4501. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4502. include(regs,RS_R14);
  4503. if regs<>[] then
  4504. begin
  4505. for r:=RS_R0 to RS_R15 do
  4506. if (r in regs) then
  4507. inc(stackmisalignment,4);
  4508. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4509. end;
  4510. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4511. begin
  4512. { the framepointer now points to the saved R15, so the saved
  4513. framepointer is at R11-12 (for get_caller_frame) }
  4514. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4515. a_reg_dealloc(list,NR_R12);
  4516. end;
  4517. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4518. if (LocalSize<>0) or
  4519. ((stackmisalignment<>0) and
  4520. ((pi_do_call in current_procinfo.flags) or
  4521. (po_assembler in current_procinfo.procdef.procoptions))) then
  4522. begin
  4523. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4524. if not(is_shifter_const(localsize,shift)) then
  4525. begin
  4526. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4527. a_reg_alloc(list,NR_R12);
  4528. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4529. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4530. a_reg_dealloc(list,NR_R12);
  4531. end
  4532. else
  4533. begin
  4534. a_reg_dealloc(list,NR_R12);
  4535. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4536. end;
  4537. end;
  4538. if firstfloatreg<>RS_NO then
  4539. begin
  4540. reference_reset(ref,4,[]);
  4541. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4542. begin
  4543. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4544. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4545. ref.base:=NR_R12;
  4546. end
  4547. else
  4548. begin
  4549. ref.base:=current_procinfo.framepointer;
  4550. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4551. end;
  4552. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4553. lastfloatreg-firstfloatreg+1,ref));
  4554. end;
  4555. end;
  4556. end;
  4557. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4558. var
  4559. ref : treference;
  4560. firstfloatreg,lastfloatreg,
  4561. r : byte;
  4562. shift : byte;
  4563. regs : tcpuregisterset;
  4564. LocalSize : longint;
  4565. stackmisalignment: pint;
  4566. begin
  4567. if not(nostackframe) then
  4568. begin
  4569. stackmisalignment:=0;
  4570. { restore floating point register }
  4571. firstfloatreg:=RS_NO;
  4572. lastfloatreg:=RS_NO;
  4573. { save floating point registers? }
  4574. for r:=RS_F0 to RS_F7 do
  4575. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4576. begin
  4577. if firstfloatreg=RS_NO then
  4578. firstfloatreg:=r;
  4579. lastfloatreg:=r;
  4580. { floating point register space is already included in
  4581. localsize below by calc_stackframe_size
  4582. inc(stackmisalignment,12);
  4583. }
  4584. end;
  4585. if firstfloatreg<>RS_NO then
  4586. begin
  4587. reference_reset(ref,4,[]);
  4588. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4589. begin
  4590. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4591. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4592. ref.base:=NR_R12;
  4593. end
  4594. else
  4595. begin
  4596. ref.base:=current_procinfo.framepointer;
  4597. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4598. end;
  4599. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4600. lastfloatreg-firstfloatreg+1,ref));
  4601. end;
  4602. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4603. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4604. begin
  4605. exclude(regs,RS_R14);
  4606. include(regs,RS_R15);
  4607. end;
  4608. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4609. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4610. for r:=RS_R0 to RS_R15 do
  4611. if (r in regs) then
  4612. inc(stackmisalignment,4);
  4613. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4614. LocalSize:=current_procinfo.calc_stackframe_size;
  4615. if (LocalSize<>0) or
  4616. ((stackmisalignment<>0) and
  4617. ((pi_do_call in current_procinfo.flags) or
  4618. (po_assembler in current_procinfo.procdef.procoptions))) then
  4619. begin
  4620. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4621. if not(is_shifter_const(LocalSize,shift)) then
  4622. begin
  4623. a_reg_alloc(list,NR_R12);
  4624. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4625. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4626. a_reg_dealloc(list,NR_R12);
  4627. end
  4628. else
  4629. begin
  4630. a_reg_dealloc(list,NR_R12);
  4631. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4632. end;
  4633. end;
  4634. if regs=[] then
  4635. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4636. else
  4637. begin
  4638. reference_reset(ref,4,[]);
  4639. ref.index:=NR_STACK_POINTER_REG;
  4640. ref.addressmode:=AM_PREINDEXED;
  4641. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4642. end;
  4643. end
  4644. else
  4645. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4646. end;
  4647. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4648. var
  4649. tmpreg : tregister;
  4650. tmpref : treference;
  4651. l : tasmlabel;
  4652. begin
  4653. tmpreg:=NR_NO;
  4654. { Be sure to have a base register }
  4655. if (ref.base=NR_NO) then
  4656. begin
  4657. if ref.shiftmode<>SM_None then
  4658. internalerror(2014020706);
  4659. ref.base:=ref.index;
  4660. ref.index:=NR_NO;
  4661. end;
  4662. { absolute symbols can't be handled directly, we've to store the symbol reference
  4663. in the text segment and access it pc relative
  4664. For now, we assume that references where base or index equals to PC are already
  4665. relative, all other references are assumed to be absolute and thus they need
  4666. to be handled extra.
  4667. A proper solution would be to change refoptions to a set and store the information
  4668. if the symbol is absolute or relative there.
  4669. }
  4670. if (assigned(ref.symbol) and
  4671. not(is_pc(ref.base)) and
  4672. not(is_pc(ref.index))
  4673. ) or
  4674. { [#xxx] isn't a valid address operand }
  4675. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4676. //(ref.offset<-4095) or
  4677. (ref.offset<-255) or
  4678. (ref.offset>4095) or
  4679. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4680. ((ref.offset<-255) or
  4681. (ref.offset>255)
  4682. )
  4683. ) or
  4684. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4685. ((ref.offset<-1020) or
  4686. (ref.offset>1020) or
  4687. ((abs(ref.offset) mod 4)<>0) or
  4688. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4689. assigned(ref.symbol)
  4690. )
  4691. ) then
  4692. begin
  4693. reference_reset(tmpref,4,[]);
  4694. { load symbol }
  4695. tmpreg:=getintregister(list,OS_INT);
  4696. if assigned(ref.symbol) then
  4697. begin
  4698. current_asmdata.getjumplabel(l);
  4699. cg.a_label(current_procinfo.aktlocaldata,l);
  4700. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4701. if ref.refaddr=addr_gottpoff then
  4702. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4703. else
  4704. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4705. { load consts entry }
  4706. tmpref.symbol:=l;
  4707. tmpref.base:=NR_R15;
  4708. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4709. { in case of LDF/STF, we got rid of the NR_R15 }
  4710. if is_pc(ref.base) then
  4711. ref.base:=NR_NO;
  4712. if is_pc(ref.index) then
  4713. ref.index:=NR_NO;
  4714. end
  4715. else
  4716. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4717. if (ref.base<>NR_NO) then
  4718. begin
  4719. if ref.index<>NR_NO then
  4720. begin
  4721. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4722. ref.base:=tmpreg;
  4723. end
  4724. else
  4725. begin
  4726. ref.index:=tmpreg;
  4727. ref.shiftimm:=0;
  4728. ref.signindex:=1;
  4729. ref.shiftmode:=SM_None;
  4730. end;
  4731. end
  4732. else
  4733. ref.base:=tmpreg;
  4734. ref.offset:=0;
  4735. ref.symbol:=nil;
  4736. end;
  4737. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4738. begin
  4739. if tmpreg<>NR_NO then
  4740. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4741. else
  4742. begin
  4743. tmpreg:=getintregister(list,OS_ADDR);
  4744. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4745. ref.base:=tmpreg;
  4746. end;
  4747. ref.offset:=0;
  4748. end;
  4749. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4750. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4751. begin
  4752. tmpreg:=getintregister(list,OS_ADDR);
  4753. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4754. ref.base := tmpreg;
  4755. end;
  4756. { floating point operations have only limited references
  4757. we expect here, that a base is already set }
  4758. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4759. begin
  4760. if ref.shiftmode<>SM_none then
  4761. internalerror(200309121);
  4762. if tmpreg<>NR_NO then
  4763. begin
  4764. if ref.base=tmpreg then
  4765. begin
  4766. if ref.signindex<0 then
  4767. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4768. else
  4769. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4770. ref.index:=NR_NO;
  4771. end
  4772. else
  4773. begin
  4774. if ref.index<>tmpreg then
  4775. internalerror(200403161);
  4776. if ref.signindex<0 then
  4777. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4778. else
  4779. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4780. ref.base:=tmpreg;
  4781. ref.index:=NR_NO;
  4782. end;
  4783. end
  4784. else
  4785. begin
  4786. tmpreg:=getintregister(list,OS_ADDR);
  4787. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4788. ref.base:=tmpreg;
  4789. ref.index:=NR_NO;
  4790. end;
  4791. end;
  4792. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4793. Result := ref;
  4794. end;
  4795. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4796. var
  4797. instr: taicpu;
  4798. begin
  4799. if (fromsize=OS_F32) and
  4800. (tosize=OS_F32) then
  4801. begin
  4802. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4803. list.Concat(instr);
  4804. add_move_instruction(instr);
  4805. maybe_check_for_fpu_exception(list);
  4806. end
  4807. else if (fromsize=OS_F64) and
  4808. (tosize=OS_F64) then
  4809. begin
  4810. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4811. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4812. end
  4813. else if (fromsize=OS_F32) and
  4814. (tosize=OS_F64) then
  4815. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4816. begin
  4817. //list.concat(nil);
  4818. end;
  4819. end;
  4820. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4821. begin
  4822. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4823. end;
  4824. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4825. begin
  4826. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4827. maybe_check_for_fpu_exception(list);
  4828. end;
  4829. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4830. begin
  4831. if //(shuffle=nil) and
  4832. (tosize=OS_F32) then
  4833. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4834. else
  4835. internalerror(2012100813);
  4836. end;
  4837. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4838. begin
  4839. if //(shuffle=nil) and
  4840. (fromsize=OS_F32) then
  4841. begin
  4842. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4843. maybe_check_for_fpu_exception(list);
  4844. end
  4845. else
  4846. internalerror(2012100814);
  4847. end;
  4848. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4849. var tmpreg: tregister;
  4850. begin
  4851. case op of
  4852. OP_NEG:
  4853. begin
  4854. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4855. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4856. tmpreg:=cg.getintregister(list,OS_32);
  4857. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4858. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4859. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4860. end;
  4861. else
  4862. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4863. end;
  4864. end;
  4865. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4866. begin
  4867. case op of
  4868. OP_NEG:
  4869. begin
  4870. list.concat(taicpu.op_reg_const(A_MOV,regdst.reglo,0));
  4871. list.concat(taicpu.op_reg_const(A_MOV,regdst.reghi,0));
  4872. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4873. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4874. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4875. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4876. end;
  4877. OP_NOT:
  4878. begin
  4879. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4880. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4881. end;
  4882. OP_AND,OP_OR,OP_XOR:
  4883. begin
  4884. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4885. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4886. end;
  4887. OP_ADD:
  4888. begin
  4889. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4890. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4891. list.concat(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi));
  4892. end;
  4893. OP_SUB:
  4894. begin
  4895. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4896. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4897. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4898. end;
  4899. else
  4900. internalerror(2003083101);
  4901. end;
  4902. end;
  4903. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4904. var
  4905. tmpreg : tregister;
  4906. begin
  4907. case op of
  4908. OP_AND,OP_OR,OP_XOR:
  4909. begin
  4910. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4911. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4912. end;
  4913. OP_ADD:
  4914. begin
  4915. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4916. begin
  4917. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4918. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4919. end
  4920. else
  4921. begin
  4922. tmpreg:=cg.getintregister(list,OS_32);
  4923. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4924. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4925. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  4926. end;
  4927. tmpreg:=cg.getintregister(list,OS_32);
  4928. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  4929. list.concat(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg));
  4930. end;
  4931. OP_SUB:
  4932. begin
  4933. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4934. begin
  4935. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4936. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  4937. end
  4938. else
  4939. begin
  4940. tmpreg:=cg.getintregister(list,OS_32);
  4941. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4942. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4943. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  4944. end;
  4945. tmpreg:=cg.getintregister(list,OS_32);
  4946. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  4947. list.concat(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg));
  4948. end;
  4949. else
  4950. internalerror(2003083101);
  4951. end;
  4952. end;
  4953. procedure create_codegen;
  4954. begin
  4955. if GenerateThumb2Code then
  4956. begin
  4957. cg:=tthumb2cgarm.create;
  4958. cg64:=tthumb2cg64farm.create;
  4959. casmoptimizer:=TCpuThumb2AsmOptimizer;
  4960. end
  4961. else if GenerateThumbCode then
  4962. begin
  4963. cg:=tthumbcgarm.create;
  4964. cg64:=tthumbcg64farm.create;
  4965. // casmoptimizer:=TCpuThumbAsmOptimizer;
  4966. end
  4967. else
  4968. begin
  4969. cg:=tarmcgarm.create;
  4970. cg64:=tarmcg64farm.create;
  4971. casmoptimizer:=TCpuAsmOptimizer;
  4972. end;
  4973. end;
  4974. end.