cgcpu.pas 223 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. procedure init_mmregister_allocator;
  36. public
  37. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  38. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  39. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  40. { move instructions }
  41. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  42. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  43. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  44. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  45. { fpu move instructions }
  46. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  47. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  48. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  49. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  50. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  51. { comparison operations }
  52. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  53. l : tasmlabel);override;
  54. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  55. procedure a_jmp_name(list : TAsmList;const s : string); override;
  56. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  57. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  58. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  59. procedure g_profilecode(list : TAsmList); override;
  60. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  61. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  62. procedure g_maybe_got_init(list : TAsmList); override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  66. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  67. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  68. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  69. procedure g_save_registers(list : TAsmList);override;
  70. procedure g_restore_registers(list : TAsmList);override;
  71. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  72. procedure fixref(list : TAsmList;var ref : treference);
  73. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  74. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  75. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  78. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  79. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  80. { Transform unsupported methods into Internal errors }
  81. procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  82. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  83. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  84. { clear out potential overflow bits from 8 or 16 bit operations
  85. the upper 24/16 bits of a register after an operation }
  86. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  87. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  88. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  89. procedure g_maybe_tls_init(list : TAsmList); override;
  90. end;
  91. { tcgarm is shared between normal arm and thumb-2 }
  92. tcgarm = class(tbasecgarm)
  93. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  94. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  95. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  96. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  97. size: tcgsize; a: tcgint; src, dst: tregister); override;
  98. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  99. size: tcgsize; src1, src2, dst: tregister); override;
  100. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  101. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  103. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  104. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  105. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  106. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  107. end;
  108. { normal arm cg }
  109. tarmcgarm = class(tcgarm)
  110. procedure init_register_allocators;override;
  111. procedure done_register_allocators;override;
  112. end;
  113. { 64 bit cg for all arm flavours }
  114. tbasecg64farm = class(tcg64f32)
  115. end;
  116. { tcg64farm is shared between normal arm and thumb-2 }
  117. tcg64farm = class(tbasecg64farm)
  118. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  119. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  120. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  121. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  122. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  123. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  125. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  126. end;
  127. tarmcg64farm = class(tcg64farm)
  128. end;
  129. tthumbcgarm = class(tbasecgarm)
  130. procedure init_register_allocators;override;
  131. procedure done_register_allocators;override;
  132. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  133. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  134. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  135. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  136. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  137. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  138. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  139. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  140. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  141. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  142. end;
  143. tthumbcg64farm = class(tbasecg64farm)
  144. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  145. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  146. end;
  147. tthumb2cgarm = class(tcgarm)
  148. procedure init_register_allocators;override;
  149. procedure done_register_allocators;override;
  150. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  151. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  152. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  153. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  154. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  155. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  157. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  158. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  159. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  160. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  161. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  164. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  165. end;
  166. tthumb2cg64farm = class(tcg64farm)
  167. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  168. end;
  169. const
  170. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  171. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  172. winstackpagesize = 4096;
  173. function get_fpu_postfix(def : tdef) : toppostfix;
  174. procedure create_codegen;
  175. implementation
  176. uses
  177. globals,verbose,systems,cutils,
  178. aopt,aoptcpu,
  179. fmodule,
  180. symconst,symsym,symtable,
  181. tgobj,
  182. procinfo,cpupi,
  183. paramgr;
  184. { Range check must be disabled explicitly as conversions between signed and unsigned
  185. 32-bit values are done without explicit typecasts }
  186. {$R-}
  187. function get_fpu_postfix(def : tdef) : toppostfix;
  188. begin
  189. if def.typ=floatdef then
  190. begin
  191. case tfloatdef(def).floattype of
  192. s32real:
  193. result:=PF_S;
  194. s64real:
  195. result:=PF_D;
  196. s80real:
  197. result:=PF_E;
  198. else
  199. internalerror(200401272);
  200. end;
  201. end
  202. else
  203. internalerror(200401271);
  204. end;
  205. procedure tarmcgarm.init_register_allocators;
  206. begin
  207. inherited init_register_allocators;
  208. { currently, we always save R14, so we can use it }
  209. if (target_info.system<>system_arm_ios) then
  210. begin
  211. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  212. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  213. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  214. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  215. else
  216. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  217. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  218. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  219. end
  220. else
  221. { r7 is not available on Darwin, it's used as frame pointer (always,
  222. for backtrace support -- also in gcc/clang -> R11 can be used).
  223. r9 is volatile }
  224. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  225. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  226. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  227. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  228. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  229. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  230. init_mmregister_allocator;
  231. end;
  232. procedure tarmcgarm.done_register_allocators;
  233. begin
  234. rg[R_INTREGISTER].free;
  235. rg[R_FPUREGISTER].free;
  236. rg[R_MMREGISTER].free;
  237. inherited done_register_allocators;
  238. end;
  239. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  240. var
  241. imm_shift : byte;
  242. l : tasmlabel;
  243. hr : treference;
  244. imm1, imm2: DWord;
  245. begin
  246. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  247. internalerror(2002090907);
  248. if is_shifter_const(a,imm_shift) then
  249. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  250. else if is_shifter_const(not(a),imm_shift) then
  251. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  252. { loading of constants with mov and orr }
  253. else if (split_into_shifter_const(a,imm1, imm2)) then
  254. begin
  255. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  256. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  257. end
  258. { loading of constants with mvn and bic }
  259. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  260. begin
  261. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  262. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  263. end
  264. else
  265. begin
  266. reference_reset(hr,4,[]);
  267. current_asmdata.getjumplabel(l);
  268. cg.a_label(current_procinfo.aktlocaldata,l);
  269. hr.symboldata:=current_procinfo.aktlocaldata.last;
  270. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  271. hr.symbol:=l;
  272. hr.base:=NR_PC;
  273. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  274. end;
  275. end;
  276. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  277. var
  278. oppostfix:toppostfix;
  279. usedtmpref: treference;
  280. tmpreg,tmpreg2 : tregister;
  281. so : tshifterop;
  282. dir : integer;
  283. begin
  284. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  285. FromSize := ToSize;
  286. case FromSize of
  287. { signed integer registers }
  288. OS_8:
  289. oppostfix:=PF_B;
  290. OS_S8:
  291. oppostfix:=PF_SB;
  292. OS_16:
  293. oppostfix:=PF_H;
  294. OS_S16:
  295. oppostfix:=PF_SH;
  296. OS_32,
  297. OS_S32:
  298. oppostfix:=PF_None;
  299. else
  300. InternalError(200308297);
  301. end;
  302. if (fromsize=OS_S8) and
  303. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  304. oppostfix:=PF_B;
  305. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  306. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  307. (oppostfix in [PF_SH,PF_H])) then
  308. begin
  309. if target_info.endian=endian_big then
  310. dir:=-1
  311. else
  312. dir:=1;
  313. case FromSize of
  314. OS_16,OS_S16:
  315. begin
  316. { only complicated references need an extra loadaddr }
  317. if assigned(ref.symbol) or
  318. (ref.index<>NR_NO) or
  319. (ref.offset<-4095) or
  320. (ref.offset>4094) or
  321. { sometimes the compiler reused registers }
  322. (reg=ref.index) or
  323. (reg=ref.base) then
  324. begin
  325. tmpreg2:=getintregister(list,OS_INT);
  326. a_loadaddr_ref_reg(list,ref,tmpreg2);
  327. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  328. end
  329. else
  330. usedtmpref:=ref;
  331. if target_info.endian=endian_big then
  332. inc(usedtmpref.offset,1);
  333. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  334. tmpreg:=getintregister(list,OS_INT);
  335. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  336. inc(usedtmpref.offset,dir);
  337. if FromSize=OS_16 then
  338. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  339. else
  340. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  341. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  342. end;
  343. OS_32,OS_S32:
  344. begin
  345. tmpreg:=getintregister(list,OS_INT);
  346. { only complicated references need an extra loadaddr }
  347. if assigned(ref.symbol) or
  348. (ref.index<>NR_NO) or
  349. (ref.offset<-4095) or
  350. (ref.offset>4092) or
  351. { sometimes the compiler reused registers }
  352. (reg=ref.index) or
  353. (reg=ref.base) then
  354. begin
  355. tmpreg2:=getintregister(list,OS_INT);
  356. a_loadaddr_ref_reg(list,ref,tmpreg2);
  357. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  358. end
  359. else
  360. usedtmpref:=ref;
  361. shifterop_reset(so);so.shiftmode:=SM_LSL;
  362. if ref.alignment=2 then
  363. begin
  364. if target_info.endian=endian_big then
  365. inc(usedtmpref.offset,2);
  366. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  367. inc(usedtmpref.offset,dir*2);
  368. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  369. so.shiftimm:=16;
  370. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  371. end
  372. else
  373. begin
  374. tmpreg2:=getintregister(list,OS_INT);
  375. if target_info.endian=endian_big then
  376. inc(usedtmpref.offset,3);
  377. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  378. inc(usedtmpref.offset,dir);
  379. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  380. inc(usedtmpref.offset,dir);
  381. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  382. so.shiftimm:=8;
  383. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  384. inc(usedtmpref.offset,dir);
  385. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  386. so.shiftimm:=16;
  387. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  388. so.shiftimm:=24;
  389. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  390. end;
  391. end
  392. else
  393. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  394. end;
  395. end
  396. else
  397. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  398. if (fromsize=OS_S8) and
  399. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  400. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  401. else if (fromsize=OS_S8) and (tosize = OS_16) then
  402. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  403. end;
  404. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  405. var
  406. hsym : tsym;
  407. href : treference;
  408. paraloc : Pcgparalocation;
  409. shift : byte;
  410. begin
  411. { calculate the parameter info for the procdef }
  412. procdef.init_paraloc_info(callerside);
  413. hsym:=tsym(procdef.parast.Find('self'));
  414. if not(assigned(hsym) and
  415. (hsym.typ=paravarsym)) then
  416. internalerror(2003052503);
  417. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  418. while paraloc<>nil do
  419. with paraloc^ do
  420. begin
  421. case loc of
  422. LOC_REGISTER:
  423. begin
  424. if is_shifter_const(ioffset,shift) then
  425. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  426. else
  427. begin
  428. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  429. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  430. end;
  431. end;
  432. LOC_REFERENCE:
  433. begin
  434. { offset in the wrapper needs to be adjusted for the stored
  435. return address }
  436. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  437. if is_shifter_const(ioffset,shift) then
  438. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  439. else
  440. begin
  441. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  442. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  443. end;
  444. end
  445. else
  446. internalerror(2003091803);
  447. end;
  448. paraloc:=next;
  449. end;
  450. end;
  451. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  452. var
  453. ref: treference;
  454. begin
  455. paraloc.check_simple_location;
  456. paramanager.allocparaloc(list,paraloc.location);
  457. case paraloc.location^.loc of
  458. LOC_REGISTER,LOC_CREGISTER:
  459. a_load_const_reg(list,size,a,paraloc.location^.register);
  460. LOC_REFERENCE:
  461. begin
  462. reference_reset(ref,paraloc.alignment,[]);
  463. ref.base:=paraloc.location^.reference.index;
  464. ref.offset:=paraloc.location^.reference.offset;
  465. a_load_const_ref(list,size,a,ref);
  466. end;
  467. else
  468. internalerror(2002081101);
  469. end;
  470. end;
  471. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  472. begin
  473. { doubles in softemu mode have a strange order of registers and references }
  474. if (cgpara.size=OS_F64) and
  475. (location^.size=OS_32) then
  476. begin
  477. g_concatcopy(list,ref,paralocref,4)
  478. end
  479. else
  480. inherited;
  481. end;
  482. procedure tbasecgarm.init_mmregister_allocator;
  483. begin
  484. { The register allocator currently cannot deal with multiple
  485. non-overlapping subregs per register, so we can only use
  486. half the single precision registers for now (as sub registers of the
  487. double precision ones). }
  488. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  489. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  490. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  491. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  492. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  493. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  494. ],first_mm_imreg,[])
  495. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  496. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  497. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  498. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  499. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  500. ],first_mm_imreg,[])
  501. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  502. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  503. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  504. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  505. ],first_mm_imreg,[]);
  506. end;
  507. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  508. var
  509. ref: treference;
  510. tmpreg: tregister;
  511. begin
  512. paraloc.check_simple_location;
  513. paramanager.allocparaloc(list,paraloc.location);
  514. case paraloc.location^.loc of
  515. LOC_REGISTER,LOC_CREGISTER:
  516. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  517. LOC_REFERENCE:
  518. begin
  519. reference_reset(ref,paraloc.alignment,[]);
  520. ref.base := paraloc.location^.reference.index;
  521. ref.offset := paraloc.location^.reference.offset;
  522. tmpreg := getintregister(list,OS_ADDR);
  523. a_loadaddr_ref_reg(list,r,tmpreg);
  524. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  525. end;
  526. else
  527. internalerror(2002080701);
  528. end;
  529. end;
  530. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  531. var
  532. branchopcode: tasmop;
  533. r : treference;
  534. sym : TAsmSymbol;
  535. begin
  536. { use always BL as newer binutils do not translate blx apparently
  537. generating BL is also what clang and gcc do by default }
  538. branchopcode:=A_BL;
  539. if not(weak) then
  540. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  541. else
  542. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  543. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  544. if (tf_pic_uses_got in target_info.flags) and
  545. (cs_create_pic in current_settings.moduleswitches) then
  546. begin
  547. r.refaddr:=addr_pic
  548. end
  549. else
  550. r.refaddr:=addr_full;
  551. list.concat(taicpu.op_ref(branchopcode,r));
  552. {
  553. the compiler does not properly set this flag anymore in pass 1, and
  554. for now we only need it after pass 2 (I hope) (JM)
  555. if not(pi_do_call in current_procinfo.flags) then
  556. internalerror(2003060703);
  557. }
  558. include(current_procinfo.flags,pi_do_call);
  559. end;
  560. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  561. begin
  562. { check not really correct: should only be used for non-Thumb cpus }
  563. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  564. begin
  565. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  566. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  567. end
  568. else
  569. list.concat(taicpu.op_reg(A_BLX, reg));
  570. {
  571. the compiler does not properly set this flag anymore in pass 1, and
  572. for now we only need it after pass 2 (I hope) (JM)
  573. if not(pi_do_call in current_procinfo.flags) then
  574. internalerror(2003060703);
  575. }
  576. include(current_procinfo.flags,pi_do_call);
  577. end;
  578. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  579. begin
  580. a_op_const_reg_reg(list,op,size,a,reg,reg);
  581. end;
  582. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  583. var
  584. tmpreg,tmpresreg : tregister;
  585. tmpref : treference;
  586. begin
  587. tmpreg:=getintregister(list,size);
  588. tmpresreg:=getintregister(list,size);
  589. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  590. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  591. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  592. end;
  593. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  594. var
  595. so : tshifterop;
  596. begin
  597. if op = OP_NEG then
  598. begin
  599. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  600. maybeadjustresult(list,OP_NEG,size,dst);
  601. end
  602. else if op = OP_NOT then
  603. begin
  604. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  605. begin
  606. shifterop_reset(so);
  607. so.shiftmode:=SM_LSL;
  608. if size in [OS_8, OS_S8] then
  609. so.shiftimm:=24
  610. else
  611. so.shiftimm:=16;
  612. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  613. {Using a shift here allows this to be folded into another instruction}
  614. if size in [OS_S8, OS_S16] then
  615. so.shiftmode:=SM_ASR
  616. else
  617. so.shiftmode:=SM_LSR;
  618. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  619. end
  620. else
  621. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  622. end
  623. else
  624. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  625. end;
  626. const
  627. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  628. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  629. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  630. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  631. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  632. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  633. op_reg_postfix_thumb: array[TOpCG] of TOpPostfix =
  634. (PF_None,PF_None,PF_None,PF_S,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_S,
  635. PF_None,PF_S,PF_S,PF_None,PF_S,PF_None,PF_S);
  636. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  637. size: tcgsize; a: tcgint; src, dst: tregister);
  638. var
  639. ovloc : tlocation;
  640. begin
  641. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  642. end;
  643. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  644. size: tcgsize; src1, src2, dst: tregister);
  645. var
  646. ovloc : tlocation;
  647. begin
  648. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  649. end;
  650. function opshift2shiftmode(op: TOpCg): tshiftmode;
  651. begin
  652. case op of
  653. OP_SHL: Result:=SM_LSL;
  654. OP_SHR: Result:=SM_LSR;
  655. OP_ROR: Result:=SM_ROR;
  656. OP_ROL: Result:=SM_ROR;
  657. OP_SAR: Result:=SM_ASR;
  658. else internalerror(2012070501);
  659. end
  660. end;
  661. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  662. var
  663. multiplier : dword;
  664. power : longint;
  665. shifterop : tshifterop;
  666. bitsset : byte;
  667. negative : boolean;
  668. first, doshiftadd: boolean;
  669. b,
  670. cycles : byte;
  671. maxeffort : byte;
  672. leftmostbit,i,shiftvalue: DWord;
  673. begin
  674. result:=true;
  675. cycles:=0;
  676. negative:=a<0;
  677. shifterop.rs:=NR_NO;
  678. shifterop.shiftmode:=SM_LSL;
  679. if negative then
  680. inc(cycles);
  681. multiplier:=dword(abs(a));
  682. { heuristics to estimate how much instructions are reasonable to replace the mul,
  683. this is currently based on XScale timings }
  684. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  685. actual multiplication, this requires min. 1+4 cycles
  686. because the first shift imm. might cause a stall and because we need more instructions
  687. when replacing the mul we generate max. 3 instructions to replace this mul }
  688. maxeffort:=3;
  689. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  690. a ldr, so generating one more operation to replace this is beneficial }
  691. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  692. inc(maxeffort);
  693. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  694. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  695. dec(maxeffort);
  696. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  697. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  698. dec(maxeffort);
  699. { "symmetric" bit pattern like $10101010 where
  700. res:=a*$10101010 can be simplified into
  701. temp:=a*$1010
  702. res:=temp+temp shl 16
  703. }
  704. doshiftadd:=false;
  705. leftmostbit:=BsrDWord(multiplier);
  706. shiftvalue:=0;
  707. if (maxeffort>1) and (leftmostbit>2) then
  708. begin
  709. for i:=2 to 31 do
  710. if (multiplier shr i)=(multiplier and ($ffffffff shr (32-i))) then
  711. begin
  712. doshiftadd:=true;
  713. shiftvalue:=i;
  714. dec(maxeffort);
  715. multiplier:=multiplier shr shiftvalue;
  716. break;
  717. end;
  718. end;
  719. bitsset:=popcnt(multiplier and $fffffffe);
  720. { most simple cases }
  721. if a=1 then
  722. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  723. else if a=0 then
  724. a_load_const_reg(list,OS_32,0,dst)
  725. else if a=-1 then
  726. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  727. { add up ?
  728. basically, one add is needed for each bit being set in the constant factor
  729. however, the least significant bit is for free, it can be hidden in the initial
  730. instruction
  731. }
  732. else if (bitsset+cycles<=maxeffort) and
  733. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  734. begin
  735. first:=true;
  736. while multiplier<>0 do
  737. begin
  738. shifterop.shiftimm:=BsrDWord(multiplier);
  739. if odd(multiplier) then
  740. begin
  741. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  742. dec(multiplier);
  743. end
  744. else
  745. if first then
  746. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  747. else
  748. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  749. first:=false;
  750. dec(multiplier,1 shl shifterop.shiftimm);
  751. end;
  752. if doshiftadd then
  753. begin
  754. shifterop.shiftimm:=shiftvalue;
  755. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  756. end;
  757. if negative then
  758. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  759. end
  760. { subtract from the next greater power of two? }
  761. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  762. begin
  763. first:=true;
  764. while multiplier<>0 do
  765. begin
  766. if first then
  767. begin
  768. multiplier:=(1 shl power)-multiplier;
  769. shifterop.shiftimm:=power;
  770. end
  771. else
  772. shifterop.shiftimm:=BsrDWord(multiplier);
  773. if odd(multiplier) then
  774. begin
  775. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  776. dec(multiplier);
  777. end
  778. else
  779. if first then
  780. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  781. else
  782. begin
  783. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  784. dec(multiplier,1 shl shifterop.shiftimm);
  785. end;
  786. first:=false;
  787. end;
  788. if doshiftadd then
  789. begin
  790. shifterop.shiftimm:=shiftvalue;
  791. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  792. end;
  793. if negative then
  794. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  795. end
  796. else
  797. result:=false;
  798. end;
  799. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  800. var
  801. shift, lsb, width : byte;
  802. tmpreg : tregister;
  803. so : tshifterop;
  804. l1 : longint;
  805. imm1, imm2: DWord;
  806. begin
  807. optimize_op_const(size, op, a);
  808. case op of
  809. OP_NONE:
  810. begin
  811. if src <> dst then
  812. a_load_reg_reg(list, size, size, src, dst);
  813. exit;
  814. end;
  815. OP_MOVE:
  816. begin
  817. a_load_const_reg(list, size, a, dst);
  818. exit;
  819. end;
  820. else
  821. ;
  822. end;
  823. ovloc.loc:=LOC_VOID;
  824. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  825. case op of
  826. OP_ADD:
  827. begin
  828. op:=OP_SUB;
  829. a:=aint(dword(-a));
  830. end;
  831. OP_SUB:
  832. begin
  833. op:=OP_ADD;
  834. a:=aint(dword(-a));
  835. end
  836. else
  837. ;
  838. end;
  839. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  840. case op of
  841. OP_NEG,OP_NOT:
  842. internalerror(200308281);
  843. OP_SHL,
  844. OP_SHR,
  845. OP_ROL,
  846. OP_ROR,
  847. OP_SAR:
  848. begin
  849. if a>32 then
  850. internalerror(200308294);
  851. shifterop_reset(so);
  852. so.shiftmode:=opshift2shiftmode(op);
  853. if op = OP_ROL then
  854. so.shiftimm:=32-a
  855. else
  856. so.shiftimm:=a;
  857. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  858. end;
  859. else
  860. {if (op in [OP_SUB, OP_ADD]) and
  861. ((a < 0) or
  862. (a > 4095)) then
  863. begin
  864. tmpreg:=getintregister(list,size);
  865. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  866. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  867. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  868. ));
  869. end
  870. else}
  871. begin
  872. if cgsetflags or setflags then
  873. a_reg_alloc(list,NR_DEFAULTFLAGS);
  874. list.concat(setoppostfix(
  875. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  876. end;
  877. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  878. begin
  879. ovloc.loc:=LOC_FLAGS;
  880. case op of
  881. OP_ADD:
  882. ovloc.resflags:=F_CS;
  883. OP_SUB:
  884. ovloc.resflags:=F_CC;
  885. else
  886. internalerror(2019050922);
  887. end;
  888. end;
  889. end
  890. else
  891. begin
  892. { there could be added some more sophisticated optimizations }
  893. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  894. a_op_reg_reg(list,OP_NEG,size,src,dst)
  895. { we do this here instead in the peephole optimizer because
  896. it saves us a register }
  897. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  898. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  899. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  900. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  901. begin
  902. if l1>32 then{roozbeh does this ever happen?}
  903. internalerror(200308296);
  904. shifterop_reset(so);
  905. so.shiftmode:=SM_LSL;
  906. so.shiftimm:=l1;
  907. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  908. end
  909. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  910. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  911. begin
  912. if l1>32 then{does this ever happen?}
  913. internalerror(201205181);
  914. shifterop_reset(so);
  915. so.shiftmode:=SM_LSL;
  916. so.shiftimm:=l1;
  917. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  918. end
  919. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  920. begin
  921. { nothing to do on success }
  922. end
  923. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  924. broader range of shifterconstants.}
  925. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  926. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  927. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  928. into the following instruction}
  929. else if (op = OP_AND) and
  930. is_continuous_mask(aword(a), lsb, width) and
  931. ((lsb = 0) or ((lsb + width) = 32)) then
  932. begin
  933. shifterop_reset(so);
  934. if (width = 16) and
  935. (lsb = 0) and
  936. (current_settings.cputype >= cpu_armv6) then
  937. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  938. else if (width = 8) and
  939. (lsb = 0) and
  940. (current_settings.cputype >= cpu_armv6) then
  941. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  942. else if lsb = 0 then
  943. begin
  944. so.shiftmode:=SM_LSL;
  945. so.shiftimm:=32-width;
  946. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  947. so.shiftmode:=SM_LSR;
  948. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  949. end
  950. else
  951. begin
  952. so.shiftmode:=SM_LSR;
  953. so.shiftimm:=lsb;
  954. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  955. so.shiftmode:=SM_LSL;
  956. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  957. end;
  958. end
  959. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  960. begin
  961. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  962. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  963. end
  964. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  965. not(cgsetflags or setflags) and
  966. split_into_shifter_const(a, imm1, imm2) then
  967. begin
  968. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  969. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  970. end
  971. else
  972. begin
  973. tmpreg:=getintregister(list,size);
  974. a_load_const_reg(list,size,a,tmpreg);
  975. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  976. end;
  977. end;
  978. maybeadjustresult(list,op,size,dst);
  979. end;
  980. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  981. var
  982. so : tshifterop;
  983. tmpreg,overflowreg : tregister;
  984. asmop : tasmop;
  985. begin
  986. ovloc.loc:=LOC_VOID;
  987. case op of
  988. OP_NEG,OP_NOT,
  989. OP_DIV,OP_IDIV:
  990. internalerror(200308283);
  991. OP_SHL,
  992. OP_SHR,
  993. OP_SAR,
  994. OP_ROR:
  995. begin
  996. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  997. internalerror(2008072801);
  998. shifterop_reset(so);
  999. so.rs:=src1;
  1000. so.shiftmode:=opshift2shiftmode(op);
  1001. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1002. end;
  1003. OP_ROL:
  1004. begin
  1005. if not(size in [OS_32,OS_S32]) then
  1006. internalerror(2008072804);
  1007. { simulate ROL by ror'ing 32-value }
  1008. tmpreg:=getintregister(list,OS_32);
  1009. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  1010. shifterop_reset(so);
  1011. so.rs:=tmpreg;
  1012. so.shiftmode:=SM_ROR;
  1013. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1014. end;
  1015. OP_IMUL,
  1016. OP_MUL:
  1017. begin
  1018. if (cgsetflags or setflags) and
  1019. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1020. begin
  1021. overflowreg:=getintregister(list,size);
  1022. if op=OP_IMUL then
  1023. asmop:=A_SMULL
  1024. else
  1025. asmop:=A_UMULL;
  1026. { the arm doesn't allow that rd and rm are the same }
  1027. if dst=src2 then
  1028. begin
  1029. if dst<>src1 then
  1030. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1031. else
  1032. begin
  1033. tmpreg:=getintregister(list,size);
  1034. a_load_reg_reg(list,size,size,src2,dst);
  1035. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1036. end;
  1037. end
  1038. else
  1039. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1040. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1041. if op=OP_IMUL then
  1042. begin
  1043. shifterop_reset(so);
  1044. so.shiftmode:=SM_ASR;
  1045. so.shiftimm:=31;
  1046. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1047. end
  1048. else
  1049. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1050. ovloc.loc:=LOC_FLAGS;
  1051. ovloc.resflags:=F_NE;
  1052. end
  1053. else
  1054. begin
  1055. { the arm doesn't allow that rd and rm are the same }
  1056. if dst=src2 then
  1057. begin
  1058. if dst<>src1 then
  1059. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1060. else
  1061. begin
  1062. tmpreg:=getintregister(list,size);
  1063. a_load_reg_reg(list,size,size,src2,dst);
  1064. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1065. end;
  1066. end
  1067. else
  1068. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1069. end;
  1070. end;
  1071. else
  1072. begin
  1073. if cgsetflags or setflags then
  1074. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1075. list.concat(setoppostfix(
  1076. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1077. end;
  1078. end;
  1079. maybeadjustresult(list,op,size,dst);
  1080. end;
  1081. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1082. var
  1083. asmop: tasmop;
  1084. begin
  1085. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1086. begin
  1087. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1088. case size of
  1089. OS_32: asmop:=A_UMULL;
  1090. OS_S32: asmop:=A_SMULL;
  1091. else
  1092. InternalError(2014060802);
  1093. end;
  1094. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1095. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1096. 32x32=32 bit multiplication}
  1097. if (dstlo = NR_NO) then
  1098. dstlo:=getintregister(list,size);
  1099. if (dsthi = NR_NO) then
  1100. dsthi:=getintregister(list,size);
  1101. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1102. end
  1103. else if dsthi=NR_NO then
  1104. begin
  1105. if (dstlo = NR_NO) then
  1106. dstlo:=getintregister(list,size);
  1107. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1108. end
  1109. else
  1110. begin
  1111. internalerror(2015083022);
  1112. end;
  1113. end;
  1114. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1115. var
  1116. tmpreg1,tmpreg2 : tregister;
  1117. begin
  1118. tmpreg1:=NR_NO;
  1119. { Be sure to have a base register }
  1120. if (ref.base=NR_NO) then
  1121. begin
  1122. if ref.shiftmode<>SM_None then
  1123. internalerror(2014020707);
  1124. ref.base:=ref.index;
  1125. ref.index:=NR_NO;
  1126. end;
  1127. { absolute symbols can't be handled directly, we've to store the symbol reference
  1128. in the text segment and access it pc relative
  1129. For now, we assume that references where base or index equals to PC are already
  1130. relative, all other references are assumed to be absolute and thus they need
  1131. to be handled extra.
  1132. A proper solution would be to change refoptions to a set and store the information
  1133. if the symbol is absolute or relative there.
  1134. }
  1135. if (assigned(ref.symbol) and
  1136. not(is_pc(ref.base)) and
  1137. not(is_pc(ref.index))
  1138. ) or
  1139. { [#xxx] isn't a valid address operand }
  1140. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1141. (ref.offset<-4095) or
  1142. (ref.offset>4095) or
  1143. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1144. ((ref.offset<-255) or
  1145. (ref.offset>255)
  1146. )
  1147. ) or
  1148. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1149. ((ref.offset<-1020) or
  1150. (ref.offset>1020) or
  1151. ((abs(ref.offset) mod 4)<>0)
  1152. )
  1153. ) or
  1154. ((GenerateThumbCode) and
  1155. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1156. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1157. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1158. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1159. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1160. )
  1161. ) then
  1162. begin
  1163. fixref(list,ref);
  1164. end;
  1165. if GenerateThumbCode then
  1166. begin
  1167. { certain thumb load require base and index }
  1168. if (oppostfix in [PF_SB,PF_SH]) and
  1169. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1170. begin
  1171. tmpreg1:=getintregister(list,OS_ADDR);
  1172. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1173. ref.index:=tmpreg1;
  1174. end;
  1175. { "hi" registers cannot be used as base or index }
  1176. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1177. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1178. begin
  1179. tmpreg1:=getintregister(list,OS_ADDR);
  1180. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1181. ref.base:=tmpreg1;
  1182. end;
  1183. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1184. begin
  1185. tmpreg1:=getintregister(list,OS_ADDR);
  1186. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1187. ref.index:=tmpreg1;
  1188. end;
  1189. end;
  1190. { fold if there is base, index and offset, however, don't fold
  1191. for vfp memory instructions because we later fold the index }
  1192. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1193. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1194. begin
  1195. if tmpreg1<>NR_NO then
  1196. begin
  1197. tmpreg2:=getintregister(list,OS_ADDR);
  1198. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1199. tmpreg1:=tmpreg2;
  1200. end
  1201. else
  1202. begin
  1203. tmpreg1:=getintregister(list,OS_ADDR);
  1204. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1205. ref.base:=tmpreg1;
  1206. end;
  1207. ref.offset:=0;
  1208. end;
  1209. { floating point operations have only limited references
  1210. we expect here, that a base is already set }
  1211. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1212. begin
  1213. if ref.shiftmode<>SM_none then
  1214. internalerror(200309121);
  1215. if tmpreg1<>NR_NO then
  1216. begin
  1217. if ref.base=tmpreg1 then
  1218. begin
  1219. if ref.signindex<0 then
  1220. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1221. else
  1222. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1223. ref.index:=NR_NO;
  1224. end
  1225. else
  1226. begin
  1227. if ref.index<>tmpreg1 then
  1228. internalerror(200403161);
  1229. if ref.signindex<0 then
  1230. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1231. else
  1232. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1233. ref.base:=tmpreg1;
  1234. ref.index:=NR_NO;
  1235. end;
  1236. end
  1237. else
  1238. begin
  1239. tmpreg1:=getintregister(list,OS_ADDR);
  1240. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1241. ref.base:=tmpreg1;
  1242. ref.index:=NR_NO;
  1243. end;
  1244. end;
  1245. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1246. Result := ref;
  1247. end;
  1248. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1249. var
  1250. oppostfix:toppostfix;
  1251. usedtmpref: treference;
  1252. tmpreg : tregister;
  1253. dir : integer;
  1254. begin
  1255. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1256. FromSize := ToSize;
  1257. case ToSize of
  1258. { signed integer registers }
  1259. OS_8,
  1260. OS_S8:
  1261. oppostfix:=PF_B;
  1262. OS_16,
  1263. OS_S16:
  1264. oppostfix:=PF_H;
  1265. OS_32,
  1266. OS_S32,
  1267. { for vfp value stored in integer register }
  1268. OS_F32:
  1269. oppostfix:=PF_None;
  1270. else
  1271. InternalError(2003082912);
  1272. end;
  1273. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1274. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1275. (oppostfix =PF_H)) then
  1276. begin
  1277. if target_info.endian=endian_big then
  1278. dir:=-1
  1279. else
  1280. dir:=1;
  1281. case FromSize of
  1282. OS_16,OS_S16:
  1283. begin
  1284. tmpreg:=getintregister(list,OS_INT);
  1285. usedtmpref:=ref;
  1286. if target_info.endian=endian_big then
  1287. inc(usedtmpref.offset,1);
  1288. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1289. inc(usedtmpref.offset,dir);
  1290. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1291. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1292. end;
  1293. OS_32,OS_S32:
  1294. begin
  1295. tmpreg:=getintregister(list,OS_INT);
  1296. usedtmpref:=ref;
  1297. if ref.alignment=2 then
  1298. begin
  1299. if target_info.endian=endian_big then
  1300. inc(usedtmpref.offset,2);
  1301. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1302. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1303. inc(usedtmpref.offset,dir*2);
  1304. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1305. end
  1306. else
  1307. begin
  1308. if target_info.endian=endian_big then
  1309. inc(usedtmpref.offset,3);
  1310. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1311. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1312. inc(usedtmpref.offset,dir);
  1313. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1314. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1315. inc(usedtmpref.offset,dir);
  1316. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1317. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1318. inc(usedtmpref.offset,dir);
  1319. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1320. end;
  1321. end
  1322. else
  1323. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1324. end;
  1325. end
  1326. else
  1327. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1328. end;
  1329. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1330. var
  1331. oppostfix:toppostfix;
  1332. href: treference;
  1333. tmpreg: TRegister;
  1334. begin
  1335. case ToSize of
  1336. { signed integer registers }
  1337. OS_8,
  1338. OS_S8:
  1339. oppostfix:=PF_B;
  1340. OS_16,
  1341. OS_S16:
  1342. oppostfix:=PF_H;
  1343. OS_32,
  1344. OS_S32:
  1345. oppostfix:=PF_None;
  1346. else
  1347. InternalError(2003082910);
  1348. end;
  1349. if (tosize in [OS_S16,OS_16]) and
  1350. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1351. begin
  1352. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1353. tmpreg:=getintregister(list,OS_INT);
  1354. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1355. href:=result;
  1356. inc(href.offset);
  1357. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1358. end
  1359. else
  1360. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1361. end;
  1362. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1363. var
  1364. oppostfix:toppostfix;
  1365. so: tshifterop;
  1366. tmpreg: TRegister;
  1367. href: treference;
  1368. begin
  1369. case FromSize of
  1370. { signed integer registers }
  1371. OS_8:
  1372. oppostfix:=PF_B;
  1373. OS_S8:
  1374. oppostfix:=PF_SB;
  1375. OS_16:
  1376. oppostfix:=PF_H;
  1377. OS_S16:
  1378. oppostfix:=PF_SH;
  1379. OS_32,
  1380. OS_S32:
  1381. oppostfix:=PF_None;
  1382. else
  1383. InternalError(200308291);
  1384. end;
  1385. if (tosize=OS_S8) and
  1386. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1387. begin
  1388. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1389. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1390. end
  1391. else if (tosize in [OS_S16,OS_16]) and
  1392. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1393. begin
  1394. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1395. tmpreg:=getintregister(list,OS_INT);
  1396. href:=result;
  1397. inc(href.offset);
  1398. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1399. shifterop_reset(so);
  1400. so.shiftmode:=SM_LSL;
  1401. so.shiftimm:=8;
  1402. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1403. end
  1404. else
  1405. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1406. end;
  1407. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1408. var
  1409. so : tshifterop;
  1410. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1411. begin
  1412. if GenerateThumbCode then
  1413. begin
  1414. case shiftmode of
  1415. SM_ASR:
  1416. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1417. SM_LSR:
  1418. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1419. SM_LSL:
  1420. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1421. else
  1422. internalerror(2013090301);
  1423. end;
  1424. end
  1425. else
  1426. begin
  1427. so.shiftmode:=shiftmode;
  1428. so.shiftimm:=shiftimm;
  1429. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1430. end;
  1431. end;
  1432. var
  1433. instr: taicpu;
  1434. conv_done: boolean;
  1435. begin
  1436. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1437. internalerror(2002090901);
  1438. conv_done:=false;
  1439. if tosize<>fromsize then
  1440. begin
  1441. shifterop_reset(so);
  1442. conv_done:=true;
  1443. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1444. fromsize:=tosize;
  1445. if current_settings.cputype<cpu_armv6 then
  1446. case fromsize of
  1447. OS_8:
  1448. if GenerateThumbCode then
  1449. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1450. else
  1451. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1452. OS_S8:
  1453. begin
  1454. do_shift(SM_LSL,24,reg1);
  1455. if tosize=OS_16 then
  1456. begin
  1457. do_shift(SM_ASR,8,reg2);
  1458. do_shift(SM_LSR,16,reg2);
  1459. end
  1460. else
  1461. do_shift(SM_ASR,24,reg2);
  1462. end;
  1463. OS_16:
  1464. begin
  1465. do_shift(SM_LSL,16,reg1);
  1466. do_shift(SM_LSR,16,reg2);
  1467. end;
  1468. OS_S16:
  1469. begin
  1470. do_shift(SM_LSL,16,reg1);
  1471. do_shift(SM_ASR,16,reg2)
  1472. end;
  1473. else
  1474. conv_done:=false;
  1475. end
  1476. else
  1477. case fromsize of
  1478. OS_8:
  1479. if GenerateThumbCode then
  1480. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1481. else
  1482. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1483. OS_S8:
  1484. begin
  1485. if tosize=OS_16 then
  1486. begin
  1487. so.shiftmode:=SM_ROR;
  1488. so.shiftimm:=16;
  1489. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1490. do_shift(SM_LSR,16,reg2);
  1491. end
  1492. else
  1493. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1494. end;
  1495. OS_16:
  1496. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1497. OS_S16:
  1498. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1499. else
  1500. conv_done:=false;
  1501. end
  1502. end;
  1503. if not conv_done and (reg1<>reg2) then
  1504. begin
  1505. { same size, only a register mov required }
  1506. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1507. list.Concat(instr);
  1508. { Notify the register allocator that we have written a move instruction so
  1509. it can try to eliminate it. }
  1510. add_move_instruction(instr);
  1511. end;
  1512. end;
  1513. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1514. var
  1515. href,href2 : treference;
  1516. hloc : pcgparalocation;
  1517. begin
  1518. href:=ref;
  1519. hloc:=paraloc.location;
  1520. while assigned(hloc) do
  1521. begin
  1522. case hloc^.loc of
  1523. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1524. begin
  1525. paramanager.allocparaloc(list,paraloc.location);
  1526. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1527. end;
  1528. LOC_REGISTER :
  1529. case hloc^.size of
  1530. OS_32,
  1531. OS_F32:
  1532. begin
  1533. paramanager.allocparaloc(list,paraloc.location);
  1534. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1535. end;
  1536. OS_64,
  1537. OS_F64:
  1538. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1539. else
  1540. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1541. end;
  1542. LOC_REFERENCE :
  1543. begin
  1544. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1545. { concatcopy should choose the best way to copy the data }
  1546. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1547. end;
  1548. else
  1549. internalerror(200408241);
  1550. end;
  1551. inc(href.offset,tcgsize2size[hloc^.size]);
  1552. hloc:=hloc^.next;
  1553. end;
  1554. end;
  1555. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1556. begin
  1557. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1558. end;
  1559. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1560. var
  1561. oppostfix:toppostfix;
  1562. begin
  1563. case fromsize of
  1564. OS_32,
  1565. OS_F32:
  1566. oppostfix:=PF_S;
  1567. OS_64,
  1568. OS_F64:
  1569. oppostfix:=PF_D;
  1570. OS_F80:
  1571. oppostfix:=PF_E;
  1572. else
  1573. InternalError(200309021);
  1574. end;
  1575. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1576. if fromsize<>tosize then
  1577. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1578. end;
  1579. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1580. var
  1581. oppostfix:toppostfix;
  1582. begin
  1583. case tosize of
  1584. OS_F32:
  1585. oppostfix:=PF_S;
  1586. OS_F64:
  1587. oppostfix:=PF_D;
  1588. OS_F80:
  1589. oppostfix:=PF_E;
  1590. else
  1591. InternalError(200309022);
  1592. end;
  1593. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1594. end;
  1595. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1596. var
  1597. r : TRegister;
  1598. ai: taicpu;
  1599. l: TAsmLabel;
  1600. begin
  1601. if (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) and
  1602. needs_check_for_fpu_exceptions and
  1603. (force or current_procinfo.FPUExceptionCheckNeeded) then
  1604. begin
  1605. r:=getintregister(list,OS_INT);
  1606. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1607. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1608. current_asmdata.getjumplabel(l);
  1609. ai:=taicpu.op_sym(A_B,l);
  1610. ai.is_jmp:=true;
  1611. ai.condition:=C_EQ;
  1612. list.concat(ai);
  1613. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1614. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1615. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1616. a_label(list,l);
  1617. if clear then
  1618. current_procinfo.FPUExceptionCheckNeeded:=false;
  1619. end;
  1620. end;
  1621. { comparison operations }
  1622. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1623. l : tasmlabel);
  1624. var
  1625. tmpreg : tregister;
  1626. b : byte;
  1627. begin
  1628. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1629. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1630. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1631. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1632. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1633. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1634. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1635. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1636. else
  1637. begin
  1638. tmpreg:=getintregister(list,size);
  1639. a_load_const_reg(list,size,a,tmpreg);
  1640. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1641. end;
  1642. a_jmp_cond(list,cmp_op,l);
  1643. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1644. end;
  1645. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1646. begin
  1647. if reverse then
  1648. begin
  1649. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1650. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1651. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1652. end
  1653. { it is decided during the compilation of the system unit if this code is used or not
  1654. so no additional check for rbit is needed }
  1655. else
  1656. begin
  1657. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1658. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1659. if not(not_zero) then
  1660. begin
  1661. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1662. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1663. if GenerateThumb2Code then
  1664. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1665. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1666. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1667. end;
  1668. end;
  1669. end;
  1670. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1671. begin
  1672. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1673. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1674. a_jmp_cond(list,cmp_op,l);
  1675. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1676. end;
  1677. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1678. var
  1679. ai : taicpu;
  1680. begin
  1681. { generate far jump, leave it to the optimizer to get rid of it }
  1682. if GenerateThumbCode then
  1683. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1684. else
  1685. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1686. ai.is_jmp:=true;
  1687. list.concat(ai);
  1688. end;
  1689. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1690. var
  1691. ai : taicpu;
  1692. begin
  1693. { generate far jump, leave it to the optimizer to get rid of it }
  1694. if GenerateThumbCode then
  1695. ai:=taicpu.op_sym(A_BL,l)
  1696. else
  1697. ai:=taicpu.op_sym(A_B,l);
  1698. ai.is_jmp:=true;
  1699. list.concat(ai);
  1700. end;
  1701. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1702. var
  1703. ai : taicpu;
  1704. inv_flags : TResFlags;
  1705. hlabel : TAsmLabel;
  1706. begin
  1707. if GenerateThumbCode then
  1708. begin
  1709. inv_flags:=f;
  1710. inverse_flags(inv_flags);
  1711. { the optimizer has to fix this if jump range is sufficient short }
  1712. current_asmdata.getjumplabel(hlabel);
  1713. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1714. ai.is_jmp:=true;
  1715. list.concat(ai);
  1716. a_jmp_always(list,l);
  1717. a_label(list,hlabel);
  1718. end
  1719. else
  1720. begin
  1721. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1722. ai.is_jmp:=true;
  1723. list.concat(ai);
  1724. end;
  1725. end;
  1726. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1727. begin
  1728. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1729. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1730. end;
  1731. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1732. begin
  1733. if target_info.system = system_arm_linux then
  1734. begin
  1735. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1736. a_call_name(list,'__gnu_mcount_nc',false);
  1737. end
  1738. else
  1739. internalerror(2014091201);
  1740. end;
  1741. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1742. var
  1743. ref : treference;
  1744. shift : byte;
  1745. firstfloatreg,lastfloatreg,
  1746. r : byte;
  1747. mmregs,
  1748. regs, saveregs : tcpuregisterset;
  1749. registerarea, offset,
  1750. r7offset,
  1751. stackmisalignment : pint;
  1752. imm1, imm2: DWord;
  1753. stack_parameters : Boolean;
  1754. begin
  1755. LocalSize:=align(LocalSize,4);
  1756. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1757. { call instruction does not put anything on the stack }
  1758. registerarea:=0;
  1759. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1760. lastfloatreg:=RS_NO;
  1761. if not(nostackframe) then
  1762. begin
  1763. firstfloatreg:=RS_NO;
  1764. mmregs:=[];
  1765. case current_settings.fputype of
  1766. fpu_none,
  1767. fpu_soft,
  1768. fpu_libgcc:
  1769. ;
  1770. fpu_fpa,
  1771. fpu_fpa10,
  1772. fpu_fpa11:
  1773. begin
  1774. { save floating point registers? }
  1775. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1776. for r:=RS_F0 to RS_F7 do
  1777. if r in regs then
  1778. begin
  1779. if firstfloatreg=RS_NO then
  1780. firstfloatreg:=r;
  1781. lastfloatreg:=r;
  1782. inc(registerarea,12);
  1783. end;
  1784. end;
  1785. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1786. begin;
  1787. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1788. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1789. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1790. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1791. end
  1792. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1793. begin;
  1794. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1795. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1796. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1797. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1798. end
  1799. else
  1800. internalerror(2019050924);
  1801. end;
  1802. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1803. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1804. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1805. { save int registers }
  1806. reference_reset(ref,4,[]);
  1807. ref.index:=NR_STACK_POINTER_REG;
  1808. ref.addressmode:=AM_PREINDEXED;
  1809. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1810. if not(target_info.system in systems_darwin) then
  1811. begin
  1812. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1813. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1814. begin
  1815. a_reg_alloc(list,NR_R12);
  1816. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1817. end;
  1818. { the (old) ARM APCS requires saving both the stack pointer (to
  1819. crawl the stack) and the PC (to identify the function this
  1820. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1821. and R15 -- still needs updating for EABI and Darwin, they don't
  1822. need that }
  1823. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1824. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1825. else
  1826. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1827. include(regs,RS_R14);
  1828. if regs<>[] then
  1829. begin
  1830. for r:=RS_R0 to RS_R15 do
  1831. if r in regs then
  1832. inc(registerarea,4);
  1833. { if the stack is not 8 byte aligned, try to add an extra register,
  1834. so we can avoid the extra sub/add ...,#4 later (KB) }
  1835. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1836. for r:=RS_R3 downto RS_R0 do
  1837. if not(r in regs) then
  1838. begin
  1839. regs:=regs+[r];
  1840. inc(registerarea,4);
  1841. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1842. break;
  1843. end;
  1844. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1845. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  1846. end;
  1847. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1848. begin
  1849. offset:=-4;
  1850. for r:=RS_R15 downto RS_R0 do
  1851. if r in regs then
  1852. begin
  1853. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),offset);
  1854. dec(offset,4);
  1855. end;
  1856. { the framepointer now points to the saved R15, so the saved
  1857. framepointer is at R11-12 (for get_caller_frame) }
  1858. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1859. a_reg_dealloc(list,NR_R12);
  1860. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  1861. current_asmdata.asmcfi.cfa_def_cfa_offset(list,4);
  1862. end;
  1863. end
  1864. else
  1865. begin
  1866. { always save r14 if we use r7 as the framepointer, because
  1867. the parameter offsets are hardcoded in advance and always
  1868. assume that r14 sits on the stack right behind the saved r7
  1869. }
  1870. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1871. include(regs,RS_FRAME_POINTER_REG);
  1872. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1873. include(regs,RS_R14);
  1874. if regs<>[] then
  1875. begin
  1876. { on Darwin, you first have to save [r4-r7,lr], and then
  1877. [r8,r10,r11] and make r7 point to the previously saved
  1878. r7 so that you can perform a stack crawl based on it
  1879. ([r7] is previous stack frame, [r7+4] is return address
  1880. }
  1881. include(regs,RS_FRAME_POINTER_REG);
  1882. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1883. r7offset:=0;
  1884. for r:=RS_R0 to RS_R15 do
  1885. if r in saveregs then
  1886. begin
  1887. inc(registerarea,4);
  1888. if r<RS_FRAME_POINTER_REG then
  1889. inc(r7offset,4);
  1890. end;
  1891. { save the registers }
  1892. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1893. { make r7 point to the saved r7 (regardless of whether this
  1894. frame uses the framepointer, for backtrace purposes) }
  1895. if r7offset<>0 then
  1896. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1897. else
  1898. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1899. { now save the rest (if any) }
  1900. saveregs:=regs-saveregs;
  1901. if saveregs<>[] then
  1902. begin
  1903. for r:=RS_R8 to RS_R11 do
  1904. if r in saveregs then
  1905. inc(registerarea,4);
  1906. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1907. end;
  1908. end;
  1909. end;
  1910. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1911. if (LocalSize<>0) or
  1912. ((stackmisalignment<>0) and
  1913. ((pi_do_call in current_procinfo.flags) or
  1914. (po_assembler in current_procinfo.procdef.procoptions))) then
  1915. begin
  1916. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1917. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1918. begin
  1919. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1920. internalerror(2014030901)
  1921. else
  1922. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1923. end;
  1924. if is_shifter_const(localsize,shift) then
  1925. begin
  1926. a_reg_dealloc(list,NR_R12);
  1927. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1928. end
  1929. else if split_into_shifter_const(localsize, imm1, imm2) then
  1930. begin
  1931. a_reg_dealloc(list,NR_R12);
  1932. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1933. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1934. end
  1935. else
  1936. begin
  1937. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1938. a_reg_alloc(list,NR_R12);
  1939. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1940. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1941. a_reg_dealloc(list,NR_R12);
  1942. end;
  1943. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1944. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  1945. end;
  1946. if (mmregs<>[]) or
  1947. (firstfloatreg<>RS_NO) then
  1948. begin
  1949. reference_reset(ref,4,[]);
  1950. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1951. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1952. begin
  1953. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1954. begin
  1955. a_reg_alloc(list,NR_R12);
  1956. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1957. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1958. a_reg_dealloc(list,NR_R12);
  1959. end
  1960. else
  1961. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1962. ref.base:=NR_R12;
  1963. end
  1964. else
  1965. begin
  1966. ref.base:=current_procinfo.framepointer;
  1967. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1968. end;
  1969. case current_settings.fputype of
  1970. fpu_fpa,
  1971. fpu_fpa10,
  1972. fpu_fpa11:
  1973. begin
  1974. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1975. lastfloatreg-firstfloatreg+1,ref));
  1976. end;
  1977. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1978. begin
  1979. ref.index:=ref.base;
  1980. ref.base:=NR_NO;
  1981. if mmregs<>[] then
  1982. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1983. end
  1984. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1985. begin
  1986. ref.index:=ref.base;
  1987. ref.base:=NR_NO;
  1988. if mmregs<>[] then
  1989. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1990. end
  1991. else
  1992. internalerror(2019050923);
  1993. end;
  1994. end;
  1995. end;
  1996. end;
  1997. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1998. var
  1999. ref : treference;
  2000. LocalSize : longint;
  2001. firstfloatreg,lastfloatreg,
  2002. r,
  2003. shift : byte;
  2004. mmregs,
  2005. saveregs,
  2006. regs : tcpuregisterset;
  2007. registerarea,
  2008. stackmisalignment: pint;
  2009. paddingreg: TSuperRegister;
  2010. imm1, imm2: DWord;
  2011. begin
  2012. if not(nostackframe) then
  2013. begin
  2014. registerarea:=0;
  2015. firstfloatreg:=RS_NO;
  2016. lastfloatreg:=RS_NO;
  2017. mmregs:=[];
  2018. saveregs:=[];
  2019. case current_settings.fputype of
  2020. fpu_none,
  2021. fpu_soft,
  2022. fpu_libgcc:
  2023. ;
  2024. fpu_fpa,
  2025. fpu_fpa10,
  2026. fpu_fpa11:
  2027. begin
  2028. { restore floating point registers? }
  2029. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  2030. for r:=RS_F0 to RS_F7 do
  2031. if r in regs then
  2032. begin
  2033. if firstfloatreg=RS_NO then
  2034. firstfloatreg:=r;
  2035. lastfloatreg:=r;
  2036. { floating point register space is already included in
  2037. localsize below by calc_stackframe_size
  2038. inc(registerarea,12);
  2039. }
  2040. end;
  2041. end;
  2042. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2043. begin
  2044. { restore vfp registers? }
  2045. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  2046. they have numbers>$1f which is not really correct as they should simply have the same numbers
  2047. as the even ones by with a different subtype as it is done on x86 with al/ah }
  2048. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  2049. end
  2050. else
  2051. internalerror(2019050908);
  2052. end;
  2053. if (firstfloatreg<>RS_NO) or
  2054. (mmregs<>[]) then
  2055. begin
  2056. reference_reset(ref,4,[]);
  2057. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  2058. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2059. begin
  2060. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2061. begin
  2062. a_reg_alloc(list,NR_R12);
  2063. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2064. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2065. a_reg_dealloc(list,NR_R12);
  2066. end
  2067. else
  2068. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2069. ref.base:=NR_R12;
  2070. end
  2071. else
  2072. begin
  2073. ref.base:=current_procinfo.framepointer;
  2074. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2075. end;
  2076. case current_settings.fputype of
  2077. fpu_fpa,
  2078. fpu_fpa10,
  2079. fpu_fpa11:
  2080. begin
  2081. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2082. lastfloatreg-firstfloatreg+1,ref));
  2083. end;
  2084. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2085. begin
  2086. ref.index:=ref.base;
  2087. ref.base:=NR_NO;
  2088. if mmregs<>[] then
  2089. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2090. end
  2091. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2092. begin
  2093. ref.index:=ref.base;
  2094. ref.base:=NR_NO;
  2095. if mmregs<>[] then
  2096. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2097. end
  2098. else
  2099. internalerror(2019050921);
  2100. end;
  2101. end;
  2102. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2103. if (pi_do_call in current_procinfo.flags) or
  2104. (regs<>[]) or
  2105. ((target_info.system in systems_darwin) and
  2106. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2107. begin
  2108. exclude(regs,RS_R14);
  2109. include(regs,RS_R15);
  2110. if (target_info.system in systems_darwin) then
  2111. include(regs,RS_FRAME_POINTER_REG);
  2112. end;
  2113. if not(target_info.system in systems_darwin) then
  2114. begin
  2115. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2116. The saved PC came after that but is discarded, since we restore
  2117. the stack pointer }
  2118. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2119. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2120. end
  2121. else
  2122. begin
  2123. { restore R8-R11 already if necessary (they've been stored
  2124. before the others) }
  2125. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2126. if saveregs<>[] then
  2127. begin
  2128. reference_reset(ref,4,[]);
  2129. ref.index:=NR_STACK_POINTER_REG;
  2130. ref.addressmode:=AM_PREINDEXED;
  2131. for r:=RS_R8 to RS_R11 do
  2132. if r in saveregs then
  2133. inc(registerarea,4);
  2134. regs:=regs-saveregs;
  2135. end;
  2136. end;
  2137. for r:=RS_R0 to RS_R15 do
  2138. if r in regs then
  2139. inc(registerarea,4);
  2140. { reapply the stack padding reg, in case there was one, see the complimentary
  2141. comment in g_proc_entry() (KB) }
  2142. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2143. if paddingreg < RS_R4 then
  2144. if paddingreg in regs then
  2145. internalerror(201306190)
  2146. else
  2147. begin
  2148. regs:=regs+[paddingreg];
  2149. inc(registerarea,4);
  2150. end;
  2151. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2152. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2153. (target_info.system in systems_darwin) then
  2154. begin
  2155. LocalSize:=current_procinfo.calc_stackframe_size;
  2156. if (LocalSize<>0) or
  2157. ((stackmisalignment<>0) and
  2158. ((pi_do_call in current_procinfo.flags) or
  2159. (po_assembler in current_procinfo.procdef.procoptions))) then
  2160. begin
  2161. if pi_estimatestacksize in current_procinfo.flags then
  2162. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2163. else
  2164. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2165. if is_shifter_const(LocalSize,shift) then
  2166. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2167. else if split_into_shifter_const(localsize, imm1, imm2) then
  2168. begin
  2169. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2170. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2171. end
  2172. else
  2173. begin
  2174. a_reg_alloc(list,NR_R12);
  2175. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2176. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2177. a_reg_dealloc(list,NR_R12);
  2178. end;
  2179. end;
  2180. if (target_info.system in systems_darwin) and
  2181. (saveregs<>[]) then
  2182. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2183. if regs=[] then
  2184. begin
  2185. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2186. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2187. else
  2188. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2189. end
  2190. else
  2191. begin
  2192. reference_reset(ref,4,[]);
  2193. ref.index:=NR_STACK_POINTER_REG;
  2194. ref.addressmode:=AM_PREINDEXED;
  2195. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2196. end;
  2197. end
  2198. else
  2199. begin
  2200. { restore int registers and return }
  2201. reference_reset(ref,4,[]);
  2202. ref.index:=NR_FRAME_POINTER_REG;
  2203. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2204. end;
  2205. end
  2206. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2207. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2208. else
  2209. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2210. end;
  2211. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2212. var
  2213. ref : treference;
  2214. l : TAsmLabel;
  2215. regs : tcpuregisterset;
  2216. r: byte;
  2217. begin
  2218. if (cs_create_pic in current_settings.moduleswitches) and
  2219. (pi_needs_got in current_procinfo.flags) and
  2220. (tf_pic_uses_got in target_info.flags) then
  2221. begin
  2222. { Procedure parametrs are not initialized at this stage.
  2223. Before GOT initialization code, allocate registers used for procedure parameters
  2224. to prevent usage of these registers for temp operations in later stages of code
  2225. generation. }
  2226. regs:=rg[R_INTREGISTER].used_in_proc;
  2227. for r:=RS_R0 to RS_R3 do
  2228. if r in regs then
  2229. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2230. { Allocate scratch register R12 and use it for GOT calculations directly.
  2231. Otherwise the init code can be distorted in later stages of code generation. }
  2232. a_reg_alloc(list,NR_R12);
  2233. reference_reset(ref,4,[]);
  2234. current_asmdata.getglobaldatalabel(l);
  2235. cg.a_label(current_procinfo.aktlocaldata,l);
  2236. ref.symbol:=l;
  2237. ref.base:=NR_PC;
  2238. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2239. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2240. current_asmdata.getaddrlabel(l);
  2241. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2242. cg.a_label(list,l);
  2243. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2244. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2245. { Deallocate registers }
  2246. a_reg_dealloc(list,NR_R12);
  2247. for r:=RS_R3 downto RS_R0 do
  2248. if r in regs then
  2249. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2250. end;
  2251. end;
  2252. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2253. var
  2254. b : byte;
  2255. tmpref : treference;
  2256. instr : taicpu;
  2257. begin
  2258. if ref.addressmode<>AM_OFFSET then
  2259. internalerror(200309071);
  2260. tmpref:=ref;
  2261. { Be sure to have a base register }
  2262. if (tmpref.base=NR_NO) then
  2263. begin
  2264. if tmpref.shiftmode<>SM_None then
  2265. internalerror(2014020702);
  2266. if tmpref.signindex<0 then
  2267. internalerror(200312023);
  2268. tmpref.base:=tmpref.index;
  2269. tmpref.index:=NR_NO;
  2270. end;
  2271. if assigned(tmpref.symbol) or
  2272. not((is_shifter_const(tmpref.offset,b)) or
  2273. (is_shifter_const(-tmpref.offset,b))
  2274. ) then
  2275. fixref(list,tmpref);
  2276. { expect a base here if there is an index }
  2277. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2278. internalerror(200312022);
  2279. if tmpref.index<>NR_NO then
  2280. begin
  2281. if tmpref.shiftmode<>SM_None then
  2282. internalerror(200312021);
  2283. if tmpref.signindex<0 then
  2284. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2285. else
  2286. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2287. if tmpref.offset<>0 then
  2288. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2289. end
  2290. else
  2291. begin
  2292. if tmpref.base=NR_NO then
  2293. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2294. else
  2295. if tmpref.offset<>0 then
  2296. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2297. else
  2298. begin
  2299. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2300. list.concat(instr);
  2301. add_move_instruction(instr);
  2302. end;
  2303. end;
  2304. end;
  2305. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2306. var
  2307. tmpreg, tmpreg2 : tregister;
  2308. tmpref : treference;
  2309. l, piclabel : tasmlabel;
  2310. indirection_done : boolean;
  2311. begin
  2312. { absolute symbols can't be handled directly, we've to store the symbol reference
  2313. in the text segment and access it pc relative
  2314. For now, we assume that references where base or index equals to PC are already
  2315. relative, all other references are assumed to be absolute and thus they need
  2316. to be handled extra.
  2317. A proper solution would be to change refoptions to a set and store the information
  2318. if the symbol is absolute or relative there.
  2319. }
  2320. { create consts entry }
  2321. reference_reset(tmpref,4,[]);
  2322. current_asmdata.getjumplabel(l);
  2323. cg.a_label(current_procinfo.aktlocaldata,l);
  2324. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2325. piclabel:=nil;
  2326. tmpreg:=NR_NO;
  2327. indirection_done:=false;
  2328. if assigned(ref.symbol) then
  2329. begin
  2330. if (target_info.system=system_arm_ios) and
  2331. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2332. begin
  2333. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2334. if ref.offset<>0 then
  2335. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2336. indirection_done:=true;
  2337. end
  2338. else if ref.refaddr=addr_gottpoff then
  2339. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2340. else if ref.refaddr=addr_tlsgd then
  2341. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2342. else if ref.refaddr=addr_tlsdesc then
  2343. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2344. else if ref.refaddr=addr_tpoff then
  2345. begin
  2346. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2347. Internalerror(2019092804);
  2348. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2349. end
  2350. else if (cs_create_pic in current_settings.moduleswitches) then
  2351. if (tf_pic_uses_got in target_info.flags) then
  2352. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2353. else
  2354. begin
  2355. { ideally, we would want to generate
  2356. ldr r1, LPICConstPool
  2357. LPICLocal:
  2358. ldr/str r2,[pc,r1]
  2359. ...
  2360. LPICConstPool:
  2361. .long _globsym-(LPICLocal+8)
  2362. However, we cannot be sure that the ldr/str will follow
  2363. right after the call to fixref, so we have to load the
  2364. complete address already in a register.
  2365. }
  2366. current_asmdata.getaddrlabel(piclabel);
  2367. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2368. end
  2369. else
  2370. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2371. end
  2372. else
  2373. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2374. { load consts entry }
  2375. if not indirection_done then
  2376. begin
  2377. tmpreg:=getintregister(list,OS_INT);
  2378. tmpref.symbol:=l;
  2379. tmpref.base:=NR_PC;
  2380. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2381. if (cs_create_pic in current_settings.moduleswitches) and
  2382. (tf_pic_uses_got in target_info.flags) and
  2383. assigned(ref.symbol) then
  2384. begin
  2385. {$ifdef EXTDEBUG}
  2386. if not (pi_needs_got in current_procinfo.flags) then
  2387. Comment(V_warning,'pi_needs_got not included');
  2388. {$endif EXTDEBUG}
  2389. Include(current_procinfo.flags,pi_needs_got);
  2390. reference_reset(tmpref,4,[]);
  2391. tmpref.base:=current_procinfo.got;
  2392. tmpref.index:=tmpreg;
  2393. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2394. if ref.offset<>0 then
  2395. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2396. end;
  2397. end;
  2398. if assigned(piclabel) then
  2399. begin
  2400. cg.a_label(list,piclabel);
  2401. tmpreg2:=getaddressregister(list);
  2402. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2403. tmpreg:=tmpreg2
  2404. end;
  2405. { This routine can be called with PC as base/index in case the offset
  2406. was too large to encode in a load/store. In that case, the entire
  2407. absolute expression has been re-encoded in a new constpool entry, and
  2408. we have to remove the use of PC from the original reference (the code
  2409. above made everything relative to the value loaded from the new
  2410. constpool entry) }
  2411. if is_pc(ref.base) then
  2412. ref.base:=NR_NO;
  2413. if is_pc(ref.index) then
  2414. ref.index:=NR_NO;
  2415. if (ref.base<>NR_NO) then
  2416. begin
  2417. if ref.index<>NR_NO then
  2418. begin
  2419. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2420. ref.base:=tmpreg;
  2421. end
  2422. else
  2423. if ref.base<>NR_PC then
  2424. begin
  2425. ref.index:=tmpreg;
  2426. ref.shiftimm:=0;
  2427. ref.signindex:=1;
  2428. ref.shiftmode:=SM_None;
  2429. end
  2430. else
  2431. ref.base:=tmpreg;
  2432. end
  2433. else
  2434. ref.base:=tmpreg;
  2435. ref.offset:=0;
  2436. ref.symbol:=nil;
  2437. end;
  2438. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2439. const
  2440. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2441. maxtmpreg_thumb = 5;
  2442. type
  2443. ttmpregisters = array[1..maxtmpreg_arm] of tregister;
  2444. var
  2445. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2446. srcreg,destreg,countreg,r,tmpreg:tregister;
  2447. helpsize:aint;
  2448. copysize:byte;
  2449. cgsize:Tcgsize;
  2450. tmpregisters:ttmpregisters;
  2451. maxtmpreg,
  2452. tmpregi,tmpregi2:byte;
  2453. { will never be called with count<=4 }
  2454. procedure genloop(count : aword;size : byte);
  2455. const
  2456. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2457. var
  2458. l : tasmlabel;
  2459. begin
  2460. current_asmdata.getjumplabel(l);
  2461. if count<size then size:=1;
  2462. a_load_const_reg(list,OS_INT,count div size,countreg);
  2463. cg.a_label(list,l);
  2464. srcref.addressmode:=AM_POSTINDEXED;
  2465. dstref.addressmode:=AM_POSTINDEXED;
  2466. srcref.offset:=size;
  2467. dstref.offset:=size;
  2468. r:=getintregister(list,size2opsize[size]);
  2469. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2470. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2471. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2472. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2473. a_jmp_flags(list,F_NE,l);
  2474. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2475. srcref.offset:=1;
  2476. dstref.offset:=1;
  2477. case count mod size of
  2478. 1:
  2479. begin
  2480. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2481. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2482. end;
  2483. 2:
  2484. if aligned then
  2485. begin
  2486. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2487. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2488. end
  2489. else
  2490. begin
  2491. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2492. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2493. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2494. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2495. end;
  2496. 3:
  2497. if aligned then
  2498. begin
  2499. srcref.offset:=2;
  2500. dstref.offset:=2;
  2501. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2502. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2503. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2504. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2505. end
  2506. else
  2507. begin
  2508. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2509. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2510. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2511. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2512. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2513. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2514. end;
  2515. end;
  2516. { keep the registers alive }
  2517. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2518. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2519. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2520. end;
  2521. { save estimation, if a creating a separate ref is needed or
  2522. if we can keep the original reference while copying }
  2523. function SimpleRef(const ref : treference) : boolean;
  2524. begin
  2525. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2526. ((ref.symbol=nil) and
  2527. (ref.addressmode=AM_OFFSET) and
  2528. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2529. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2530. { ldrh has a limited offset range }
  2531. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2532. )
  2533. );
  2534. end;
  2535. { will never be called with count<=4 }
  2536. procedure genloop_thumb(count : aword;size : byte);
  2537. procedure refincofs(const ref : treference;const value : longint = 1);
  2538. begin
  2539. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2540. end;
  2541. const
  2542. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2543. var
  2544. l : tasmlabel;
  2545. begin
  2546. current_asmdata.getjumplabel(l);
  2547. if count<size then size:=1;
  2548. a_load_const_reg(list,OS_INT,count div size,countreg);
  2549. cg.a_label(list,l);
  2550. r:=getintregister(list,size2opsize[size]);
  2551. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2552. refincofs(srcref);
  2553. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2554. refincofs(dstref);
  2555. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2556. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2557. a_jmp_flags(list,F_NE,l);
  2558. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2559. case count mod size of
  2560. 1:
  2561. begin
  2562. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2563. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2564. end;
  2565. 2:
  2566. if aligned then
  2567. begin
  2568. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2569. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2570. end
  2571. else
  2572. begin
  2573. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2574. refincofs(srcref);
  2575. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2576. refincofs(dstref);
  2577. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2578. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2579. end;
  2580. 3:
  2581. if aligned then
  2582. begin
  2583. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2584. refincofs(srcref,2);
  2585. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2586. refincofs(dstref,2);
  2587. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2588. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2589. end
  2590. else
  2591. begin
  2592. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2593. refincofs(srcref);
  2594. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2595. refincofs(dstref);
  2596. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2597. refincofs(srcref);
  2598. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2599. refincofs(dstref);
  2600. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2601. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2602. end;
  2603. end;
  2604. { keep the registers alive }
  2605. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2606. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2607. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2608. end;
  2609. begin
  2610. tmpregisters:=Default(ttmpregisters);
  2611. if len=0 then
  2612. exit;
  2613. if GenerateThumbCode then
  2614. maxtmpreg:=maxtmpreg_thumb
  2615. else
  2616. maxtmpreg:=maxtmpreg_arm;
  2617. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2618. dstref:=dest;
  2619. srcref:=source;
  2620. if cs_opt_size in current_settings.optimizerswitches then
  2621. helpsize:=8;
  2622. if aligned and (len=4) then
  2623. begin
  2624. tmpreg:=getintregister(list,OS_32);
  2625. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2626. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2627. end
  2628. else if aligned and (len=2) then
  2629. begin
  2630. tmpreg:=getintregister(list,OS_16);
  2631. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2632. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2633. end
  2634. else if (len<=helpsize) and aligned then
  2635. begin
  2636. tmpregi:=0;
  2637. { loading address in a separate register needed? }
  2638. if SimpleRef(source) then
  2639. begin
  2640. { ... then we don't need a loadaddr }
  2641. srcref:=source;
  2642. end
  2643. else
  2644. begin
  2645. srcreg:=getintregister(list,OS_ADDR);
  2646. a_loadaddr_ref_reg(list,source,srcreg);
  2647. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2648. end;
  2649. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2650. begin
  2651. inc(tmpregi);
  2652. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2653. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2654. inc(srcref.offset,4);
  2655. dec(len,4);
  2656. end;
  2657. { loading address in a separate register needed? }
  2658. if SimpleRef(dest) then
  2659. dstref:=dest
  2660. else
  2661. begin
  2662. destreg:=getintregister(list,OS_ADDR);
  2663. a_loadaddr_ref_reg(list,dest,destreg);
  2664. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2665. end;
  2666. tmpregi2:=1;
  2667. while (tmpregi2<=tmpregi) do
  2668. begin
  2669. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2670. inc(dstref.offset,4);
  2671. inc(tmpregi2);
  2672. end;
  2673. copysize:=4;
  2674. cgsize:=OS_32;
  2675. while len<>0 do
  2676. begin
  2677. if len<2 then
  2678. begin
  2679. copysize:=1;
  2680. cgsize:=OS_8;
  2681. end
  2682. else if len<4 then
  2683. begin
  2684. copysize:=2;
  2685. cgsize:=OS_16;
  2686. end;
  2687. dec(len,copysize);
  2688. r:=getintregister(list,cgsize);
  2689. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2690. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2691. inc(srcref.offset,copysize);
  2692. inc(dstref.offset,copysize);
  2693. end;{end of while}
  2694. end
  2695. else
  2696. begin
  2697. cgsize:=OS_32;
  2698. if (len<=4) then{len<=4 and not aligned}
  2699. begin
  2700. r:=getintregister(list,cgsize);
  2701. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2702. if Len=1 then
  2703. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2704. else
  2705. begin
  2706. tmpreg:=getintregister(list,cgsize);
  2707. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2708. inc(usedtmpref.offset,1);
  2709. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2710. inc(usedtmpref2.offset,1);
  2711. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2712. if len>2 then
  2713. begin
  2714. inc(usedtmpref.offset,1);
  2715. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2716. inc(usedtmpref2.offset,1);
  2717. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2718. if len>3 then
  2719. begin
  2720. inc(usedtmpref.offset,1);
  2721. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2722. inc(usedtmpref2.offset,1);
  2723. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2724. end;
  2725. end;
  2726. end;
  2727. end{end of if len<=4}
  2728. else
  2729. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2730. destreg:=getintregister(list,OS_ADDR);
  2731. a_loadaddr_ref_reg(list,dest,destreg);
  2732. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2733. srcreg:=getintregister(list,OS_ADDR);
  2734. a_loadaddr_ref_reg(list,source,srcreg);
  2735. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2736. countreg:=getintregister(list,OS_32);
  2737. // if cs_opt_size in current_settings.optimizerswitches then
  2738. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2739. {if aligned then
  2740. genloop(len,4)
  2741. else}
  2742. if GenerateThumbCode then
  2743. genloop_thumb(len,1)
  2744. else
  2745. genloop(len,1);
  2746. end;
  2747. end;
  2748. end;
  2749. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2750. begin
  2751. g_concatcopy_internal(list,source,dest,len,false);
  2752. end;
  2753. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2754. begin
  2755. if (source.alignment in [1,3]) or
  2756. (dest.alignment in [1,3]) then
  2757. g_concatcopy_internal(list,source,dest,len,false)
  2758. else
  2759. g_concatcopy_internal(list,source,dest,len,true);
  2760. end;
  2761. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2762. var
  2763. ovloc : tlocation;
  2764. begin
  2765. ovloc.loc:=LOC_VOID;
  2766. g_overflowCheck_loc(list,l,def,ovloc);
  2767. end;
  2768. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2769. var
  2770. hl : tasmlabel;
  2771. ai:TAiCpu;
  2772. hflags : tresflags;
  2773. begin
  2774. if not(cs_check_overflow in current_settings.localswitches) then
  2775. exit;
  2776. current_asmdata.getjumplabel(hl);
  2777. case ovloc.loc of
  2778. LOC_VOID:
  2779. begin
  2780. ai:=taicpu.op_sym(A_B,hl);
  2781. ai.is_jmp:=true;
  2782. if not((def.typ=pointerdef) or
  2783. ((def.typ=orddef) and
  2784. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2785. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2786. ai.SetCondition(C_VC)
  2787. else
  2788. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2789. ai.SetCondition(C_CS)
  2790. else
  2791. ai.SetCondition(C_CC);
  2792. list.concat(ai);
  2793. end;
  2794. LOC_FLAGS:
  2795. begin
  2796. hflags:=ovloc.resflags;
  2797. inverse_flags(hflags);
  2798. cg.a_jmp_flags(list,hflags,hl);
  2799. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2800. end;
  2801. else
  2802. internalerror(200409281);
  2803. end;
  2804. a_call_name(list,'FPC_OVERFLOW',false);
  2805. a_label(list,hl);
  2806. end;
  2807. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2808. begin
  2809. { this work is done in g_proc_entry }
  2810. end;
  2811. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2812. begin
  2813. { this work is done in g_proc_exit }
  2814. end;
  2815. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2816. var
  2817. ai : taicpu;
  2818. hlabel : TAsmLabel;
  2819. begin
  2820. if GenerateThumbCode then
  2821. begin
  2822. { the optimizer has to fix this if jump range is sufficient short }
  2823. current_asmdata.getjumplabel(hlabel);
  2824. ai:=Taicpu.Op_sym(A_B,hlabel);
  2825. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2826. ai.is_jmp:=true;
  2827. list.concat(ai);
  2828. a_jmp_always(list,l);
  2829. a_label(list,hlabel);
  2830. end
  2831. else
  2832. begin
  2833. ai:=Taicpu.Op_sym(A_B,l);
  2834. ai.SetCondition(OpCmp2AsmCond[cond]);
  2835. ai.is_jmp:=true;
  2836. list.concat(ai);
  2837. end;
  2838. end;
  2839. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2840. const
  2841. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2842. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2843. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2844. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2845. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2846. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2847. begin
  2848. result:=convertop[fromsize,tosize];
  2849. if result=A_NONE then
  2850. internalerror(200312205);
  2851. end;
  2852. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2853. const
  2854. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2855. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2856. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2857. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2858. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2859. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2860. begin
  2861. result:=convertop[fromsize,tosize];
  2862. end;
  2863. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2864. var
  2865. instr: taicpu;
  2866. begin
  2867. if (shuffle=nil) or shufflescalar(shuffle) then
  2868. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2869. else
  2870. internalerror(2009112407);
  2871. list.concat(instr);
  2872. case instr.opcode of
  2873. A_VMOV:
  2874. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2875. add_move_instruction(instr);
  2876. else
  2877. { VCVT can generate an exception }
  2878. maybe_check_for_fpu_exception(list);
  2879. end;
  2880. end;
  2881. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2882. var
  2883. intreg,
  2884. tmpmmreg : tregister;
  2885. reg64 : tregister64;
  2886. begin
  2887. if assigned(shuffle) and
  2888. not(shufflescalar(shuffle)) then
  2889. internalerror(2009112413);
  2890. case fromsize of
  2891. OS_32,OS_S32:
  2892. begin
  2893. fromsize:=OS_F32;
  2894. { since we are loading an integer, no conversion may be required }
  2895. if (fromsize<>tosize) then
  2896. internalerror(2009112801);
  2897. end;
  2898. OS_64,OS_S64:
  2899. begin
  2900. fromsize:=OS_F64;
  2901. { since we are loading an integer, no conversion may be required }
  2902. if (fromsize<>tosize) then
  2903. internalerror(2009112901);
  2904. end;
  2905. OS_F32,OS_F64:
  2906. ;
  2907. else
  2908. internalerror(2019050920);
  2909. end;
  2910. if (fromsize<>tosize) then
  2911. tmpmmreg:=getmmregister(list,fromsize)
  2912. else
  2913. tmpmmreg:=reg;
  2914. if (ref.alignment in [1,2]) then
  2915. begin
  2916. case fromsize of
  2917. OS_F32:
  2918. begin
  2919. intreg:=getintregister(list,OS_32);
  2920. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2921. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2922. end;
  2923. OS_F64:
  2924. begin
  2925. reg64.reglo:=getintregister(list,OS_32);
  2926. reg64.reghi:=getintregister(list,OS_32);
  2927. cg64.a_load64_ref_reg(list,ref,reg64);
  2928. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2929. end;
  2930. else
  2931. internalerror(2009112412);
  2932. end;
  2933. end
  2934. else
  2935. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2936. if (tmpmmreg<>reg) then
  2937. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2938. end;
  2939. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2940. var
  2941. intreg,
  2942. tmpmmreg : tregister;
  2943. reg64 : tregister64;
  2944. begin
  2945. if assigned(shuffle) and
  2946. not(shufflescalar(shuffle)) then
  2947. internalerror(2009112416);
  2948. case tosize of
  2949. OS_32,OS_S32:
  2950. begin
  2951. tosize:=OS_F32;
  2952. { since we are loading an integer, no conversion may be required }
  2953. if (fromsize<>tosize) then
  2954. internalerror(2009112802);
  2955. end;
  2956. OS_64,OS_S64:
  2957. begin
  2958. tosize:=OS_F64;
  2959. { since we are loading an integer, no conversion may be required }
  2960. if (fromsize<>tosize) then
  2961. internalerror(2009112902);
  2962. end;
  2963. OS_F32,OS_F64:
  2964. ;
  2965. else
  2966. internalerror(2019050919);
  2967. end;
  2968. if (fromsize<>tosize) then
  2969. begin
  2970. tmpmmreg:=getmmregister(list,tosize);
  2971. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2972. end
  2973. else
  2974. tmpmmreg:=reg;
  2975. if (ref.alignment in [1,2]) then
  2976. begin
  2977. case tosize of
  2978. OS_F32:
  2979. begin
  2980. intreg:=getintregister(list,OS_32);
  2981. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2982. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2983. end;
  2984. OS_F64:
  2985. begin
  2986. reg64.reglo:=getintregister(list,OS_32);
  2987. reg64.reghi:=getintregister(list,OS_32);
  2988. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2989. cg64.a_load64_reg_ref(list,reg64,ref);
  2990. end;
  2991. else
  2992. internalerror(2009112417);
  2993. end;
  2994. end
  2995. else
  2996. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2997. { VSTR cannot generate an FPU exception, VCVT is handled separately, so we do not need a check here }
  2998. end;
  2999. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  3000. begin
  3001. { this code can only be used to transfer raw data, not to perform
  3002. conversions }
  3003. if (tosize<>OS_F32) then
  3004. internalerror(2009112419);
  3005. if not(fromsize in [OS_32,OS_S32]) then
  3006. internalerror(2009112420);
  3007. if assigned(shuffle) and
  3008. not shufflescalar(shuffle) then
  3009. internalerror(2009112516);
  3010. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  3011. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3012. end;
  3013. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  3014. begin
  3015. { this code can only be used to transfer raw data, not to perform
  3016. conversions }
  3017. if (fromsize<>OS_F32) then
  3018. internalerror(2009112430);
  3019. if not(tosize in [OS_32,OS_S32]) then
  3020. internalerror(2009112409);
  3021. if assigned(shuffle) and
  3022. not shufflescalar(shuffle) then
  3023. internalerror(2009112514);
  3024. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  3025. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3026. end;
  3027. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  3028. var
  3029. tmpreg: tregister;
  3030. begin
  3031. { the vfp doesn't support xor nor any other logical operation, but
  3032. this routine is used to initialise global mm regvars. We can
  3033. easily initialise an mm reg with 0 though. }
  3034. case op of
  3035. OP_XOR:
  3036. begin
  3037. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3038. begin
  3039. if (reg_cgsize(src)<>size) or
  3040. assigned(shuffle) then
  3041. internalerror(2019081301);
  3042. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3043. end
  3044. else
  3045. begin
  3046. if (src<>dst) or
  3047. (reg_cgsize(src)<>size) or
  3048. assigned(shuffle) then
  3049. internalerror(2009112907);
  3050. tmpreg:=getintregister(list,OS_32);
  3051. a_load_const_reg(list,OS_32,0,tmpreg);
  3052. case size of
  3053. OS_F32:
  3054. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3055. OS_F64:
  3056. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3057. else
  3058. internalerror(2009112908);
  3059. end;
  3060. end;
  3061. end
  3062. else
  3063. internalerror(2009112906);
  3064. end;
  3065. end;
  3066. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3067. const
  3068. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3069. begin
  3070. if (op in overflowops) and
  3071. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3072. a_load_reg_reg(list,OS_32,size,dst,dst);
  3073. end;
  3074. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3075. procedure checkreg(var reg : TRegister);
  3076. var
  3077. tmpreg : TRegister;
  3078. begin
  3079. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3080. (getsupreg(reg)=RS_R15) then
  3081. begin
  3082. tmpreg:=getintregister(list,OS_INT);
  3083. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3084. reg:=tmpreg;
  3085. end;
  3086. end;
  3087. begin
  3088. checkreg(op1);
  3089. checkreg(op2);
  3090. checkreg(op3);
  3091. checkreg(op4);
  3092. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3093. end;
  3094. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3095. begin
  3096. if pi_needs_tls in current_procinfo.flags then
  3097. begin
  3098. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3099. a_call_name(list,'fpc_read_tp',false);
  3100. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3101. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3102. end;
  3103. end;
  3104. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3105. begin
  3106. case op of
  3107. OP_NEG:
  3108. begin
  3109. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3110. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3111. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3112. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3113. end;
  3114. OP_NOT:
  3115. begin
  3116. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3117. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3118. end;
  3119. else
  3120. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3121. end;
  3122. end;
  3123. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3124. begin
  3125. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3126. end;
  3127. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3128. var
  3129. ovloc : tlocation;
  3130. begin
  3131. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3132. end;
  3133. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3134. var
  3135. ovloc : tlocation;
  3136. begin
  3137. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3138. end;
  3139. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3140. begin
  3141. { this code can only be used to transfer raw data, not to perform
  3142. conversions }
  3143. if (mmsize<>OS_F64) then
  3144. internalerror(2009112405);
  3145. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3146. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3147. end;
  3148. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3149. begin
  3150. { this code can only be used to transfer raw data, not to perform
  3151. conversions }
  3152. if (mmsize<>OS_F64) then
  3153. internalerror(2009112406);
  3154. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3155. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3156. end;
  3157. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3158. var
  3159. tmpreg : tregister;
  3160. b : byte;
  3161. begin
  3162. ovloc.loc:=LOC_VOID;
  3163. case op of
  3164. OP_NEG,
  3165. OP_NOT :
  3166. internalerror(2012022501);
  3167. else
  3168. ;
  3169. end;
  3170. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3171. begin
  3172. case op of
  3173. OP_ADD:
  3174. begin
  3175. if is_shifter_const(lo(value),b) then
  3176. begin
  3177. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3178. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3179. end
  3180. else
  3181. begin
  3182. tmpreg:=cg.getintregister(list,OS_32);
  3183. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3184. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3185. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3186. end;
  3187. if is_shifter_const(hi(value),b) then
  3188. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3189. else
  3190. begin
  3191. tmpreg:=cg.getintregister(list,OS_32);
  3192. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3193. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3194. end;
  3195. end;
  3196. OP_SUB:
  3197. begin
  3198. if is_shifter_const(lo(value),b) then
  3199. begin
  3200. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3201. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3202. end
  3203. else
  3204. begin
  3205. tmpreg:=cg.getintregister(list,OS_32);
  3206. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3207. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3208. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3209. end;
  3210. if is_shifter_const(hi(value),b) then
  3211. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3212. else
  3213. begin
  3214. tmpreg:=cg.getintregister(list,OS_32);
  3215. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3216. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3217. end;
  3218. end;
  3219. else
  3220. internalerror(200502131);
  3221. end;
  3222. if size=OS_64 then
  3223. begin
  3224. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3225. ovloc.loc:=LOC_FLAGS;
  3226. case op of
  3227. OP_ADD:
  3228. ovloc.resflags:=F_CS;
  3229. OP_SUB:
  3230. ovloc.resflags:=F_CC;
  3231. else
  3232. internalerror(2019050918);
  3233. end;
  3234. end;
  3235. end
  3236. else
  3237. begin
  3238. case op of
  3239. OP_AND,OP_OR,OP_XOR:
  3240. begin
  3241. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3242. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3243. end;
  3244. OP_ADD:
  3245. begin
  3246. if is_shifter_const(aint(lo(value)),b) then
  3247. begin
  3248. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3249. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3250. end
  3251. else
  3252. begin
  3253. tmpreg:=cg.getintregister(list,OS_32);
  3254. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3255. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3256. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3257. end;
  3258. if is_shifter_const(aint(hi(value)),b) then
  3259. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3260. else
  3261. begin
  3262. tmpreg:=cg.getintregister(list,OS_32);
  3263. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3264. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3265. end;
  3266. end;
  3267. OP_SUB:
  3268. begin
  3269. if is_shifter_const(aint(lo(value)),b) then
  3270. begin
  3271. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3272. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3273. end
  3274. else
  3275. begin
  3276. tmpreg:=cg.getintregister(list,OS_32);
  3277. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3278. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3279. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3280. end;
  3281. if is_shifter_const(aint(hi(value)),b) then
  3282. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3283. else
  3284. begin
  3285. tmpreg:=cg.getintregister(list,OS_32);
  3286. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3287. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3288. end;
  3289. end;
  3290. else
  3291. internalerror(2003083101);
  3292. end;
  3293. end;
  3294. end;
  3295. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3296. begin
  3297. ovloc.loc:=LOC_VOID;
  3298. case op of
  3299. OP_NEG,
  3300. OP_NOT :
  3301. internalerror(2012022502);
  3302. else
  3303. ;
  3304. end;
  3305. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3306. begin
  3307. case op of
  3308. OP_ADD:
  3309. begin
  3310. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3311. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3312. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3313. end;
  3314. OP_SUB:
  3315. begin
  3316. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3317. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3318. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3319. end;
  3320. else
  3321. internalerror(2003083102);
  3322. end;
  3323. ovloc.loc:=LOC_FLAGS;
  3324. if size=OS_64 then
  3325. begin
  3326. { arm has a weired opinion how flags for SUB/ADD are handled }
  3327. case op of
  3328. OP_ADD:
  3329. ovloc.resflags:=F_CS;
  3330. OP_SUB:
  3331. ovloc.resflags:=F_CC;
  3332. else
  3333. internalerror(2019050917);
  3334. end;
  3335. end
  3336. else
  3337. ovloc.resflags:=F_VS;
  3338. end
  3339. else
  3340. begin
  3341. case op of
  3342. OP_AND,OP_OR,OP_XOR:
  3343. begin
  3344. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3345. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3346. end;
  3347. OP_ADD:
  3348. begin
  3349. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3350. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3351. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3352. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3353. end;
  3354. OP_SUB:
  3355. begin
  3356. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3357. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3358. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3359. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3360. end;
  3361. else
  3362. internalerror(2003083104);
  3363. end;
  3364. end;
  3365. end;
  3366. procedure tthumbcgarm.init_register_allocators;
  3367. begin
  3368. inherited init_register_allocators;
  3369. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3370. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3371. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3372. else
  3373. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3374. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3375. end;
  3376. procedure tthumbcgarm.done_register_allocators;
  3377. begin
  3378. rg[R_INTREGISTER].free;
  3379. rg[R_FPUREGISTER].free;
  3380. rg[R_MMREGISTER].free;
  3381. inherited done_register_allocators;
  3382. end;
  3383. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3384. var
  3385. ref : treference;
  3386. r : byte;
  3387. regs : tcpuregisterset;
  3388. stackmisalignment : pint;
  3389. registerarea: DWord;
  3390. stack_parameters: Boolean;
  3391. begin
  3392. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3393. LocalSize:=align(LocalSize,4);
  3394. { call instruction does not put anything on the stack }
  3395. stackmisalignment:=0;
  3396. if not(nostackframe) then
  3397. begin
  3398. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3399. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3400. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3401. { save int registers }
  3402. reference_reset(ref,4,[]);
  3403. ref.index:=NR_STACK_POINTER_REG;
  3404. ref.addressmode:=AM_PREINDEXED;
  3405. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3406. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3407. begin
  3408. //!!!! a_reg_alloc(list,NR_R12);
  3409. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3410. end;
  3411. { the (old) ARM APCS requires saving both the stack pointer (to
  3412. crawl the stack) and the PC (to identify the function this
  3413. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3414. and R15 -- still needs updating for EABI and Darwin, they don't
  3415. need that }
  3416. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3417. regs:=regs+[RS_R7,RS_R14]
  3418. else
  3419. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3420. include(regs,RS_R14);
  3421. { safely estimate stack size }
  3422. if localsize+current_settings.alignment.localalignmax+4>508 then
  3423. begin
  3424. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3425. include(regs,RS_R4);
  3426. end;
  3427. registerarea:=0;
  3428. { do not save integer registers if the procedure does not return }
  3429. if po_noreturn in current_procinfo.procdef.procoptions then
  3430. regs:=[];
  3431. if regs<>[] then
  3432. begin
  3433. for r:=RS_R0 to RS_R15 do
  3434. if r in regs then
  3435. inc(registerarea,4);
  3436. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3437. { we need to run the loop twice to get cfi right }
  3438. registerarea:=0;
  3439. for r:=RS_R0 to RS_R15 do
  3440. if r in regs then
  3441. begin
  3442. inc(registerarea,4);
  3443. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),-registerarea);
  3444. end;
  3445. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  3446. end;
  3447. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3448. if stack_parameters or (LocalSize<>0) or
  3449. ((stackmisalignment<>0) and
  3450. ((pi_do_call in current_procinfo.flags) or
  3451. (po_assembler in current_procinfo.procdef.procoptions))) then
  3452. begin
  3453. { do we access stack parameters?
  3454. if yes, the previously estimated stacksize must be used }
  3455. if stack_parameters then
  3456. begin
  3457. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3458. begin
  3459. writeln(localsize);
  3460. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3461. internalerror(2013040601);
  3462. end
  3463. else
  3464. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3465. end
  3466. else
  3467. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3468. if localsize<508 then
  3469. begin
  3470. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3471. end
  3472. else if localsize<=1016 then
  3473. begin
  3474. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3475. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3476. end
  3477. else
  3478. begin
  3479. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3480. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3481. include(regs,RS_R4);
  3482. end;
  3483. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  3484. end;
  3485. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3486. begin
  3487. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3488. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  3489. end;
  3490. end;
  3491. end;
  3492. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3493. var
  3494. LocalSize : longint;
  3495. r: byte;
  3496. regs : tcpuregisterset;
  3497. registerarea : DWord;
  3498. stackmisalignment: pint;
  3499. stack_parameters : Boolean;
  3500. begin
  3501. { a routine not returning needs no exit code,
  3502. we trust this directive as arm thumb is normally used if small code shall be generated }
  3503. if po_noreturn in current_procinfo.procdef.procoptions then
  3504. exit;
  3505. if not(nostackframe) then
  3506. begin
  3507. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3508. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3509. include(regs,RS_R15);
  3510. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3511. include(regs,getsupreg(current_procinfo.framepointer));
  3512. registerarea:=0;
  3513. for r:=RS_R0 to RS_R15 do
  3514. if r in regs then
  3515. inc(registerarea,4);
  3516. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3517. LocalSize:=current_procinfo.calc_stackframe_size;
  3518. if stack_parameters then
  3519. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3520. else
  3521. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3522. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3523. (target_info.system in systems_darwin) then
  3524. begin
  3525. if (LocalSize<>0) or
  3526. ((stackmisalignment<>0) and
  3527. ((pi_do_call in current_procinfo.flags) or
  3528. (po_assembler in current_procinfo.procdef.procoptions))) then
  3529. begin
  3530. if LocalSize=0 then
  3531. else if LocalSize<=508 then
  3532. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3533. else if LocalSize<=1016 then
  3534. begin
  3535. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3536. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3537. end
  3538. else
  3539. begin
  3540. a_reg_alloc(list,NR_R3);
  3541. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3542. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3543. a_reg_dealloc(list,NR_R3);
  3544. end;
  3545. end;
  3546. if regs=[] then
  3547. begin
  3548. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3549. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3550. else
  3551. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3552. end
  3553. else
  3554. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3555. end;
  3556. end
  3557. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3558. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3559. else
  3560. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3561. end;
  3562. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3563. var
  3564. oppostfix:toppostfix;
  3565. usedtmpref: treference;
  3566. tmpreg,tmpreg2 : tregister;
  3567. dir : integer;
  3568. begin
  3569. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3570. FromSize := ToSize;
  3571. case FromSize of
  3572. { signed integer registers }
  3573. OS_8:
  3574. oppostfix:=PF_B;
  3575. OS_S8:
  3576. oppostfix:=PF_SB;
  3577. OS_16:
  3578. oppostfix:=PF_H;
  3579. OS_S16:
  3580. oppostfix:=PF_SH;
  3581. OS_32,
  3582. OS_S32:
  3583. oppostfix:=PF_None;
  3584. else
  3585. InternalError(200308298);
  3586. end;
  3587. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3588. begin
  3589. if target_info.endian=endian_big then
  3590. dir:=-1
  3591. else
  3592. dir:=1;
  3593. case FromSize of
  3594. OS_16,OS_S16:
  3595. begin
  3596. { only complicated references need an extra loadaddr }
  3597. if assigned(ref.symbol) or
  3598. (ref.index<>NR_NO) or
  3599. (ref.offset<-124) or
  3600. (ref.offset>124) or
  3601. { sometimes the compiler reused registers }
  3602. (reg=ref.index) or
  3603. (reg=ref.base) then
  3604. begin
  3605. tmpreg2:=getintregister(list,OS_INT);
  3606. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3607. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3608. end
  3609. else
  3610. usedtmpref:=ref;
  3611. if target_info.endian=endian_big then
  3612. inc(usedtmpref.offset,1);
  3613. tmpreg:=getintregister(list,OS_INT);
  3614. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3615. inc(usedtmpref.offset,dir);
  3616. if FromSize=OS_16 then
  3617. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3618. else
  3619. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3620. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3621. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3622. end;
  3623. OS_32,OS_S32:
  3624. begin
  3625. tmpreg:=getintregister(list,OS_INT);
  3626. { only complicated references need an extra loadaddr }
  3627. if assigned(ref.symbol) or
  3628. (ref.index<>NR_NO) or
  3629. (ref.offset<-124) or
  3630. (ref.offset>124) or
  3631. { sometimes the compiler reused registers }
  3632. (reg=ref.index) or
  3633. (reg=ref.base) then
  3634. begin
  3635. tmpreg2:=getintregister(list,OS_INT);
  3636. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3637. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3638. end
  3639. else
  3640. usedtmpref:=ref;
  3641. if ref.alignment=2 then
  3642. begin
  3643. if target_info.endian=endian_big then
  3644. inc(usedtmpref.offset,2);
  3645. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3646. inc(usedtmpref.offset,dir*2);
  3647. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3648. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3649. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3650. end
  3651. else
  3652. begin
  3653. if target_info.endian=endian_big then
  3654. inc(usedtmpref.offset,3);
  3655. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3656. inc(usedtmpref.offset,dir);
  3657. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3658. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3659. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3660. inc(usedtmpref.offset,dir);
  3661. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3662. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3663. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3664. inc(usedtmpref.offset,dir);
  3665. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3666. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3667. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3668. end;
  3669. end
  3670. else
  3671. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3672. end;
  3673. end
  3674. else
  3675. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3676. if (fromsize=OS_S8) and (tosize = OS_16) then
  3677. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3678. end;
  3679. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3680. var
  3681. l : tasmlabel;
  3682. hr : treference;
  3683. begin
  3684. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3685. internalerror(2002090908);
  3686. if is_thumb_imm(a) then
  3687. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,a),PF_S))
  3688. else
  3689. begin
  3690. reference_reset(hr,4,[]);
  3691. current_asmdata.getjumplabel(l);
  3692. cg.a_label(current_procinfo.aktlocaldata,l);
  3693. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3694. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3695. hr.symbol:=l;
  3696. hr.base:=NR_PC;
  3697. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3698. end;
  3699. end;
  3700. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3701. var
  3702. hsym : tsym;
  3703. href,
  3704. tmpref : treference;
  3705. paraloc : Pcgparalocation;
  3706. l : TAsmLabel;
  3707. begin
  3708. { calculate the parameter info for the procdef }
  3709. procdef.init_paraloc_info(callerside);
  3710. hsym:=tsym(procdef.parast.Find('self'));
  3711. if not(assigned(hsym) and
  3712. (hsym.typ=paravarsym)) then
  3713. internalerror(2003052504);
  3714. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3715. while paraloc<>nil do
  3716. with paraloc^ do
  3717. begin
  3718. case loc of
  3719. LOC_REGISTER:
  3720. begin
  3721. if is_thumb_imm(ioffset) then
  3722. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3723. else
  3724. begin
  3725. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3726. reference_reset(tmpref,4,[]);
  3727. current_asmdata.getjumplabel(l);
  3728. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3729. cg.a_label(current_procinfo.aktlocaldata,l);
  3730. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3731. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3732. tmpref.symbol:=l;
  3733. tmpref.base:=NR_PC;
  3734. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3735. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3736. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3737. end;
  3738. end;
  3739. LOC_REFERENCE:
  3740. begin
  3741. { offset in the wrapper needs to be adjusted for the stored
  3742. return address }
  3743. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3744. if is_thumb_imm(ioffset) then
  3745. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3746. else
  3747. begin
  3748. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3749. reference_reset(tmpref,4,[]);
  3750. current_asmdata.getjumplabel(l);
  3751. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3752. cg.a_label(current_procinfo.aktlocaldata,l);
  3753. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3754. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3755. tmpref.symbol:=l;
  3756. tmpref.base:=NR_PC;
  3757. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3758. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3759. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3760. end;
  3761. end
  3762. else
  3763. internalerror(2003091804);
  3764. end;
  3765. paraloc:=next;
  3766. end;
  3767. end;
  3768. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3769. var
  3770. href : treference;
  3771. tmpreg : TRegister;
  3772. begin
  3773. href:=ref;
  3774. if { LDR/STR limitations }
  3775. (
  3776. (((op=A_LDR) and (oppostfix=PF_None)) or
  3777. ((op=A_STR) and (oppostfix=PF_None))) and
  3778. (ref.base<>NR_STACK_POINTER_REG) and
  3779. (abs(ref.offset)>124)
  3780. ) or
  3781. { LDRB/STRB limitations }
  3782. (
  3783. (((op=A_LDR) and (oppostfix=PF_B)) or
  3784. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3785. ((op=A_STR) and (oppostfix=PF_B)) or
  3786. ((op=A_STRB) and (oppostfix=PF_None))) and
  3787. ((ref.base=NR_STACK_POINTER_REG) or
  3788. (ref.index=NR_STACK_POINTER_REG) or
  3789. (abs(ref.offset)>31)
  3790. )
  3791. ) or
  3792. { LDRH/STRH limitations }
  3793. (
  3794. (((op=A_LDR) and (oppostfix=PF_H)) or
  3795. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3796. ((op=A_STR) and (oppostfix=PF_H)) or
  3797. ((op=A_STRH) and (oppostfix=PF_None))) and
  3798. ((ref.base=NR_STACK_POINTER_REG) or
  3799. (ref.index=NR_STACK_POINTER_REG) or
  3800. (abs(ref.offset)>62) or
  3801. ((abs(ref.offset) mod 2)<>0)
  3802. )
  3803. ) then
  3804. begin
  3805. tmpreg:=getintregister(list,OS_ADDR);
  3806. a_loadaddr_ref_reg(list,ref,tmpreg);
  3807. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3808. end
  3809. else if (op=A_LDR) and
  3810. (oppostfix in [PF_None]) and
  3811. (ref.base=NR_STACK_POINTER_REG) and
  3812. (abs(ref.offset)>1020) then
  3813. begin
  3814. tmpreg:=getintregister(list,OS_ADDR);
  3815. a_loadaddr_ref_reg(list,ref,tmpreg);
  3816. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3817. end
  3818. else if (op=A_LDR) and
  3819. ((oppostfix in [PF_SH,PF_SB]) or
  3820. (abs(ref.offset)>124)) then
  3821. begin
  3822. tmpreg:=getintregister(list,OS_ADDR);
  3823. a_loadaddr_ref_reg(list,ref,tmpreg);
  3824. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3825. end;
  3826. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3827. end;
  3828. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3829. var
  3830. tmpreg : tregister;
  3831. begin
  3832. case op of
  3833. OP_NEG:
  3834. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3835. OP_NOT:
  3836. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,dst,src),PF_S));
  3837. OP_DIV,OP_IDIV:
  3838. internalerror(200308284);
  3839. OP_ROL:
  3840. begin
  3841. if not(size in [OS_32,OS_S32]) then
  3842. internalerror(2008072805);
  3843. { simulate ROL by ror'ing 32-value }
  3844. tmpreg:=getintregister(list,OS_32);
  3845. a_load_const_reg(list,OS_32,32,tmpreg);
  3846. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3847. list.concat(setoppostfix(taicpu.op_reg_reg(A_ROR,dst,src),PF_S));
  3848. end;
  3849. else
  3850. begin
  3851. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3852. list.concat(setoppostfix(
  3853. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix_thumb[op]));
  3854. end;
  3855. end;
  3856. maybeadjustresult(list,op,size,dst);
  3857. end;
  3858. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3859. var
  3860. tmpreg : tregister;
  3861. {$ifdef DUMMY}
  3862. l1 : longint;
  3863. {$endif DUMMY}
  3864. begin
  3865. //!!! ovloc.loc:=LOC_VOID;
  3866. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3867. case op of
  3868. OP_ADD:
  3869. begin
  3870. op:=OP_SUB;
  3871. a:=aint(dword(-a));
  3872. end;
  3873. OP_SUB:
  3874. begin
  3875. op:=OP_ADD;
  3876. a:=aint(dword(-a));
  3877. end
  3878. else
  3879. ;
  3880. end;
  3881. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3882. begin
  3883. // if cgsetflags or setflags then
  3884. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3885. list.concat(setoppostfix(
  3886. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix_thumb[op]));
  3887. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3888. begin
  3889. //!!! ovloc.loc:=LOC_FLAGS;
  3890. case op of
  3891. OP_ADD:
  3892. //!!! ovloc.resflags:=F_CS;
  3893. ;
  3894. OP_SUB:
  3895. //!!! ovloc.resflags:=F_CC;
  3896. ;
  3897. else
  3898. ;
  3899. end;
  3900. end;
  3901. end
  3902. else
  3903. begin
  3904. { there could be added some more sophisticated optimizations }
  3905. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3906. a_load_reg_reg(list,size,size,dst,dst)
  3907. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3908. a_load_const_reg(list,size,0,dst)
  3909. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3910. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3911. { we do this here instead in the peephole optimizer because
  3912. it saves us a register }
  3913. {$ifdef DUMMY}
  3914. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3915. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3916. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3917. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3918. begin
  3919. if l1>32 then{roozbeh does this ever happen?}
  3920. internalerror(2003082903);
  3921. shifterop_reset(so);
  3922. so.shiftmode:=SM_LSL;
  3923. so.shiftimm:=l1;
  3924. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3925. end
  3926. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3927. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3928. begin
  3929. if l1>32 then{does this ever happen?}
  3930. internalerror(2012051802);
  3931. shifterop_reset(so);
  3932. so.shiftmode:=SM_LSL;
  3933. so.shiftimm:=l1;
  3934. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3935. end
  3936. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3937. begin
  3938. { nothing to do on success }
  3939. end
  3940. {$endif DUMMY}
  3941. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3942. Just using mov x, #0 might allow some easier optimizations down the line. }
  3943. else if (op = OP_AND) and (dword(a)=0) then
  3944. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,dst,0),PF_S))
  3945. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3946. else if (op = OP_AND) and (not(dword(a))=0) then
  3947. // do nothing
  3948. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3949. broader range of shifterconstants.}
  3950. {$ifdef DUMMY}
  3951. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3952. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3953. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3954. begin
  3955. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3956. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3957. end
  3958. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3959. not(cgsetflags or setflags) and
  3960. split_into_shifter_const(a, imm1, imm2) then
  3961. begin
  3962. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3963. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3964. end
  3965. {$endif DUMMY}
  3966. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3967. begin
  3968. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3969. end
  3970. else
  3971. begin
  3972. tmpreg:=getintregister(list,size);
  3973. a_load_const_reg(list,size,a,tmpreg);
  3974. a_op_reg_reg(list,op,size,tmpreg,dst);
  3975. end;
  3976. end;
  3977. maybeadjustresult(list,op,size,dst);
  3978. end;
  3979. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3980. begin
  3981. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3982. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3983. else
  3984. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3985. end;
  3986. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3987. var
  3988. l1,l2 : tasmlabel;
  3989. ai : taicpu;
  3990. begin
  3991. current_asmdata.getjumplabel(l1);
  3992. current_asmdata.getjumplabel(l2);
  3993. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3994. ai.is_jmp:=true;
  3995. list.concat(ai);
  3996. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,0),PF_S));
  3997. list.concat(taicpu.op_sym(A_B,l2));
  3998. cg.a_label(list,l1);
  3999. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,1),PF_S));
  4000. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4001. cg.a_label(list,l2);
  4002. end;
  4003. procedure tthumb2cgarm.init_register_allocators;
  4004. begin
  4005. inherited init_register_allocators;
  4006. { currently, we save R14 always, so we can use it }
  4007. if (target_info.system<>system_arm_ios) then
  4008. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4009. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4010. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  4011. else
  4012. { r9 is not available on Darwin according to the llvm code generator }
  4013. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4014. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4015. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  4016. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4017. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  4018. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  4019. init_mmregister_allocator;
  4020. end;
  4021. procedure tthumb2cgarm.done_register_allocators;
  4022. begin
  4023. rg[R_INTREGISTER].free;
  4024. rg[R_FPUREGISTER].free;
  4025. rg[R_MMREGISTER].free;
  4026. inherited done_register_allocators;
  4027. end;
  4028. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  4029. begin
  4030. list.concat(taicpu.op_reg(A_BLX, reg));
  4031. {
  4032. the compiler does not properly set this flag anymore in pass 1, and
  4033. for now we only need it after pass 2 (I hope) (JM)
  4034. if not(pi_do_call in current_procinfo.flags) then
  4035. internalerror(2003060703);
  4036. }
  4037. include(current_procinfo.flags,pi_do_call);
  4038. end;
  4039. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4040. var
  4041. l : tasmlabel;
  4042. hr : treference;
  4043. begin
  4044. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4045. internalerror(2002090909);
  4046. if is_thumb32_imm(a) then
  4047. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4048. else if is_thumb32_imm(not(a)) then
  4049. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4050. else if (a and $FFFF)=a then
  4051. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4052. else
  4053. begin
  4054. reference_reset(hr,4,[]);
  4055. current_asmdata.getjumplabel(l);
  4056. cg.a_label(current_procinfo.aktlocaldata,l);
  4057. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4058. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4059. hr.symbol:=l;
  4060. hr.base:=NR_PC;
  4061. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4062. end;
  4063. end;
  4064. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4065. var
  4066. oppostfix:toppostfix;
  4067. usedtmpref: treference;
  4068. tmpreg,tmpreg2 : tregister;
  4069. so : tshifterop;
  4070. dir : integer;
  4071. begin
  4072. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4073. FromSize := ToSize;
  4074. case FromSize of
  4075. { signed integer registers }
  4076. OS_8:
  4077. oppostfix:=PF_B;
  4078. OS_S8:
  4079. oppostfix:=PF_SB;
  4080. OS_16:
  4081. oppostfix:=PF_H;
  4082. OS_S16:
  4083. oppostfix:=PF_SH;
  4084. OS_32,
  4085. OS_S32:
  4086. oppostfix:=PF_None;
  4087. else
  4088. InternalError(2003082913);
  4089. end;
  4090. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4091. begin
  4092. if target_info.endian=endian_big then
  4093. dir:=-1
  4094. else
  4095. dir:=1;
  4096. case FromSize of
  4097. OS_16,OS_S16:
  4098. begin
  4099. { only complicated references need an extra loadaddr }
  4100. if assigned(ref.symbol) or
  4101. (ref.index<>NR_NO) or
  4102. (ref.offset<-255) or
  4103. (ref.offset>4094) or
  4104. { sometimes the compiler reused registers }
  4105. (reg=ref.index) or
  4106. (reg=ref.base) then
  4107. begin
  4108. tmpreg2:=getintregister(list,OS_INT);
  4109. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4110. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4111. end
  4112. else
  4113. usedtmpref:=ref;
  4114. if target_info.endian=endian_big then
  4115. inc(usedtmpref.offset,1);
  4116. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4117. tmpreg:=getintregister(list,OS_INT);
  4118. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4119. inc(usedtmpref.offset,dir);
  4120. if FromSize=OS_16 then
  4121. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4122. else
  4123. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4124. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4125. end;
  4126. OS_32,OS_S32:
  4127. begin
  4128. tmpreg:=getintregister(list,OS_INT);
  4129. { only complicated references need an extra loadaddr }
  4130. if assigned(ref.symbol) or
  4131. (ref.index<>NR_NO) or
  4132. (ref.offset<-255) or
  4133. (ref.offset>4092) or
  4134. { sometimes the compiler reused registers }
  4135. (reg=ref.index) or
  4136. (reg=ref.base) then
  4137. begin
  4138. tmpreg2:=getintregister(list,OS_INT);
  4139. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4140. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4141. end
  4142. else
  4143. usedtmpref:=ref;
  4144. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4145. if ref.alignment=2 then
  4146. begin
  4147. if target_info.endian=endian_big then
  4148. inc(usedtmpref.offset,2);
  4149. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4150. inc(usedtmpref.offset,dir*2);
  4151. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4152. so.shiftimm:=16;
  4153. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4154. end
  4155. else
  4156. begin
  4157. if target_info.endian=endian_big then
  4158. inc(usedtmpref.offset,3);
  4159. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4160. inc(usedtmpref.offset,dir);
  4161. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4162. so.shiftimm:=8;
  4163. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4164. inc(usedtmpref.offset,dir);
  4165. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4166. so.shiftimm:=16;
  4167. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4168. inc(usedtmpref.offset,dir);
  4169. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4170. so.shiftimm:=24;
  4171. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4172. end;
  4173. end
  4174. else
  4175. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4176. end;
  4177. end
  4178. else
  4179. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4180. if (fromsize=OS_S8) and (tosize = OS_16) then
  4181. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4182. end;
  4183. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4184. begin
  4185. if op = OP_NOT then
  4186. begin
  4187. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4188. case size of
  4189. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4190. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4191. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4192. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4193. OS_32,
  4194. OS_S32:
  4195. ;
  4196. else
  4197. internalerror(2019050916);
  4198. end;
  4199. end
  4200. else
  4201. inherited a_op_reg_reg(list, op, size, src, dst);
  4202. end;
  4203. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4204. var
  4205. shift, width : byte;
  4206. tmpreg : tregister;
  4207. so : tshifterop;
  4208. l1 : longint;
  4209. begin
  4210. ovloc.loc:=LOC_VOID;
  4211. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4212. case op of
  4213. OP_ADD:
  4214. begin
  4215. op:=OP_SUB;
  4216. a:=aint(dword(-a));
  4217. end;
  4218. OP_SUB:
  4219. begin
  4220. op:=OP_ADD;
  4221. a:=aint(dword(-a));
  4222. end
  4223. else
  4224. ;
  4225. end;
  4226. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4227. case op of
  4228. OP_NEG,OP_NOT,
  4229. OP_DIV,OP_IDIV:
  4230. internalerror(200308285);
  4231. OP_SHL:
  4232. begin
  4233. if a>32 then
  4234. internalerror(2014020703);
  4235. if a<>0 then
  4236. begin
  4237. shifterop_reset(so);
  4238. so.shiftmode:=SM_LSL;
  4239. so.shiftimm:=a;
  4240. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4241. end
  4242. else
  4243. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4244. end;
  4245. OP_ROL:
  4246. begin
  4247. if a>32 then
  4248. internalerror(2014020704);
  4249. if a<>0 then
  4250. begin
  4251. shifterop_reset(so);
  4252. so.shiftmode:=SM_ROR;
  4253. so.shiftimm:=32-a;
  4254. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4255. end
  4256. else
  4257. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4258. end;
  4259. OP_ROR:
  4260. begin
  4261. if a>32 then
  4262. internalerror(2014020705);
  4263. if a<>0 then
  4264. begin
  4265. shifterop_reset(so);
  4266. so.shiftmode:=SM_ROR;
  4267. so.shiftimm:=a;
  4268. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4269. end
  4270. else
  4271. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4272. end;
  4273. OP_SHR:
  4274. begin
  4275. if a>32 then
  4276. internalerror(200308292);
  4277. shifterop_reset(so);
  4278. if a<>0 then
  4279. begin
  4280. so.shiftmode:=SM_LSR;
  4281. so.shiftimm:=a;
  4282. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4283. end
  4284. else
  4285. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4286. end;
  4287. OP_SAR:
  4288. begin
  4289. if a>32 then
  4290. internalerror(200308295);
  4291. if a<>0 then
  4292. begin
  4293. shifterop_reset(so);
  4294. so.shiftmode:=SM_ASR;
  4295. so.shiftimm:=a;
  4296. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4297. end
  4298. else
  4299. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4300. end;
  4301. else
  4302. if (op in [OP_SUB, OP_ADD]) and
  4303. ((a < 0) or
  4304. (a > 4095)) then
  4305. begin
  4306. tmpreg:=getintregister(list,size);
  4307. a_load_const_reg(list, size, a, tmpreg);
  4308. if cgsetflags or setflags then
  4309. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4310. list.concat(setoppostfix(
  4311. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4312. end
  4313. else
  4314. begin
  4315. if cgsetflags or setflags then
  4316. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4317. list.concat(setoppostfix(
  4318. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4319. end;
  4320. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4321. begin
  4322. ovloc.loc:=LOC_FLAGS;
  4323. case op of
  4324. OP_ADD:
  4325. ovloc.resflags:=F_CS;
  4326. OP_SUB:
  4327. ovloc.resflags:=F_CC;
  4328. else
  4329. ;
  4330. end;
  4331. end;
  4332. end
  4333. else
  4334. begin
  4335. { there could be added some more sophisticated optimizations }
  4336. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4337. a_load_reg_reg(list,size,size,src,dst)
  4338. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4339. a_load_const_reg(list,size,0,dst)
  4340. else if (op in [OP_IMUL]) and (a=-1) then
  4341. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4342. { we do this here instead in the peephole optimizer because
  4343. it saves us a register }
  4344. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4345. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4346. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4347. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4348. begin
  4349. if l1>32 then{roozbeh does this ever happen?}
  4350. internalerror(2003082911);
  4351. shifterop_reset(so);
  4352. so.shiftmode:=SM_LSL;
  4353. so.shiftimm:=l1;
  4354. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4355. end
  4356. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4357. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4358. begin
  4359. if l1>32 then{does this ever happen?}
  4360. internalerror(2012051803);
  4361. shifterop_reset(so);
  4362. so.shiftmode:=SM_LSL;
  4363. so.shiftimm:=l1;
  4364. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4365. end
  4366. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4367. begin
  4368. { nothing to do on success }
  4369. end
  4370. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4371. Just using mov x, #0 might allow some easier optimizations down the line. }
  4372. else if (op = OP_AND) and (dword(a)=0) then
  4373. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4374. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4375. else if (op = OP_AND) and (not(dword(a))=0) then
  4376. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4377. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4378. broader range of shifterconstants.}
  4379. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4380. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4381. else if (op = OP_AND) and is_thumb32_imm(a) then
  4382. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4383. else if (op = OP_AND) and (a = $FFFF) then
  4384. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4385. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4386. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4387. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4388. begin
  4389. a_load_reg_reg(list,size,size,src,dst);
  4390. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4391. end
  4392. else
  4393. begin
  4394. tmpreg:=getintregister(list,size);
  4395. a_load_const_reg(list,size,a,tmpreg);
  4396. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4397. end;
  4398. end;
  4399. maybeadjustresult(list,op,size,dst);
  4400. end;
  4401. const
  4402. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4403. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4404. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4405. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4406. var
  4407. so : tshifterop;
  4408. tmpreg,overflowreg : tregister;
  4409. asmop : tasmop;
  4410. begin
  4411. ovloc.loc:=LOC_VOID;
  4412. case op of
  4413. OP_NEG,OP_NOT:
  4414. internalerror(200308286);
  4415. OP_ROL:
  4416. begin
  4417. if not(size in [OS_32,OS_S32]) then
  4418. internalerror(2008072806);
  4419. { simulate ROL by ror'ing 32-value }
  4420. tmpreg:=getintregister(list,OS_32);
  4421. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4422. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4423. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4424. end;
  4425. OP_ROR:
  4426. begin
  4427. if not(size in [OS_32,OS_S32]) then
  4428. internalerror(2008072802);
  4429. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4430. end;
  4431. OP_IMUL,
  4432. OP_MUL:
  4433. begin
  4434. if cgsetflags or setflags then
  4435. begin
  4436. overflowreg:=getintregister(list,size);
  4437. if op=OP_IMUL then
  4438. asmop:=A_SMULL
  4439. else
  4440. asmop:=A_UMULL;
  4441. { the arm doesn't allow that rd and rm are the same }
  4442. if dst=src2 then
  4443. begin
  4444. if dst<>src1 then
  4445. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4446. else
  4447. begin
  4448. tmpreg:=getintregister(list,size);
  4449. a_load_reg_reg(list,size,size,src2,dst);
  4450. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4451. end;
  4452. end
  4453. else
  4454. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4455. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4456. if op=OP_IMUL then
  4457. begin
  4458. shifterop_reset(so);
  4459. so.shiftmode:=SM_ASR;
  4460. so.shiftimm:=31;
  4461. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4462. end
  4463. else
  4464. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4465. ovloc.loc:=LOC_FLAGS;
  4466. ovloc.resflags:=F_NE;
  4467. end
  4468. else
  4469. begin
  4470. { the arm doesn't allow that rd and rm are the same }
  4471. if dst=src2 then
  4472. begin
  4473. if dst<>src1 then
  4474. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4475. else
  4476. begin
  4477. tmpreg:=getintregister(list,size);
  4478. a_load_reg_reg(list,size,size,src2,dst);
  4479. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4480. end;
  4481. end
  4482. else
  4483. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4484. end;
  4485. end;
  4486. else
  4487. begin
  4488. if cgsetflags or setflags then
  4489. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4490. {$ifdef dummy}
  4491. { R13 is not allowed for certain instruction operands }
  4492. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4493. begin
  4494. if getsupreg(dst)=RS_R13 then
  4495. begin
  4496. tmpreg:=getintregister(list,OS_INT);
  4497. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4498. dst:=tmpreg;
  4499. end;
  4500. if getsupreg(src1)=RS_R13 then
  4501. begin
  4502. tmpreg:=getintregister(list,OS_INT);
  4503. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4504. src1:=tmpreg;
  4505. end;
  4506. end;
  4507. {$endif}
  4508. list.concat(setoppostfix(
  4509. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4510. end;
  4511. end;
  4512. maybeadjustresult(list,op,size,dst);
  4513. end;
  4514. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4515. begin
  4516. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4517. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4518. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4519. end;
  4520. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4521. var
  4522. ref : treference;
  4523. shift : byte;
  4524. firstfloatreg,lastfloatreg,
  4525. r : byte;
  4526. regs : tcpuregisterset;
  4527. stackmisalignment: pint;
  4528. begin
  4529. LocalSize:=align(LocalSize,4);
  4530. { call instruction does not put anything on the stack }
  4531. stackmisalignment:=0;
  4532. if not(nostackframe) then
  4533. begin
  4534. firstfloatreg:=RS_NO;
  4535. lastfloatreg:=RS_NO;
  4536. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4537. begin
  4538. { save floating point registers? }
  4539. for r:=RS_F0 to RS_F7 do
  4540. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4541. begin
  4542. if firstfloatreg=RS_NO then
  4543. firstfloatreg:=r;
  4544. lastfloatreg:=r;
  4545. inc(stackmisalignment,12);
  4546. end;
  4547. end;
  4548. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4549. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4550. begin
  4551. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4552. a_reg_alloc(list,NR_R12);
  4553. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4554. end;
  4555. { save int registers }
  4556. reference_reset(ref,4,[]);
  4557. ref.index:=NR_STACK_POINTER_REG;
  4558. ref.addressmode:=AM_PREINDEXED;
  4559. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4560. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4561. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4562. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4563. include(regs,RS_R14);
  4564. if regs<>[] then
  4565. begin
  4566. for r:=RS_R0 to RS_R15 do
  4567. if (r in regs) then
  4568. inc(stackmisalignment,4);
  4569. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4570. end;
  4571. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4572. begin
  4573. { the framepointer now points to the saved R15, so the saved
  4574. framepointer is at R11-12 (for get_caller_frame) }
  4575. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4576. a_reg_dealloc(list,NR_R12);
  4577. end;
  4578. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4579. if (LocalSize<>0) or
  4580. ((stackmisalignment<>0) and
  4581. ((pi_do_call in current_procinfo.flags) or
  4582. (po_assembler in current_procinfo.procdef.procoptions))) then
  4583. begin
  4584. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4585. if not(is_shifter_const(localsize,shift)) then
  4586. begin
  4587. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4588. a_reg_alloc(list,NR_R12);
  4589. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4590. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4591. a_reg_dealloc(list,NR_R12);
  4592. end
  4593. else
  4594. begin
  4595. a_reg_dealloc(list,NR_R12);
  4596. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4597. end;
  4598. end;
  4599. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4600. begin
  4601. if firstfloatreg<>RS_NO then
  4602. begin
  4603. reference_reset(ref,4,[]);
  4604. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4605. begin
  4606. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4607. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4608. ref.base:=NR_R12;
  4609. end
  4610. else
  4611. begin
  4612. ref.base:=current_procinfo.framepointer;
  4613. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4614. end;
  4615. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4616. lastfloatreg-firstfloatreg+1,ref));
  4617. end;
  4618. end;
  4619. end;
  4620. end;
  4621. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4622. var
  4623. ref : treference;
  4624. firstfloatreg,lastfloatreg,
  4625. r : byte;
  4626. shift : byte;
  4627. regs : tcpuregisterset;
  4628. LocalSize : longint;
  4629. stackmisalignment: pint;
  4630. begin
  4631. { a routine not returning needs no exit code,
  4632. we trust this directive as arm thumb is normally used if small code shall be generated }
  4633. if po_noreturn in current_procinfo.procdef.procoptions then
  4634. exit;
  4635. if not(nostackframe) then
  4636. begin
  4637. stackmisalignment:=0;
  4638. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4639. begin
  4640. { restore floating point register }
  4641. firstfloatreg:=RS_NO;
  4642. lastfloatreg:=RS_NO;
  4643. { save floating point registers? }
  4644. for r:=RS_F0 to RS_F7 do
  4645. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4646. begin
  4647. if firstfloatreg=RS_NO then
  4648. firstfloatreg:=r;
  4649. lastfloatreg:=r;
  4650. { floating point register space is already included in
  4651. localsize below by calc_stackframe_size
  4652. inc(stackmisalignment,12);
  4653. }
  4654. end;
  4655. if firstfloatreg<>RS_NO then
  4656. begin
  4657. reference_reset(ref,4,[]);
  4658. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4659. begin
  4660. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4661. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4662. ref.base:=NR_R12;
  4663. end
  4664. else
  4665. begin
  4666. ref.base:=current_procinfo.framepointer;
  4667. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4668. end;
  4669. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4670. lastfloatreg-firstfloatreg+1,ref));
  4671. end;
  4672. end;
  4673. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4674. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4675. begin
  4676. exclude(regs,RS_R14);
  4677. include(regs,RS_R15);
  4678. end;
  4679. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4680. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4681. for r:=RS_R0 to RS_R15 do
  4682. if (r in regs) then
  4683. inc(stackmisalignment,4);
  4684. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4685. LocalSize:=current_procinfo.calc_stackframe_size;
  4686. if (LocalSize<>0) or
  4687. ((stackmisalignment<>0) and
  4688. ((pi_do_call in current_procinfo.flags) or
  4689. (po_assembler in current_procinfo.procdef.procoptions))) then
  4690. begin
  4691. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4692. if not(is_shifter_const(LocalSize,shift)) then
  4693. begin
  4694. a_reg_alloc(list,NR_R12);
  4695. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4696. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4697. a_reg_dealloc(list,NR_R12);
  4698. end
  4699. else
  4700. begin
  4701. a_reg_dealloc(list,NR_R12);
  4702. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4703. end;
  4704. end;
  4705. if regs=[] then
  4706. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4707. else
  4708. begin
  4709. reference_reset(ref,4,[]);
  4710. ref.index:=NR_STACK_POINTER_REG;
  4711. ref.addressmode:=AM_PREINDEXED;
  4712. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4713. end;
  4714. end
  4715. else
  4716. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4717. end;
  4718. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4719. var
  4720. tmpreg : tregister;
  4721. tmpref : treference;
  4722. l : tasmlabel;
  4723. begin
  4724. tmpreg:=NR_NO;
  4725. { Be sure to have a base register }
  4726. if (ref.base=NR_NO) then
  4727. begin
  4728. if ref.shiftmode<>SM_None then
  4729. internalerror(2014020706);
  4730. ref.base:=ref.index;
  4731. ref.index:=NR_NO;
  4732. end;
  4733. { absolute symbols can't be handled directly, we've to store the symbol reference
  4734. in the text segment and access it pc relative
  4735. For now, we assume that references where base or index equals to PC are already
  4736. relative, all other references are assumed to be absolute and thus they need
  4737. to be handled extra.
  4738. A proper solution would be to change refoptions to a set and store the information
  4739. if the symbol is absolute or relative there.
  4740. }
  4741. if (assigned(ref.symbol) and
  4742. not(is_pc(ref.base)) and
  4743. not(is_pc(ref.index))
  4744. ) or
  4745. { [#xxx] isn't a valid address operand }
  4746. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4747. //(ref.offset<-4095) or
  4748. (ref.offset<-255) or
  4749. (ref.offset>4095) or
  4750. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4751. ((ref.offset<-255) or
  4752. (ref.offset>255)
  4753. )
  4754. ) or
  4755. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4756. ((ref.offset<-1020) or
  4757. (ref.offset>1020) or
  4758. ((abs(ref.offset) mod 4)<>0) or
  4759. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4760. assigned(ref.symbol)
  4761. )
  4762. ) then
  4763. begin
  4764. reference_reset(tmpref,4,[]);
  4765. { load symbol }
  4766. tmpreg:=getintregister(list,OS_INT);
  4767. if assigned(ref.symbol) then
  4768. begin
  4769. current_asmdata.getjumplabel(l);
  4770. cg.a_label(current_procinfo.aktlocaldata,l);
  4771. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4772. if ref.refaddr=addr_gottpoff then
  4773. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4774. else if ref.refaddr=addr_tlsgd then
  4775. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4776. else if ref.refaddr=addr_tlsdesc then
  4777. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4778. else if ref.refaddr=addr_tpoff then
  4779. begin
  4780. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4781. Internalerror(2019092807);
  4782. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4783. end
  4784. else
  4785. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4786. { load consts entry }
  4787. tmpref.symbol:=l;
  4788. tmpref.base:=NR_R15;
  4789. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4790. { in case of LDF/STF, we got rid of the NR_R15 }
  4791. if is_pc(ref.base) then
  4792. ref.base:=NR_NO;
  4793. if is_pc(ref.index) then
  4794. ref.index:=NR_NO;
  4795. end
  4796. else
  4797. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4798. if (ref.base<>NR_NO) then
  4799. begin
  4800. if ref.index<>NR_NO then
  4801. begin
  4802. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4803. ref.base:=tmpreg;
  4804. end
  4805. else
  4806. begin
  4807. ref.index:=tmpreg;
  4808. ref.shiftimm:=0;
  4809. ref.signindex:=1;
  4810. ref.shiftmode:=SM_None;
  4811. end;
  4812. end
  4813. else
  4814. ref.base:=tmpreg;
  4815. ref.offset:=0;
  4816. ref.symbol:=nil;
  4817. end;
  4818. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4819. begin
  4820. if tmpreg<>NR_NO then
  4821. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4822. else
  4823. begin
  4824. tmpreg:=getintregister(list,OS_ADDR);
  4825. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4826. ref.base:=tmpreg;
  4827. end;
  4828. ref.offset:=0;
  4829. end;
  4830. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4831. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4832. begin
  4833. tmpreg:=getintregister(list,OS_ADDR);
  4834. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4835. ref.base := tmpreg;
  4836. end;
  4837. { floating point operations have only limited references
  4838. we expect here, that a base is already set }
  4839. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4840. begin
  4841. if ref.shiftmode<>SM_none then
  4842. internalerror(2003091202);
  4843. if tmpreg<>NR_NO then
  4844. begin
  4845. if ref.base=tmpreg then
  4846. begin
  4847. if ref.signindex<0 then
  4848. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4849. else
  4850. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4851. ref.index:=NR_NO;
  4852. end
  4853. else
  4854. begin
  4855. if ref.index<>tmpreg then
  4856. internalerror(2004031602);
  4857. if ref.signindex<0 then
  4858. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4859. else
  4860. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4861. ref.base:=tmpreg;
  4862. ref.index:=NR_NO;
  4863. end;
  4864. end
  4865. else
  4866. begin
  4867. tmpreg:=getintregister(list,OS_ADDR);
  4868. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4869. ref.base:=tmpreg;
  4870. ref.index:=NR_NO;
  4871. end;
  4872. end;
  4873. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4874. Result := ref;
  4875. end;
  4876. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4877. var
  4878. instr: taicpu;
  4879. begin
  4880. if (fromsize=OS_F32) and
  4881. (tosize=OS_F32) then
  4882. begin
  4883. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4884. list.Concat(instr);
  4885. add_move_instruction(instr);
  4886. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4887. end
  4888. else if (fromsize=OS_F64) and
  4889. (tosize=OS_F64) then
  4890. begin
  4891. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4892. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4893. end
  4894. else if (fromsize=OS_F32) and
  4895. (tosize=OS_F64) then
  4896. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4897. begin
  4898. //list.concat(nil);
  4899. end;
  4900. end;
  4901. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4902. begin
  4903. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4904. end;
  4905. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4906. begin
  4907. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4908. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4909. end;
  4910. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4911. begin
  4912. if //(shuffle=nil) and
  4913. (tosize=OS_F32) then
  4914. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4915. else
  4916. internalerror(2012100813);
  4917. end;
  4918. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4919. begin
  4920. if //(shuffle=nil) and
  4921. (fromsize=OS_F32) then
  4922. begin
  4923. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4924. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4925. end
  4926. else
  4927. internalerror(2012100814);
  4928. end;
  4929. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4930. var tmpreg: tregister;
  4931. begin
  4932. case op of
  4933. OP_NEG:
  4934. begin
  4935. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4936. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4937. tmpreg:=cg.getintregister(list,OS_32);
  4938. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4939. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4940. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4941. end;
  4942. else
  4943. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4944. end;
  4945. end;
  4946. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4947. begin
  4948. case op of
  4949. OP_NEG:
  4950. begin
  4951. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reglo,0),PF_S));
  4952. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reghi,0),PF_S));
  4953. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4954. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4955. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4956. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4957. end;
  4958. OP_NOT:
  4959. begin
  4960. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4961. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4962. end;
  4963. OP_AND,OP_OR,OP_XOR:
  4964. begin
  4965. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4966. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4967. end;
  4968. OP_ADD:
  4969. begin
  4970. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4971. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4972. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi),PF_S));
  4973. end;
  4974. OP_SUB:
  4975. begin
  4976. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4977. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4978. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4979. end;
  4980. else
  4981. internalerror(2003083105);
  4982. end;
  4983. end;
  4984. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4985. var
  4986. tmpreg : tregister;
  4987. begin
  4988. case op of
  4989. OP_AND,OP_OR,OP_XOR:
  4990. begin
  4991. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4992. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4993. end;
  4994. OP_ADD:
  4995. begin
  4996. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4997. begin
  4998. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4999. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  5000. end
  5001. else
  5002. begin
  5003. tmpreg:=cg.getintregister(list,OS_32);
  5004. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5005. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5006. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  5007. end;
  5008. tmpreg:=cg.getintregister(list,OS_32);
  5009. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  5010. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg),PF_S));
  5011. end;
  5012. OP_SUB:
  5013. begin
  5014. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5015. begin
  5016. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5017. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  5018. end
  5019. else
  5020. begin
  5021. tmpreg:=cg.getintregister(list,OS_32);
  5022. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5023. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5024. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  5025. end;
  5026. tmpreg:=cg.getintregister(list,OS_32);
  5027. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  5028. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg),PF_S));
  5029. end;
  5030. else
  5031. internalerror(2003083106);
  5032. end;
  5033. end;
  5034. procedure create_codegen;
  5035. begin
  5036. if GenerateThumb2Code then
  5037. begin
  5038. cg:=tthumb2cgarm.create;
  5039. cg64:=tthumb2cg64farm.create;
  5040. casmoptimizer:=TCpuThumb2AsmOptimizer;
  5041. end
  5042. else if GenerateThumbCode then
  5043. begin
  5044. cg:=tthumbcgarm.create;
  5045. cg64:=tthumbcg64farm.create;
  5046. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5047. end
  5048. else
  5049. begin
  5050. cg:=tarmcgarm.create;
  5051. cg64:=tarmcg64farm.create;
  5052. casmoptimizer:=TCpuAsmOptimizer;
  5053. end;
  5054. end;
  5055. end.