cgcpu.pas 223 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. procedure init_mmregister_allocator;
  36. public
  37. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  38. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  39. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  40. { move instructions }
  41. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  42. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  43. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  44. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  45. { fpu move instructions }
  46. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  47. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  48. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  49. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  50. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  51. { comparison operations }
  52. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  53. l : tasmlabel);override;
  54. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  55. procedure a_jmp_name(list : TAsmList;const s : string); override;
  56. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  57. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  58. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  59. procedure g_profilecode(list : TAsmList); override;
  60. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  61. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  62. procedure g_maybe_got_init(list : TAsmList); override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  66. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  67. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  68. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  69. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  70. procedure g_save_registers(list : TAsmList);override;
  71. procedure g_restore_registers(list : TAsmList);override;
  72. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  73. procedure fixref(list : TAsmList;var ref : treference);
  74. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  75. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  78. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  79. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  80. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  81. { Transform unsupported methods into Internal errors }
  82. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  83. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  84. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  85. { clear out potential overflow bits from 8 or 16 bit operations
  86. the upper 24/16 bits of a register after an operation }
  87. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  88. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  89. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  90. procedure g_maybe_tls_init(list : TAsmList); override;
  91. end;
  92. { tcgarm is shared between normal arm and thumb-2 }
  93. tcgarm = class(tbasecgarm)
  94. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  95. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  96. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  97. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  98. size: tcgsize; a: tcgint; src, dst: tregister); override;
  99. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  100. size: tcgsize; src1, src2, dst: tregister); override;
  101. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  103. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  104. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  105. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  106. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  107. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  108. end;
  109. { normal arm cg }
  110. tarmcgarm = class(tcgarm)
  111. procedure init_register_allocators;override;
  112. procedure done_register_allocators;override;
  113. end;
  114. { 64 bit cg for all arm flavours }
  115. tbasecg64farm = class(tcg64f32)
  116. end;
  117. { tcg64farm is shared between normal arm and thumb-2 }
  118. tcg64farm = class(tbasecg64farm)
  119. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  120. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  121. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  122. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  123. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  125. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  126. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  127. end;
  128. tarmcg64farm = class(tcg64farm)
  129. end;
  130. tthumbcgarm = class(tbasecgarm)
  131. procedure init_register_allocators;override;
  132. procedure done_register_allocators;override;
  133. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  134. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  135. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  136. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  137. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  138. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  139. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  140. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  141. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  142. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  143. end;
  144. tthumbcg64farm = class(tbasecg64farm)
  145. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  146. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  147. end;
  148. tthumb2cgarm = class(tcgarm)
  149. procedure init_register_allocators;override;
  150. procedure done_register_allocators;override;
  151. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  152. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  153. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  154. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  155. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  157. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  158. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  159. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  160. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  161. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  164. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  165. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  166. end;
  167. tthumb2cg64farm = class(tcg64farm)
  168. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  169. end;
  170. const
  171. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  172. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  173. winstackpagesize = 4096;
  174. function get_fpu_postfix(def : tdef) : toppostfix;
  175. procedure create_codegen;
  176. implementation
  177. uses
  178. globals,verbose,systems,cutils,
  179. aopt,aoptcpu,
  180. fmodule,
  181. symconst,symsym,symtable,
  182. tgobj,
  183. procinfo,cpupi,
  184. paramgr;
  185. { Range check must be disabled explicitly as conversions between signed and unsigned
  186. 32-bit values are done without explicit typecasts }
  187. {$R-}
  188. function get_fpu_postfix(def : tdef) : toppostfix;
  189. begin
  190. if def.typ=floatdef then
  191. begin
  192. case tfloatdef(def).floattype of
  193. s32real:
  194. result:=PF_S;
  195. s64real:
  196. result:=PF_D;
  197. s80real:
  198. result:=PF_E;
  199. else
  200. internalerror(200401272);
  201. end;
  202. end
  203. else
  204. internalerror(200401271);
  205. end;
  206. procedure tarmcgarm.init_register_allocators;
  207. begin
  208. inherited init_register_allocators;
  209. { currently, we always save R14, so we can use it }
  210. if (target_info.system<>system_arm_ios) then
  211. begin
  212. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  213. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  214. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  215. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  216. else
  217. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  218. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  219. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  220. end
  221. else
  222. { r7 is not available on Darwin, it's used as frame pointer (always,
  223. for backtrace support -- also in gcc/clang -> R11 can be used).
  224. r9 is volatile }
  225. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  226. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  227. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  228. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  229. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  230. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  231. init_mmregister_allocator;
  232. end;
  233. procedure tarmcgarm.done_register_allocators;
  234. begin
  235. rg[R_INTREGISTER].free;
  236. rg[R_FPUREGISTER].free;
  237. rg[R_MMREGISTER].free;
  238. inherited done_register_allocators;
  239. end;
  240. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  241. var
  242. imm_shift : byte;
  243. l : tasmlabel;
  244. hr : treference;
  245. imm1, imm2: DWord;
  246. begin
  247. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  248. internalerror(2002090907);
  249. if is_shifter_const(a,imm_shift) then
  250. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  251. else if is_shifter_const(not(a),imm_shift) then
  252. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  253. { loading of constants with mov and orr }
  254. else if (split_into_shifter_const(a,imm1, imm2)) then
  255. begin
  256. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  257. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  258. end
  259. { loading of constants with mvn and bic }
  260. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  261. begin
  262. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  263. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  264. end
  265. else
  266. begin
  267. reference_reset(hr,4,[]);
  268. current_asmdata.getjumplabel(l);
  269. cg.a_label(current_procinfo.aktlocaldata,l);
  270. hr.symboldata:=current_procinfo.aktlocaldata.last;
  271. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  272. hr.symbol:=l;
  273. hr.base:=NR_PC;
  274. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  275. end;
  276. end;
  277. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  278. var
  279. oppostfix:toppostfix;
  280. usedtmpref: treference;
  281. tmpreg,tmpreg2 : tregister;
  282. so : tshifterop;
  283. dir : integer;
  284. begin
  285. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  286. FromSize := ToSize;
  287. case FromSize of
  288. { signed integer registers }
  289. OS_8:
  290. oppostfix:=PF_B;
  291. OS_S8:
  292. oppostfix:=PF_SB;
  293. OS_16:
  294. oppostfix:=PF_H;
  295. OS_S16:
  296. oppostfix:=PF_SH;
  297. OS_32,
  298. OS_S32:
  299. oppostfix:=PF_None;
  300. else
  301. InternalError(200308297);
  302. end;
  303. if (fromsize=OS_S8) and
  304. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  305. oppostfix:=PF_B;
  306. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  307. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  308. (oppostfix in [PF_SH,PF_H])) then
  309. begin
  310. if target_info.endian=endian_big then
  311. dir:=-1
  312. else
  313. dir:=1;
  314. case FromSize of
  315. OS_16,OS_S16:
  316. begin
  317. { only complicated references need an extra loadaddr }
  318. if assigned(ref.symbol) or
  319. (ref.index<>NR_NO) or
  320. (ref.offset<-4095) or
  321. (ref.offset>4094) or
  322. { sometimes the compiler reused registers }
  323. (reg=ref.index) or
  324. (reg=ref.base) then
  325. begin
  326. tmpreg2:=getintregister(list,OS_INT);
  327. a_loadaddr_ref_reg(list,ref,tmpreg2);
  328. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  329. end
  330. else
  331. usedtmpref:=ref;
  332. if target_info.endian=endian_big then
  333. inc(usedtmpref.offset,1);
  334. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  335. tmpreg:=getintregister(list,OS_INT);
  336. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  337. inc(usedtmpref.offset,dir);
  338. if FromSize=OS_16 then
  339. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  340. else
  341. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  342. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  343. end;
  344. OS_32,OS_S32:
  345. begin
  346. tmpreg:=getintregister(list,OS_INT);
  347. { only complicated references need an extra loadaddr }
  348. if assigned(ref.symbol) or
  349. (ref.index<>NR_NO) or
  350. (ref.offset<-4095) or
  351. (ref.offset>4092) or
  352. { sometimes the compiler reused registers }
  353. (reg=ref.index) or
  354. (reg=ref.base) then
  355. begin
  356. tmpreg2:=getintregister(list,OS_INT);
  357. a_loadaddr_ref_reg(list,ref,tmpreg2);
  358. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  359. end
  360. else
  361. usedtmpref:=ref;
  362. shifterop_reset(so);so.shiftmode:=SM_LSL;
  363. if ref.alignment=2 then
  364. begin
  365. if target_info.endian=endian_big then
  366. inc(usedtmpref.offset,2);
  367. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  368. inc(usedtmpref.offset,dir*2);
  369. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  370. so.shiftimm:=16;
  371. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  372. end
  373. else
  374. begin
  375. tmpreg2:=getintregister(list,OS_INT);
  376. if target_info.endian=endian_big then
  377. inc(usedtmpref.offset,3);
  378. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  379. inc(usedtmpref.offset,dir);
  380. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  381. inc(usedtmpref.offset,dir);
  382. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  383. so.shiftimm:=8;
  384. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  385. inc(usedtmpref.offset,dir);
  386. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  387. so.shiftimm:=16;
  388. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  389. so.shiftimm:=24;
  390. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  391. end;
  392. end
  393. else
  394. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  395. end;
  396. end
  397. else
  398. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  399. if (fromsize=OS_S8) and
  400. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  401. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  402. else if (fromsize=OS_S8) and (tosize = OS_16) then
  403. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  404. end;
  405. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  406. var
  407. hsym : tsym;
  408. href : treference;
  409. paraloc : Pcgparalocation;
  410. shift : byte;
  411. begin
  412. { calculate the parameter info for the procdef }
  413. procdef.init_paraloc_info(callerside);
  414. hsym:=tsym(procdef.parast.Find('self'));
  415. if not(assigned(hsym) and
  416. (hsym.typ=paravarsym)) then
  417. internalerror(2003052503);
  418. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  419. while paraloc<>nil do
  420. with paraloc^ do
  421. begin
  422. case loc of
  423. LOC_REGISTER:
  424. begin
  425. if is_shifter_const(ioffset,shift) then
  426. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  427. else
  428. begin
  429. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  430. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  431. end;
  432. end;
  433. LOC_REFERENCE:
  434. begin
  435. { offset in the wrapper needs to be adjusted for the stored
  436. return address }
  437. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  438. if is_shifter_const(ioffset,shift) then
  439. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  440. else
  441. begin
  442. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  443. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  444. end;
  445. end
  446. else
  447. internalerror(2003091803);
  448. end;
  449. paraloc:=next;
  450. end;
  451. end;
  452. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  453. var
  454. ref: treference;
  455. begin
  456. paraloc.check_simple_location;
  457. paramanager.allocparaloc(list,paraloc.location);
  458. case paraloc.location^.loc of
  459. LOC_REGISTER,LOC_CREGISTER:
  460. a_load_const_reg(list,size,a,paraloc.location^.register);
  461. LOC_REFERENCE:
  462. begin
  463. reference_reset(ref,paraloc.alignment,[]);
  464. ref.base:=paraloc.location^.reference.index;
  465. ref.offset:=paraloc.location^.reference.offset;
  466. a_load_const_ref(list,size,a,ref);
  467. end;
  468. else
  469. internalerror(2002081101);
  470. end;
  471. end;
  472. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  473. begin
  474. { doubles in softemu mode have a strange order of registers and references }
  475. if (cgpara.size=OS_F64) and
  476. (location^.size=OS_32) then
  477. begin
  478. g_concatcopy(list,ref,paralocref,4)
  479. end
  480. else
  481. inherited;
  482. end;
  483. procedure tbasecgarm.init_mmregister_allocator;
  484. begin
  485. { The register allocator currently cannot deal with multiple
  486. non-overlapping subregs per register, so we can only use
  487. half the single precision registers for now (as sub registers of the
  488. double precision ones). }
  489. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  490. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  491. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  492. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  493. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  494. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  495. ],first_mm_imreg,[])
  496. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  497. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  498. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  499. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  500. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  501. ],first_mm_imreg,[])
  502. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  503. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  504. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  505. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  506. ],first_mm_imreg,[]);
  507. end;
  508. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  509. var
  510. ref: treference;
  511. tmpreg: tregister;
  512. begin
  513. paraloc.check_simple_location;
  514. paramanager.allocparaloc(list,paraloc.location);
  515. case paraloc.location^.loc of
  516. LOC_REGISTER,LOC_CREGISTER:
  517. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  518. LOC_REFERENCE:
  519. begin
  520. reference_reset(ref,paraloc.alignment,[]);
  521. ref.base := paraloc.location^.reference.index;
  522. ref.offset := paraloc.location^.reference.offset;
  523. tmpreg := getintregister(list,OS_ADDR);
  524. a_loadaddr_ref_reg(list,r,tmpreg);
  525. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  526. end;
  527. else
  528. internalerror(2002080701);
  529. end;
  530. end;
  531. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  532. var
  533. branchopcode: tasmop;
  534. r : treference;
  535. sym : TAsmSymbol;
  536. begin
  537. { use always BL as newer binutils do not translate blx apparently
  538. generating BL is also what clang and gcc do by default }
  539. branchopcode:=A_BL;
  540. if not(weak) then
  541. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  542. else
  543. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  544. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  545. if (tf_pic_uses_got in target_info.flags) and
  546. (cs_create_pic in current_settings.moduleswitches) then
  547. begin
  548. r.refaddr:=addr_pic
  549. end
  550. else
  551. r.refaddr:=addr_full;
  552. list.concat(taicpu.op_ref(branchopcode,r));
  553. {
  554. the compiler does not properly set this flag anymore in pass 1, and
  555. for now we only need it after pass 2 (I hope) (JM)
  556. if not(pi_do_call in current_procinfo.flags) then
  557. internalerror(2003060703);
  558. }
  559. include(current_procinfo.flags,pi_do_call);
  560. end;
  561. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  562. begin
  563. { check not really correct: should only be used for non-Thumb cpus }
  564. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  565. begin
  566. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  567. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  568. end
  569. else
  570. list.concat(taicpu.op_reg(A_BLX, reg));
  571. {
  572. the compiler does not properly set this flag anymore in pass 1, and
  573. for now we only need it after pass 2 (I hope) (JM)
  574. if not(pi_do_call in current_procinfo.flags) then
  575. internalerror(2003060703);
  576. }
  577. include(current_procinfo.flags,pi_do_call);
  578. end;
  579. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  580. begin
  581. a_op_const_reg_reg(list,op,size,a,reg,reg);
  582. end;
  583. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  584. var
  585. tmpreg,tmpresreg : tregister;
  586. tmpref : treference;
  587. begin
  588. tmpreg:=getintregister(list,size);
  589. tmpresreg:=getintregister(list,size);
  590. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  591. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  592. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  593. end;
  594. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  595. var
  596. so : tshifterop;
  597. begin
  598. if op = OP_NEG then
  599. begin
  600. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  601. maybeadjustresult(list,OP_NEG,size,dst);
  602. end
  603. else if op = OP_NOT then
  604. begin
  605. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  606. begin
  607. shifterop_reset(so);
  608. so.shiftmode:=SM_LSL;
  609. if size in [OS_8, OS_S8] then
  610. so.shiftimm:=24
  611. else
  612. so.shiftimm:=16;
  613. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  614. {Using a shift here allows this to be folded into another instruction}
  615. if size in [OS_S8, OS_S16] then
  616. so.shiftmode:=SM_ASR
  617. else
  618. so.shiftmode:=SM_LSR;
  619. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  620. end
  621. else
  622. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  623. end
  624. else
  625. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  626. end;
  627. const
  628. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  629. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  630. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  631. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  632. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  633. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  634. op_reg_postfix_thumb: array[TOpCG] of TOpPostfix =
  635. (PF_None,PF_None,PF_None,PF_S,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_S,
  636. PF_None,PF_S,PF_S,PF_None,PF_S,PF_None,PF_S);
  637. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  638. size: tcgsize; a: tcgint; src, dst: tregister);
  639. var
  640. ovloc : tlocation;
  641. begin
  642. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  643. end;
  644. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  645. size: tcgsize; src1, src2, dst: tregister);
  646. var
  647. ovloc : tlocation;
  648. begin
  649. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  650. end;
  651. function opshift2shiftmode(op: TOpCg): tshiftmode;
  652. begin
  653. case op of
  654. OP_SHL: Result:=SM_LSL;
  655. OP_SHR: Result:=SM_LSR;
  656. OP_ROR: Result:=SM_ROR;
  657. OP_ROL: Result:=SM_ROR;
  658. OP_SAR: Result:=SM_ASR;
  659. else internalerror(2012070501);
  660. end
  661. end;
  662. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  663. var
  664. multiplier : dword;
  665. power : longint;
  666. shifterop : tshifterop;
  667. bitsset : byte;
  668. negative : boolean;
  669. first : boolean;
  670. b,
  671. cycles : byte;
  672. maxeffort : byte;
  673. begin
  674. result:=true;
  675. cycles:=0;
  676. negative:=a<0;
  677. shifterop.rs:=NR_NO;
  678. shifterop.shiftmode:=SM_LSL;
  679. if negative then
  680. inc(cycles);
  681. multiplier:=dword(abs(a));
  682. bitsset:=popcnt(multiplier and $fffffffe);
  683. { heuristics to estimate how much instructions are reasonable to replace the mul,
  684. this is currently based on XScale timings }
  685. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  686. actual multiplication, this requires min. 1+4 cycles
  687. because the first shift imm. might cause a stall and because we need more instructions
  688. when replacing the mul we generate max. 3 instructions to replace this mul }
  689. maxeffort:=3;
  690. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  691. a ldr, so generating one more operation to replace this is beneficial }
  692. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  693. inc(maxeffort);
  694. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  695. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  696. dec(maxeffort);
  697. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  698. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  699. dec(maxeffort);
  700. { most simple cases }
  701. if a=1 then
  702. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  703. else if a=0 then
  704. a_load_const_reg(list,OS_32,0,dst)
  705. else if a=-1 then
  706. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  707. { add up ?
  708. basically, one add is needed for each bit being set in the constant factor
  709. however, the least significant bit is for free, it can be hidden in the initial
  710. instruction
  711. }
  712. else if (bitsset+cycles<=maxeffort) and
  713. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  714. begin
  715. first:=true;
  716. while multiplier<>0 do
  717. begin
  718. shifterop.shiftimm:=BsrDWord(multiplier);
  719. if odd(multiplier) then
  720. begin
  721. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  722. dec(multiplier);
  723. end
  724. else
  725. if first then
  726. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  727. else
  728. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  729. first:=false;
  730. dec(multiplier,1 shl shifterop.shiftimm);
  731. end;
  732. if negative then
  733. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  734. end
  735. { subtract from the next greater power of two? }
  736. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  737. begin
  738. first:=true;
  739. while multiplier<>0 do
  740. begin
  741. if first then
  742. begin
  743. multiplier:=(1 shl power)-multiplier;
  744. shifterop.shiftimm:=power;
  745. end
  746. else
  747. shifterop.shiftimm:=BsrDWord(multiplier);
  748. if odd(multiplier) then
  749. begin
  750. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  751. dec(multiplier);
  752. end
  753. else
  754. if first then
  755. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  756. else
  757. begin
  758. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  759. dec(multiplier,1 shl shifterop.shiftimm);
  760. end;
  761. first:=false;
  762. end;
  763. if negative then
  764. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  765. end
  766. else
  767. result:=false;
  768. end;
  769. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  770. var
  771. shift, lsb, width : byte;
  772. tmpreg : tregister;
  773. so : tshifterop;
  774. l1 : longint;
  775. imm1, imm2: DWord;
  776. begin
  777. optimize_op_const(size, op, a);
  778. case op of
  779. OP_NONE:
  780. begin
  781. if src <> dst then
  782. a_load_reg_reg(list, size, size, src, dst);
  783. exit;
  784. end;
  785. OP_MOVE:
  786. begin
  787. a_load_const_reg(list, size, a, dst);
  788. exit;
  789. end;
  790. else
  791. ;
  792. end;
  793. ovloc.loc:=LOC_VOID;
  794. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  795. case op of
  796. OP_ADD:
  797. begin
  798. op:=OP_SUB;
  799. a:=aint(dword(-a));
  800. end;
  801. OP_SUB:
  802. begin
  803. op:=OP_ADD;
  804. a:=aint(dword(-a));
  805. end
  806. else
  807. ;
  808. end;
  809. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  810. case op of
  811. OP_NEG,OP_NOT:
  812. internalerror(200308281);
  813. OP_SHL,
  814. OP_SHR,
  815. OP_ROL,
  816. OP_ROR,
  817. OP_SAR:
  818. begin
  819. if a>32 then
  820. internalerror(200308294);
  821. shifterop_reset(so);
  822. so.shiftmode:=opshift2shiftmode(op);
  823. if op = OP_ROL then
  824. so.shiftimm:=32-a
  825. else
  826. so.shiftimm:=a;
  827. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  828. end;
  829. else
  830. {if (op in [OP_SUB, OP_ADD]) and
  831. ((a < 0) or
  832. (a > 4095)) then
  833. begin
  834. tmpreg:=getintregister(list,size);
  835. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  836. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  837. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  838. ));
  839. end
  840. else}
  841. begin
  842. if cgsetflags or setflags then
  843. a_reg_alloc(list,NR_DEFAULTFLAGS);
  844. list.concat(setoppostfix(
  845. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  846. end;
  847. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  848. begin
  849. ovloc.loc:=LOC_FLAGS;
  850. case op of
  851. OP_ADD:
  852. ovloc.resflags:=F_CS;
  853. OP_SUB:
  854. ovloc.resflags:=F_CC;
  855. else
  856. internalerror(2019050922);
  857. end;
  858. end;
  859. end
  860. else
  861. begin
  862. { there could be added some more sophisticated optimizations }
  863. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  864. a_op_reg_reg(list,OP_NEG,size,src,dst)
  865. { we do this here instead in the peephole optimizer because
  866. it saves us a register }
  867. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  868. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  869. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  870. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  871. begin
  872. if l1>32 then{roozbeh does this ever happen?}
  873. internalerror(200308296);
  874. shifterop_reset(so);
  875. so.shiftmode:=SM_LSL;
  876. so.shiftimm:=l1;
  877. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  878. end
  879. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  880. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  881. begin
  882. if l1>32 then{does this ever happen?}
  883. internalerror(201205181);
  884. shifterop_reset(so);
  885. so.shiftmode:=SM_LSL;
  886. so.shiftimm:=l1;
  887. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  888. end
  889. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  890. begin
  891. { nothing to do on success }
  892. end
  893. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  894. broader range of shifterconstants.}
  895. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  896. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  897. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  898. into the following instruction}
  899. else if (op = OP_AND) and
  900. is_continuous_mask(aword(a), lsb, width) and
  901. ((lsb = 0) or ((lsb + width) = 32)) then
  902. begin
  903. shifterop_reset(so);
  904. if (width = 16) and
  905. (lsb = 0) and
  906. (current_settings.cputype >= cpu_armv6) then
  907. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  908. else if (width = 8) and
  909. (lsb = 0) and
  910. (current_settings.cputype >= cpu_armv6) then
  911. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  912. else if lsb = 0 then
  913. begin
  914. so.shiftmode:=SM_LSL;
  915. so.shiftimm:=32-width;
  916. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  917. so.shiftmode:=SM_LSR;
  918. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  919. end
  920. else
  921. begin
  922. so.shiftmode:=SM_LSR;
  923. so.shiftimm:=lsb;
  924. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  925. so.shiftmode:=SM_LSL;
  926. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  927. end;
  928. end
  929. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  930. begin
  931. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  932. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  933. end
  934. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  935. not(cgsetflags or setflags) and
  936. split_into_shifter_const(a, imm1, imm2) then
  937. begin
  938. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  939. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  940. end
  941. else
  942. begin
  943. tmpreg:=getintregister(list,size);
  944. a_load_const_reg(list,size,a,tmpreg);
  945. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  946. end;
  947. end;
  948. maybeadjustresult(list,op,size,dst);
  949. end;
  950. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  951. var
  952. so : tshifterop;
  953. tmpreg,overflowreg : tregister;
  954. asmop : tasmop;
  955. begin
  956. ovloc.loc:=LOC_VOID;
  957. case op of
  958. OP_NEG,OP_NOT,
  959. OP_DIV,OP_IDIV:
  960. internalerror(200308283);
  961. OP_SHL,
  962. OP_SHR,
  963. OP_SAR,
  964. OP_ROR:
  965. begin
  966. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  967. internalerror(2008072801);
  968. shifterop_reset(so);
  969. so.rs:=src1;
  970. so.shiftmode:=opshift2shiftmode(op);
  971. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  972. end;
  973. OP_ROL:
  974. begin
  975. if not(size in [OS_32,OS_S32]) then
  976. internalerror(2008072804);
  977. { simulate ROL by ror'ing 32-value }
  978. tmpreg:=getintregister(list,OS_32);
  979. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  980. shifterop_reset(so);
  981. so.rs:=tmpreg;
  982. so.shiftmode:=SM_ROR;
  983. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  984. end;
  985. OP_IMUL,
  986. OP_MUL:
  987. begin
  988. if (cgsetflags or setflags) and
  989. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  990. begin
  991. overflowreg:=getintregister(list,size);
  992. if op=OP_IMUL then
  993. asmop:=A_SMULL
  994. else
  995. asmop:=A_UMULL;
  996. { the arm doesn't allow that rd and rm are the same }
  997. if dst=src2 then
  998. begin
  999. if dst<>src1 then
  1000. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1001. else
  1002. begin
  1003. tmpreg:=getintregister(list,size);
  1004. a_load_reg_reg(list,size,size,src2,dst);
  1005. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1006. end;
  1007. end
  1008. else
  1009. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1010. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1011. if op=OP_IMUL then
  1012. begin
  1013. shifterop_reset(so);
  1014. so.shiftmode:=SM_ASR;
  1015. so.shiftimm:=31;
  1016. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1017. end
  1018. else
  1019. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1020. ovloc.loc:=LOC_FLAGS;
  1021. ovloc.resflags:=F_NE;
  1022. end
  1023. else
  1024. begin
  1025. { the arm doesn't allow that rd and rm are the same }
  1026. if dst=src2 then
  1027. begin
  1028. if dst<>src1 then
  1029. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1030. else
  1031. begin
  1032. tmpreg:=getintregister(list,size);
  1033. a_load_reg_reg(list,size,size,src2,dst);
  1034. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1035. end;
  1036. end
  1037. else
  1038. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1039. end;
  1040. end;
  1041. else
  1042. begin
  1043. if cgsetflags or setflags then
  1044. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1045. list.concat(setoppostfix(
  1046. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1047. end;
  1048. end;
  1049. maybeadjustresult(list,op,size,dst);
  1050. end;
  1051. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1052. var
  1053. asmop: tasmop;
  1054. begin
  1055. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1056. begin
  1057. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1058. case size of
  1059. OS_32: asmop:=A_UMULL;
  1060. OS_S32: asmop:=A_SMULL;
  1061. else
  1062. InternalError(2014060802);
  1063. end;
  1064. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1065. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1066. 32x32=32 bit multiplication}
  1067. if (dstlo = NR_NO) then
  1068. dstlo:=getintregister(list,size);
  1069. if (dsthi = NR_NO) then
  1070. dsthi:=getintregister(list,size);
  1071. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1072. end
  1073. else if dsthi=NR_NO then
  1074. begin
  1075. if (dstlo = NR_NO) then
  1076. dstlo:=getintregister(list,size);
  1077. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1078. end
  1079. else
  1080. begin
  1081. internalerror(2015083022);
  1082. end;
  1083. end;
  1084. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1085. var
  1086. tmpreg1,tmpreg2 : tregister;
  1087. begin
  1088. tmpreg1:=NR_NO;
  1089. { Be sure to have a base register }
  1090. if (ref.base=NR_NO) then
  1091. begin
  1092. if ref.shiftmode<>SM_None then
  1093. internalerror(2014020707);
  1094. ref.base:=ref.index;
  1095. ref.index:=NR_NO;
  1096. end;
  1097. { absolute symbols can't be handled directly, we've to store the symbol reference
  1098. in the text segment and access it pc relative
  1099. For now, we assume that references where base or index equals to PC are already
  1100. relative, all other references are assumed to be absolute and thus they need
  1101. to be handled extra.
  1102. A proper solution would be to change refoptions to a set and store the information
  1103. if the symbol is absolute or relative there.
  1104. }
  1105. if (assigned(ref.symbol) and
  1106. not(is_pc(ref.base)) and
  1107. not(is_pc(ref.index))
  1108. ) or
  1109. { [#xxx] isn't a valid address operand }
  1110. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1111. (ref.offset<-4095) or
  1112. (ref.offset>4095) or
  1113. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1114. ((ref.offset<-255) or
  1115. (ref.offset>255)
  1116. )
  1117. ) or
  1118. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1119. ((ref.offset<-1020) or
  1120. (ref.offset>1020) or
  1121. ((abs(ref.offset) mod 4)<>0)
  1122. )
  1123. ) or
  1124. ((GenerateThumbCode) and
  1125. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1126. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1127. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1128. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1129. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1130. )
  1131. ) then
  1132. begin
  1133. fixref(list,ref);
  1134. end;
  1135. if GenerateThumbCode then
  1136. begin
  1137. { certain thumb load require base and index }
  1138. if (oppostfix in [PF_SB,PF_SH]) and
  1139. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1140. begin
  1141. tmpreg1:=getintregister(list,OS_ADDR);
  1142. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1143. ref.index:=tmpreg1;
  1144. end;
  1145. { "hi" registers cannot be used as base or index }
  1146. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1147. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1148. begin
  1149. tmpreg1:=getintregister(list,OS_ADDR);
  1150. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1151. ref.base:=tmpreg1;
  1152. end;
  1153. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1154. begin
  1155. tmpreg1:=getintregister(list,OS_ADDR);
  1156. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1157. ref.index:=tmpreg1;
  1158. end;
  1159. end;
  1160. { fold if there is base, index and offset, however, don't fold
  1161. for vfp memory instructions because we later fold the index }
  1162. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1163. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1164. begin
  1165. if tmpreg1<>NR_NO then
  1166. begin
  1167. tmpreg2:=getintregister(list,OS_ADDR);
  1168. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1169. tmpreg1:=tmpreg2;
  1170. end
  1171. else
  1172. begin
  1173. tmpreg1:=getintregister(list,OS_ADDR);
  1174. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1175. ref.base:=tmpreg1;
  1176. end;
  1177. ref.offset:=0;
  1178. end;
  1179. { floating point operations have only limited references
  1180. we expect here, that a base is already set }
  1181. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1182. begin
  1183. if ref.shiftmode<>SM_none then
  1184. internalerror(200309121);
  1185. if tmpreg1<>NR_NO then
  1186. begin
  1187. if ref.base=tmpreg1 then
  1188. begin
  1189. if ref.signindex<0 then
  1190. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1191. else
  1192. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1193. ref.index:=NR_NO;
  1194. end
  1195. else
  1196. begin
  1197. if ref.index<>tmpreg1 then
  1198. internalerror(200403161);
  1199. if ref.signindex<0 then
  1200. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1201. else
  1202. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1203. ref.base:=tmpreg1;
  1204. ref.index:=NR_NO;
  1205. end;
  1206. end
  1207. else
  1208. begin
  1209. tmpreg1:=getintregister(list,OS_ADDR);
  1210. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1211. ref.base:=tmpreg1;
  1212. ref.index:=NR_NO;
  1213. end;
  1214. end;
  1215. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1216. Result := ref;
  1217. end;
  1218. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1219. var
  1220. oppostfix:toppostfix;
  1221. usedtmpref: treference;
  1222. tmpreg : tregister;
  1223. dir : integer;
  1224. begin
  1225. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1226. FromSize := ToSize;
  1227. case ToSize of
  1228. { signed integer registers }
  1229. OS_8,
  1230. OS_S8:
  1231. oppostfix:=PF_B;
  1232. OS_16,
  1233. OS_S16:
  1234. oppostfix:=PF_H;
  1235. OS_32,
  1236. OS_S32,
  1237. { for vfp value stored in integer register }
  1238. OS_F32:
  1239. oppostfix:=PF_None;
  1240. else
  1241. InternalError(2003082912);
  1242. end;
  1243. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1244. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1245. (oppostfix =PF_H)) then
  1246. begin
  1247. if target_info.endian=endian_big then
  1248. dir:=-1
  1249. else
  1250. dir:=1;
  1251. case FromSize of
  1252. OS_16,OS_S16:
  1253. begin
  1254. tmpreg:=getintregister(list,OS_INT);
  1255. usedtmpref:=ref;
  1256. if target_info.endian=endian_big then
  1257. inc(usedtmpref.offset,1);
  1258. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1259. inc(usedtmpref.offset,dir);
  1260. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1261. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1262. end;
  1263. OS_32,OS_S32:
  1264. begin
  1265. tmpreg:=getintregister(list,OS_INT);
  1266. usedtmpref:=ref;
  1267. if ref.alignment=2 then
  1268. begin
  1269. if target_info.endian=endian_big then
  1270. inc(usedtmpref.offset,2);
  1271. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1272. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1273. inc(usedtmpref.offset,dir*2);
  1274. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1275. end
  1276. else
  1277. begin
  1278. if target_info.endian=endian_big then
  1279. inc(usedtmpref.offset,3);
  1280. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1281. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1282. inc(usedtmpref.offset,dir);
  1283. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1284. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1285. inc(usedtmpref.offset,dir);
  1286. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1287. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1288. inc(usedtmpref.offset,dir);
  1289. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1290. end;
  1291. end
  1292. else
  1293. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1294. end;
  1295. end
  1296. else
  1297. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1298. end;
  1299. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1300. var
  1301. oppostfix:toppostfix;
  1302. href: treference;
  1303. tmpreg: TRegister;
  1304. begin
  1305. case ToSize of
  1306. { signed integer registers }
  1307. OS_8,
  1308. OS_S8:
  1309. oppostfix:=PF_B;
  1310. OS_16,
  1311. OS_S16:
  1312. oppostfix:=PF_H;
  1313. OS_32,
  1314. OS_S32:
  1315. oppostfix:=PF_None;
  1316. else
  1317. InternalError(2003082910);
  1318. end;
  1319. if (tosize in [OS_S16,OS_16]) and
  1320. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1321. begin
  1322. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1323. tmpreg:=getintregister(list,OS_INT);
  1324. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1325. href:=result;
  1326. inc(href.offset);
  1327. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1328. end
  1329. else
  1330. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1331. end;
  1332. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1333. var
  1334. oppostfix:toppostfix;
  1335. so: tshifterop;
  1336. tmpreg: TRegister;
  1337. href: treference;
  1338. begin
  1339. case FromSize of
  1340. { signed integer registers }
  1341. OS_8:
  1342. oppostfix:=PF_B;
  1343. OS_S8:
  1344. oppostfix:=PF_SB;
  1345. OS_16:
  1346. oppostfix:=PF_H;
  1347. OS_S16:
  1348. oppostfix:=PF_SH;
  1349. OS_32,
  1350. OS_S32:
  1351. oppostfix:=PF_None;
  1352. else
  1353. InternalError(200308291);
  1354. end;
  1355. if (tosize=OS_S8) and
  1356. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1357. begin
  1358. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1359. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1360. end
  1361. else if (tosize in [OS_S16,OS_16]) and
  1362. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1363. begin
  1364. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1365. tmpreg:=getintregister(list,OS_INT);
  1366. href:=result;
  1367. inc(href.offset);
  1368. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1369. shifterop_reset(so);
  1370. so.shiftmode:=SM_LSL;
  1371. so.shiftimm:=8;
  1372. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1373. end
  1374. else
  1375. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1376. end;
  1377. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1378. var
  1379. so : tshifterop;
  1380. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1381. begin
  1382. if GenerateThumbCode then
  1383. begin
  1384. case shiftmode of
  1385. SM_ASR:
  1386. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1387. SM_LSR:
  1388. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1389. SM_LSL:
  1390. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1391. else
  1392. internalerror(2013090301);
  1393. end;
  1394. end
  1395. else
  1396. begin
  1397. so.shiftmode:=shiftmode;
  1398. so.shiftimm:=shiftimm;
  1399. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1400. end;
  1401. end;
  1402. var
  1403. instr: taicpu;
  1404. conv_done: boolean;
  1405. begin
  1406. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1407. internalerror(2002090901);
  1408. conv_done:=false;
  1409. if tosize<>fromsize then
  1410. begin
  1411. shifterop_reset(so);
  1412. conv_done:=true;
  1413. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1414. fromsize:=tosize;
  1415. if current_settings.cputype<cpu_armv6 then
  1416. case fromsize of
  1417. OS_8:
  1418. if GenerateThumbCode then
  1419. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1420. else
  1421. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1422. OS_S8:
  1423. begin
  1424. do_shift(SM_LSL,24,reg1);
  1425. if tosize=OS_16 then
  1426. begin
  1427. do_shift(SM_ASR,8,reg2);
  1428. do_shift(SM_LSR,16,reg2);
  1429. end
  1430. else
  1431. do_shift(SM_ASR,24,reg2);
  1432. end;
  1433. OS_16:
  1434. begin
  1435. do_shift(SM_LSL,16,reg1);
  1436. do_shift(SM_LSR,16,reg2);
  1437. end;
  1438. OS_S16:
  1439. begin
  1440. do_shift(SM_LSL,16,reg1);
  1441. do_shift(SM_ASR,16,reg2)
  1442. end;
  1443. else
  1444. conv_done:=false;
  1445. end
  1446. else
  1447. case fromsize of
  1448. OS_8:
  1449. if GenerateThumbCode then
  1450. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1451. else
  1452. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1453. OS_S8:
  1454. begin
  1455. if tosize=OS_16 then
  1456. begin
  1457. so.shiftmode:=SM_ROR;
  1458. so.shiftimm:=16;
  1459. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1460. do_shift(SM_LSR,16,reg2);
  1461. end
  1462. else
  1463. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1464. end;
  1465. OS_16:
  1466. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1467. OS_S16:
  1468. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1469. else
  1470. conv_done:=false;
  1471. end
  1472. end;
  1473. if not conv_done and (reg1<>reg2) then
  1474. begin
  1475. { same size, only a register mov required }
  1476. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1477. list.Concat(instr);
  1478. { Notify the register allocator that we have written a move instruction so
  1479. it can try to eliminate it. }
  1480. add_move_instruction(instr);
  1481. end;
  1482. end;
  1483. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1484. var
  1485. href,href2 : treference;
  1486. hloc : pcgparalocation;
  1487. begin
  1488. href:=ref;
  1489. hloc:=paraloc.location;
  1490. while assigned(hloc) do
  1491. begin
  1492. case hloc^.loc of
  1493. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1494. begin
  1495. paramanager.allocparaloc(list,paraloc.location);
  1496. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1497. end;
  1498. LOC_REGISTER :
  1499. case hloc^.size of
  1500. OS_32,
  1501. OS_F32:
  1502. begin
  1503. paramanager.allocparaloc(list,paraloc.location);
  1504. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1505. end;
  1506. OS_64,
  1507. OS_F64:
  1508. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1509. else
  1510. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1511. end;
  1512. LOC_REFERENCE :
  1513. begin
  1514. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1515. { concatcopy should choose the best way to copy the data }
  1516. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1517. end;
  1518. else
  1519. internalerror(200408241);
  1520. end;
  1521. inc(href.offset,tcgsize2size[hloc^.size]);
  1522. hloc:=hloc^.next;
  1523. end;
  1524. end;
  1525. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1526. begin
  1527. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1528. end;
  1529. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1530. var
  1531. oppostfix:toppostfix;
  1532. begin
  1533. case fromsize of
  1534. OS_32,
  1535. OS_F32:
  1536. oppostfix:=PF_S;
  1537. OS_64,
  1538. OS_F64:
  1539. oppostfix:=PF_D;
  1540. OS_F80:
  1541. oppostfix:=PF_E;
  1542. else
  1543. InternalError(200309021);
  1544. end;
  1545. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1546. if fromsize<>tosize then
  1547. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1548. end;
  1549. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1550. var
  1551. oppostfix:toppostfix;
  1552. begin
  1553. case tosize of
  1554. OS_F32:
  1555. oppostfix:=PF_S;
  1556. OS_F64:
  1557. oppostfix:=PF_D;
  1558. OS_F80:
  1559. oppostfix:=PF_E;
  1560. else
  1561. InternalError(200309022);
  1562. end;
  1563. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1564. end;
  1565. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1566. var
  1567. r : TRegister;
  1568. ai: taicpu;
  1569. l: TAsmLabel;
  1570. begin
  1571. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  1572. not(FPUARM_HAS_EXCEPTION_TRAPPING in fpu_capabilities[current_settings.fputype]) and
  1573. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  1574. begin
  1575. r:=getintregister(list,OS_INT);
  1576. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1577. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1578. current_asmdata.getjumplabel(l);
  1579. ai:=taicpu.op_sym(A_B,l);
  1580. ai.is_jmp:=true;
  1581. ai.condition:=C_EQ;
  1582. list.concat(ai);
  1583. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1584. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1585. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1586. a_label(list,l);
  1587. if clear then
  1588. current_procinfo.FPUExceptionCheckNeeded:=false;
  1589. end;
  1590. end;
  1591. { comparison operations }
  1592. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1593. l : tasmlabel);
  1594. var
  1595. tmpreg : tregister;
  1596. b : byte;
  1597. begin
  1598. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1599. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1600. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1601. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1602. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1603. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1604. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1605. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1606. else
  1607. begin
  1608. tmpreg:=getintregister(list,size);
  1609. a_load_const_reg(list,size,a,tmpreg);
  1610. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1611. end;
  1612. a_jmp_cond(list,cmp_op,l);
  1613. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1614. end;
  1615. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1616. begin
  1617. if reverse then
  1618. begin
  1619. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1620. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1621. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1622. end
  1623. { it is decided during the compilation of the system unit if this code is used or not
  1624. so no additional check for rbit is needed }
  1625. else
  1626. begin
  1627. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1628. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1629. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1630. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1631. if GenerateThumb2Code then
  1632. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1633. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1634. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1635. end;
  1636. end;
  1637. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1638. begin
  1639. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1640. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1641. a_jmp_cond(list,cmp_op,l);
  1642. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1643. end;
  1644. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1645. var
  1646. ai : taicpu;
  1647. begin
  1648. { generate far jump, leave it to the optimizer to get rid of it }
  1649. if GenerateThumbCode then
  1650. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1651. else
  1652. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1653. ai.is_jmp:=true;
  1654. list.concat(ai);
  1655. end;
  1656. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1657. var
  1658. ai : taicpu;
  1659. begin
  1660. { generate far jump, leave it to the optimizer to get rid of it }
  1661. if GenerateThumbCode then
  1662. ai:=taicpu.op_sym(A_BL,l)
  1663. else
  1664. ai:=taicpu.op_sym(A_B,l);
  1665. ai.is_jmp:=true;
  1666. list.concat(ai);
  1667. end;
  1668. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1669. var
  1670. ai : taicpu;
  1671. inv_flags : TResFlags;
  1672. hlabel : TAsmLabel;
  1673. begin
  1674. if GenerateThumbCode then
  1675. begin
  1676. inv_flags:=f;
  1677. inverse_flags(inv_flags);
  1678. { the optimizer has to fix this if jump range is sufficient short }
  1679. current_asmdata.getjumplabel(hlabel);
  1680. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1681. ai.is_jmp:=true;
  1682. list.concat(ai);
  1683. a_jmp_always(list,l);
  1684. a_label(list,hlabel);
  1685. end
  1686. else
  1687. begin
  1688. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1689. ai.is_jmp:=true;
  1690. list.concat(ai);
  1691. end;
  1692. end;
  1693. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1694. begin
  1695. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1696. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1697. end;
  1698. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1699. begin
  1700. if target_info.system = system_arm_linux then
  1701. begin
  1702. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1703. a_call_name(list,'__gnu_mcount_nc',false);
  1704. end
  1705. else
  1706. internalerror(2014091201);
  1707. end;
  1708. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1709. var
  1710. ref : treference;
  1711. shift : byte;
  1712. firstfloatreg,lastfloatreg,
  1713. r : byte;
  1714. mmregs,
  1715. regs, saveregs : tcpuregisterset;
  1716. registerarea, offset,
  1717. r7offset,
  1718. stackmisalignment : pint;
  1719. imm1, imm2: DWord;
  1720. stack_parameters : Boolean;
  1721. begin
  1722. LocalSize:=align(LocalSize,4);
  1723. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1724. { call instruction does not put anything on the stack }
  1725. registerarea:=0;
  1726. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1727. lastfloatreg:=RS_NO;
  1728. if not(nostackframe) then
  1729. begin
  1730. firstfloatreg:=RS_NO;
  1731. mmregs:=[];
  1732. case current_settings.fputype of
  1733. fpu_none,
  1734. fpu_soft,
  1735. fpu_libgcc:
  1736. ;
  1737. fpu_fpa,
  1738. fpu_fpa10,
  1739. fpu_fpa11:
  1740. begin
  1741. { save floating point registers? }
  1742. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1743. for r:=RS_F0 to RS_F7 do
  1744. if r in regs then
  1745. begin
  1746. if firstfloatreg=RS_NO then
  1747. firstfloatreg:=r;
  1748. lastfloatreg:=r;
  1749. inc(registerarea,12);
  1750. end;
  1751. end;
  1752. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1753. begin;
  1754. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1755. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1756. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1757. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1758. end
  1759. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1760. begin;
  1761. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1762. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1763. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1764. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1765. end
  1766. else
  1767. internalerror(2019050924);
  1768. end;
  1769. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1770. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1771. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1772. { save int registers }
  1773. reference_reset(ref,4,[]);
  1774. ref.index:=NR_STACK_POINTER_REG;
  1775. ref.addressmode:=AM_PREINDEXED;
  1776. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1777. if not(target_info.system in systems_darwin) then
  1778. begin
  1779. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1780. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1781. begin
  1782. a_reg_alloc(list,NR_R12);
  1783. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1784. end;
  1785. { the (old) ARM APCS requires saving both the stack pointer (to
  1786. crawl the stack) and the PC (to identify the function this
  1787. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1788. and R15 -- still needs updating for EABI and Darwin, they don't
  1789. need that }
  1790. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1791. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1792. else
  1793. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1794. include(regs,RS_R14);
  1795. if regs<>[] then
  1796. begin
  1797. for r:=RS_R0 to RS_R15 do
  1798. if r in regs then
  1799. inc(registerarea,4);
  1800. { if the stack is not 8 byte aligned, try to add an extra register,
  1801. so we can avoid the extra sub/add ...,#4 later (KB) }
  1802. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1803. for r:=RS_R3 downto RS_R0 do
  1804. if not(r in regs) then
  1805. begin
  1806. regs:=regs+[r];
  1807. inc(registerarea,4);
  1808. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1809. break;
  1810. end;
  1811. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1812. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  1813. end;
  1814. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1815. begin
  1816. offset:=-4;
  1817. for r:=RS_R15 downto RS_R0 do
  1818. if r in regs then
  1819. begin
  1820. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),offset);
  1821. dec(offset,4);
  1822. end;
  1823. { the framepointer now points to the saved R15, so the saved
  1824. framepointer is at R11-12 (for get_caller_frame) }
  1825. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1826. a_reg_dealloc(list,NR_R12);
  1827. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  1828. current_asmdata.asmcfi.cfa_def_cfa_offset(list,4);
  1829. end;
  1830. end
  1831. else
  1832. begin
  1833. { always save r14 if we use r7 as the framepointer, because
  1834. the parameter offsets are hardcoded in advance and always
  1835. assume that r14 sits on the stack right behind the saved r7
  1836. }
  1837. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1838. include(regs,RS_FRAME_POINTER_REG);
  1839. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1840. include(regs,RS_R14);
  1841. if regs<>[] then
  1842. begin
  1843. { on Darwin, you first have to save [r4-r7,lr], and then
  1844. [r8,r10,r11] and make r7 point to the previously saved
  1845. r7 so that you can perform a stack crawl based on it
  1846. ([r7] is previous stack frame, [r7+4] is return address
  1847. }
  1848. include(regs,RS_FRAME_POINTER_REG);
  1849. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1850. r7offset:=0;
  1851. for r:=RS_R0 to RS_R15 do
  1852. if r in saveregs then
  1853. begin
  1854. inc(registerarea,4);
  1855. if r<RS_FRAME_POINTER_REG then
  1856. inc(r7offset,4);
  1857. end;
  1858. { save the registers }
  1859. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1860. { make r7 point to the saved r7 (regardless of whether this
  1861. frame uses the framepointer, for backtrace purposes) }
  1862. if r7offset<>0 then
  1863. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1864. else
  1865. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1866. { now save the rest (if any) }
  1867. saveregs:=regs-saveregs;
  1868. if saveregs<>[] then
  1869. begin
  1870. for r:=RS_R8 to RS_R11 do
  1871. if r in saveregs then
  1872. inc(registerarea,4);
  1873. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1874. end;
  1875. end;
  1876. end;
  1877. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1878. if (LocalSize<>0) or
  1879. ((stackmisalignment<>0) and
  1880. ((pi_do_call in current_procinfo.flags) or
  1881. (po_assembler in current_procinfo.procdef.procoptions))) then
  1882. begin
  1883. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1884. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1885. begin
  1886. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1887. internalerror(2014030901)
  1888. else
  1889. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1890. end;
  1891. if is_shifter_const(localsize,shift) then
  1892. begin
  1893. a_reg_dealloc(list,NR_R12);
  1894. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1895. end
  1896. else if split_into_shifter_const(localsize, imm1, imm2) then
  1897. begin
  1898. a_reg_dealloc(list,NR_R12);
  1899. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1900. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1901. end
  1902. else
  1903. begin
  1904. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1905. a_reg_alloc(list,NR_R12);
  1906. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1907. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1908. a_reg_dealloc(list,NR_R12);
  1909. end;
  1910. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1911. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  1912. end;
  1913. if (mmregs<>[]) or
  1914. (firstfloatreg<>RS_NO) then
  1915. begin
  1916. reference_reset(ref,4,[]);
  1917. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1918. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1919. begin
  1920. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1921. begin
  1922. a_reg_alloc(list,NR_R12);
  1923. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1924. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1925. a_reg_dealloc(list,NR_R12);
  1926. end
  1927. else
  1928. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1929. ref.base:=NR_R12;
  1930. end
  1931. else
  1932. begin
  1933. ref.base:=current_procinfo.framepointer;
  1934. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1935. end;
  1936. case current_settings.fputype of
  1937. fpu_fpa,
  1938. fpu_fpa10,
  1939. fpu_fpa11:
  1940. begin
  1941. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1942. lastfloatreg-firstfloatreg+1,ref));
  1943. end;
  1944. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1945. begin
  1946. ref.index:=ref.base;
  1947. ref.base:=NR_NO;
  1948. if mmregs<>[] then
  1949. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1950. end
  1951. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1952. begin
  1953. ref.index:=ref.base;
  1954. ref.base:=NR_NO;
  1955. if mmregs<>[] then
  1956. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1957. end
  1958. else
  1959. internalerror(2019050923);
  1960. end;
  1961. end;
  1962. end;
  1963. end;
  1964. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1965. var
  1966. ref : treference;
  1967. LocalSize : longint;
  1968. firstfloatreg,lastfloatreg,
  1969. r,
  1970. shift : byte;
  1971. mmregs,
  1972. saveregs,
  1973. regs : tcpuregisterset;
  1974. registerarea,
  1975. stackmisalignment: pint;
  1976. paddingreg: TSuperRegister;
  1977. imm1, imm2: DWord;
  1978. begin
  1979. if not(nostackframe) then
  1980. begin
  1981. registerarea:=0;
  1982. firstfloatreg:=RS_NO;
  1983. lastfloatreg:=RS_NO;
  1984. mmregs:=[];
  1985. saveregs:=[];
  1986. case current_settings.fputype of
  1987. fpu_none,
  1988. fpu_soft,
  1989. fpu_libgcc:
  1990. ;
  1991. fpu_fpa,
  1992. fpu_fpa10,
  1993. fpu_fpa11:
  1994. begin
  1995. { restore floating point registers? }
  1996. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1997. for r:=RS_F0 to RS_F7 do
  1998. if r in regs then
  1999. begin
  2000. if firstfloatreg=RS_NO then
  2001. firstfloatreg:=r;
  2002. lastfloatreg:=r;
  2003. { floating point register space is already included in
  2004. localsize below by calc_stackframe_size
  2005. inc(registerarea,12);
  2006. }
  2007. end;
  2008. end;
  2009. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2010. begin
  2011. { restore vfp registers? }
  2012. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  2013. they have numbers>$1f which is not really correct as they should simply have the same numbers
  2014. as the even ones by with a different subtype as it is done on x86 with al/ah }
  2015. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  2016. end
  2017. else
  2018. internalerror(2019050908);
  2019. end;
  2020. if (firstfloatreg<>RS_NO) or
  2021. (mmregs<>[]) then
  2022. begin
  2023. reference_reset(ref,4,[]);
  2024. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  2025. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2026. begin
  2027. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2028. begin
  2029. a_reg_alloc(list,NR_R12);
  2030. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2031. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2032. a_reg_dealloc(list,NR_R12);
  2033. end
  2034. else
  2035. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2036. ref.base:=NR_R12;
  2037. end
  2038. else
  2039. begin
  2040. ref.base:=current_procinfo.framepointer;
  2041. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2042. end;
  2043. case current_settings.fputype of
  2044. fpu_fpa,
  2045. fpu_fpa10,
  2046. fpu_fpa11:
  2047. begin
  2048. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2049. lastfloatreg-firstfloatreg+1,ref));
  2050. end;
  2051. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2052. begin
  2053. ref.index:=ref.base;
  2054. ref.base:=NR_NO;
  2055. if mmregs<>[] then
  2056. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2057. end
  2058. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2059. begin
  2060. ref.index:=ref.base;
  2061. ref.base:=NR_NO;
  2062. if mmregs<>[] then
  2063. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2064. end
  2065. else
  2066. internalerror(2019050921);
  2067. end;
  2068. end;
  2069. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2070. if (pi_do_call in current_procinfo.flags) or
  2071. (regs<>[]) or
  2072. ((target_info.system in systems_darwin) and
  2073. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2074. begin
  2075. exclude(regs,RS_R14);
  2076. include(regs,RS_R15);
  2077. if (target_info.system in systems_darwin) then
  2078. include(regs,RS_FRAME_POINTER_REG);
  2079. end;
  2080. if not(target_info.system in systems_darwin) then
  2081. begin
  2082. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2083. The saved PC came after that but is discarded, since we restore
  2084. the stack pointer }
  2085. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2086. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2087. end
  2088. else
  2089. begin
  2090. { restore R8-R11 already if necessary (they've been stored
  2091. before the others) }
  2092. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2093. if saveregs<>[] then
  2094. begin
  2095. reference_reset(ref,4,[]);
  2096. ref.index:=NR_STACK_POINTER_REG;
  2097. ref.addressmode:=AM_PREINDEXED;
  2098. for r:=RS_R8 to RS_R11 do
  2099. if r in saveregs then
  2100. inc(registerarea,4);
  2101. regs:=regs-saveregs;
  2102. end;
  2103. end;
  2104. for r:=RS_R0 to RS_R15 do
  2105. if r in regs then
  2106. inc(registerarea,4);
  2107. { reapply the stack padding reg, in case there was one, see the complimentary
  2108. comment in g_proc_entry() (KB) }
  2109. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2110. if paddingreg < RS_R4 then
  2111. if paddingreg in regs then
  2112. internalerror(201306190)
  2113. else
  2114. begin
  2115. regs:=regs+[paddingreg];
  2116. inc(registerarea,4);
  2117. end;
  2118. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2119. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2120. (target_info.system in systems_darwin) then
  2121. begin
  2122. LocalSize:=current_procinfo.calc_stackframe_size;
  2123. if (LocalSize<>0) or
  2124. ((stackmisalignment<>0) and
  2125. ((pi_do_call in current_procinfo.flags) or
  2126. (po_assembler in current_procinfo.procdef.procoptions))) then
  2127. begin
  2128. if pi_estimatestacksize in current_procinfo.flags then
  2129. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2130. else
  2131. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2132. if is_shifter_const(LocalSize,shift) then
  2133. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2134. else if split_into_shifter_const(localsize, imm1, imm2) then
  2135. begin
  2136. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2137. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2138. end
  2139. else
  2140. begin
  2141. a_reg_alloc(list,NR_R12);
  2142. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2143. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2144. a_reg_dealloc(list,NR_R12);
  2145. end;
  2146. end;
  2147. if (target_info.system in systems_darwin) and
  2148. (saveregs<>[]) then
  2149. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2150. if regs=[] then
  2151. begin
  2152. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2153. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2154. else
  2155. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2156. end
  2157. else
  2158. begin
  2159. reference_reset(ref,4,[]);
  2160. ref.index:=NR_STACK_POINTER_REG;
  2161. ref.addressmode:=AM_PREINDEXED;
  2162. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2163. end;
  2164. end
  2165. else
  2166. begin
  2167. { restore int registers and return }
  2168. reference_reset(ref,4,[]);
  2169. ref.index:=NR_FRAME_POINTER_REG;
  2170. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2171. end;
  2172. end
  2173. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2174. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2175. else
  2176. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2177. end;
  2178. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2179. var
  2180. ref : treference;
  2181. l : TAsmLabel;
  2182. regs : tcpuregisterset;
  2183. r: byte;
  2184. begin
  2185. if (cs_create_pic in current_settings.moduleswitches) and
  2186. (pi_needs_got in current_procinfo.flags) and
  2187. (tf_pic_uses_got in target_info.flags) then
  2188. begin
  2189. { Procedure parametrs are not initialized at this stage.
  2190. Before GOT initialization code, allocate registers used for procedure parameters
  2191. to prevent usage of these registers for temp operations in later stages of code
  2192. generation. }
  2193. regs:=rg[R_INTREGISTER].used_in_proc;
  2194. for r:=RS_R0 to RS_R3 do
  2195. if r in regs then
  2196. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2197. { Allocate scratch register R12 and use it for GOT calculations directly.
  2198. Otherwise the init code can be distorted in later stages of code generation. }
  2199. a_reg_alloc(list,NR_R12);
  2200. reference_reset(ref,4,[]);
  2201. current_asmdata.getglobaldatalabel(l);
  2202. cg.a_label(current_procinfo.aktlocaldata,l);
  2203. ref.symbol:=l;
  2204. ref.base:=NR_PC;
  2205. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2206. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2207. current_asmdata.getaddrlabel(l);
  2208. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2209. cg.a_label(list,l);
  2210. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2211. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2212. { Deallocate registers }
  2213. a_reg_dealloc(list,NR_R12);
  2214. for r:=RS_R3 downto RS_R0 do
  2215. if r in regs then
  2216. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2217. end;
  2218. end;
  2219. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2220. var
  2221. b : byte;
  2222. tmpref : treference;
  2223. instr : taicpu;
  2224. begin
  2225. if ref.addressmode<>AM_OFFSET then
  2226. internalerror(200309071);
  2227. tmpref:=ref;
  2228. { Be sure to have a base register }
  2229. if (tmpref.base=NR_NO) then
  2230. begin
  2231. if tmpref.shiftmode<>SM_None then
  2232. internalerror(2014020702);
  2233. if tmpref.signindex<0 then
  2234. internalerror(200312023);
  2235. tmpref.base:=tmpref.index;
  2236. tmpref.index:=NR_NO;
  2237. end;
  2238. if assigned(tmpref.symbol) or
  2239. not((is_shifter_const(tmpref.offset,b)) or
  2240. (is_shifter_const(-tmpref.offset,b))
  2241. ) then
  2242. fixref(list,tmpref);
  2243. { expect a base here if there is an index }
  2244. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2245. internalerror(200312022);
  2246. if tmpref.index<>NR_NO then
  2247. begin
  2248. if tmpref.shiftmode<>SM_None then
  2249. internalerror(200312021);
  2250. if tmpref.signindex<0 then
  2251. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2252. else
  2253. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2254. if tmpref.offset<>0 then
  2255. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2256. end
  2257. else
  2258. begin
  2259. if tmpref.base=NR_NO then
  2260. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2261. else
  2262. if tmpref.offset<>0 then
  2263. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2264. else
  2265. begin
  2266. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2267. list.concat(instr);
  2268. add_move_instruction(instr);
  2269. end;
  2270. end;
  2271. end;
  2272. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2273. var
  2274. tmpreg, tmpreg2 : tregister;
  2275. tmpref : treference;
  2276. l, piclabel : tasmlabel;
  2277. indirection_done : boolean;
  2278. begin
  2279. { absolute symbols can't be handled directly, we've to store the symbol reference
  2280. in the text segment and access it pc relative
  2281. For now, we assume that references where base or index equals to PC are already
  2282. relative, all other references are assumed to be absolute and thus they need
  2283. to be handled extra.
  2284. A proper solution would be to change refoptions to a set and store the information
  2285. if the symbol is absolute or relative there.
  2286. }
  2287. { create consts entry }
  2288. reference_reset(tmpref,4,[]);
  2289. current_asmdata.getjumplabel(l);
  2290. cg.a_label(current_procinfo.aktlocaldata,l);
  2291. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2292. piclabel:=nil;
  2293. tmpreg:=NR_NO;
  2294. indirection_done:=false;
  2295. if assigned(ref.symbol) then
  2296. begin
  2297. if (target_info.system=system_arm_ios) and
  2298. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2299. begin
  2300. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2301. if ref.offset<>0 then
  2302. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2303. indirection_done:=true;
  2304. end
  2305. else if ref.refaddr=addr_gottpoff then
  2306. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2307. else if ref.refaddr=addr_tlsgd then
  2308. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2309. else if ref.refaddr=addr_tlsdesc then
  2310. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2311. else if ref.refaddr=addr_tpoff then
  2312. begin
  2313. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2314. Internalerror(2019092804);
  2315. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2316. end
  2317. else if (cs_create_pic in current_settings.moduleswitches) then
  2318. if (tf_pic_uses_got in target_info.flags) then
  2319. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2320. else
  2321. begin
  2322. { ideally, we would want to generate
  2323. ldr r1, LPICConstPool
  2324. LPICLocal:
  2325. ldr/str r2,[pc,r1]
  2326. ...
  2327. LPICConstPool:
  2328. .long _globsym-(LPICLocal+8)
  2329. However, we cannot be sure that the ldr/str will follow
  2330. right after the call to fixref, so we have to load the
  2331. complete address already in a register.
  2332. }
  2333. current_asmdata.getaddrlabel(piclabel);
  2334. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2335. end
  2336. else
  2337. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2338. end
  2339. else
  2340. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2341. { load consts entry }
  2342. if not indirection_done then
  2343. begin
  2344. tmpreg:=getintregister(list,OS_INT);
  2345. tmpref.symbol:=l;
  2346. tmpref.base:=NR_PC;
  2347. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2348. if (cs_create_pic in current_settings.moduleswitches) and
  2349. (tf_pic_uses_got in target_info.flags) and
  2350. assigned(ref.symbol) then
  2351. begin
  2352. {$ifdef EXTDEBUG}
  2353. if not (pi_needs_got in current_procinfo.flags) then
  2354. Comment(V_warning,'pi_needs_got not included');
  2355. {$endif EXTDEBUG}
  2356. Include(current_procinfo.flags,pi_needs_got);
  2357. reference_reset(tmpref,4,[]);
  2358. tmpref.base:=current_procinfo.got;
  2359. tmpref.index:=tmpreg;
  2360. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2361. if ref.offset<>0 then
  2362. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2363. end;
  2364. end;
  2365. if assigned(piclabel) then
  2366. begin
  2367. cg.a_label(list,piclabel);
  2368. tmpreg2:=getaddressregister(list);
  2369. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2370. tmpreg:=tmpreg2
  2371. end;
  2372. { This routine can be called with PC as base/index in case the offset
  2373. was too large to encode in a load/store. In that case, the entire
  2374. absolute expression has been re-encoded in a new constpool entry, and
  2375. we have to remove the use of PC from the original reference (the code
  2376. above made everything relative to the value loaded from the new
  2377. constpool entry) }
  2378. if is_pc(ref.base) then
  2379. ref.base:=NR_NO;
  2380. if is_pc(ref.index) then
  2381. ref.index:=NR_NO;
  2382. if (ref.base<>NR_NO) then
  2383. begin
  2384. if ref.index<>NR_NO then
  2385. begin
  2386. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2387. ref.base:=tmpreg;
  2388. end
  2389. else
  2390. if ref.base<>NR_PC then
  2391. begin
  2392. ref.index:=tmpreg;
  2393. ref.shiftimm:=0;
  2394. ref.signindex:=1;
  2395. ref.shiftmode:=SM_None;
  2396. end
  2397. else
  2398. ref.base:=tmpreg;
  2399. end
  2400. else
  2401. ref.base:=tmpreg;
  2402. ref.offset:=0;
  2403. ref.symbol:=nil;
  2404. end;
  2405. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2406. var
  2407. paraloc1,paraloc2,paraloc3 : TCGPara;
  2408. pd : tprocdef;
  2409. begin
  2410. pd:=search_system_proc('MOVE');
  2411. paraloc1.init;
  2412. paraloc2.init;
  2413. paraloc3.init;
  2414. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  2415. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  2416. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  2417. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2418. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2419. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2420. paramanager.freecgpara(list,paraloc3);
  2421. paramanager.freecgpara(list,paraloc2);
  2422. paramanager.freecgpara(list,paraloc1);
  2423. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2424. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2425. a_call_name(list,'FPC_MOVE',false);
  2426. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2427. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2428. paraloc3.done;
  2429. paraloc2.done;
  2430. paraloc1.done;
  2431. end;
  2432. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2433. const
  2434. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2435. maxtmpreg_thumb = 5;
  2436. var
  2437. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2438. srcreg,destreg,countreg,r,tmpreg:tregister;
  2439. helpsize:aint;
  2440. copysize:byte;
  2441. cgsize:Tcgsize;
  2442. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2443. maxtmpreg,
  2444. tmpregi,tmpregi2:byte;
  2445. { will never be called with count<=4 }
  2446. procedure genloop(count : aword;size : byte);
  2447. const
  2448. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2449. var
  2450. l : tasmlabel;
  2451. begin
  2452. current_asmdata.getjumplabel(l);
  2453. if count<size then size:=1;
  2454. a_load_const_reg(list,OS_INT,count div size,countreg);
  2455. cg.a_label(list,l);
  2456. srcref.addressmode:=AM_POSTINDEXED;
  2457. dstref.addressmode:=AM_POSTINDEXED;
  2458. srcref.offset:=size;
  2459. dstref.offset:=size;
  2460. r:=getintregister(list,size2opsize[size]);
  2461. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2462. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2463. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2464. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2465. a_jmp_flags(list,F_NE,l);
  2466. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2467. srcref.offset:=1;
  2468. dstref.offset:=1;
  2469. case count mod size of
  2470. 1:
  2471. begin
  2472. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2473. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2474. end;
  2475. 2:
  2476. if aligned then
  2477. begin
  2478. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2479. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2480. end
  2481. else
  2482. begin
  2483. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2484. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2485. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2486. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2487. end;
  2488. 3:
  2489. if aligned then
  2490. begin
  2491. srcref.offset:=2;
  2492. dstref.offset:=2;
  2493. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2494. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2495. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2496. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2497. end
  2498. else
  2499. begin
  2500. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2501. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2502. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2503. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2504. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2505. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2506. end;
  2507. end;
  2508. { keep the registers alive }
  2509. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2510. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2511. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2512. end;
  2513. { save estimation, if a creating a separate ref is needed or
  2514. if we can keep the original reference while copying }
  2515. function SimpleRef(const ref : treference) : boolean;
  2516. begin
  2517. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2518. ((ref.symbol=nil) and
  2519. (ref.addressmode=AM_OFFSET) and
  2520. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2521. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2522. { ldrh has a limited offset range }
  2523. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2524. )
  2525. );
  2526. end;
  2527. { will never be called with count<=4 }
  2528. procedure genloop_thumb(count : aword;size : byte);
  2529. procedure refincofs(const ref : treference;const value : longint = 1);
  2530. begin
  2531. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2532. end;
  2533. const
  2534. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2535. var
  2536. l : tasmlabel;
  2537. begin
  2538. current_asmdata.getjumplabel(l);
  2539. if count<size then size:=1;
  2540. a_load_const_reg(list,OS_INT,count div size,countreg);
  2541. cg.a_label(list,l);
  2542. r:=getintregister(list,size2opsize[size]);
  2543. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2544. refincofs(srcref);
  2545. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2546. refincofs(dstref);
  2547. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2548. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2549. a_jmp_flags(list,F_NE,l);
  2550. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2551. case count mod size of
  2552. 1:
  2553. begin
  2554. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2555. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2556. end;
  2557. 2:
  2558. if aligned then
  2559. begin
  2560. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2561. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2562. end
  2563. else
  2564. begin
  2565. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2566. refincofs(srcref);
  2567. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2568. refincofs(dstref);
  2569. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2570. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2571. end;
  2572. 3:
  2573. if aligned then
  2574. begin
  2575. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2576. refincofs(srcref,2);
  2577. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2578. refincofs(dstref,2);
  2579. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2580. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2581. end
  2582. else
  2583. begin
  2584. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2585. refincofs(srcref);
  2586. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2587. refincofs(dstref);
  2588. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2589. refincofs(srcref);
  2590. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2591. refincofs(dstref);
  2592. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2593. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2594. end;
  2595. end;
  2596. { keep the registers alive }
  2597. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2598. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2599. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2600. end;
  2601. begin
  2602. if len=0 then
  2603. exit;
  2604. if GenerateThumbCode then
  2605. maxtmpreg:=maxtmpreg_thumb
  2606. else
  2607. maxtmpreg:=maxtmpreg_arm;
  2608. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2609. dstref:=dest;
  2610. srcref:=source;
  2611. if cs_opt_size in current_settings.optimizerswitches then
  2612. helpsize:=8;
  2613. if aligned and (len=4) then
  2614. begin
  2615. tmpreg:=getintregister(list,OS_32);
  2616. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2617. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2618. end
  2619. else if aligned and (len=2) then
  2620. begin
  2621. tmpreg:=getintregister(list,OS_16);
  2622. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2623. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2624. end
  2625. else if (len<=helpsize) and aligned then
  2626. begin
  2627. tmpregi:=0;
  2628. { loading address in a separate register needed? }
  2629. if SimpleRef(source) then
  2630. begin
  2631. { ... then we don't need a loadaddr }
  2632. srcref:=source;
  2633. end
  2634. else
  2635. begin
  2636. srcreg:=getintregister(list,OS_ADDR);
  2637. a_loadaddr_ref_reg(list,source,srcreg);
  2638. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2639. end;
  2640. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2641. begin
  2642. inc(tmpregi);
  2643. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2644. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2645. inc(srcref.offset,4);
  2646. dec(len,4);
  2647. end;
  2648. { loading address in a separate register needed? }
  2649. if SimpleRef(dest) then
  2650. dstref:=dest
  2651. else
  2652. begin
  2653. destreg:=getintregister(list,OS_ADDR);
  2654. a_loadaddr_ref_reg(list,dest,destreg);
  2655. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2656. end;
  2657. tmpregi2:=1;
  2658. while (tmpregi2<=tmpregi) do
  2659. begin
  2660. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2661. inc(dstref.offset,4);
  2662. inc(tmpregi2);
  2663. end;
  2664. copysize:=4;
  2665. cgsize:=OS_32;
  2666. while len<>0 do
  2667. begin
  2668. if len<2 then
  2669. begin
  2670. copysize:=1;
  2671. cgsize:=OS_8;
  2672. end
  2673. else if len<4 then
  2674. begin
  2675. copysize:=2;
  2676. cgsize:=OS_16;
  2677. end;
  2678. dec(len,copysize);
  2679. r:=getintregister(list,cgsize);
  2680. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2681. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2682. inc(srcref.offset,copysize);
  2683. inc(dstref.offset,copysize);
  2684. end;{end of while}
  2685. end
  2686. else
  2687. begin
  2688. cgsize:=OS_32;
  2689. if (len<=4) then{len<=4 and not aligned}
  2690. begin
  2691. r:=getintregister(list,cgsize);
  2692. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2693. if Len=1 then
  2694. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2695. else
  2696. begin
  2697. tmpreg:=getintregister(list,cgsize);
  2698. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2699. inc(usedtmpref.offset,1);
  2700. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2701. inc(usedtmpref2.offset,1);
  2702. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2703. if len>2 then
  2704. begin
  2705. inc(usedtmpref.offset,1);
  2706. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2707. inc(usedtmpref2.offset,1);
  2708. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2709. if len>3 then
  2710. begin
  2711. inc(usedtmpref.offset,1);
  2712. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2713. inc(usedtmpref2.offset,1);
  2714. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2715. end;
  2716. end;
  2717. end;
  2718. end{end of if len<=4}
  2719. else
  2720. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2721. destreg:=getintregister(list,OS_ADDR);
  2722. a_loadaddr_ref_reg(list,dest,destreg);
  2723. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2724. srcreg:=getintregister(list,OS_ADDR);
  2725. a_loadaddr_ref_reg(list,source,srcreg);
  2726. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2727. countreg:=getintregister(list,OS_32);
  2728. // if cs_opt_size in current_settings.optimizerswitches then
  2729. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2730. {if aligned then
  2731. genloop(len,4)
  2732. else}
  2733. if GenerateThumbCode then
  2734. genloop_thumb(len,1)
  2735. else
  2736. genloop(len,1);
  2737. end;
  2738. end;
  2739. end;
  2740. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2741. begin
  2742. g_concatcopy_internal(list,source,dest,len,false);
  2743. end;
  2744. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2745. begin
  2746. if (source.alignment in [1,3]) or
  2747. (dest.alignment in [1,3]) then
  2748. g_concatcopy_internal(list,source,dest,len,false)
  2749. else
  2750. g_concatcopy_internal(list,source,dest,len,true);
  2751. end;
  2752. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2753. var
  2754. ovloc : tlocation;
  2755. begin
  2756. ovloc.loc:=LOC_VOID;
  2757. g_overflowCheck_loc(list,l,def,ovloc);
  2758. end;
  2759. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2760. var
  2761. hl : tasmlabel;
  2762. ai:TAiCpu;
  2763. hflags : tresflags;
  2764. begin
  2765. if not(cs_check_overflow in current_settings.localswitches) then
  2766. exit;
  2767. current_asmdata.getjumplabel(hl);
  2768. case ovloc.loc of
  2769. LOC_VOID:
  2770. begin
  2771. ai:=taicpu.op_sym(A_B,hl);
  2772. ai.is_jmp:=true;
  2773. if not((def.typ=pointerdef) or
  2774. ((def.typ=orddef) and
  2775. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2776. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2777. ai.SetCondition(C_VC)
  2778. else
  2779. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2780. ai.SetCondition(C_CS)
  2781. else
  2782. ai.SetCondition(C_CC);
  2783. list.concat(ai);
  2784. end;
  2785. LOC_FLAGS:
  2786. begin
  2787. hflags:=ovloc.resflags;
  2788. inverse_flags(hflags);
  2789. cg.a_jmp_flags(list,hflags,hl);
  2790. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2791. end;
  2792. else
  2793. internalerror(200409281);
  2794. end;
  2795. a_call_name(list,'FPC_OVERFLOW',false);
  2796. a_label(list,hl);
  2797. end;
  2798. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2799. begin
  2800. { this work is done in g_proc_entry }
  2801. end;
  2802. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2803. begin
  2804. { this work is done in g_proc_exit }
  2805. end;
  2806. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2807. var
  2808. ai : taicpu;
  2809. hlabel : TAsmLabel;
  2810. begin
  2811. if GenerateThumbCode then
  2812. begin
  2813. { the optimizer has to fix this if jump range is sufficient short }
  2814. current_asmdata.getjumplabel(hlabel);
  2815. ai:=Taicpu.Op_sym(A_B,hlabel);
  2816. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2817. ai.is_jmp:=true;
  2818. list.concat(ai);
  2819. a_jmp_always(list,l);
  2820. a_label(list,hlabel);
  2821. end
  2822. else
  2823. begin
  2824. ai:=Taicpu.Op_sym(A_B,l);
  2825. ai.SetCondition(OpCmp2AsmCond[cond]);
  2826. ai.is_jmp:=true;
  2827. list.concat(ai);
  2828. end;
  2829. end;
  2830. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2831. const
  2832. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2833. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2834. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2835. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2836. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2837. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2838. begin
  2839. result:=convertop[fromsize,tosize];
  2840. if result=A_NONE then
  2841. internalerror(200312205);
  2842. end;
  2843. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2844. const
  2845. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2846. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2847. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2848. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2849. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2850. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2851. begin
  2852. result:=convertop[fromsize,tosize];
  2853. end;
  2854. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2855. var
  2856. instr: taicpu;
  2857. begin
  2858. if (shuffle=nil) or shufflescalar(shuffle) then
  2859. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2860. else
  2861. internalerror(2009112407);
  2862. list.concat(instr);
  2863. case instr.opcode of
  2864. A_VMOV:
  2865. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2866. add_move_instruction(instr);
  2867. else
  2868. { VCVT can generate an exception }
  2869. maybe_check_for_fpu_exception(list);
  2870. end;
  2871. end;
  2872. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2873. var
  2874. intreg,
  2875. tmpmmreg : tregister;
  2876. reg64 : tregister64;
  2877. begin
  2878. if assigned(shuffle) and
  2879. not(shufflescalar(shuffle)) then
  2880. internalerror(2009112413);
  2881. case fromsize of
  2882. OS_32,OS_S32:
  2883. begin
  2884. fromsize:=OS_F32;
  2885. { since we are loading an integer, no conversion may be required }
  2886. if (fromsize<>tosize) then
  2887. internalerror(2009112801);
  2888. end;
  2889. OS_64,OS_S64:
  2890. begin
  2891. fromsize:=OS_F64;
  2892. { since we are loading an integer, no conversion may be required }
  2893. if (fromsize<>tosize) then
  2894. internalerror(2009112901);
  2895. end;
  2896. OS_F32,OS_F64:
  2897. ;
  2898. else
  2899. internalerror(2019050920);
  2900. end;
  2901. if (fromsize<>tosize) then
  2902. tmpmmreg:=getmmregister(list,fromsize)
  2903. else
  2904. tmpmmreg:=reg;
  2905. if (ref.alignment in [1,2]) then
  2906. begin
  2907. case fromsize of
  2908. OS_F32:
  2909. begin
  2910. intreg:=getintregister(list,OS_32);
  2911. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2912. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2913. end;
  2914. OS_F64:
  2915. begin
  2916. reg64.reglo:=getintregister(list,OS_32);
  2917. reg64.reghi:=getintregister(list,OS_32);
  2918. cg64.a_load64_ref_reg(list,ref,reg64);
  2919. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2920. end;
  2921. else
  2922. internalerror(2009112412);
  2923. end;
  2924. end
  2925. else
  2926. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2927. if (tmpmmreg<>reg) then
  2928. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2929. end;
  2930. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2931. var
  2932. intreg,
  2933. tmpmmreg : tregister;
  2934. reg64 : tregister64;
  2935. begin
  2936. if assigned(shuffle) and
  2937. not(shufflescalar(shuffle)) then
  2938. internalerror(2009112416);
  2939. case tosize of
  2940. OS_32,OS_S32:
  2941. begin
  2942. tosize:=OS_F32;
  2943. { since we are loading an integer, no conversion may be required }
  2944. if (fromsize<>tosize) then
  2945. internalerror(2009112802);
  2946. end;
  2947. OS_64,OS_S64:
  2948. begin
  2949. tosize:=OS_F64;
  2950. { since we are loading an integer, no conversion may be required }
  2951. if (fromsize<>tosize) then
  2952. internalerror(2009112902);
  2953. end;
  2954. OS_F32,OS_F64:
  2955. ;
  2956. else
  2957. internalerror(2019050919);
  2958. end;
  2959. if (fromsize<>tosize) then
  2960. begin
  2961. tmpmmreg:=getmmregister(list,tosize);
  2962. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2963. end
  2964. else
  2965. tmpmmreg:=reg;
  2966. if (ref.alignment in [1,2]) then
  2967. begin
  2968. case tosize of
  2969. OS_F32:
  2970. begin
  2971. intreg:=getintregister(list,OS_32);
  2972. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2973. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2974. end;
  2975. OS_F64:
  2976. begin
  2977. reg64.reglo:=getintregister(list,OS_32);
  2978. reg64.reghi:=getintregister(list,OS_32);
  2979. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2980. cg64.a_load64_reg_ref(list,reg64,ref);
  2981. end;
  2982. else
  2983. internalerror(2009112417);
  2984. end;
  2985. end
  2986. else
  2987. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2988. { VSTR cannot generate an FPU exception, VCVT is handled seperately, so we do not need a check here }
  2989. end;
  2990. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2991. begin
  2992. { this code can only be used to transfer raw data, not to perform
  2993. conversions }
  2994. if (tosize<>OS_F32) then
  2995. internalerror(2009112419);
  2996. if not(fromsize in [OS_32,OS_S32]) then
  2997. internalerror(2009112420);
  2998. if assigned(shuffle) and
  2999. not shufflescalar(shuffle) then
  3000. internalerror(2009112516);
  3001. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  3002. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3003. end;
  3004. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  3005. begin
  3006. { this code can only be used to transfer raw data, not to perform
  3007. conversions }
  3008. if (fromsize<>OS_F32) then
  3009. internalerror(2009112430);
  3010. if not(tosize in [OS_32,OS_S32]) then
  3011. internalerror(2009112409);
  3012. if assigned(shuffle) and
  3013. not shufflescalar(shuffle) then
  3014. internalerror(2009112514);
  3015. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  3016. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3017. end;
  3018. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  3019. var
  3020. tmpreg: tregister;
  3021. begin
  3022. { the vfp doesn't support xor nor any other logical operation, but
  3023. this routine is used to initialise global mm regvars. We can
  3024. easily initialise an mm reg with 0 though. }
  3025. case op of
  3026. OP_XOR:
  3027. begin
  3028. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3029. begin
  3030. if (reg_cgsize(src)<>size) or
  3031. assigned(shuffle) then
  3032. internalerror(2019081301);
  3033. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3034. end
  3035. else
  3036. begin
  3037. if (src<>dst) or
  3038. (reg_cgsize(src)<>size) or
  3039. assigned(shuffle) then
  3040. internalerror(2009112907);
  3041. tmpreg:=getintregister(list,OS_32);
  3042. a_load_const_reg(list,OS_32,0,tmpreg);
  3043. case size of
  3044. OS_F32:
  3045. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3046. OS_F64:
  3047. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3048. else
  3049. internalerror(2009112908);
  3050. end;
  3051. end;
  3052. end
  3053. else
  3054. internalerror(2009112906);
  3055. end;
  3056. end;
  3057. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3058. const
  3059. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3060. begin
  3061. if (op in overflowops) and
  3062. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3063. a_load_reg_reg(list,OS_32,size,dst,dst);
  3064. end;
  3065. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3066. procedure checkreg(var reg : TRegister);
  3067. var
  3068. tmpreg : TRegister;
  3069. begin
  3070. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3071. (getsupreg(reg)=RS_R15) then
  3072. begin
  3073. tmpreg:=getintregister(list,OS_INT);
  3074. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3075. reg:=tmpreg;
  3076. end;
  3077. end;
  3078. begin
  3079. checkreg(op1);
  3080. checkreg(op2);
  3081. checkreg(op3);
  3082. checkreg(op4);
  3083. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3084. end;
  3085. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3086. begin
  3087. if pi_needs_tls in current_procinfo.flags then
  3088. begin
  3089. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3090. a_call_name(list,'fpc_read_tp',false);
  3091. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3092. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3093. end;
  3094. end;
  3095. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3096. begin
  3097. case op of
  3098. OP_NEG:
  3099. begin
  3100. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3101. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3102. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3103. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3104. end;
  3105. OP_NOT:
  3106. begin
  3107. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3108. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3109. end;
  3110. else
  3111. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3112. end;
  3113. end;
  3114. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3115. begin
  3116. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3117. end;
  3118. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3119. var
  3120. ovloc : tlocation;
  3121. begin
  3122. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3123. end;
  3124. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3125. var
  3126. ovloc : tlocation;
  3127. begin
  3128. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3129. end;
  3130. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3131. begin
  3132. { this code can only be used to transfer raw data, not to perform
  3133. conversions }
  3134. if (mmsize<>OS_F64) then
  3135. internalerror(2009112405);
  3136. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3137. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3138. end;
  3139. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3140. begin
  3141. { this code can only be used to transfer raw data, not to perform
  3142. conversions }
  3143. if (mmsize<>OS_F64) then
  3144. internalerror(2009112406);
  3145. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3146. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3147. end;
  3148. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3149. var
  3150. tmpreg : tregister;
  3151. b : byte;
  3152. begin
  3153. ovloc.loc:=LOC_VOID;
  3154. case op of
  3155. OP_NEG,
  3156. OP_NOT :
  3157. internalerror(2012022501);
  3158. else
  3159. ;
  3160. end;
  3161. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3162. begin
  3163. case op of
  3164. OP_ADD:
  3165. begin
  3166. if is_shifter_const(lo(value),b) then
  3167. begin
  3168. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3169. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3170. end
  3171. else
  3172. begin
  3173. tmpreg:=cg.getintregister(list,OS_32);
  3174. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3175. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3176. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3177. end;
  3178. if is_shifter_const(hi(value),b) then
  3179. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3180. else
  3181. begin
  3182. tmpreg:=cg.getintregister(list,OS_32);
  3183. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3184. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3185. end;
  3186. end;
  3187. OP_SUB:
  3188. begin
  3189. if is_shifter_const(lo(value),b) then
  3190. begin
  3191. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3192. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3193. end
  3194. else
  3195. begin
  3196. tmpreg:=cg.getintregister(list,OS_32);
  3197. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3198. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3199. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3200. end;
  3201. if is_shifter_const(hi(value),b) then
  3202. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3203. else
  3204. begin
  3205. tmpreg:=cg.getintregister(list,OS_32);
  3206. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3207. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3208. end;
  3209. end;
  3210. else
  3211. internalerror(200502131);
  3212. end;
  3213. if size=OS_64 then
  3214. begin
  3215. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3216. ovloc.loc:=LOC_FLAGS;
  3217. case op of
  3218. OP_ADD:
  3219. ovloc.resflags:=F_CS;
  3220. OP_SUB:
  3221. ovloc.resflags:=F_CC;
  3222. else
  3223. internalerror(2019050918);
  3224. end;
  3225. end;
  3226. end
  3227. else
  3228. begin
  3229. case op of
  3230. OP_AND,OP_OR,OP_XOR:
  3231. begin
  3232. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3233. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3234. end;
  3235. OP_ADD:
  3236. begin
  3237. if is_shifter_const(aint(lo(value)),b) then
  3238. begin
  3239. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3240. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3241. end
  3242. else
  3243. begin
  3244. tmpreg:=cg.getintregister(list,OS_32);
  3245. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3246. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3247. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3248. end;
  3249. if is_shifter_const(aint(hi(value)),b) then
  3250. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3251. else
  3252. begin
  3253. tmpreg:=cg.getintregister(list,OS_32);
  3254. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3255. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3256. end;
  3257. end;
  3258. OP_SUB:
  3259. begin
  3260. if is_shifter_const(aint(lo(value)),b) then
  3261. begin
  3262. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3263. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3264. end
  3265. else
  3266. begin
  3267. tmpreg:=cg.getintregister(list,OS_32);
  3268. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3269. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3270. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3271. end;
  3272. if is_shifter_const(aint(hi(value)),b) then
  3273. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3274. else
  3275. begin
  3276. tmpreg:=cg.getintregister(list,OS_32);
  3277. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3278. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3279. end;
  3280. end;
  3281. else
  3282. internalerror(2003083101);
  3283. end;
  3284. end;
  3285. end;
  3286. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3287. begin
  3288. ovloc.loc:=LOC_VOID;
  3289. case op of
  3290. OP_NEG,
  3291. OP_NOT :
  3292. internalerror(2012022502);
  3293. else
  3294. ;
  3295. end;
  3296. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3297. begin
  3298. case op of
  3299. OP_ADD:
  3300. begin
  3301. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3302. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3303. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3304. end;
  3305. OP_SUB:
  3306. begin
  3307. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3308. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3309. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3310. end;
  3311. else
  3312. internalerror(2003083102);
  3313. end;
  3314. if size=OS_64 then
  3315. begin
  3316. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3317. ovloc.loc:=LOC_FLAGS;
  3318. case op of
  3319. OP_ADD:
  3320. ovloc.resflags:=F_CS;
  3321. OP_SUB:
  3322. ovloc.resflags:=F_CC;
  3323. else
  3324. internalerror(2019050917);
  3325. end;
  3326. end;
  3327. end
  3328. else
  3329. begin
  3330. case op of
  3331. OP_AND,OP_OR,OP_XOR:
  3332. begin
  3333. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3334. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3335. end;
  3336. OP_ADD:
  3337. begin
  3338. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3339. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3340. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3341. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3342. end;
  3343. OP_SUB:
  3344. begin
  3345. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3346. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3347. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3348. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3349. end;
  3350. else
  3351. internalerror(2003083104);
  3352. end;
  3353. end;
  3354. end;
  3355. procedure tthumbcgarm.init_register_allocators;
  3356. begin
  3357. inherited init_register_allocators;
  3358. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3359. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3360. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3361. else
  3362. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3363. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3364. end;
  3365. procedure tthumbcgarm.done_register_allocators;
  3366. begin
  3367. rg[R_INTREGISTER].free;
  3368. rg[R_FPUREGISTER].free;
  3369. rg[R_MMREGISTER].free;
  3370. inherited done_register_allocators;
  3371. end;
  3372. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3373. var
  3374. ref : treference;
  3375. r : byte;
  3376. regs : tcpuregisterset;
  3377. stackmisalignment : pint;
  3378. registerarea: DWord;
  3379. stack_parameters: Boolean;
  3380. begin
  3381. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3382. LocalSize:=align(LocalSize,4);
  3383. { call instruction does not put anything on the stack }
  3384. stackmisalignment:=0;
  3385. if not(nostackframe) then
  3386. begin
  3387. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3388. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3389. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3390. { save int registers }
  3391. reference_reset(ref,4,[]);
  3392. ref.index:=NR_STACK_POINTER_REG;
  3393. ref.addressmode:=AM_PREINDEXED;
  3394. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3395. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3396. begin
  3397. //!!!! a_reg_alloc(list,NR_R12);
  3398. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3399. end;
  3400. { the (old) ARM APCS requires saving both the stack pointer (to
  3401. crawl the stack) and the PC (to identify the function this
  3402. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3403. and R15 -- still needs updating for EABI and Darwin, they don't
  3404. need that }
  3405. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3406. regs:=regs+[RS_R7,RS_R14]
  3407. else
  3408. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3409. include(regs,RS_R14);
  3410. { safely estimate stack size }
  3411. if localsize+current_settings.alignment.localalignmax+4>508 then
  3412. begin
  3413. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3414. include(regs,RS_R4);
  3415. end;
  3416. registerarea:=0;
  3417. if regs<>[] then
  3418. begin
  3419. for r:=RS_R0 to RS_R15 do
  3420. if r in regs then
  3421. inc(registerarea,4);
  3422. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3423. { we need to run the loop twice to get cfi right }
  3424. registerarea:=0;
  3425. for r:=RS_R0 to RS_R15 do
  3426. if r in regs then
  3427. begin
  3428. inc(registerarea,4);
  3429. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),-registerarea);
  3430. end;
  3431. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  3432. end;
  3433. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3434. if stack_parameters or (LocalSize<>0) or
  3435. ((stackmisalignment<>0) and
  3436. ((pi_do_call in current_procinfo.flags) or
  3437. (po_assembler in current_procinfo.procdef.procoptions))) then
  3438. begin
  3439. { do we access stack parameters?
  3440. if yes, the previously estimated stacksize must be used }
  3441. if stack_parameters then
  3442. begin
  3443. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3444. begin
  3445. writeln(localsize);
  3446. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3447. internalerror(2013040601);
  3448. end
  3449. else
  3450. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3451. end
  3452. else
  3453. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3454. if localsize<508 then
  3455. begin
  3456. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3457. end
  3458. else if localsize<=1016 then
  3459. begin
  3460. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3461. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3462. end
  3463. else
  3464. begin
  3465. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3466. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3467. include(regs,RS_R4);
  3468. end;
  3469. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  3470. end;
  3471. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3472. begin
  3473. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3474. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  3475. end;
  3476. end;
  3477. end;
  3478. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3479. var
  3480. LocalSize : longint;
  3481. r: byte;
  3482. regs : tcpuregisterset;
  3483. registerarea : DWord;
  3484. stackmisalignment: pint;
  3485. stack_parameters : Boolean;
  3486. begin
  3487. if not(nostackframe) then
  3488. begin
  3489. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3490. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3491. include(regs,RS_R15);
  3492. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3493. include(regs,getsupreg(current_procinfo.framepointer));
  3494. registerarea:=0;
  3495. for r:=RS_R0 to RS_R15 do
  3496. if r in regs then
  3497. inc(registerarea,4);
  3498. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3499. LocalSize:=current_procinfo.calc_stackframe_size;
  3500. if stack_parameters then
  3501. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3502. else
  3503. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3504. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3505. (target_info.system in systems_darwin) then
  3506. begin
  3507. if (LocalSize<>0) or
  3508. ((stackmisalignment<>0) and
  3509. ((pi_do_call in current_procinfo.flags) or
  3510. (po_assembler in current_procinfo.procdef.procoptions))) then
  3511. begin
  3512. if LocalSize=0 then
  3513. else if LocalSize<=508 then
  3514. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3515. else if LocalSize<=1016 then
  3516. begin
  3517. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3518. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3519. end
  3520. else
  3521. begin
  3522. a_reg_alloc(list,NR_R3);
  3523. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3524. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3525. a_reg_dealloc(list,NR_R3);
  3526. end;
  3527. end;
  3528. if regs=[] then
  3529. begin
  3530. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3531. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3532. else
  3533. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3534. end
  3535. else
  3536. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3537. end;
  3538. end
  3539. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3540. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3541. else
  3542. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3543. end;
  3544. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3545. var
  3546. oppostfix:toppostfix;
  3547. usedtmpref: treference;
  3548. tmpreg,tmpreg2 : tregister;
  3549. dir : integer;
  3550. begin
  3551. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3552. FromSize := ToSize;
  3553. case FromSize of
  3554. { signed integer registers }
  3555. OS_8:
  3556. oppostfix:=PF_B;
  3557. OS_S8:
  3558. oppostfix:=PF_SB;
  3559. OS_16:
  3560. oppostfix:=PF_H;
  3561. OS_S16:
  3562. oppostfix:=PF_SH;
  3563. OS_32,
  3564. OS_S32:
  3565. oppostfix:=PF_None;
  3566. else
  3567. InternalError(200308298);
  3568. end;
  3569. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3570. begin
  3571. if target_info.endian=endian_big then
  3572. dir:=-1
  3573. else
  3574. dir:=1;
  3575. case FromSize of
  3576. OS_16,OS_S16:
  3577. begin
  3578. { only complicated references need an extra loadaddr }
  3579. if assigned(ref.symbol) or
  3580. (ref.index<>NR_NO) or
  3581. (ref.offset<-124) or
  3582. (ref.offset>124) or
  3583. { sometimes the compiler reused registers }
  3584. (reg=ref.index) or
  3585. (reg=ref.base) then
  3586. begin
  3587. tmpreg2:=getintregister(list,OS_INT);
  3588. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3589. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3590. end
  3591. else
  3592. usedtmpref:=ref;
  3593. if target_info.endian=endian_big then
  3594. inc(usedtmpref.offset,1);
  3595. tmpreg:=getintregister(list,OS_INT);
  3596. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3597. inc(usedtmpref.offset,dir);
  3598. if FromSize=OS_16 then
  3599. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3600. else
  3601. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3602. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3603. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3604. end;
  3605. OS_32,OS_S32:
  3606. begin
  3607. tmpreg:=getintregister(list,OS_INT);
  3608. { only complicated references need an extra loadaddr }
  3609. if assigned(ref.symbol) or
  3610. (ref.index<>NR_NO) or
  3611. (ref.offset<-124) or
  3612. (ref.offset>124) or
  3613. { sometimes the compiler reused registers }
  3614. (reg=ref.index) or
  3615. (reg=ref.base) then
  3616. begin
  3617. tmpreg2:=getintregister(list,OS_INT);
  3618. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3619. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3620. end
  3621. else
  3622. usedtmpref:=ref;
  3623. if ref.alignment=2 then
  3624. begin
  3625. if target_info.endian=endian_big then
  3626. inc(usedtmpref.offset,2);
  3627. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3628. inc(usedtmpref.offset,dir*2);
  3629. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3630. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3631. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3632. end
  3633. else
  3634. begin
  3635. if target_info.endian=endian_big then
  3636. inc(usedtmpref.offset,3);
  3637. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3638. inc(usedtmpref.offset,dir);
  3639. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3640. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3641. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3642. inc(usedtmpref.offset,dir);
  3643. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3644. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3645. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3646. inc(usedtmpref.offset,dir);
  3647. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3648. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3649. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3650. end;
  3651. end
  3652. else
  3653. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3654. end;
  3655. end
  3656. else
  3657. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3658. if (fromsize=OS_S8) and (tosize = OS_16) then
  3659. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3660. end;
  3661. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3662. var
  3663. l : tasmlabel;
  3664. hr : treference;
  3665. begin
  3666. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3667. internalerror(2002090908);
  3668. if is_thumb_imm(a) then
  3669. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,a),PF_S))
  3670. else
  3671. begin
  3672. reference_reset(hr,4,[]);
  3673. current_asmdata.getjumplabel(l);
  3674. cg.a_label(current_procinfo.aktlocaldata,l);
  3675. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3676. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3677. hr.symbol:=l;
  3678. hr.base:=NR_PC;
  3679. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3680. end;
  3681. end;
  3682. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3683. var
  3684. hsym : tsym;
  3685. href,
  3686. tmpref : treference;
  3687. paraloc : Pcgparalocation;
  3688. l : TAsmLabel;
  3689. begin
  3690. { calculate the parameter info for the procdef }
  3691. procdef.init_paraloc_info(callerside);
  3692. hsym:=tsym(procdef.parast.Find('self'));
  3693. if not(assigned(hsym) and
  3694. (hsym.typ=paravarsym)) then
  3695. internalerror(2003052504);
  3696. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3697. while paraloc<>nil do
  3698. with paraloc^ do
  3699. begin
  3700. case loc of
  3701. LOC_REGISTER:
  3702. begin
  3703. if is_thumb_imm(ioffset) then
  3704. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3705. else
  3706. begin
  3707. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3708. reference_reset(tmpref,4,[]);
  3709. current_asmdata.getjumplabel(l);
  3710. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3711. cg.a_label(current_procinfo.aktlocaldata,l);
  3712. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3713. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3714. tmpref.symbol:=l;
  3715. tmpref.base:=NR_PC;
  3716. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3717. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3718. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3719. end;
  3720. end;
  3721. LOC_REFERENCE:
  3722. begin
  3723. { offset in the wrapper needs to be adjusted for the stored
  3724. return address }
  3725. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3726. if is_thumb_imm(ioffset) then
  3727. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3728. else
  3729. begin
  3730. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3731. reference_reset(tmpref,4,[]);
  3732. current_asmdata.getjumplabel(l);
  3733. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3734. cg.a_label(current_procinfo.aktlocaldata,l);
  3735. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3736. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3737. tmpref.symbol:=l;
  3738. tmpref.base:=NR_PC;
  3739. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3740. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3741. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3742. end;
  3743. end
  3744. else
  3745. internalerror(2003091804);
  3746. end;
  3747. paraloc:=next;
  3748. end;
  3749. end;
  3750. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3751. var
  3752. href : treference;
  3753. tmpreg : TRegister;
  3754. begin
  3755. href:=ref;
  3756. if { LDR/STR limitations }
  3757. (
  3758. (((op=A_LDR) and (oppostfix=PF_None)) or
  3759. ((op=A_STR) and (oppostfix=PF_None))) and
  3760. (ref.base<>NR_STACK_POINTER_REG) and
  3761. (abs(ref.offset)>124)
  3762. ) or
  3763. { LDRB/STRB limitations }
  3764. (
  3765. (((op=A_LDR) and (oppostfix=PF_B)) or
  3766. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3767. ((op=A_STR) and (oppostfix=PF_B)) or
  3768. ((op=A_STRB) and (oppostfix=PF_None))) and
  3769. ((ref.base=NR_STACK_POINTER_REG) or
  3770. (ref.index=NR_STACK_POINTER_REG) or
  3771. (abs(ref.offset)>31)
  3772. )
  3773. ) or
  3774. { LDRH/STRH limitations }
  3775. (
  3776. (((op=A_LDR) and (oppostfix=PF_H)) or
  3777. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3778. ((op=A_STR) and (oppostfix=PF_H)) or
  3779. ((op=A_STRH) and (oppostfix=PF_None))) and
  3780. ((ref.base=NR_STACK_POINTER_REG) or
  3781. (ref.index=NR_STACK_POINTER_REG) or
  3782. (abs(ref.offset)>62) or
  3783. ((abs(ref.offset) mod 2)<>0)
  3784. )
  3785. ) then
  3786. begin
  3787. tmpreg:=getintregister(list,OS_ADDR);
  3788. a_loadaddr_ref_reg(list,ref,tmpreg);
  3789. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3790. end
  3791. else if (op=A_LDR) and
  3792. (oppostfix in [PF_None]) and
  3793. (ref.base=NR_STACK_POINTER_REG) and
  3794. (abs(ref.offset)>1020) then
  3795. begin
  3796. tmpreg:=getintregister(list,OS_ADDR);
  3797. a_loadaddr_ref_reg(list,ref,tmpreg);
  3798. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3799. end
  3800. else if (op=A_LDR) and
  3801. ((oppostfix in [PF_SH,PF_SB]) or
  3802. (abs(ref.offset)>124)) then
  3803. begin
  3804. tmpreg:=getintregister(list,OS_ADDR);
  3805. a_loadaddr_ref_reg(list,ref,tmpreg);
  3806. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3807. end;
  3808. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3809. end;
  3810. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3811. var
  3812. tmpreg : tregister;
  3813. begin
  3814. case op of
  3815. OP_NEG:
  3816. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3817. OP_NOT:
  3818. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,dst,src),PF_S));
  3819. OP_DIV,OP_IDIV:
  3820. internalerror(200308284);
  3821. OP_ROL:
  3822. begin
  3823. if not(size in [OS_32,OS_S32]) then
  3824. internalerror(2008072805);
  3825. { simulate ROL by ror'ing 32-value }
  3826. tmpreg:=getintregister(list,OS_32);
  3827. a_load_const_reg(list,OS_32,32,tmpreg);
  3828. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3829. list.concat(setoppostfix(taicpu.op_reg_reg(A_ROR,dst,src),PF_S));
  3830. end;
  3831. else
  3832. begin
  3833. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3834. list.concat(setoppostfix(
  3835. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix_thumb[op]));
  3836. end;
  3837. end;
  3838. maybeadjustresult(list,op,size,dst);
  3839. end;
  3840. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3841. var
  3842. tmpreg : tregister;
  3843. {$ifdef DUMMY}
  3844. l1 : longint;
  3845. {$endif DUMMY}
  3846. begin
  3847. //!!! ovloc.loc:=LOC_VOID;
  3848. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3849. case op of
  3850. OP_ADD:
  3851. begin
  3852. op:=OP_SUB;
  3853. a:=aint(dword(-a));
  3854. end;
  3855. OP_SUB:
  3856. begin
  3857. op:=OP_ADD;
  3858. a:=aint(dword(-a));
  3859. end
  3860. else
  3861. ;
  3862. end;
  3863. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3864. begin
  3865. // if cgsetflags or setflags then
  3866. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3867. list.concat(setoppostfix(
  3868. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix_thumb[op]));
  3869. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3870. begin
  3871. //!!! ovloc.loc:=LOC_FLAGS;
  3872. case op of
  3873. OP_ADD:
  3874. //!!! ovloc.resflags:=F_CS;
  3875. ;
  3876. OP_SUB:
  3877. //!!! ovloc.resflags:=F_CC;
  3878. ;
  3879. else
  3880. ;
  3881. end;
  3882. end;
  3883. end
  3884. else
  3885. begin
  3886. { there could be added some more sophisticated optimizations }
  3887. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3888. a_load_reg_reg(list,size,size,dst,dst)
  3889. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3890. a_load_const_reg(list,size,0,dst)
  3891. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3892. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3893. { we do this here instead in the peephole optimizer because
  3894. it saves us a register }
  3895. {$ifdef DUMMY}
  3896. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3897. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3898. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3899. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3900. begin
  3901. if l1>32 then{roozbeh does this ever happen?}
  3902. internalerror(2003082903);
  3903. shifterop_reset(so);
  3904. so.shiftmode:=SM_LSL;
  3905. so.shiftimm:=l1;
  3906. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3907. end
  3908. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3909. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3910. begin
  3911. if l1>32 then{does this ever happen?}
  3912. internalerror(2012051802);
  3913. shifterop_reset(so);
  3914. so.shiftmode:=SM_LSL;
  3915. so.shiftimm:=l1;
  3916. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3917. end
  3918. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3919. begin
  3920. { nothing to do on success }
  3921. end
  3922. {$endif DUMMY}
  3923. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3924. Just using mov x, #0 might allow some easier optimizations down the line. }
  3925. else if (op = OP_AND) and (dword(a)=0) then
  3926. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,dst,0),PF_S))
  3927. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3928. else if (op = OP_AND) and (not(dword(a))=0) then
  3929. // do nothing
  3930. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3931. broader range of shifterconstants.}
  3932. {$ifdef DUMMY}
  3933. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3934. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3935. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3936. begin
  3937. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3938. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3939. end
  3940. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3941. not(cgsetflags or setflags) and
  3942. split_into_shifter_const(a, imm1, imm2) then
  3943. begin
  3944. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3945. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3946. end
  3947. {$endif DUMMY}
  3948. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3949. begin
  3950. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3951. end
  3952. else
  3953. begin
  3954. tmpreg:=getintregister(list,size);
  3955. a_load_const_reg(list,size,a,tmpreg);
  3956. a_op_reg_reg(list,op,size,tmpreg,dst);
  3957. end;
  3958. end;
  3959. maybeadjustresult(list,op,size,dst);
  3960. end;
  3961. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3962. begin
  3963. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3964. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3965. else
  3966. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3967. end;
  3968. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3969. var
  3970. l1,l2 : tasmlabel;
  3971. ai : taicpu;
  3972. begin
  3973. current_asmdata.getjumplabel(l1);
  3974. current_asmdata.getjumplabel(l2);
  3975. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3976. ai.is_jmp:=true;
  3977. list.concat(ai);
  3978. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,0),PF_S));
  3979. list.concat(taicpu.op_sym(A_B,l2));
  3980. cg.a_label(list,l1);
  3981. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,1),PF_S));
  3982. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3983. cg.a_label(list,l2);
  3984. end;
  3985. procedure tthumb2cgarm.init_register_allocators;
  3986. begin
  3987. inherited init_register_allocators;
  3988. { currently, we save R14 always, so we can use it }
  3989. if (target_info.system<>system_arm_ios) then
  3990. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3991. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3992. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  3993. else
  3994. { r9 is not available on Darwin according to the llvm code generator }
  3995. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3996. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3997. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  3998. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  3999. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  4000. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  4001. init_mmregister_allocator;
  4002. end;
  4003. procedure tthumb2cgarm.done_register_allocators;
  4004. begin
  4005. rg[R_INTREGISTER].free;
  4006. rg[R_FPUREGISTER].free;
  4007. rg[R_MMREGISTER].free;
  4008. inherited done_register_allocators;
  4009. end;
  4010. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  4011. begin
  4012. list.concat(taicpu.op_reg(A_BLX, reg));
  4013. {
  4014. the compiler does not properly set this flag anymore in pass 1, and
  4015. for now we only need it after pass 2 (I hope) (JM)
  4016. if not(pi_do_call in current_procinfo.flags) then
  4017. internalerror(2003060703);
  4018. }
  4019. include(current_procinfo.flags,pi_do_call);
  4020. end;
  4021. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4022. var
  4023. l : tasmlabel;
  4024. hr : treference;
  4025. begin
  4026. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4027. internalerror(2002090909);
  4028. if is_thumb32_imm(a) then
  4029. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4030. else if is_thumb32_imm(not(a)) then
  4031. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4032. else if (a and $FFFF)=a then
  4033. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4034. else
  4035. begin
  4036. reference_reset(hr,4,[]);
  4037. current_asmdata.getjumplabel(l);
  4038. cg.a_label(current_procinfo.aktlocaldata,l);
  4039. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4040. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4041. hr.symbol:=l;
  4042. hr.base:=NR_PC;
  4043. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4044. end;
  4045. end;
  4046. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4047. var
  4048. oppostfix:toppostfix;
  4049. usedtmpref: treference;
  4050. tmpreg,tmpreg2 : tregister;
  4051. so : tshifterop;
  4052. dir : integer;
  4053. begin
  4054. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4055. FromSize := ToSize;
  4056. case FromSize of
  4057. { signed integer registers }
  4058. OS_8:
  4059. oppostfix:=PF_B;
  4060. OS_S8:
  4061. oppostfix:=PF_SB;
  4062. OS_16:
  4063. oppostfix:=PF_H;
  4064. OS_S16:
  4065. oppostfix:=PF_SH;
  4066. OS_32,
  4067. OS_S32:
  4068. oppostfix:=PF_None;
  4069. else
  4070. InternalError(2003082913);
  4071. end;
  4072. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4073. begin
  4074. if target_info.endian=endian_big then
  4075. dir:=-1
  4076. else
  4077. dir:=1;
  4078. case FromSize of
  4079. OS_16,OS_S16:
  4080. begin
  4081. { only complicated references need an extra loadaddr }
  4082. if assigned(ref.symbol) or
  4083. (ref.index<>NR_NO) or
  4084. (ref.offset<-255) or
  4085. (ref.offset>4094) or
  4086. { sometimes the compiler reused registers }
  4087. (reg=ref.index) or
  4088. (reg=ref.base) then
  4089. begin
  4090. tmpreg2:=getintregister(list,OS_INT);
  4091. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4092. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4093. end
  4094. else
  4095. usedtmpref:=ref;
  4096. if target_info.endian=endian_big then
  4097. inc(usedtmpref.offset,1);
  4098. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4099. tmpreg:=getintregister(list,OS_INT);
  4100. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4101. inc(usedtmpref.offset,dir);
  4102. if FromSize=OS_16 then
  4103. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4104. else
  4105. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4106. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4107. end;
  4108. OS_32,OS_S32:
  4109. begin
  4110. tmpreg:=getintregister(list,OS_INT);
  4111. { only complicated references need an extra loadaddr }
  4112. if assigned(ref.symbol) or
  4113. (ref.index<>NR_NO) or
  4114. (ref.offset<-255) or
  4115. (ref.offset>4092) or
  4116. { sometimes the compiler reused registers }
  4117. (reg=ref.index) or
  4118. (reg=ref.base) then
  4119. begin
  4120. tmpreg2:=getintregister(list,OS_INT);
  4121. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4122. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4123. end
  4124. else
  4125. usedtmpref:=ref;
  4126. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4127. if ref.alignment=2 then
  4128. begin
  4129. if target_info.endian=endian_big then
  4130. inc(usedtmpref.offset,2);
  4131. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4132. inc(usedtmpref.offset,dir*2);
  4133. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4134. so.shiftimm:=16;
  4135. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4136. end
  4137. else
  4138. begin
  4139. if target_info.endian=endian_big then
  4140. inc(usedtmpref.offset,3);
  4141. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4142. inc(usedtmpref.offset,dir);
  4143. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4144. so.shiftimm:=8;
  4145. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4146. inc(usedtmpref.offset,dir);
  4147. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4148. so.shiftimm:=16;
  4149. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4150. inc(usedtmpref.offset,dir);
  4151. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4152. so.shiftimm:=24;
  4153. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4154. end;
  4155. end
  4156. else
  4157. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4158. end;
  4159. end
  4160. else
  4161. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4162. if (fromsize=OS_S8) and (tosize = OS_16) then
  4163. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4164. end;
  4165. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4166. begin
  4167. if op = OP_NOT then
  4168. begin
  4169. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4170. case size of
  4171. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4172. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4173. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4174. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4175. OS_32,
  4176. OS_S32:
  4177. ;
  4178. else
  4179. internalerror(2019050916);
  4180. end;
  4181. end
  4182. else
  4183. inherited a_op_reg_reg(list, op, size, src, dst);
  4184. end;
  4185. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4186. var
  4187. shift, width : byte;
  4188. tmpreg : tregister;
  4189. so : tshifterop;
  4190. l1 : longint;
  4191. begin
  4192. ovloc.loc:=LOC_VOID;
  4193. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4194. case op of
  4195. OP_ADD:
  4196. begin
  4197. op:=OP_SUB;
  4198. a:=aint(dword(-a));
  4199. end;
  4200. OP_SUB:
  4201. begin
  4202. op:=OP_ADD;
  4203. a:=aint(dword(-a));
  4204. end
  4205. else
  4206. ;
  4207. end;
  4208. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4209. case op of
  4210. OP_NEG,OP_NOT,
  4211. OP_DIV,OP_IDIV:
  4212. internalerror(200308285);
  4213. OP_SHL:
  4214. begin
  4215. if a>32 then
  4216. internalerror(2014020703);
  4217. if a<>0 then
  4218. begin
  4219. shifterop_reset(so);
  4220. so.shiftmode:=SM_LSL;
  4221. so.shiftimm:=a;
  4222. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4223. end
  4224. else
  4225. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4226. end;
  4227. OP_ROL:
  4228. begin
  4229. if a>32 then
  4230. internalerror(2014020704);
  4231. if a<>0 then
  4232. begin
  4233. shifterop_reset(so);
  4234. so.shiftmode:=SM_ROR;
  4235. so.shiftimm:=32-a;
  4236. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4237. end
  4238. else
  4239. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4240. end;
  4241. OP_ROR:
  4242. begin
  4243. if a>32 then
  4244. internalerror(2014020705);
  4245. if a<>0 then
  4246. begin
  4247. shifterop_reset(so);
  4248. so.shiftmode:=SM_ROR;
  4249. so.shiftimm:=a;
  4250. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4251. end
  4252. else
  4253. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4254. end;
  4255. OP_SHR:
  4256. begin
  4257. if a>32 then
  4258. internalerror(200308292);
  4259. shifterop_reset(so);
  4260. if a<>0 then
  4261. begin
  4262. so.shiftmode:=SM_LSR;
  4263. so.shiftimm:=a;
  4264. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4265. end
  4266. else
  4267. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4268. end;
  4269. OP_SAR:
  4270. begin
  4271. if a>32 then
  4272. internalerror(200308295);
  4273. if a<>0 then
  4274. begin
  4275. shifterop_reset(so);
  4276. so.shiftmode:=SM_ASR;
  4277. so.shiftimm:=a;
  4278. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4279. end
  4280. else
  4281. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4282. end;
  4283. else
  4284. if (op in [OP_SUB, OP_ADD]) and
  4285. ((a < 0) or
  4286. (a > 4095)) then
  4287. begin
  4288. tmpreg:=getintregister(list,size);
  4289. a_load_const_reg(list, size, a, tmpreg);
  4290. if cgsetflags or setflags then
  4291. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4292. list.concat(setoppostfix(
  4293. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4294. end
  4295. else
  4296. begin
  4297. if cgsetflags or setflags then
  4298. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4299. list.concat(setoppostfix(
  4300. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4301. end;
  4302. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4303. begin
  4304. ovloc.loc:=LOC_FLAGS;
  4305. case op of
  4306. OP_ADD:
  4307. ovloc.resflags:=F_CS;
  4308. OP_SUB:
  4309. ovloc.resflags:=F_CC;
  4310. else
  4311. ;
  4312. end;
  4313. end;
  4314. end
  4315. else
  4316. begin
  4317. { there could be added some more sophisticated optimizations }
  4318. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4319. a_load_reg_reg(list,size,size,src,dst)
  4320. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4321. a_load_const_reg(list,size,0,dst)
  4322. else if (op in [OP_IMUL]) and (a=-1) then
  4323. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4324. { we do this here instead in the peephole optimizer because
  4325. it saves us a register }
  4326. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4327. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4328. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4329. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4330. begin
  4331. if l1>32 then{roozbeh does this ever happen?}
  4332. internalerror(2003082911);
  4333. shifterop_reset(so);
  4334. so.shiftmode:=SM_LSL;
  4335. so.shiftimm:=l1;
  4336. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4337. end
  4338. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4339. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4340. begin
  4341. if l1>32 then{does this ever happen?}
  4342. internalerror(2012051803);
  4343. shifterop_reset(so);
  4344. so.shiftmode:=SM_LSL;
  4345. so.shiftimm:=l1;
  4346. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4347. end
  4348. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4349. begin
  4350. { nothing to do on success }
  4351. end
  4352. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4353. Just using mov x, #0 might allow some easier optimizations down the line. }
  4354. else if (op = OP_AND) and (dword(a)=0) then
  4355. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4356. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4357. else if (op = OP_AND) and (not(dword(a))=0) then
  4358. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4359. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4360. broader range of shifterconstants.}
  4361. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4362. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4363. else if (op = OP_AND) and is_thumb32_imm(a) then
  4364. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4365. else if (op = OP_AND) and (a = $FFFF) then
  4366. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4367. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4368. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4369. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4370. begin
  4371. a_load_reg_reg(list,size,size,src,dst);
  4372. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4373. end
  4374. else
  4375. begin
  4376. tmpreg:=getintregister(list,size);
  4377. a_load_const_reg(list,size,a,tmpreg);
  4378. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4379. end;
  4380. end;
  4381. maybeadjustresult(list,op,size,dst);
  4382. end;
  4383. const
  4384. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4385. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4386. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4387. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4388. var
  4389. so : tshifterop;
  4390. tmpreg,overflowreg : tregister;
  4391. asmop : tasmop;
  4392. begin
  4393. ovloc.loc:=LOC_VOID;
  4394. case op of
  4395. OP_NEG,OP_NOT:
  4396. internalerror(200308286);
  4397. OP_ROL:
  4398. begin
  4399. if not(size in [OS_32,OS_S32]) then
  4400. internalerror(2008072806);
  4401. { simulate ROL by ror'ing 32-value }
  4402. tmpreg:=getintregister(list,OS_32);
  4403. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4404. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4405. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4406. end;
  4407. OP_ROR:
  4408. begin
  4409. if not(size in [OS_32,OS_S32]) then
  4410. internalerror(2008072802);
  4411. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4412. end;
  4413. OP_IMUL,
  4414. OP_MUL:
  4415. begin
  4416. if cgsetflags or setflags then
  4417. begin
  4418. overflowreg:=getintregister(list,size);
  4419. if op=OP_IMUL then
  4420. asmop:=A_SMULL
  4421. else
  4422. asmop:=A_UMULL;
  4423. { the arm doesn't allow that rd and rm are the same }
  4424. if dst=src2 then
  4425. begin
  4426. if dst<>src1 then
  4427. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4428. else
  4429. begin
  4430. tmpreg:=getintregister(list,size);
  4431. a_load_reg_reg(list,size,size,src2,dst);
  4432. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4433. end;
  4434. end
  4435. else
  4436. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4437. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4438. if op=OP_IMUL then
  4439. begin
  4440. shifterop_reset(so);
  4441. so.shiftmode:=SM_ASR;
  4442. so.shiftimm:=31;
  4443. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4444. end
  4445. else
  4446. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4447. ovloc.loc:=LOC_FLAGS;
  4448. ovloc.resflags:=F_NE;
  4449. end
  4450. else
  4451. begin
  4452. { the arm doesn't allow that rd and rm are the same }
  4453. if dst=src2 then
  4454. begin
  4455. if dst<>src1 then
  4456. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4457. else
  4458. begin
  4459. tmpreg:=getintregister(list,size);
  4460. a_load_reg_reg(list,size,size,src2,dst);
  4461. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4462. end;
  4463. end
  4464. else
  4465. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4466. end;
  4467. end;
  4468. else
  4469. begin
  4470. if cgsetflags or setflags then
  4471. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4472. {$ifdef dummy}
  4473. { R13 is not allowed for certain instruction operands }
  4474. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4475. begin
  4476. if getsupreg(dst)=RS_R13 then
  4477. begin
  4478. tmpreg:=getintregister(list,OS_INT);
  4479. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4480. dst:=tmpreg;
  4481. end;
  4482. if getsupreg(src1)=RS_R13 then
  4483. begin
  4484. tmpreg:=getintregister(list,OS_INT);
  4485. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4486. src1:=tmpreg;
  4487. end;
  4488. end;
  4489. {$endif}
  4490. list.concat(setoppostfix(
  4491. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4492. end;
  4493. end;
  4494. maybeadjustresult(list,op,size,dst);
  4495. end;
  4496. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4497. begin
  4498. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4499. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4500. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4501. end;
  4502. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4503. var
  4504. ref : treference;
  4505. shift : byte;
  4506. firstfloatreg,lastfloatreg,
  4507. r : byte;
  4508. regs : tcpuregisterset;
  4509. stackmisalignment: pint;
  4510. begin
  4511. LocalSize:=align(LocalSize,4);
  4512. { call instruction does not put anything on the stack }
  4513. stackmisalignment:=0;
  4514. if not(nostackframe) then
  4515. begin
  4516. firstfloatreg:=RS_NO;
  4517. lastfloatreg:=RS_NO;
  4518. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4519. begin
  4520. { save floating point registers? }
  4521. for r:=RS_F0 to RS_F7 do
  4522. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4523. begin
  4524. if firstfloatreg=RS_NO then
  4525. firstfloatreg:=r;
  4526. lastfloatreg:=r;
  4527. inc(stackmisalignment,12);
  4528. end;
  4529. end;
  4530. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4531. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4532. begin
  4533. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4534. a_reg_alloc(list,NR_R12);
  4535. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4536. end;
  4537. { save int registers }
  4538. reference_reset(ref,4,[]);
  4539. ref.index:=NR_STACK_POINTER_REG;
  4540. ref.addressmode:=AM_PREINDEXED;
  4541. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4542. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4543. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4544. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4545. include(regs,RS_R14);
  4546. if regs<>[] then
  4547. begin
  4548. for r:=RS_R0 to RS_R15 do
  4549. if (r in regs) then
  4550. inc(stackmisalignment,4);
  4551. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4552. end;
  4553. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4554. begin
  4555. { the framepointer now points to the saved R15, so the saved
  4556. framepointer is at R11-12 (for get_caller_frame) }
  4557. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4558. a_reg_dealloc(list,NR_R12);
  4559. end;
  4560. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4561. if (LocalSize<>0) or
  4562. ((stackmisalignment<>0) and
  4563. ((pi_do_call in current_procinfo.flags) or
  4564. (po_assembler in current_procinfo.procdef.procoptions))) then
  4565. begin
  4566. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4567. if not(is_shifter_const(localsize,shift)) then
  4568. begin
  4569. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4570. a_reg_alloc(list,NR_R12);
  4571. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4572. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4573. a_reg_dealloc(list,NR_R12);
  4574. end
  4575. else
  4576. begin
  4577. a_reg_dealloc(list,NR_R12);
  4578. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4579. end;
  4580. end;
  4581. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4582. begin
  4583. if firstfloatreg<>RS_NO then
  4584. begin
  4585. reference_reset(ref,4,[]);
  4586. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4587. begin
  4588. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4589. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4590. ref.base:=NR_R12;
  4591. end
  4592. else
  4593. begin
  4594. ref.base:=current_procinfo.framepointer;
  4595. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4596. end;
  4597. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4598. lastfloatreg-firstfloatreg+1,ref));
  4599. end;
  4600. end;
  4601. end;
  4602. end;
  4603. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4604. var
  4605. ref : treference;
  4606. firstfloatreg,lastfloatreg,
  4607. r : byte;
  4608. shift : byte;
  4609. regs : tcpuregisterset;
  4610. LocalSize : longint;
  4611. stackmisalignment: pint;
  4612. begin
  4613. if not(nostackframe) then
  4614. begin
  4615. stackmisalignment:=0;
  4616. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4617. begin
  4618. { restore floating point register }
  4619. firstfloatreg:=RS_NO;
  4620. lastfloatreg:=RS_NO;
  4621. { save floating point registers? }
  4622. for r:=RS_F0 to RS_F7 do
  4623. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4624. begin
  4625. if firstfloatreg=RS_NO then
  4626. firstfloatreg:=r;
  4627. lastfloatreg:=r;
  4628. { floating point register space is already included in
  4629. localsize below by calc_stackframe_size
  4630. inc(stackmisalignment,12);
  4631. }
  4632. end;
  4633. if firstfloatreg<>RS_NO then
  4634. begin
  4635. reference_reset(ref,4,[]);
  4636. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4637. begin
  4638. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4639. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4640. ref.base:=NR_R12;
  4641. end
  4642. else
  4643. begin
  4644. ref.base:=current_procinfo.framepointer;
  4645. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4646. end;
  4647. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4648. lastfloatreg-firstfloatreg+1,ref));
  4649. end;
  4650. end;
  4651. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4652. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4653. begin
  4654. exclude(regs,RS_R14);
  4655. include(regs,RS_R15);
  4656. end;
  4657. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4658. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4659. for r:=RS_R0 to RS_R15 do
  4660. if (r in regs) then
  4661. inc(stackmisalignment,4);
  4662. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4663. LocalSize:=current_procinfo.calc_stackframe_size;
  4664. if (LocalSize<>0) or
  4665. ((stackmisalignment<>0) and
  4666. ((pi_do_call in current_procinfo.flags) or
  4667. (po_assembler in current_procinfo.procdef.procoptions))) then
  4668. begin
  4669. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4670. if not(is_shifter_const(LocalSize,shift)) then
  4671. begin
  4672. a_reg_alloc(list,NR_R12);
  4673. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4674. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4675. a_reg_dealloc(list,NR_R12);
  4676. end
  4677. else
  4678. begin
  4679. a_reg_dealloc(list,NR_R12);
  4680. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4681. end;
  4682. end;
  4683. if regs=[] then
  4684. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4685. else
  4686. begin
  4687. reference_reset(ref,4,[]);
  4688. ref.index:=NR_STACK_POINTER_REG;
  4689. ref.addressmode:=AM_PREINDEXED;
  4690. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4691. end;
  4692. end
  4693. else
  4694. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4695. end;
  4696. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4697. var
  4698. tmpreg : tregister;
  4699. tmpref : treference;
  4700. l : tasmlabel;
  4701. begin
  4702. tmpreg:=NR_NO;
  4703. { Be sure to have a base register }
  4704. if (ref.base=NR_NO) then
  4705. begin
  4706. if ref.shiftmode<>SM_None then
  4707. internalerror(2014020706);
  4708. ref.base:=ref.index;
  4709. ref.index:=NR_NO;
  4710. end;
  4711. { absolute symbols can't be handled directly, we've to store the symbol reference
  4712. in the text segment and access it pc relative
  4713. For now, we assume that references where base or index equals to PC are already
  4714. relative, all other references are assumed to be absolute and thus they need
  4715. to be handled extra.
  4716. A proper solution would be to change refoptions to a set and store the information
  4717. if the symbol is absolute or relative there.
  4718. }
  4719. if (assigned(ref.symbol) and
  4720. not(is_pc(ref.base)) and
  4721. not(is_pc(ref.index))
  4722. ) or
  4723. { [#xxx] isn't a valid address operand }
  4724. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4725. //(ref.offset<-4095) or
  4726. (ref.offset<-255) or
  4727. (ref.offset>4095) or
  4728. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4729. ((ref.offset<-255) or
  4730. (ref.offset>255)
  4731. )
  4732. ) or
  4733. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4734. ((ref.offset<-1020) or
  4735. (ref.offset>1020) or
  4736. ((abs(ref.offset) mod 4)<>0) or
  4737. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4738. assigned(ref.symbol)
  4739. )
  4740. ) then
  4741. begin
  4742. reference_reset(tmpref,4,[]);
  4743. { load symbol }
  4744. tmpreg:=getintregister(list,OS_INT);
  4745. if assigned(ref.symbol) then
  4746. begin
  4747. current_asmdata.getjumplabel(l);
  4748. cg.a_label(current_procinfo.aktlocaldata,l);
  4749. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4750. if ref.refaddr=addr_gottpoff then
  4751. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4752. else if ref.refaddr=addr_tlsgd then
  4753. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4754. else if ref.refaddr=addr_tlsdesc then
  4755. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4756. else if ref.refaddr=addr_tpoff then
  4757. begin
  4758. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4759. Internalerror(2019092807);
  4760. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4761. end
  4762. else
  4763. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4764. { load consts entry }
  4765. tmpref.symbol:=l;
  4766. tmpref.base:=NR_R15;
  4767. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4768. { in case of LDF/STF, we got rid of the NR_R15 }
  4769. if is_pc(ref.base) then
  4770. ref.base:=NR_NO;
  4771. if is_pc(ref.index) then
  4772. ref.index:=NR_NO;
  4773. end
  4774. else
  4775. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4776. if (ref.base<>NR_NO) then
  4777. begin
  4778. if ref.index<>NR_NO then
  4779. begin
  4780. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4781. ref.base:=tmpreg;
  4782. end
  4783. else
  4784. begin
  4785. ref.index:=tmpreg;
  4786. ref.shiftimm:=0;
  4787. ref.signindex:=1;
  4788. ref.shiftmode:=SM_None;
  4789. end;
  4790. end
  4791. else
  4792. ref.base:=tmpreg;
  4793. ref.offset:=0;
  4794. ref.symbol:=nil;
  4795. end;
  4796. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4797. begin
  4798. if tmpreg<>NR_NO then
  4799. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4800. else
  4801. begin
  4802. tmpreg:=getintregister(list,OS_ADDR);
  4803. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4804. ref.base:=tmpreg;
  4805. end;
  4806. ref.offset:=0;
  4807. end;
  4808. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4809. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4810. begin
  4811. tmpreg:=getintregister(list,OS_ADDR);
  4812. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4813. ref.base := tmpreg;
  4814. end;
  4815. { floating point operations have only limited references
  4816. we expect here, that a base is already set }
  4817. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4818. begin
  4819. if ref.shiftmode<>SM_none then
  4820. internalerror(2003091202);
  4821. if tmpreg<>NR_NO then
  4822. begin
  4823. if ref.base=tmpreg then
  4824. begin
  4825. if ref.signindex<0 then
  4826. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4827. else
  4828. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4829. ref.index:=NR_NO;
  4830. end
  4831. else
  4832. begin
  4833. if ref.index<>tmpreg then
  4834. internalerror(2004031602);
  4835. if ref.signindex<0 then
  4836. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4837. else
  4838. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4839. ref.base:=tmpreg;
  4840. ref.index:=NR_NO;
  4841. end;
  4842. end
  4843. else
  4844. begin
  4845. tmpreg:=getintregister(list,OS_ADDR);
  4846. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4847. ref.base:=tmpreg;
  4848. ref.index:=NR_NO;
  4849. end;
  4850. end;
  4851. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4852. Result := ref;
  4853. end;
  4854. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4855. var
  4856. instr: taicpu;
  4857. begin
  4858. if (fromsize=OS_F32) and
  4859. (tosize=OS_F32) then
  4860. begin
  4861. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4862. list.Concat(instr);
  4863. add_move_instruction(instr);
  4864. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4865. end
  4866. else if (fromsize=OS_F64) and
  4867. (tosize=OS_F64) then
  4868. begin
  4869. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4870. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4871. end
  4872. else if (fromsize=OS_F32) and
  4873. (tosize=OS_F64) then
  4874. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4875. begin
  4876. //list.concat(nil);
  4877. end;
  4878. end;
  4879. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4880. begin
  4881. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4882. end;
  4883. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4884. begin
  4885. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4886. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4887. end;
  4888. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4889. begin
  4890. if //(shuffle=nil) and
  4891. (tosize=OS_F32) then
  4892. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4893. else
  4894. internalerror(2012100813);
  4895. end;
  4896. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4897. begin
  4898. if //(shuffle=nil) and
  4899. (fromsize=OS_F32) then
  4900. begin
  4901. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4902. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4903. end
  4904. else
  4905. internalerror(2012100814);
  4906. end;
  4907. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4908. var tmpreg: tregister;
  4909. begin
  4910. case op of
  4911. OP_NEG:
  4912. begin
  4913. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4914. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4915. tmpreg:=cg.getintregister(list,OS_32);
  4916. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4917. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4918. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4919. end;
  4920. else
  4921. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4922. end;
  4923. end;
  4924. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4925. begin
  4926. case op of
  4927. OP_NEG:
  4928. begin
  4929. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reglo,0),PF_S));
  4930. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reghi,0),PF_S));
  4931. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4932. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4933. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4934. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4935. end;
  4936. OP_NOT:
  4937. begin
  4938. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4939. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4940. end;
  4941. OP_AND,OP_OR,OP_XOR:
  4942. begin
  4943. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4944. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4945. end;
  4946. OP_ADD:
  4947. begin
  4948. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4949. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4950. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi),PF_S));
  4951. end;
  4952. OP_SUB:
  4953. begin
  4954. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4955. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4956. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4957. end;
  4958. else
  4959. internalerror(2003083105);
  4960. end;
  4961. end;
  4962. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4963. var
  4964. tmpreg : tregister;
  4965. begin
  4966. case op of
  4967. OP_AND,OP_OR,OP_XOR:
  4968. begin
  4969. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4970. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4971. end;
  4972. OP_ADD:
  4973. begin
  4974. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4975. begin
  4976. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4977. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4978. end
  4979. else
  4980. begin
  4981. tmpreg:=cg.getintregister(list,OS_32);
  4982. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4983. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4984. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  4985. end;
  4986. tmpreg:=cg.getintregister(list,OS_32);
  4987. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  4988. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg),PF_S));
  4989. end;
  4990. OP_SUB:
  4991. begin
  4992. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4993. begin
  4994. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4995. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  4996. end
  4997. else
  4998. begin
  4999. tmpreg:=cg.getintregister(list,OS_32);
  5000. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5001. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5002. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  5003. end;
  5004. tmpreg:=cg.getintregister(list,OS_32);
  5005. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  5006. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg),PF_S));
  5007. end;
  5008. else
  5009. internalerror(2003083106);
  5010. end;
  5011. end;
  5012. procedure create_codegen;
  5013. begin
  5014. if GenerateThumb2Code then
  5015. begin
  5016. cg:=tthumb2cgarm.create;
  5017. cg64:=tthumb2cg64farm.create;
  5018. casmoptimizer:=TCpuThumb2AsmOptimizer;
  5019. end
  5020. else if GenerateThumbCode then
  5021. begin
  5022. cg:=tthumbcgarm.create;
  5023. cg64:=tthumbcg64farm.create;
  5024. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5025. end
  5026. else
  5027. begin
  5028. cg:=tarmcgarm.create;
  5029. cg64:=tarmcg64farm.create;
  5030. casmoptimizer:=TCpuAsmOptimizer;
  5031. end;
  5032. end;
  5033. end.