cgcpu.pas 223 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. procedure init_mmregister_allocator;
  36. public
  37. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  38. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  39. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  40. { move instructions }
  41. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  42. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  43. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  44. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  45. { fpu move instructions }
  46. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  47. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  48. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  49. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  50. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  51. { comparison operations }
  52. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  53. l : tasmlabel);override;
  54. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  55. procedure a_jmp_name(list : TAsmList;const s : string); override;
  56. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  57. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  58. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  59. procedure g_profilecode(list : TAsmList); override;
  60. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  61. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  62. procedure g_maybe_got_init(list : TAsmList); override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  66. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  67. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  68. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  69. procedure g_save_registers(list : TAsmList);override;
  70. procedure g_restore_registers(list : TAsmList);override;
  71. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  72. procedure fixref(list : TAsmList;var ref : treference);
  73. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  74. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  75. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  78. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  79. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  80. { Transform unsupported methods into Internal errors }
  81. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  82. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  83. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  84. { clear out potential overflow bits from 8 or 16 bit operations
  85. the upper 24/16 bits of a register after an operation }
  86. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  87. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  88. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  89. procedure g_maybe_tls_init(list : TAsmList); override;
  90. end;
  91. { tcgarm is shared between normal arm and thumb-2 }
  92. tcgarm = class(tbasecgarm)
  93. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  94. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  95. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  96. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  97. size: tcgsize; a: tcgint; src, dst: tregister); override;
  98. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  99. size: tcgsize; src1, src2, dst: tregister); override;
  100. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  101. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  103. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  104. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  105. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  106. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  107. end;
  108. { normal arm cg }
  109. tarmcgarm = class(tcgarm)
  110. procedure init_register_allocators;override;
  111. procedure done_register_allocators;override;
  112. end;
  113. { 64 bit cg for all arm flavours }
  114. tbasecg64farm = class(tcg64f32)
  115. end;
  116. { tcg64farm is shared between normal arm and thumb-2 }
  117. tcg64farm = class(tbasecg64farm)
  118. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  119. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  120. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  121. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  122. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  123. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  125. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  126. end;
  127. tarmcg64farm = class(tcg64farm)
  128. end;
  129. tthumbcgarm = class(tbasecgarm)
  130. procedure init_register_allocators;override;
  131. procedure done_register_allocators;override;
  132. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  133. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  134. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  135. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  136. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  137. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  138. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  139. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  140. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  141. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  142. end;
  143. tthumbcg64farm = class(tbasecg64farm)
  144. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  145. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  146. end;
  147. tthumb2cgarm = class(tcgarm)
  148. procedure init_register_allocators;override;
  149. procedure done_register_allocators;override;
  150. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  151. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  152. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  153. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  154. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  155. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  157. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  158. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  159. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  160. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  161. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  164. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  165. end;
  166. tthumb2cg64farm = class(tcg64farm)
  167. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  168. end;
  169. const
  170. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  171. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  172. winstackpagesize = 4096;
  173. function get_fpu_postfix(def : tdef) : toppostfix;
  174. procedure create_codegen;
  175. implementation
  176. uses
  177. globals,verbose,systems,cutils,
  178. aopt,aoptcpu,
  179. fmodule,
  180. symconst,symsym,symtable,
  181. tgobj,
  182. procinfo,cpupi,
  183. paramgr;
  184. { Range check must be disabled explicitly as conversions between signed and unsigned
  185. 32-bit values are done without explicit typecasts }
  186. {$R-}
  187. function get_fpu_postfix(def : tdef) : toppostfix;
  188. begin
  189. if def.typ=floatdef then
  190. begin
  191. case tfloatdef(def).floattype of
  192. s32real:
  193. result:=PF_S;
  194. s64real:
  195. result:=PF_D;
  196. s80real:
  197. result:=PF_E;
  198. else
  199. internalerror(200401272);
  200. end;
  201. end
  202. else
  203. internalerror(200401271);
  204. end;
  205. procedure tarmcgarm.init_register_allocators;
  206. begin
  207. inherited init_register_allocators;
  208. { currently, we always save R14, so we can use it }
  209. if (target_info.system<>system_arm_ios) then
  210. begin
  211. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  212. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  213. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  214. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  215. else
  216. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  217. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  218. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  219. end
  220. else
  221. { r7 is not available on Darwin, it's used as frame pointer (always,
  222. for backtrace support -- also in gcc/clang -> R11 can be used).
  223. r9 is volatile }
  224. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  225. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  226. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  227. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  228. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  229. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  230. init_mmregister_allocator;
  231. end;
  232. procedure tarmcgarm.done_register_allocators;
  233. begin
  234. rg[R_INTREGISTER].free;
  235. rg[R_FPUREGISTER].free;
  236. rg[R_MMREGISTER].free;
  237. inherited done_register_allocators;
  238. end;
  239. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  240. var
  241. imm_shift : byte;
  242. l : tasmlabel;
  243. hr : treference;
  244. imm1, imm2: DWord;
  245. begin
  246. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  247. internalerror(2002090907);
  248. if is_shifter_const(a,imm_shift) then
  249. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  250. else if is_shifter_const(not(a),imm_shift) then
  251. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  252. { loading of constants with mov and orr }
  253. else if (split_into_shifter_const(a,imm1, imm2)) then
  254. begin
  255. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  256. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  257. end
  258. { loading of constants with mvn and bic }
  259. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  260. begin
  261. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  262. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  263. end
  264. else
  265. begin
  266. reference_reset(hr,4,[]);
  267. current_asmdata.getjumplabel(l);
  268. cg.a_label(current_procinfo.aktlocaldata,l);
  269. hr.symboldata:=current_procinfo.aktlocaldata.last;
  270. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  271. hr.symbol:=l;
  272. hr.base:=NR_PC;
  273. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  274. end;
  275. end;
  276. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  277. var
  278. oppostfix:toppostfix;
  279. usedtmpref: treference;
  280. tmpreg,tmpreg2 : tregister;
  281. so : tshifterop;
  282. dir : integer;
  283. begin
  284. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  285. FromSize := ToSize;
  286. case FromSize of
  287. { signed integer registers }
  288. OS_8:
  289. oppostfix:=PF_B;
  290. OS_S8:
  291. oppostfix:=PF_SB;
  292. OS_16:
  293. oppostfix:=PF_H;
  294. OS_S16:
  295. oppostfix:=PF_SH;
  296. OS_32,
  297. OS_S32:
  298. oppostfix:=PF_None;
  299. else
  300. InternalError(200308297);
  301. end;
  302. if (fromsize=OS_S8) and
  303. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  304. oppostfix:=PF_B;
  305. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  306. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  307. (oppostfix in [PF_SH,PF_H])) then
  308. begin
  309. if target_info.endian=endian_big then
  310. dir:=-1
  311. else
  312. dir:=1;
  313. case FromSize of
  314. OS_16,OS_S16:
  315. begin
  316. { only complicated references need an extra loadaddr }
  317. if assigned(ref.symbol) or
  318. (ref.index<>NR_NO) or
  319. (ref.offset<-4095) or
  320. (ref.offset>4094) or
  321. { sometimes the compiler reused registers }
  322. (reg=ref.index) or
  323. (reg=ref.base) then
  324. begin
  325. tmpreg2:=getintregister(list,OS_INT);
  326. a_loadaddr_ref_reg(list,ref,tmpreg2);
  327. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  328. end
  329. else
  330. usedtmpref:=ref;
  331. if target_info.endian=endian_big then
  332. inc(usedtmpref.offset,1);
  333. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  334. tmpreg:=getintregister(list,OS_INT);
  335. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  336. inc(usedtmpref.offset,dir);
  337. if FromSize=OS_16 then
  338. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  339. else
  340. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  341. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  342. end;
  343. OS_32,OS_S32:
  344. begin
  345. tmpreg:=getintregister(list,OS_INT);
  346. { only complicated references need an extra loadaddr }
  347. if assigned(ref.symbol) or
  348. (ref.index<>NR_NO) or
  349. (ref.offset<-4095) or
  350. (ref.offset>4092) or
  351. { sometimes the compiler reused registers }
  352. (reg=ref.index) or
  353. (reg=ref.base) then
  354. begin
  355. tmpreg2:=getintregister(list,OS_INT);
  356. a_loadaddr_ref_reg(list,ref,tmpreg2);
  357. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  358. end
  359. else
  360. usedtmpref:=ref;
  361. shifterop_reset(so);so.shiftmode:=SM_LSL;
  362. if ref.alignment=2 then
  363. begin
  364. if target_info.endian=endian_big then
  365. inc(usedtmpref.offset,2);
  366. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  367. inc(usedtmpref.offset,dir*2);
  368. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  369. so.shiftimm:=16;
  370. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  371. end
  372. else
  373. begin
  374. tmpreg2:=getintregister(list,OS_INT);
  375. if target_info.endian=endian_big then
  376. inc(usedtmpref.offset,3);
  377. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  378. inc(usedtmpref.offset,dir);
  379. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  380. inc(usedtmpref.offset,dir);
  381. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  382. so.shiftimm:=8;
  383. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  384. inc(usedtmpref.offset,dir);
  385. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  386. so.shiftimm:=16;
  387. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  388. so.shiftimm:=24;
  389. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  390. end;
  391. end
  392. else
  393. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  394. end;
  395. end
  396. else
  397. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  398. if (fromsize=OS_S8) and
  399. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  400. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  401. else if (fromsize=OS_S8) and (tosize = OS_16) then
  402. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  403. end;
  404. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  405. var
  406. hsym : tsym;
  407. href : treference;
  408. paraloc : Pcgparalocation;
  409. shift : byte;
  410. begin
  411. { calculate the parameter info for the procdef }
  412. procdef.init_paraloc_info(callerside);
  413. hsym:=tsym(procdef.parast.Find('self'));
  414. if not(assigned(hsym) and
  415. (hsym.typ=paravarsym)) then
  416. internalerror(2003052503);
  417. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  418. while paraloc<>nil do
  419. with paraloc^ do
  420. begin
  421. case loc of
  422. LOC_REGISTER:
  423. begin
  424. if is_shifter_const(ioffset,shift) then
  425. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  426. else
  427. begin
  428. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  429. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  430. end;
  431. end;
  432. LOC_REFERENCE:
  433. begin
  434. { offset in the wrapper needs to be adjusted for the stored
  435. return address }
  436. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  437. if is_shifter_const(ioffset,shift) then
  438. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  439. else
  440. begin
  441. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  442. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  443. end;
  444. end
  445. else
  446. internalerror(2003091803);
  447. end;
  448. paraloc:=next;
  449. end;
  450. end;
  451. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  452. var
  453. ref: treference;
  454. begin
  455. paraloc.check_simple_location;
  456. paramanager.allocparaloc(list,paraloc.location);
  457. case paraloc.location^.loc of
  458. LOC_REGISTER,LOC_CREGISTER:
  459. a_load_const_reg(list,size,a,paraloc.location^.register);
  460. LOC_REFERENCE:
  461. begin
  462. reference_reset(ref,paraloc.alignment,[]);
  463. ref.base:=paraloc.location^.reference.index;
  464. ref.offset:=paraloc.location^.reference.offset;
  465. a_load_const_ref(list,size,a,ref);
  466. end;
  467. else
  468. internalerror(2002081101);
  469. end;
  470. end;
  471. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  472. begin
  473. { doubles in softemu mode have a strange order of registers and references }
  474. if (cgpara.size=OS_F64) and
  475. (location^.size=OS_32) then
  476. begin
  477. g_concatcopy(list,ref,paralocref,4)
  478. end
  479. else
  480. inherited;
  481. end;
  482. procedure tbasecgarm.init_mmregister_allocator;
  483. begin
  484. { The register allocator currently cannot deal with multiple
  485. non-overlapping subregs per register, so we can only use
  486. half the single precision registers for now (as sub registers of the
  487. double precision ones). }
  488. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  489. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  490. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  491. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  492. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  493. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  494. ],first_mm_imreg,[])
  495. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  496. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  497. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  498. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  499. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  500. ],first_mm_imreg,[])
  501. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  502. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  503. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  504. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  505. ],first_mm_imreg,[]);
  506. end;
  507. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  508. var
  509. ref: treference;
  510. tmpreg: tregister;
  511. begin
  512. paraloc.check_simple_location;
  513. paramanager.allocparaloc(list,paraloc.location);
  514. case paraloc.location^.loc of
  515. LOC_REGISTER,LOC_CREGISTER:
  516. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  517. LOC_REFERENCE:
  518. begin
  519. reference_reset(ref,paraloc.alignment,[]);
  520. ref.base := paraloc.location^.reference.index;
  521. ref.offset := paraloc.location^.reference.offset;
  522. tmpreg := getintregister(list,OS_ADDR);
  523. a_loadaddr_ref_reg(list,r,tmpreg);
  524. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  525. end;
  526. else
  527. internalerror(2002080701);
  528. end;
  529. end;
  530. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  531. var
  532. branchopcode: tasmop;
  533. r : treference;
  534. sym : TAsmSymbol;
  535. begin
  536. { use always BL as newer binutils do not translate blx apparently
  537. generating BL is also what clang and gcc do by default }
  538. branchopcode:=A_BL;
  539. if not(weak) then
  540. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  541. else
  542. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  543. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  544. if (tf_pic_uses_got in target_info.flags) and
  545. (cs_create_pic in current_settings.moduleswitches) then
  546. begin
  547. r.refaddr:=addr_pic
  548. end
  549. else
  550. r.refaddr:=addr_full;
  551. list.concat(taicpu.op_ref(branchopcode,r));
  552. {
  553. the compiler does not properly set this flag anymore in pass 1, and
  554. for now we only need it after pass 2 (I hope) (JM)
  555. if not(pi_do_call in current_procinfo.flags) then
  556. internalerror(2003060703);
  557. }
  558. include(current_procinfo.flags,pi_do_call);
  559. end;
  560. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  561. begin
  562. { check not really correct: should only be used for non-Thumb cpus }
  563. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  564. begin
  565. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  566. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  567. end
  568. else
  569. list.concat(taicpu.op_reg(A_BLX, reg));
  570. {
  571. the compiler does not properly set this flag anymore in pass 1, and
  572. for now we only need it after pass 2 (I hope) (JM)
  573. if not(pi_do_call in current_procinfo.flags) then
  574. internalerror(2003060703);
  575. }
  576. include(current_procinfo.flags,pi_do_call);
  577. end;
  578. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  579. begin
  580. a_op_const_reg_reg(list,op,size,a,reg,reg);
  581. end;
  582. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  583. var
  584. tmpreg,tmpresreg : tregister;
  585. tmpref : treference;
  586. begin
  587. tmpreg:=getintregister(list,size);
  588. tmpresreg:=getintregister(list,size);
  589. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  590. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  591. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  592. end;
  593. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  594. var
  595. so : tshifterop;
  596. begin
  597. if op = OP_NEG then
  598. begin
  599. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  600. maybeadjustresult(list,OP_NEG,size,dst);
  601. end
  602. else if op = OP_NOT then
  603. begin
  604. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  605. begin
  606. shifterop_reset(so);
  607. so.shiftmode:=SM_LSL;
  608. if size in [OS_8, OS_S8] then
  609. so.shiftimm:=24
  610. else
  611. so.shiftimm:=16;
  612. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  613. {Using a shift here allows this to be folded into another instruction}
  614. if size in [OS_S8, OS_S16] then
  615. so.shiftmode:=SM_ASR
  616. else
  617. so.shiftmode:=SM_LSR;
  618. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  619. end
  620. else
  621. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  622. end
  623. else
  624. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  625. end;
  626. const
  627. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  628. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  629. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  630. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  631. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  632. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  633. op_reg_postfix_thumb: array[TOpCG] of TOpPostfix =
  634. (PF_None,PF_None,PF_None,PF_S,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_S,
  635. PF_None,PF_S,PF_S,PF_None,PF_S,PF_None,PF_S);
  636. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  637. size: tcgsize; a: tcgint; src, dst: tregister);
  638. var
  639. ovloc : tlocation;
  640. begin
  641. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  642. end;
  643. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  644. size: tcgsize; src1, src2, dst: tregister);
  645. var
  646. ovloc : tlocation;
  647. begin
  648. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  649. end;
  650. function opshift2shiftmode(op: TOpCg): tshiftmode;
  651. begin
  652. case op of
  653. OP_SHL: Result:=SM_LSL;
  654. OP_SHR: Result:=SM_LSR;
  655. OP_ROR: Result:=SM_ROR;
  656. OP_ROL: Result:=SM_ROR;
  657. OP_SAR: Result:=SM_ASR;
  658. else internalerror(2012070501);
  659. end
  660. end;
  661. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  662. var
  663. multiplier : dword;
  664. power : longint;
  665. shifterop : tshifterop;
  666. bitsset : byte;
  667. negative : boolean;
  668. first, doshiftadd: boolean;
  669. b,
  670. cycles : byte;
  671. maxeffort : byte;
  672. leftmostbit,i,shiftvalue: DWord;
  673. begin
  674. result:=true;
  675. cycles:=0;
  676. negative:=a<0;
  677. shifterop.rs:=NR_NO;
  678. shifterop.shiftmode:=SM_LSL;
  679. if negative then
  680. inc(cycles);
  681. multiplier:=dword(abs(a));
  682. { heuristics to estimate how much instructions are reasonable to replace the mul,
  683. this is currently based on XScale timings }
  684. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  685. actual multiplication, this requires min. 1+4 cycles
  686. because the first shift imm. might cause a stall and because we need more instructions
  687. when replacing the mul we generate max. 3 instructions to replace this mul }
  688. maxeffort:=3;
  689. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  690. a ldr, so generating one more operation to replace this is beneficial }
  691. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  692. inc(maxeffort);
  693. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  694. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  695. dec(maxeffort);
  696. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  697. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  698. dec(maxeffort);
  699. { "symmetric" bit pattern like $10101010 where
  700. res:=a*$10101010 can be simplified into
  701. temp:=a*$1010
  702. res:=temp+temp shl 16
  703. }
  704. doshiftadd:=false;
  705. leftmostbit:=BsrDWord(multiplier);
  706. shiftvalue:=0;
  707. if (maxeffort>1) and (leftmostbit>2) then
  708. begin
  709. for i:=2 to 31 do
  710. if (multiplier shr i)=(multiplier and ($ffffffff shr (32-i))) then
  711. begin
  712. doshiftadd:=true;
  713. shiftvalue:=i;
  714. dec(maxeffort);
  715. multiplier:=multiplier shr shiftvalue;
  716. break;
  717. end;
  718. end;
  719. bitsset:=popcnt(multiplier and $fffffffe);
  720. { most simple cases }
  721. if a=1 then
  722. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  723. else if a=0 then
  724. a_load_const_reg(list,OS_32,0,dst)
  725. else if a=-1 then
  726. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  727. { add up ?
  728. basically, one add is needed for each bit being set in the constant factor
  729. however, the least significant bit is for free, it can be hidden in the initial
  730. instruction
  731. }
  732. else if (bitsset+cycles<=maxeffort) and
  733. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  734. begin
  735. first:=true;
  736. while multiplier<>0 do
  737. begin
  738. shifterop.shiftimm:=BsrDWord(multiplier);
  739. if odd(multiplier) then
  740. begin
  741. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  742. dec(multiplier);
  743. end
  744. else
  745. if first then
  746. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  747. else
  748. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  749. first:=false;
  750. dec(multiplier,1 shl shifterop.shiftimm);
  751. end;
  752. if doshiftadd then
  753. begin
  754. shifterop.shiftimm:=shiftvalue;
  755. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  756. end;
  757. if negative then
  758. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  759. end
  760. { subtract from the next greater power of two? }
  761. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  762. begin
  763. first:=true;
  764. while multiplier<>0 do
  765. begin
  766. if first then
  767. begin
  768. multiplier:=(1 shl power)-multiplier;
  769. shifterop.shiftimm:=power;
  770. end
  771. else
  772. shifterop.shiftimm:=BsrDWord(multiplier);
  773. if odd(multiplier) then
  774. begin
  775. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  776. dec(multiplier);
  777. end
  778. else
  779. if first then
  780. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  781. else
  782. begin
  783. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  784. dec(multiplier,1 shl shifterop.shiftimm);
  785. end;
  786. first:=false;
  787. end;
  788. if doshiftadd then
  789. begin
  790. shifterop.shiftimm:=shiftvalue;
  791. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  792. end;
  793. if negative then
  794. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  795. end
  796. else
  797. result:=false;
  798. end;
  799. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  800. var
  801. shift, lsb, width : byte;
  802. tmpreg : tregister;
  803. so : tshifterop;
  804. l1 : longint;
  805. imm1, imm2: DWord;
  806. begin
  807. optimize_op_const(size, op, a);
  808. case op of
  809. OP_NONE:
  810. begin
  811. if src <> dst then
  812. a_load_reg_reg(list, size, size, src, dst);
  813. exit;
  814. end;
  815. OP_MOVE:
  816. begin
  817. a_load_const_reg(list, size, a, dst);
  818. exit;
  819. end;
  820. else
  821. ;
  822. end;
  823. ovloc.loc:=LOC_VOID;
  824. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  825. case op of
  826. OP_ADD:
  827. begin
  828. op:=OP_SUB;
  829. a:=aint(dword(-a));
  830. end;
  831. OP_SUB:
  832. begin
  833. op:=OP_ADD;
  834. a:=aint(dword(-a));
  835. end
  836. else
  837. ;
  838. end;
  839. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  840. case op of
  841. OP_NEG,OP_NOT:
  842. internalerror(200308281);
  843. OP_SHL,
  844. OP_SHR,
  845. OP_ROL,
  846. OP_ROR,
  847. OP_SAR:
  848. begin
  849. if a>32 then
  850. internalerror(200308294);
  851. shifterop_reset(so);
  852. so.shiftmode:=opshift2shiftmode(op);
  853. if op = OP_ROL then
  854. so.shiftimm:=32-a
  855. else
  856. so.shiftimm:=a;
  857. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  858. end;
  859. else
  860. {if (op in [OP_SUB, OP_ADD]) and
  861. ((a < 0) or
  862. (a > 4095)) then
  863. begin
  864. tmpreg:=getintregister(list,size);
  865. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  866. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  867. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  868. ));
  869. end
  870. else}
  871. begin
  872. if cgsetflags or setflags then
  873. a_reg_alloc(list,NR_DEFAULTFLAGS);
  874. list.concat(setoppostfix(
  875. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  876. end;
  877. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  878. begin
  879. ovloc.loc:=LOC_FLAGS;
  880. case op of
  881. OP_ADD:
  882. ovloc.resflags:=F_CS;
  883. OP_SUB:
  884. ovloc.resflags:=F_CC;
  885. else
  886. internalerror(2019050922);
  887. end;
  888. end;
  889. end
  890. else
  891. begin
  892. { there could be added some more sophisticated optimizations }
  893. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  894. a_op_reg_reg(list,OP_NEG,size,src,dst)
  895. { we do this here instead in the peephole optimizer because
  896. it saves us a register }
  897. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  898. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  899. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  900. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  901. begin
  902. if l1>32 then{roozbeh does this ever happen?}
  903. internalerror(200308296);
  904. shifterop_reset(so);
  905. so.shiftmode:=SM_LSL;
  906. so.shiftimm:=l1;
  907. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  908. end
  909. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  910. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  911. begin
  912. if l1>32 then{does this ever happen?}
  913. internalerror(201205181);
  914. shifterop_reset(so);
  915. so.shiftmode:=SM_LSL;
  916. so.shiftimm:=l1;
  917. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  918. end
  919. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  920. begin
  921. { nothing to do on success }
  922. end
  923. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  924. broader range of shifterconstants.}
  925. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  926. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  927. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  928. into the following instruction}
  929. else if (op = OP_AND) and
  930. is_continuous_mask(aword(a), lsb, width) and
  931. ((lsb = 0) or ((lsb + width) = 32)) then
  932. begin
  933. shifterop_reset(so);
  934. if (width = 16) and
  935. (lsb = 0) and
  936. (current_settings.cputype >= cpu_armv6) then
  937. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  938. else if (width = 8) and
  939. (lsb = 0) and
  940. (current_settings.cputype >= cpu_armv6) then
  941. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  942. else if lsb = 0 then
  943. begin
  944. so.shiftmode:=SM_LSL;
  945. so.shiftimm:=32-width;
  946. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  947. so.shiftmode:=SM_LSR;
  948. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  949. end
  950. else
  951. begin
  952. so.shiftmode:=SM_LSR;
  953. so.shiftimm:=lsb;
  954. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  955. so.shiftmode:=SM_LSL;
  956. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  957. end;
  958. end
  959. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  960. begin
  961. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  962. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  963. end
  964. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  965. not(cgsetflags or setflags) and
  966. split_into_shifter_const(a, imm1, imm2) then
  967. begin
  968. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  969. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  970. end
  971. else
  972. begin
  973. tmpreg:=getintregister(list,size);
  974. a_load_const_reg(list,size,a,tmpreg);
  975. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  976. end;
  977. end;
  978. maybeadjustresult(list,op,size,dst);
  979. end;
  980. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  981. var
  982. so : tshifterop;
  983. tmpreg,overflowreg : tregister;
  984. asmop : tasmop;
  985. begin
  986. ovloc.loc:=LOC_VOID;
  987. case op of
  988. OP_NEG,OP_NOT,
  989. OP_DIV,OP_IDIV:
  990. internalerror(200308283);
  991. OP_SHL,
  992. OP_SHR,
  993. OP_SAR,
  994. OP_ROR:
  995. begin
  996. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  997. internalerror(2008072801);
  998. shifterop_reset(so);
  999. so.rs:=src1;
  1000. so.shiftmode:=opshift2shiftmode(op);
  1001. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1002. end;
  1003. OP_ROL:
  1004. begin
  1005. if not(size in [OS_32,OS_S32]) then
  1006. internalerror(2008072804);
  1007. { simulate ROL by ror'ing 32-value }
  1008. tmpreg:=getintregister(list,OS_32);
  1009. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  1010. shifterop_reset(so);
  1011. so.rs:=tmpreg;
  1012. so.shiftmode:=SM_ROR;
  1013. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1014. end;
  1015. OP_IMUL,
  1016. OP_MUL:
  1017. begin
  1018. if (cgsetflags or setflags) and
  1019. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1020. begin
  1021. overflowreg:=getintregister(list,size);
  1022. if op=OP_IMUL then
  1023. asmop:=A_SMULL
  1024. else
  1025. asmop:=A_UMULL;
  1026. { the arm doesn't allow that rd and rm are the same }
  1027. if dst=src2 then
  1028. begin
  1029. if dst<>src1 then
  1030. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1031. else
  1032. begin
  1033. tmpreg:=getintregister(list,size);
  1034. a_load_reg_reg(list,size,size,src2,dst);
  1035. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1036. end;
  1037. end
  1038. else
  1039. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1040. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1041. if op=OP_IMUL then
  1042. begin
  1043. shifterop_reset(so);
  1044. so.shiftmode:=SM_ASR;
  1045. so.shiftimm:=31;
  1046. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1047. end
  1048. else
  1049. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1050. ovloc.loc:=LOC_FLAGS;
  1051. ovloc.resflags:=F_NE;
  1052. end
  1053. else
  1054. begin
  1055. { the arm doesn't allow that rd and rm are the same }
  1056. if dst=src2 then
  1057. begin
  1058. if dst<>src1 then
  1059. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1060. else
  1061. begin
  1062. tmpreg:=getintregister(list,size);
  1063. a_load_reg_reg(list,size,size,src2,dst);
  1064. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1065. end;
  1066. end
  1067. else
  1068. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1069. end;
  1070. end;
  1071. else
  1072. begin
  1073. if cgsetflags or setflags then
  1074. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1075. list.concat(setoppostfix(
  1076. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1077. end;
  1078. end;
  1079. maybeadjustresult(list,op,size,dst);
  1080. end;
  1081. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1082. var
  1083. asmop: tasmop;
  1084. begin
  1085. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1086. begin
  1087. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1088. case size of
  1089. OS_32: asmop:=A_UMULL;
  1090. OS_S32: asmop:=A_SMULL;
  1091. else
  1092. InternalError(2014060802);
  1093. end;
  1094. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1095. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1096. 32x32=32 bit multiplication}
  1097. if (dstlo = NR_NO) then
  1098. dstlo:=getintregister(list,size);
  1099. if (dsthi = NR_NO) then
  1100. dsthi:=getintregister(list,size);
  1101. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1102. end
  1103. else if dsthi=NR_NO then
  1104. begin
  1105. if (dstlo = NR_NO) then
  1106. dstlo:=getintregister(list,size);
  1107. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1108. end
  1109. else
  1110. begin
  1111. internalerror(2015083022);
  1112. end;
  1113. end;
  1114. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1115. var
  1116. tmpreg1,tmpreg2 : tregister;
  1117. begin
  1118. tmpreg1:=NR_NO;
  1119. { Be sure to have a base register }
  1120. if (ref.base=NR_NO) then
  1121. begin
  1122. if ref.shiftmode<>SM_None then
  1123. internalerror(2014020707);
  1124. ref.base:=ref.index;
  1125. ref.index:=NR_NO;
  1126. end;
  1127. { absolute symbols can't be handled directly, we've to store the symbol reference
  1128. in the text segment and access it pc relative
  1129. For now, we assume that references where base or index equals to PC are already
  1130. relative, all other references are assumed to be absolute and thus they need
  1131. to be handled extra.
  1132. A proper solution would be to change refoptions to a set and store the information
  1133. if the symbol is absolute or relative there.
  1134. }
  1135. if (assigned(ref.symbol) and
  1136. not(is_pc(ref.base)) and
  1137. not(is_pc(ref.index))
  1138. ) or
  1139. { [#xxx] isn't a valid address operand }
  1140. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1141. (ref.offset<-4095) or
  1142. (ref.offset>4095) or
  1143. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1144. ((ref.offset<-255) or
  1145. (ref.offset>255)
  1146. )
  1147. ) or
  1148. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1149. ((ref.offset<-1020) or
  1150. (ref.offset>1020) or
  1151. ((abs(ref.offset) mod 4)<>0)
  1152. )
  1153. ) or
  1154. ((GenerateThumbCode) and
  1155. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1156. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1157. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1158. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1159. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1160. )
  1161. ) then
  1162. begin
  1163. fixref(list,ref);
  1164. end;
  1165. if GenerateThumbCode then
  1166. begin
  1167. { certain thumb load require base and index }
  1168. if (oppostfix in [PF_SB,PF_SH]) and
  1169. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1170. begin
  1171. tmpreg1:=getintregister(list,OS_ADDR);
  1172. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1173. ref.index:=tmpreg1;
  1174. end;
  1175. { "hi" registers cannot be used as base or index }
  1176. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1177. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1178. begin
  1179. tmpreg1:=getintregister(list,OS_ADDR);
  1180. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1181. ref.base:=tmpreg1;
  1182. end;
  1183. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1184. begin
  1185. tmpreg1:=getintregister(list,OS_ADDR);
  1186. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1187. ref.index:=tmpreg1;
  1188. end;
  1189. end;
  1190. { fold if there is base, index and offset, however, don't fold
  1191. for vfp memory instructions because we later fold the index }
  1192. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1193. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1194. begin
  1195. if tmpreg1<>NR_NO then
  1196. begin
  1197. tmpreg2:=getintregister(list,OS_ADDR);
  1198. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1199. tmpreg1:=tmpreg2;
  1200. end
  1201. else
  1202. begin
  1203. tmpreg1:=getintregister(list,OS_ADDR);
  1204. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1205. ref.base:=tmpreg1;
  1206. end;
  1207. ref.offset:=0;
  1208. end;
  1209. { floating point operations have only limited references
  1210. we expect here, that a base is already set }
  1211. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1212. begin
  1213. if ref.shiftmode<>SM_none then
  1214. internalerror(200309121);
  1215. if tmpreg1<>NR_NO then
  1216. begin
  1217. if ref.base=tmpreg1 then
  1218. begin
  1219. if ref.signindex<0 then
  1220. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1221. else
  1222. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1223. ref.index:=NR_NO;
  1224. end
  1225. else
  1226. begin
  1227. if ref.index<>tmpreg1 then
  1228. internalerror(200403161);
  1229. if ref.signindex<0 then
  1230. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1231. else
  1232. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1233. ref.base:=tmpreg1;
  1234. ref.index:=NR_NO;
  1235. end;
  1236. end
  1237. else
  1238. begin
  1239. tmpreg1:=getintregister(list,OS_ADDR);
  1240. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1241. ref.base:=tmpreg1;
  1242. ref.index:=NR_NO;
  1243. end;
  1244. end;
  1245. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1246. Result := ref;
  1247. end;
  1248. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1249. var
  1250. oppostfix:toppostfix;
  1251. usedtmpref: treference;
  1252. tmpreg : tregister;
  1253. dir : integer;
  1254. begin
  1255. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1256. FromSize := ToSize;
  1257. case ToSize of
  1258. { signed integer registers }
  1259. OS_8,
  1260. OS_S8:
  1261. oppostfix:=PF_B;
  1262. OS_16,
  1263. OS_S16:
  1264. oppostfix:=PF_H;
  1265. OS_32,
  1266. OS_S32,
  1267. { for vfp value stored in integer register }
  1268. OS_F32:
  1269. oppostfix:=PF_None;
  1270. else
  1271. InternalError(2003082912);
  1272. end;
  1273. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1274. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1275. (oppostfix =PF_H)) then
  1276. begin
  1277. if target_info.endian=endian_big then
  1278. dir:=-1
  1279. else
  1280. dir:=1;
  1281. case FromSize of
  1282. OS_16,OS_S16:
  1283. begin
  1284. tmpreg:=getintregister(list,OS_INT);
  1285. usedtmpref:=ref;
  1286. if target_info.endian=endian_big then
  1287. inc(usedtmpref.offset,1);
  1288. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1289. inc(usedtmpref.offset,dir);
  1290. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1291. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1292. end;
  1293. OS_32,OS_S32:
  1294. begin
  1295. tmpreg:=getintregister(list,OS_INT);
  1296. usedtmpref:=ref;
  1297. if ref.alignment=2 then
  1298. begin
  1299. if target_info.endian=endian_big then
  1300. inc(usedtmpref.offset,2);
  1301. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1302. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1303. inc(usedtmpref.offset,dir*2);
  1304. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1305. end
  1306. else
  1307. begin
  1308. if target_info.endian=endian_big then
  1309. inc(usedtmpref.offset,3);
  1310. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1311. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1312. inc(usedtmpref.offset,dir);
  1313. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1314. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1315. inc(usedtmpref.offset,dir);
  1316. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1317. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1318. inc(usedtmpref.offset,dir);
  1319. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1320. end;
  1321. end
  1322. else
  1323. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1324. end;
  1325. end
  1326. else
  1327. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1328. end;
  1329. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1330. var
  1331. oppostfix:toppostfix;
  1332. href: treference;
  1333. tmpreg: TRegister;
  1334. begin
  1335. case ToSize of
  1336. { signed integer registers }
  1337. OS_8,
  1338. OS_S8:
  1339. oppostfix:=PF_B;
  1340. OS_16,
  1341. OS_S16:
  1342. oppostfix:=PF_H;
  1343. OS_32,
  1344. OS_S32:
  1345. oppostfix:=PF_None;
  1346. else
  1347. InternalError(2003082910);
  1348. end;
  1349. if (tosize in [OS_S16,OS_16]) and
  1350. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1351. begin
  1352. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1353. tmpreg:=getintregister(list,OS_INT);
  1354. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1355. href:=result;
  1356. inc(href.offset);
  1357. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1358. end
  1359. else
  1360. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1361. end;
  1362. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1363. var
  1364. oppostfix:toppostfix;
  1365. so: tshifterop;
  1366. tmpreg: TRegister;
  1367. href: treference;
  1368. begin
  1369. case FromSize of
  1370. { signed integer registers }
  1371. OS_8:
  1372. oppostfix:=PF_B;
  1373. OS_S8:
  1374. oppostfix:=PF_SB;
  1375. OS_16:
  1376. oppostfix:=PF_H;
  1377. OS_S16:
  1378. oppostfix:=PF_SH;
  1379. OS_32,
  1380. OS_S32:
  1381. oppostfix:=PF_None;
  1382. else
  1383. InternalError(200308291);
  1384. end;
  1385. if (tosize=OS_S8) and
  1386. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1387. begin
  1388. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1389. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1390. end
  1391. else if (tosize in [OS_S16,OS_16]) and
  1392. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1393. begin
  1394. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1395. tmpreg:=getintregister(list,OS_INT);
  1396. href:=result;
  1397. inc(href.offset);
  1398. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1399. shifterop_reset(so);
  1400. so.shiftmode:=SM_LSL;
  1401. so.shiftimm:=8;
  1402. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1403. end
  1404. else
  1405. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1406. end;
  1407. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1408. var
  1409. so : tshifterop;
  1410. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1411. begin
  1412. if GenerateThumbCode then
  1413. begin
  1414. case shiftmode of
  1415. SM_ASR:
  1416. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1417. SM_LSR:
  1418. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1419. SM_LSL:
  1420. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1421. else
  1422. internalerror(2013090301);
  1423. end;
  1424. end
  1425. else
  1426. begin
  1427. so.shiftmode:=shiftmode;
  1428. so.shiftimm:=shiftimm;
  1429. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1430. end;
  1431. end;
  1432. var
  1433. instr: taicpu;
  1434. conv_done: boolean;
  1435. begin
  1436. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1437. internalerror(2002090901);
  1438. conv_done:=false;
  1439. if tosize<>fromsize then
  1440. begin
  1441. shifterop_reset(so);
  1442. conv_done:=true;
  1443. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1444. fromsize:=tosize;
  1445. if current_settings.cputype<cpu_armv6 then
  1446. case fromsize of
  1447. OS_8:
  1448. if GenerateThumbCode then
  1449. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1450. else
  1451. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1452. OS_S8:
  1453. begin
  1454. do_shift(SM_LSL,24,reg1);
  1455. if tosize=OS_16 then
  1456. begin
  1457. do_shift(SM_ASR,8,reg2);
  1458. do_shift(SM_LSR,16,reg2);
  1459. end
  1460. else
  1461. do_shift(SM_ASR,24,reg2);
  1462. end;
  1463. OS_16:
  1464. begin
  1465. do_shift(SM_LSL,16,reg1);
  1466. do_shift(SM_LSR,16,reg2);
  1467. end;
  1468. OS_S16:
  1469. begin
  1470. do_shift(SM_LSL,16,reg1);
  1471. do_shift(SM_ASR,16,reg2)
  1472. end;
  1473. else
  1474. conv_done:=false;
  1475. end
  1476. else
  1477. case fromsize of
  1478. OS_8:
  1479. if GenerateThumbCode then
  1480. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1481. else
  1482. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1483. OS_S8:
  1484. begin
  1485. if tosize=OS_16 then
  1486. begin
  1487. so.shiftmode:=SM_ROR;
  1488. so.shiftimm:=16;
  1489. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1490. do_shift(SM_LSR,16,reg2);
  1491. end
  1492. else
  1493. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1494. end;
  1495. OS_16:
  1496. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1497. OS_S16:
  1498. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1499. else
  1500. conv_done:=false;
  1501. end
  1502. end;
  1503. if not conv_done and (reg1<>reg2) then
  1504. begin
  1505. { same size, only a register mov required }
  1506. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1507. list.Concat(instr);
  1508. { Notify the register allocator that we have written a move instruction so
  1509. it can try to eliminate it. }
  1510. add_move_instruction(instr);
  1511. end;
  1512. end;
  1513. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1514. var
  1515. href,href2 : treference;
  1516. hloc : pcgparalocation;
  1517. begin
  1518. href:=ref;
  1519. hloc:=paraloc.location;
  1520. while assigned(hloc) do
  1521. begin
  1522. case hloc^.loc of
  1523. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1524. begin
  1525. paramanager.allocparaloc(list,paraloc.location);
  1526. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1527. end;
  1528. LOC_REGISTER :
  1529. case hloc^.size of
  1530. OS_32,
  1531. OS_F32:
  1532. begin
  1533. paramanager.allocparaloc(list,paraloc.location);
  1534. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1535. end;
  1536. OS_64,
  1537. OS_F64:
  1538. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1539. else
  1540. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1541. end;
  1542. LOC_REFERENCE :
  1543. begin
  1544. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1545. { concatcopy should choose the best way to copy the data }
  1546. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1547. end;
  1548. else
  1549. internalerror(200408241);
  1550. end;
  1551. inc(href.offset,tcgsize2size[hloc^.size]);
  1552. hloc:=hloc^.next;
  1553. end;
  1554. end;
  1555. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1556. begin
  1557. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1558. end;
  1559. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1560. var
  1561. oppostfix:toppostfix;
  1562. begin
  1563. case fromsize of
  1564. OS_32,
  1565. OS_F32:
  1566. oppostfix:=PF_S;
  1567. OS_64,
  1568. OS_F64:
  1569. oppostfix:=PF_D;
  1570. OS_F80:
  1571. oppostfix:=PF_E;
  1572. else
  1573. InternalError(200309021);
  1574. end;
  1575. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1576. if fromsize<>tosize then
  1577. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1578. end;
  1579. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1580. var
  1581. oppostfix:toppostfix;
  1582. begin
  1583. case tosize of
  1584. OS_F32:
  1585. oppostfix:=PF_S;
  1586. OS_F64:
  1587. oppostfix:=PF_D;
  1588. OS_F80:
  1589. oppostfix:=PF_E;
  1590. else
  1591. InternalError(200309022);
  1592. end;
  1593. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1594. end;
  1595. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1596. var
  1597. r : TRegister;
  1598. ai: taicpu;
  1599. l: TAsmLabel;
  1600. begin
  1601. if needs_check_for_fpu_exceptions and
  1602. (force or current_procinfo.FPUExceptionCheckNeeded) then
  1603. begin
  1604. r:=getintregister(list,OS_INT);
  1605. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1606. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1607. current_asmdata.getjumplabel(l);
  1608. ai:=taicpu.op_sym(A_B,l);
  1609. ai.is_jmp:=true;
  1610. ai.condition:=C_EQ;
  1611. list.concat(ai);
  1612. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1613. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1614. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1615. a_label(list,l);
  1616. if clear then
  1617. current_procinfo.FPUExceptionCheckNeeded:=false;
  1618. end;
  1619. end;
  1620. { comparison operations }
  1621. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1622. l : tasmlabel);
  1623. var
  1624. tmpreg : tregister;
  1625. b : byte;
  1626. begin
  1627. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1628. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1629. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1630. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1631. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1632. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1633. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1634. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1635. else
  1636. begin
  1637. tmpreg:=getintregister(list,size);
  1638. a_load_const_reg(list,size,a,tmpreg);
  1639. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1640. end;
  1641. a_jmp_cond(list,cmp_op,l);
  1642. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1643. end;
  1644. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1645. begin
  1646. if reverse then
  1647. begin
  1648. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1649. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1650. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1651. end
  1652. { it is decided during the compilation of the system unit if this code is used or not
  1653. so no additional check for rbit is needed }
  1654. else
  1655. begin
  1656. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1657. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1658. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1659. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1660. if GenerateThumb2Code then
  1661. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1662. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1663. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1664. end;
  1665. end;
  1666. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1667. begin
  1668. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1669. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1670. a_jmp_cond(list,cmp_op,l);
  1671. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1672. end;
  1673. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1674. var
  1675. ai : taicpu;
  1676. begin
  1677. { generate far jump, leave it to the optimizer to get rid of it }
  1678. if GenerateThumbCode then
  1679. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1680. else
  1681. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1682. ai.is_jmp:=true;
  1683. list.concat(ai);
  1684. end;
  1685. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1686. var
  1687. ai : taicpu;
  1688. begin
  1689. { generate far jump, leave it to the optimizer to get rid of it }
  1690. if GenerateThumbCode then
  1691. ai:=taicpu.op_sym(A_BL,l)
  1692. else
  1693. ai:=taicpu.op_sym(A_B,l);
  1694. ai.is_jmp:=true;
  1695. list.concat(ai);
  1696. end;
  1697. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1698. var
  1699. ai : taicpu;
  1700. inv_flags : TResFlags;
  1701. hlabel : TAsmLabel;
  1702. begin
  1703. if GenerateThumbCode then
  1704. begin
  1705. inv_flags:=f;
  1706. inverse_flags(inv_flags);
  1707. { the optimizer has to fix this if jump range is sufficient short }
  1708. current_asmdata.getjumplabel(hlabel);
  1709. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1710. ai.is_jmp:=true;
  1711. list.concat(ai);
  1712. a_jmp_always(list,l);
  1713. a_label(list,hlabel);
  1714. end
  1715. else
  1716. begin
  1717. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1718. ai.is_jmp:=true;
  1719. list.concat(ai);
  1720. end;
  1721. end;
  1722. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1723. begin
  1724. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1725. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1726. end;
  1727. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1728. begin
  1729. if target_info.system = system_arm_linux then
  1730. begin
  1731. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1732. a_call_name(list,'__gnu_mcount_nc',false);
  1733. end
  1734. else
  1735. internalerror(2014091201);
  1736. end;
  1737. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1738. var
  1739. ref : treference;
  1740. shift : byte;
  1741. firstfloatreg,lastfloatreg,
  1742. r : byte;
  1743. mmregs,
  1744. regs, saveregs : tcpuregisterset;
  1745. registerarea, offset,
  1746. r7offset,
  1747. stackmisalignment : pint;
  1748. imm1, imm2: DWord;
  1749. stack_parameters : Boolean;
  1750. begin
  1751. LocalSize:=align(LocalSize,4);
  1752. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1753. { call instruction does not put anything on the stack }
  1754. registerarea:=0;
  1755. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1756. lastfloatreg:=RS_NO;
  1757. if not(nostackframe) then
  1758. begin
  1759. firstfloatreg:=RS_NO;
  1760. mmregs:=[];
  1761. case current_settings.fputype of
  1762. fpu_none,
  1763. fpu_soft,
  1764. fpu_libgcc:
  1765. ;
  1766. fpu_fpa,
  1767. fpu_fpa10,
  1768. fpu_fpa11:
  1769. begin
  1770. { save floating point registers? }
  1771. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1772. for r:=RS_F0 to RS_F7 do
  1773. if r in regs then
  1774. begin
  1775. if firstfloatreg=RS_NO then
  1776. firstfloatreg:=r;
  1777. lastfloatreg:=r;
  1778. inc(registerarea,12);
  1779. end;
  1780. end;
  1781. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1782. begin;
  1783. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1784. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1785. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1786. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1787. end
  1788. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1789. begin;
  1790. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1791. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1792. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1793. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1794. end
  1795. else
  1796. internalerror(2019050924);
  1797. end;
  1798. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1799. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1800. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1801. { save int registers }
  1802. reference_reset(ref,4,[]);
  1803. ref.index:=NR_STACK_POINTER_REG;
  1804. ref.addressmode:=AM_PREINDEXED;
  1805. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1806. if not(target_info.system in systems_darwin) then
  1807. begin
  1808. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1809. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1810. begin
  1811. a_reg_alloc(list,NR_R12);
  1812. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1813. end;
  1814. { the (old) ARM APCS requires saving both the stack pointer (to
  1815. crawl the stack) and the PC (to identify the function this
  1816. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1817. and R15 -- still needs updating for EABI and Darwin, they don't
  1818. need that }
  1819. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1820. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1821. else
  1822. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1823. include(regs,RS_R14);
  1824. if regs<>[] then
  1825. begin
  1826. for r:=RS_R0 to RS_R15 do
  1827. if r in regs then
  1828. inc(registerarea,4);
  1829. { if the stack is not 8 byte aligned, try to add an extra register,
  1830. so we can avoid the extra sub/add ...,#4 later (KB) }
  1831. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1832. for r:=RS_R3 downto RS_R0 do
  1833. if not(r in regs) then
  1834. begin
  1835. regs:=regs+[r];
  1836. inc(registerarea,4);
  1837. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1838. break;
  1839. end;
  1840. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1841. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  1842. end;
  1843. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1844. begin
  1845. offset:=-4;
  1846. for r:=RS_R15 downto RS_R0 do
  1847. if r in regs then
  1848. begin
  1849. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),offset);
  1850. dec(offset,4);
  1851. end;
  1852. { the framepointer now points to the saved R15, so the saved
  1853. framepointer is at R11-12 (for get_caller_frame) }
  1854. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1855. a_reg_dealloc(list,NR_R12);
  1856. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  1857. current_asmdata.asmcfi.cfa_def_cfa_offset(list,4);
  1858. end;
  1859. end
  1860. else
  1861. begin
  1862. { always save r14 if we use r7 as the framepointer, because
  1863. the parameter offsets are hardcoded in advance and always
  1864. assume that r14 sits on the stack right behind the saved r7
  1865. }
  1866. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1867. include(regs,RS_FRAME_POINTER_REG);
  1868. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1869. include(regs,RS_R14);
  1870. if regs<>[] then
  1871. begin
  1872. { on Darwin, you first have to save [r4-r7,lr], and then
  1873. [r8,r10,r11] and make r7 point to the previously saved
  1874. r7 so that you can perform a stack crawl based on it
  1875. ([r7] is previous stack frame, [r7+4] is return address
  1876. }
  1877. include(regs,RS_FRAME_POINTER_REG);
  1878. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1879. r7offset:=0;
  1880. for r:=RS_R0 to RS_R15 do
  1881. if r in saveregs then
  1882. begin
  1883. inc(registerarea,4);
  1884. if r<RS_FRAME_POINTER_REG then
  1885. inc(r7offset,4);
  1886. end;
  1887. { save the registers }
  1888. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1889. { make r7 point to the saved r7 (regardless of whether this
  1890. frame uses the framepointer, for backtrace purposes) }
  1891. if r7offset<>0 then
  1892. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1893. else
  1894. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1895. { now save the rest (if any) }
  1896. saveregs:=regs-saveregs;
  1897. if saveregs<>[] then
  1898. begin
  1899. for r:=RS_R8 to RS_R11 do
  1900. if r in saveregs then
  1901. inc(registerarea,4);
  1902. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1903. end;
  1904. end;
  1905. end;
  1906. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1907. if (LocalSize<>0) or
  1908. ((stackmisalignment<>0) and
  1909. ((pi_do_call in current_procinfo.flags) or
  1910. (po_assembler in current_procinfo.procdef.procoptions))) then
  1911. begin
  1912. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1913. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1914. begin
  1915. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1916. internalerror(2014030901)
  1917. else
  1918. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1919. end;
  1920. if is_shifter_const(localsize,shift) then
  1921. begin
  1922. a_reg_dealloc(list,NR_R12);
  1923. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1924. end
  1925. else if split_into_shifter_const(localsize, imm1, imm2) then
  1926. begin
  1927. a_reg_dealloc(list,NR_R12);
  1928. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1929. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1930. end
  1931. else
  1932. begin
  1933. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1934. a_reg_alloc(list,NR_R12);
  1935. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1936. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1937. a_reg_dealloc(list,NR_R12);
  1938. end;
  1939. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1940. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  1941. end;
  1942. if (mmregs<>[]) or
  1943. (firstfloatreg<>RS_NO) then
  1944. begin
  1945. reference_reset(ref,4,[]);
  1946. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1947. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1948. begin
  1949. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1950. begin
  1951. a_reg_alloc(list,NR_R12);
  1952. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1953. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1954. a_reg_dealloc(list,NR_R12);
  1955. end
  1956. else
  1957. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1958. ref.base:=NR_R12;
  1959. end
  1960. else
  1961. begin
  1962. ref.base:=current_procinfo.framepointer;
  1963. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1964. end;
  1965. case current_settings.fputype of
  1966. fpu_fpa,
  1967. fpu_fpa10,
  1968. fpu_fpa11:
  1969. begin
  1970. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1971. lastfloatreg-firstfloatreg+1,ref));
  1972. end;
  1973. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1974. begin
  1975. ref.index:=ref.base;
  1976. ref.base:=NR_NO;
  1977. if mmregs<>[] then
  1978. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1979. end
  1980. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1981. begin
  1982. ref.index:=ref.base;
  1983. ref.base:=NR_NO;
  1984. if mmregs<>[] then
  1985. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1986. end
  1987. else
  1988. internalerror(2019050923);
  1989. end;
  1990. end;
  1991. end;
  1992. end;
  1993. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1994. var
  1995. ref : treference;
  1996. LocalSize : longint;
  1997. firstfloatreg,lastfloatreg,
  1998. r,
  1999. shift : byte;
  2000. mmregs,
  2001. saveregs,
  2002. regs : tcpuregisterset;
  2003. registerarea,
  2004. stackmisalignment: pint;
  2005. paddingreg: TSuperRegister;
  2006. imm1, imm2: DWord;
  2007. begin
  2008. if not(nostackframe) then
  2009. begin
  2010. registerarea:=0;
  2011. firstfloatreg:=RS_NO;
  2012. lastfloatreg:=RS_NO;
  2013. mmregs:=[];
  2014. saveregs:=[];
  2015. case current_settings.fputype of
  2016. fpu_none,
  2017. fpu_soft,
  2018. fpu_libgcc:
  2019. ;
  2020. fpu_fpa,
  2021. fpu_fpa10,
  2022. fpu_fpa11:
  2023. begin
  2024. { restore floating point registers? }
  2025. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  2026. for r:=RS_F0 to RS_F7 do
  2027. if r in regs then
  2028. begin
  2029. if firstfloatreg=RS_NO then
  2030. firstfloatreg:=r;
  2031. lastfloatreg:=r;
  2032. { floating point register space is already included in
  2033. localsize below by calc_stackframe_size
  2034. inc(registerarea,12);
  2035. }
  2036. end;
  2037. end;
  2038. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2039. begin
  2040. { restore vfp registers? }
  2041. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  2042. they have numbers>$1f which is not really correct as they should simply have the same numbers
  2043. as the even ones by with a different subtype as it is done on x86 with al/ah }
  2044. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  2045. end
  2046. else
  2047. internalerror(2019050908);
  2048. end;
  2049. if (firstfloatreg<>RS_NO) or
  2050. (mmregs<>[]) then
  2051. begin
  2052. reference_reset(ref,4,[]);
  2053. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  2054. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2055. begin
  2056. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2057. begin
  2058. a_reg_alloc(list,NR_R12);
  2059. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2060. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2061. a_reg_dealloc(list,NR_R12);
  2062. end
  2063. else
  2064. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2065. ref.base:=NR_R12;
  2066. end
  2067. else
  2068. begin
  2069. ref.base:=current_procinfo.framepointer;
  2070. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2071. end;
  2072. case current_settings.fputype of
  2073. fpu_fpa,
  2074. fpu_fpa10,
  2075. fpu_fpa11:
  2076. begin
  2077. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2078. lastfloatreg-firstfloatreg+1,ref));
  2079. end;
  2080. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2081. begin
  2082. ref.index:=ref.base;
  2083. ref.base:=NR_NO;
  2084. if mmregs<>[] then
  2085. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2086. end
  2087. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2088. begin
  2089. ref.index:=ref.base;
  2090. ref.base:=NR_NO;
  2091. if mmregs<>[] then
  2092. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2093. end
  2094. else
  2095. internalerror(2019050921);
  2096. end;
  2097. end;
  2098. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2099. if (pi_do_call in current_procinfo.flags) or
  2100. (regs<>[]) or
  2101. ((target_info.system in systems_darwin) and
  2102. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2103. begin
  2104. exclude(regs,RS_R14);
  2105. include(regs,RS_R15);
  2106. if (target_info.system in systems_darwin) then
  2107. include(regs,RS_FRAME_POINTER_REG);
  2108. end;
  2109. if not(target_info.system in systems_darwin) then
  2110. begin
  2111. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2112. The saved PC came after that but is discarded, since we restore
  2113. the stack pointer }
  2114. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2115. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2116. end
  2117. else
  2118. begin
  2119. { restore R8-R11 already if necessary (they've been stored
  2120. before the others) }
  2121. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2122. if saveregs<>[] then
  2123. begin
  2124. reference_reset(ref,4,[]);
  2125. ref.index:=NR_STACK_POINTER_REG;
  2126. ref.addressmode:=AM_PREINDEXED;
  2127. for r:=RS_R8 to RS_R11 do
  2128. if r in saveregs then
  2129. inc(registerarea,4);
  2130. regs:=regs-saveregs;
  2131. end;
  2132. end;
  2133. for r:=RS_R0 to RS_R15 do
  2134. if r in regs then
  2135. inc(registerarea,4);
  2136. { reapply the stack padding reg, in case there was one, see the complimentary
  2137. comment in g_proc_entry() (KB) }
  2138. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2139. if paddingreg < RS_R4 then
  2140. if paddingreg in regs then
  2141. internalerror(201306190)
  2142. else
  2143. begin
  2144. regs:=regs+[paddingreg];
  2145. inc(registerarea,4);
  2146. end;
  2147. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2148. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2149. (target_info.system in systems_darwin) then
  2150. begin
  2151. LocalSize:=current_procinfo.calc_stackframe_size;
  2152. if (LocalSize<>0) or
  2153. ((stackmisalignment<>0) and
  2154. ((pi_do_call in current_procinfo.flags) or
  2155. (po_assembler in current_procinfo.procdef.procoptions))) then
  2156. begin
  2157. if pi_estimatestacksize in current_procinfo.flags then
  2158. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2159. else
  2160. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2161. if is_shifter_const(LocalSize,shift) then
  2162. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2163. else if split_into_shifter_const(localsize, imm1, imm2) then
  2164. begin
  2165. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2166. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2167. end
  2168. else
  2169. begin
  2170. a_reg_alloc(list,NR_R12);
  2171. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2172. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2173. a_reg_dealloc(list,NR_R12);
  2174. end;
  2175. end;
  2176. if (target_info.system in systems_darwin) and
  2177. (saveregs<>[]) then
  2178. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2179. if regs=[] then
  2180. begin
  2181. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2182. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2183. else
  2184. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2185. end
  2186. else
  2187. begin
  2188. reference_reset(ref,4,[]);
  2189. ref.index:=NR_STACK_POINTER_REG;
  2190. ref.addressmode:=AM_PREINDEXED;
  2191. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2192. end;
  2193. end
  2194. else
  2195. begin
  2196. { restore int registers and return }
  2197. reference_reset(ref,4,[]);
  2198. ref.index:=NR_FRAME_POINTER_REG;
  2199. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2200. end;
  2201. end
  2202. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2203. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2204. else
  2205. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2206. end;
  2207. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2208. var
  2209. ref : treference;
  2210. l : TAsmLabel;
  2211. regs : tcpuregisterset;
  2212. r: byte;
  2213. begin
  2214. if (cs_create_pic in current_settings.moduleswitches) and
  2215. (pi_needs_got in current_procinfo.flags) and
  2216. (tf_pic_uses_got in target_info.flags) then
  2217. begin
  2218. { Procedure parametrs are not initialized at this stage.
  2219. Before GOT initialization code, allocate registers used for procedure parameters
  2220. to prevent usage of these registers for temp operations in later stages of code
  2221. generation. }
  2222. regs:=rg[R_INTREGISTER].used_in_proc;
  2223. for r:=RS_R0 to RS_R3 do
  2224. if r in regs then
  2225. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2226. { Allocate scratch register R12 and use it for GOT calculations directly.
  2227. Otherwise the init code can be distorted in later stages of code generation. }
  2228. a_reg_alloc(list,NR_R12);
  2229. reference_reset(ref,4,[]);
  2230. current_asmdata.getglobaldatalabel(l);
  2231. cg.a_label(current_procinfo.aktlocaldata,l);
  2232. ref.symbol:=l;
  2233. ref.base:=NR_PC;
  2234. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2235. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2236. current_asmdata.getaddrlabel(l);
  2237. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2238. cg.a_label(list,l);
  2239. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2240. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2241. { Deallocate registers }
  2242. a_reg_dealloc(list,NR_R12);
  2243. for r:=RS_R3 downto RS_R0 do
  2244. if r in regs then
  2245. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2246. end;
  2247. end;
  2248. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2249. var
  2250. b : byte;
  2251. tmpref : treference;
  2252. instr : taicpu;
  2253. begin
  2254. if ref.addressmode<>AM_OFFSET then
  2255. internalerror(200309071);
  2256. tmpref:=ref;
  2257. { Be sure to have a base register }
  2258. if (tmpref.base=NR_NO) then
  2259. begin
  2260. if tmpref.shiftmode<>SM_None then
  2261. internalerror(2014020702);
  2262. if tmpref.signindex<0 then
  2263. internalerror(200312023);
  2264. tmpref.base:=tmpref.index;
  2265. tmpref.index:=NR_NO;
  2266. end;
  2267. if assigned(tmpref.symbol) or
  2268. not((is_shifter_const(tmpref.offset,b)) or
  2269. (is_shifter_const(-tmpref.offset,b))
  2270. ) then
  2271. fixref(list,tmpref);
  2272. { expect a base here if there is an index }
  2273. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2274. internalerror(200312022);
  2275. if tmpref.index<>NR_NO then
  2276. begin
  2277. if tmpref.shiftmode<>SM_None then
  2278. internalerror(200312021);
  2279. if tmpref.signindex<0 then
  2280. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2281. else
  2282. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2283. if tmpref.offset<>0 then
  2284. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2285. end
  2286. else
  2287. begin
  2288. if tmpref.base=NR_NO then
  2289. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2290. else
  2291. if tmpref.offset<>0 then
  2292. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2293. else
  2294. begin
  2295. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2296. list.concat(instr);
  2297. add_move_instruction(instr);
  2298. end;
  2299. end;
  2300. end;
  2301. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2302. var
  2303. tmpreg, tmpreg2 : tregister;
  2304. tmpref : treference;
  2305. l, piclabel : tasmlabel;
  2306. indirection_done : boolean;
  2307. begin
  2308. { absolute symbols can't be handled directly, we've to store the symbol reference
  2309. in the text segment and access it pc relative
  2310. For now, we assume that references where base or index equals to PC are already
  2311. relative, all other references are assumed to be absolute and thus they need
  2312. to be handled extra.
  2313. A proper solution would be to change refoptions to a set and store the information
  2314. if the symbol is absolute or relative there.
  2315. }
  2316. { create consts entry }
  2317. reference_reset(tmpref,4,[]);
  2318. current_asmdata.getjumplabel(l);
  2319. cg.a_label(current_procinfo.aktlocaldata,l);
  2320. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2321. piclabel:=nil;
  2322. tmpreg:=NR_NO;
  2323. indirection_done:=false;
  2324. if assigned(ref.symbol) then
  2325. begin
  2326. if (target_info.system=system_arm_ios) and
  2327. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2328. begin
  2329. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2330. if ref.offset<>0 then
  2331. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2332. indirection_done:=true;
  2333. end
  2334. else if ref.refaddr=addr_gottpoff then
  2335. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2336. else if ref.refaddr=addr_tlsgd then
  2337. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2338. else if ref.refaddr=addr_tlsdesc then
  2339. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2340. else if ref.refaddr=addr_tpoff then
  2341. begin
  2342. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2343. Internalerror(2019092804);
  2344. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2345. end
  2346. else if (cs_create_pic in current_settings.moduleswitches) then
  2347. if (tf_pic_uses_got in target_info.flags) then
  2348. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2349. else
  2350. begin
  2351. { ideally, we would want to generate
  2352. ldr r1, LPICConstPool
  2353. LPICLocal:
  2354. ldr/str r2,[pc,r1]
  2355. ...
  2356. LPICConstPool:
  2357. .long _globsym-(LPICLocal+8)
  2358. However, we cannot be sure that the ldr/str will follow
  2359. right after the call to fixref, so we have to load the
  2360. complete address already in a register.
  2361. }
  2362. current_asmdata.getaddrlabel(piclabel);
  2363. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2364. end
  2365. else
  2366. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2367. end
  2368. else
  2369. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2370. { load consts entry }
  2371. if not indirection_done then
  2372. begin
  2373. tmpreg:=getintregister(list,OS_INT);
  2374. tmpref.symbol:=l;
  2375. tmpref.base:=NR_PC;
  2376. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2377. if (cs_create_pic in current_settings.moduleswitches) and
  2378. (tf_pic_uses_got in target_info.flags) and
  2379. assigned(ref.symbol) then
  2380. begin
  2381. {$ifdef EXTDEBUG}
  2382. if not (pi_needs_got in current_procinfo.flags) then
  2383. Comment(V_warning,'pi_needs_got not included');
  2384. {$endif EXTDEBUG}
  2385. Include(current_procinfo.flags,pi_needs_got);
  2386. reference_reset(tmpref,4,[]);
  2387. tmpref.base:=current_procinfo.got;
  2388. tmpref.index:=tmpreg;
  2389. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2390. if ref.offset<>0 then
  2391. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2392. end;
  2393. end;
  2394. if assigned(piclabel) then
  2395. begin
  2396. cg.a_label(list,piclabel);
  2397. tmpreg2:=getaddressregister(list);
  2398. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2399. tmpreg:=tmpreg2
  2400. end;
  2401. { This routine can be called with PC as base/index in case the offset
  2402. was too large to encode in a load/store. In that case, the entire
  2403. absolute expression has been re-encoded in a new constpool entry, and
  2404. we have to remove the use of PC from the original reference (the code
  2405. above made everything relative to the value loaded from the new
  2406. constpool entry) }
  2407. if is_pc(ref.base) then
  2408. ref.base:=NR_NO;
  2409. if is_pc(ref.index) then
  2410. ref.index:=NR_NO;
  2411. if (ref.base<>NR_NO) then
  2412. begin
  2413. if ref.index<>NR_NO then
  2414. begin
  2415. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2416. ref.base:=tmpreg;
  2417. end
  2418. else
  2419. if ref.base<>NR_PC then
  2420. begin
  2421. ref.index:=tmpreg;
  2422. ref.shiftimm:=0;
  2423. ref.signindex:=1;
  2424. ref.shiftmode:=SM_None;
  2425. end
  2426. else
  2427. ref.base:=tmpreg;
  2428. end
  2429. else
  2430. ref.base:=tmpreg;
  2431. ref.offset:=0;
  2432. ref.symbol:=nil;
  2433. end;
  2434. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2435. const
  2436. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2437. maxtmpreg_thumb = 5;
  2438. type
  2439. ttmpregisters = array[1..maxtmpreg_arm] of tregister;
  2440. var
  2441. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2442. srcreg,destreg,countreg,r,tmpreg:tregister;
  2443. helpsize:aint;
  2444. copysize:byte;
  2445. cgsize:Tcgsize;
  2446. tmpregisters:ttmpregisters;
  2447. maxtmpreg,
  2448. tmpregi,tmpregi2:byte;
  2449. { will never be called with count<=4 }
  2450. procedure genloop(count : aword;size : byte);
  2451. const
  2452. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2453. var
  2454. l : tasmlabel;
  2455. begin
  2456. current_asmdata.getjumplabel(l);
  2457. if count<size then size:=1;
  2458. a_load_const_reg(list,OS_INT,count div size,countreg);
  2459. cg.a_label(list,l);
  2460. srcref.addressmode:=AM_POSTINDEXED;
  2461. dstref.addressmode:=AM_POSTINDEXED;
  2462. srcref.offset:=size;
  2463. dstref.offset:=size;
  2464. r:=getintregister(list,size2opsize[size]);
  2465. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2466. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2467. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2468. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2469. a_jmp_flags(list,F_NE,l);
  2470. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2471. srcref.offset:=1;
  2472. dstref.offset:=1;
  2473. case count mod size of
  2474. 1:
  2475. begin
  2476. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2477. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2478. end;
  2479. 2:
  2480. if aligned then
  2481. begin
  2482. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2483. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2484. end
  2485. else
  2486. begin
  2487. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2488. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2489. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2490. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2491. end;
  2492. 3:
  2493. if aligned then
  2494. begin
  2495. srcref.offset:=2;
  2496. dstref.offset:=2;
  2497. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2498. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2499. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2500. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2501. end
  2502. else
  2503. begin
  2504. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2505. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2506. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2507. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2508. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2509. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2510. end;
  2511. end;
  2512. { keep the registers alive }
  2513. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2514. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2515. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2516. end;
  2517. { save estimation, if a creating a separate ref is needed or
  2518. if we can keep the original reference while copying }
  2519. function SimpleRef(const ref : treference) : boolean;
  2520. begin
  2521. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2522. ((ref.symbol=nil) and
  2523. (ref.addressmode=AM_OFFSET) and
  2524. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2525. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2526. { ldrh has a limited offset range }
  2527. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2528. )
  2529. );
  2530. end;
  2531. { will never be called with count<=4 }
  2532. procedure genloop_thumb(count : aword;size : byte);
  2533. procedure refincofs(const ref : treference;const value : longint = 1);
  2534. begin
  2535. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2536. end;
  2537. const
  2538. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2539. var
  2540. l : tasmlabel;
  2541. begin
  2542. current_asmdata.getjumplabel(l);
  2543. if count<size then size:=1;
  2544. a_load_const_reg(list,OS_INT,count div size,countreg);
  2545. cg.a_label(list,l);
  2546. r:=getintregister(list,size2opsize[size]);
  2547. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2548. refincofs(srcref);
  2549. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2550. refincofs(dstref);
  2551. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2552. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2553. a_jmp_flags(list,F_NE,l);
  2554. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2555. case count mod size of
  2556. 1:
  2557. begin
  2558. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2559. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2560. end;
  2561. 2:
  2562. if aligned then
  2563. begin
  2564. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2565. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2566. end
  2567. else
  2568. begin
  2569. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2570. refincofs(srcref);
  2571. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2572. refincofs(dstref);
  2573. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2574. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2575. end;
  2576. 3:
  2577. if aligned then
  2578. begin
  2579. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2580. refincofs(srcref,2);
  2581. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2582. refincofs(dstref,2);
  2583. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2584. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2585. end
  2586. else
  2587. begin
  2588. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2589. refincofs(srcref);
  2590. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2591. refincofs(dstref);
  2592. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2593. refincofs(srcref);
  2594. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2595. refincofs(dstref);
  2596. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2597. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2598. end;
  2599. end;
  2600. { keep the registers alive }
  2601. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2602. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2603. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2604. end;
  2605. begin
  2606. tmpregisters:=Default(ttmpregisters);
  2607. if len=0 then
  2608. exit;
  2609. if GenerateThumbCode then
  2610. maxtmpreg:=maxtmpreg_thumb
  2611. else
  2612. maxtmpreg:=maxtmpreg_arm;
  2613. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2614. dstref:=dest;
  2615. srcref:=source;
  2616. if cs_opt_size in current_settings.optimizerswitches then
  2617. helpsize:=8;
  2618. if aligned and (len=4) then
  2619. begin
  2620. tmpreg:=getintregister(list,OS_32);
  2621. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2622. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2623. end
  2624. else if aligned and (len=2) then
  2625. begin
  2626. tmpreg:=getintregister(list,OS_16);
  2627. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2628. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2629. end
  2630. else if (len<=helpsize) and aligned then
  2631. begin
  2632. tmpregi:=0;
  2633. { loading address in a separate register needed? }
  2634. if SimpleRef(source) then
  2635. begin
  2636. { ... then we don't need a loadaddr }
  2637. srcref:=source;
  2638. end
  2639. else
  2640. begin
  2641. srcreg:=getintregister(list,OS_ADDR);
  2642. a_loadaddr_ref_reg(list,source,srcreg);
  2643. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2644. end;
  2645. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2646. begin
  2647. inc(tmpregi);
  2648. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2649. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2650. inc(srcref.offset,4);
  2651. dec(len,4);
  2652. end;
  2653. { loading address in a separate register needed? }
  2654. if SimpleRef(dest) then
  2655. dstref:=dest
  2656. else
  2657. begin
  2658. destreg:=getintregister(list,OS_ADDR);
  2659. a_loadaddr_ref_reg(list,dest,destreg);
  2660. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2661. end;
  2662. tmpregi2:=1;
  2663. while (tmpregi2<=tmpregi) do
  2664. begin
  2665. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2666. inc(dstref.offset,4);
  2667. inc(tmpregi2);
  2668. end;
  2669. copysize:=4;
  2670. cgsize:=OS_32;
  2671. while len<>0 do
  2672. begin
  2673. if len<2 then
  2674. begin
  2675. copysize:=1;
  2676. cgsize:=OS_8;
  2677. end
  2678. else if len<4 then
  2679. begin
  2680. copysize:=2;
  2681. cgsize:=OS_16;
  2682. end;
  2683. dec(len,copysize);
  2684. r:=getintregister(list,cgsize);
  2685. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2686. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2687. inc(srcref.offset,copysize);
  2688. inc(dstref.offset,copysize);
  2689. end;{end of while}
  2690. end
  2691. else
  2692. begin
  2693. cgsize:=OS_32;
  2694. if (len<=4) then{len<=4 and not aligned}
  2695. begin
  2696. r:=getintregister(list,cgsize);
  2697. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2698. if Len=1 then
  2699. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2700. else
  2701. begin
  2702. tmpreg:=getintregister(list,cgsize);
  2703. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2704. inc(usedtmpref.offset,1);
  2705. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2706. inc(usedtmpref2.offset,1);
  2707. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2708. if len>2 then
  2709. begin
  2710. inc(usedtmpref.offset,1);
  2711. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2712. inc(usedtmpref2.offset,1);
  2713. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2714. if len>3 then
  2715. begin
  2716. inc(usedtmpref.offset,1);
  2717. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2718. inc(usedtmpref2.offset,1);
  2719. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2720. end;
  2721. end;
  2722. end;
  2723. end{end of if len<=4}
  2724. else
  2725. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2726. destreg:=getintregister(list,OS_ADDR);
  2727. a_loadaddr_ref_reg(list,dest,destreg);
  2728. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2729. srcreg:=getintregister(list,OS_ADDR);
  2730. a_loadaddr_ref_reg(list,source,srcreg);
  2731. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2732. countreg:=getintregister(list,OS_32);
  2733. // if cs_opt_size in current_settings.optimizerswitches then
  2734. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2735. {if aligned then
  2736. genloop(len,4)
  2737. else}
  2738. if GenerateThumbCode then
  2739. genloop_thumb(len,1)
  2740. else
  2741. genloop(len,1);
  2742. end;
  2743. end;
  2744. end;
  2745. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2746. begin
  2747. g_concatcopy_internal(list,source,dest,len,false);
  2748. end;
  2749. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2750. begin
  2751. if (source.alignment in [1,3]) or
  2752. (dest.alignment in [1,3]) then
  2753. g_concatcopy_internal(list,source,dest,len,false)
  2754. else
  2755. g_concatcopy_internal(list,source,dest,len,true);
  2756. end;
  2757. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2758. var
  2759. ovloc : tlocation;
  2760. begin
  2761. ovloc.loc:=LOC_VOID;
  2762. g_overflowCheck_loc(list,l,def,ovloc);
  2763. end;
  2764. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2765. var
  2766. hl : tasmlabel;
  2767. ai:TAiCpu;
  2768. hflags : tresflags;
  2769. begin
  2770. if not(cs_check_overflow in current_settings.localswitches) then
  2771. exit;
  2772. current_asmdata.getjumplabel(hl);
  2773. case ovloc.loc of
  2774. LOC_VOID:
  2775. begin
  2776. ai:=taicpu.op_sym(A_B,hl);
  2777. ai.is_jmp:=true;
  2778. if not((def.typ=pointerdef) or
  2779. ((def.typ=orddef) and
  2780. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2781. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2782. ai.SetCondition(C_VC)
  2783. else
  2784. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2785. ai.SetCondition(C_CS)
  2786. else
  2787. ai.SetCondition(C_CC);
  2788. list.concat(ai);
  2789. end;
  2790. LOC_FLAGS:
  2791. begin
  2792. hflags:=ovloc.resflags;
  2793. inverse_flags(hflags);
  2794. cg.a_jmp_flags(list,hflags,hl);
  2795. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2796. end;
  2797. else
  2798. internalerror(200409281);
  2799. end;
  2800. a_call_name(list,'FPC_OVERFLOW',false);
  2801. a_label(list,hl);
  2802. end;
  2803. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2804. begin
  2805. { this work is done in g_proc_entry }
  2806. end;
  2807. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2808. begin
  2809. { this work is done in g_proc_exit }
  2810. end;
  2811. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2812. var
  2813. ai : taicpu;
  2814. hlabel : TAsmLabel;
  2815. begin
  2816. if GenerateThumbCode then
  2817. begin
  2818. { the optimizer has to fix this if jump range is sufficient short }
  2819. current_asmdata.getjumplabel(hlabel);
  2820. ai:=Taicpu.Op_sym(A_B,hlabel);
  2821. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2822. ai.is_jmp:=true;
  2823. list.concat(ai);
  2824. a_jmp_always(list,l);
  2825. a_label(list,hlabel);
  2826. end
  2827. else
  2828. begin
  2829. ai:=Taicpu.Op_sym(A_B,l);
  2830. ai.SetCondition(OpCmp2AsmCond[cond]);
  2831. ai.is_jmp:=true;
  2832. list.concat(ai);
  2833. end;
  2834. end;
  2835. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2836. const
  2837. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2838. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2839. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2840. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2841. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2842. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2843. begin
  2844. result:=convertop[fromsize,tosize];
  2845. if result=A_NONE then
  2846. internalerror(200312205);
  2847. end;
  2848. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2849. const
  2850. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2851. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2852. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2853. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2854. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2855. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2856. begin
  2857. result:=convertop[fromsize,tosize];
  2858. end;
  2859. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2860. var
  2861. instr: taicpu;
  2862. begin
  2863. if (shuffle=nil) or shufflescalar(shuffle) then
  2864. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2865. else
  2866. internalerror(2009112407);
  2867. list.concat(instr);
  2868. case instr.opcode of
  2869. A_VMOV:
  2870. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2871. add_move_instruction(instr);
  2872. else
  2873. { VCVT can generate an exception }
  2874. maybe_check_for_fpu_exception(list);
  2875. end;
  2876. end;
  2877. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2878. var
  2879. intreg,
  2880. tmpmmreg : tregister;
  2881. reg64 : tregister64;
  2882. begin
  2883. if assigned(shuffle) and
  2884. not(shufflescalar(shuffle)) then
  2885. internalerror(2009112413);
  2886. case fromsize of
  2887. OS_32,OS_S32:
  2888. begin
  2889. fromsize:=OS_F32;
  2890. { since we are loading an integer, no conversion may be required }
  2891. if (fromsize<>tosize) then
  2892. internalerror(2009112801);
  2893. end;
  2894. OS_64,OS_S64:
  2895. begin
  2896. fromsize:=OS_F64;
  2897. { since we are loading an integer, no conversion may be required }
  2898. if (fromsize<>tosize) then
  2899. internalerror(2009112901);
  2900. end;
  2901. OS_F32,OS_F64:
  2902. ;
  2903. else
  2904. internalerror(2019050920);
  2905. end;
  2906. if (fromsize<>tosize) then
  2907. tmpmmreg:=getmmregister(list,fromsize)
  2908. else
  2909. tmpmmreg:=reg;
  2910. if (ref.alignment in [1,2]) then
  2911. begin
  2912. case fromsize of
  2913. OS_F32:
  2914. begin
  2915. intreg:=getintregister(list,OS_32);
  2916. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2917. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2918. end;
  2919. OS_F64:
  2920. begin
  2921. reg64.reglo:=getintregister(list,OS_32);
  2922. reg64.reghi:=getintregister(list,OS_32);
  2923. cg64.a_load64_ref_reg(list,ref,reg64);
  2924. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2925. end;
  2926. else
  2927. internalerror(2009112412);
  2928. end;
  2929. end
  2930. else
  2931. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2932. if (tmpmmreg<>reg) then
  2933. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2934. end;
  2935. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2936. var
  2937. intreg,
  2938. tmpmmreg : tregister;
  2939. reg64 : tregister64;
  2940. begin
  2941. if assigned(shuffle) and
  2942. not(shufflescalar(shuffle)) then
  2943. internalerror(2009112416);
  2944. case tosize of
  2945. OS_32,OS_S32:
  2946. begin
  2947. tosize:=OS_F32;
  2948. { since we are loading an integer, no conversion may be required }
  2949. if (fromsize<>tosize) then
  2950. internalerror(2009112802);
  2951. end;
  2952. OS_64,OS_S64:
  2953. begin
  2954. tosize:=OS_F64;
  2955. { since we are loading an integer, no conversion may be required }
  2956. if (fromsize<>tosize) then
  2957. internalerror(2009112902);
  2958. end;
  2959. OS_F32,OS_F64:
  2960. ;
  2961. else
  2962. internalerror(2019050919);
  2963. end;
  2964. if (fromsize<>tosize) then
  2965. begin
  2966. tmpmmreg:=getmmregister(list,tosize);
  2967. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2968. end
  2969. else
  2970. tmpmmreg:=reg;
  2971. if (ref.alignment in [1,2]) then
  2972. begin
  2973. case tosize of
  2974. OS_F32:
  2975. begin
  2976. intreg:=getintregister(list,OS_32);
  2977. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2978. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2979. end;
  2980. OS_F64:
  2981. begin
  2982. reg64.reglo:=getintregister(list,OS_32);
  2983. reg64.reghi:=getintregister(list,OS_32);
  2984. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2985. cg64.a_load64_reg_ref(list,reg64,ref);
  2986. end;
  2987. else
  2988. internalerror(2009112417);
  2989. end;
  2990. end
  2991. else
  2992. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2993. { VSTR cannot generate an FPU exception, VCVT is handled separately, so we do not need a check here }
  2994. end;
  2995. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2996. begin
  2997. { this code can only be used to transfer raw data, not to perform
  2998. conversions }
  2999. if (tosize<>OS_F32) then
  3000. internalerror(2009112419);
  3001. if not(fromsize in [OS_32,OS_S32]) then
  3002. internalerror(2009112420);
  3003. if assigned(shuffle) and
  3004. not shufflescalar(shuffle) then
  3005. internalerror(2009112516);
  3006. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  3007. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3008. end;
  3009. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  3010. begin
  3011. { this code can only be used to transfer raw data, not to perform
  3012. conversions }
  3013. if (fromsize<>OS_F32) then
  3014. internalerror(2009112430);
  3015. if not(tosize in [OS_32,OS_S32]) then
  3016. internalerror(2009112409);
  3017. if assigned(shuffle) and
  3018. not shufflescalar(shuffle) then
  3019. internalerror(2009112514);
  3020. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  3021. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3022. end;
  3023. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  3024. var
  3025. tmpreg: tregister;
  3026. begin
  3027. { the vfp doesn't support xor nor any other logical operation, but
  3028. this routine is used to initialise global mm regvars. We can
  3029. easily initialise an mm reg with 0 though. }
  3030. case op of
  3031. OP_XOR:
  3032. begin
  3033. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3034. begin
  3035. if (reg_cgsize(src)<>size) or
  3036. assigned(shuffle) then
  3037. internalerror(2019081301);
  3038. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3039. end
  3040. else
  3041. begin
  3042. if (src<>dst) or
  3043. (reg_cgsize(src)<>size) or
  3044. assigned(shuffle) then
  3045. internalerror(2009112907);
  3046. tmpreg:=getintregister(list,OS_32);
  3047. a_load_const_reg(list,OS_32,0,tmpreg);
  3048. case size of
  3049. OS_F32:
  3050. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3051. OS_F64:
  3052. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3053. else
  3054. internalerror(2009112908);
  3055. end;
  3056. end;
  3057. end
  3058. else
  3059. internalerror(2009112906);
  3060. end;
  3061. end;
  3062. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3063. const
  3064. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3065. begin
  3066. if (op in overflowops) and
  3067. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3068. a_load_reg_reg(list,OS_32,size,dst,dst);
  3069. end;
  3070. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3071. procedure checkreg(var reg : TRegister);
  3072. var
  3073. tmpreg : TRegister;
  3074. begin
  3075. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3076. (getsupreg(reg)=RS_R15) then
  3077. begin
  3078. tmpreg:=getintregister(list,OS_INT);
  3079. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3080. reg:=tmpreg;
  3081. end;
  3082. end;
  3083. begin
  3084. checkreg(op1);
  3085. checkreg(op2);
  3086. checkreg(op3);
  3087. checkreg(op4);
  3088. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3089. end;
  3090. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3091. begin
  3092. if pi_needs_tls in current_procinfo.flags then
  3093. begin
  3094. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3095. a_call_name(list,'fpc_read_tp',false);
  3096. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3097. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3098. end;
  3099. end;
  3100. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3101. begin
  3102. case op of
  3103. OP_NEG:
  3104. begin
  3105. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3106. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3107. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3108. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3109. end;
  3110. OP_NOT:
  3111. begin
  3112. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3113. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3114. end;
  3115. else
  3116. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3117. end;
  3118. end;
  3119. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3120. begin
  3121. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3122. end;
  3123. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3124. var
  3125. ovloc : tlocation;
  3126. begin
  3127. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3128. end;
  3129. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3130. var
  3131. ovloc : tlocation;
  3132. begin
  3133. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3134. end;
  3135. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3136. begin
  3137. { this code can only be used to transfer raw data, not to perform
  3138. conversions }
  3139. if (mmsize<>OS_F64) then
  3140. internalerror(2009112405);
  3141. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3142. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3143. end;
  3144. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3145. begin
  3146. { this code can only be used to transfer raw data, not to perform
  3147. conversions }
  3148. if (mmsize<>OS_F64) then
  3149. internalerror(2009112406);
  3150. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3151. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3152. end;
  3153. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3154. var
  3155. tmpreg : tregister;
  3156. b : byte;
  3157. begin
  3158. ovloc.loc:=LOC_VOID;
  3159. case op of
  3160. OP_NEG,
  3161. OP_NOT :
  3162. internalerror(2012022501);
  3163. else
  3164. ;
  3165. end;
  3166. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3167. begin
  3168. case op of
  3169. OP_ADD:
  3170. begin
  3171. if is_shifter_const(lo(value),b) then
  3172. begin
  3173. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3174. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3175. end
  3176. else
  3177. begin
  3178. tmpreg:=cg.getintregister(list,OS_32);
  3179. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3180. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3181. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3182. end;
  3183. if is_shifter_const(hi(value),b) then
  3184. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3185. else
  3186. begin
  3187. tmpreg:=cg.getintregister(list,OS_32);
  3188. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3189. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3190. end;
  3191. end;
  3192. OP_SUB:
  3193. begin
  3194. if is_shifter_const(lo(value),b) then
  3195. begin
  3196. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3197. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3198. end
  3199. else
  3200. begin
  3201. tmpreg:=cg.getintregister(list,OS_32);
  3202. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3203. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3204. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3205. end;
  3206. if is_shifter_const(hi(value),b) then
  3207. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3208. else
  3209. begin
  3210. tmpreg:=cg.getintregister(list,OS_32);
  3211. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3212. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3213. end;
  3214. end;
  3215. else
  3216. internalerror(200502131);
  3217. end;
  3218. if size=OS_64 then
  3219. begin
  3220. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3221. ovloc.loc:=LOC_FLAGS;
  3222. case op of
  3223. OP_ADD:
  3224. ovloc.resflags:=F_CS;
  3225. OP_SUB:
  3226. ovloc.resflags:=F_CC;
  3227. else
  3228. internalerror(2019050918);
  3229. end;
  3230. end;
  3231. end
  3232. else
  3233. begin
  3234. case op of
  3235. OP_AND,OP_OR,OP_XOR:
  3236. begin
  3237. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3238. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3239. end;
  3240. OP_ADD:
  3241. begin
  3242. if is_shifter_const(aint(lo(value)),b) then
  3243. begin
  3244. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3245. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3246. end
  3247. else
  3248. begin
  3249. tmpreg:=cg.getintregister(list,OS_32);
  3250. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3251. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3252. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3253. end;
  3254. if is_shifter_const(aint(hi(value)),b) then
  3255. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3256. else
  3257. begin
  3258. tmpreg:=cg.getintregister(list,OS_32);
  3259. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3260. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3261. end;
  3262. end;
  3263. OP_SUB:
  3264. begin
  3265. if is_shifter_const(aint(lo(value)),b) then
  3266. begin
  3267. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3268. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3269. end
  3270. else
  3271. begin
  3272. tmpreg:=cg.getintregister(list,OS_32);
  3273. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3274. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3275. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3276. end;
  3277. if is_shifter_const(aint(hi(value)),b) then
  3278. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3279. else
  3280. begin
  3281. tmpreg:=cg.getintregister(list,OS_32);
  3282. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3283. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3284. end;
  3285. end;
  3286. else
  3287. internalerror(2003083101);
  3288. end;
  3289. end;
  3290. end;
  3291. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3292. begin
  3293. ovloc.loc:=LOC_VOID;
  3294. case op of
  3295. OP_NEG,
  3296. OP_NOT :
  3297. internalerror(2012022502);
  3298. else
  3299. ;
  3300. end;
  3301. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3302. begin
  3303. case op of
  3304. OP_ADD:
  3305. begin
  3306. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3307. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3308. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3309. end;
  3310. OP_SUB:
  3311. begin
  3312. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3313. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3314. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3315. end;
  3316. else
  3317. internalerror(2003083102);
  3318. end;
  3319. ovloc.loc:=LOC_FLAGS;
  3320. if size=OS_64 then
  3321. begin
  3322. { arm has a weired opinion how flags for SUB/ADD are handled }
  3323. case op of
  3324. OP_ADD:
  3325. ovloc.resflags:=F_CS;
  3326. OP_SUB:
  3327. ovloc.resflags:=F_CC;
  3328. else
  3329. internalerror(2019050917);
  3330. end;
  3331. end
  3332. else
  3333. ovloc.resflags:=F_VS;
  3334. end
  3335. else
  3336. begin
  3337. case op of
  3338. OP_AND,OP_OR,OP_XOR:
  3339. begin
  3340. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3341. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3342. end;
  3343. OP_ADD:
  3344. begin
  3345. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3346. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3347. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3348. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3349. end;
  3350. OP_SUB:
  3351. begin
  3352. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3353. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3354. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3355. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3356. end;
  3357. else
  3358. internalerror(2003083104);
  3359. end;
  3360. end;
  3361. end;
  3362. procedure tthumbcgarm.init_register_allocators;
  3363. begin
  3364. inherited init_register_allocators;
  3365. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3366. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3367. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3368. else
  3369. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3370. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3371. end;
  3372. procedure tthumbcgarm.done_register_allocators;
  3373. begin
  3374. rg[R_INTREGISTER].free;
  3375. rg[R_FPUREGISTER].free;
  3376. rg[R_MMREGISTER].free;
  3377. inherited done_register_allocators;
  3378. end;
  3379. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3380. var
  3381. ref : treference;
  3382. r : byte;
  3383. regs : tcpuregisterset;
  3384. stackmisalignment : pint;
  3385. registerarea: DWord;
  3386. stack_parameters: Boolean;
  3387. begin
  3388. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3389. LocalSize:=align(LocalSize,4);
  3390. { call instruction does not put anything on the stack }
  3391. stackmisalignment:=0;
  3392. if not(nostackframe) then
  3393. begin
  3394. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3395. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3396. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3397. { save int registers }
  3398. reference_reset(ref,4,[]);
  3399. ref.index:=NR_STACK_POINTER_REG;
  3400. ref.addressmode:=AM_PREINDEXED;
  3401. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3402. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3403. begin
  3404. //!!!! a_reg_alloc(list,NR_R12);
  3405. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3406. end;
  3407. { the (old) ARM APCS requires saving both the stack pointer (to
  3408. crawl the stack) and the PC (to identify the function this
  3409. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3410. and R15 -- still needs updating for EABI and Darwin, they don't
  3411. need that }
  3412. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3413. regs:=regs+[RS_R7,RS_R14]
  3414. else
  3415. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3416. include(regs,RS_R14);
  3417. { safely estimate stack size }
  3418. if localsize+current_settings.alignment.localalignmax+4>508 then
  3419. begin
  3420. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3421. include(regs,RS_R4);
  3422. end;
  3423. registerarea:=0;
  3424. { do not save integer registers if the procedure does not return }
  3425. if po_noreturn in current_procinfo.procdef.procoptions then
  3426. regs:=[];
  3427. if regs<>[] then
  3428. begin
  3429. for r:=RS_R0 to RS_R15 do
  3430. if r in regs then
  3431. inc(registerarea,4);
  3432. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3433. { we need to run the loop twice to get cfi right }
  3434. registerarea:=0;
  3435. for r:=RS_R0 to RS_R15 do
  3436. if r in regs then
  3437. begin
  3438. inc(registerarea,4);
  3439. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),-registerarea);
  3440. end;
  3441. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  3442. end;
  3443. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3444. if stack_parameters or (LocalSize<>0) or
  3445. ((stackmisalignment<>0) and
  3446. ((pi_do_call in current_procinfo.flags) or
  3447. (po_assembler in current_procinfo.procdef.procoptions))) then
  3448. begin
  3449. { do we access stack parameters?
  3450. if yes, the previously estimated stacksize must be used }
  3451. if stack_parameters then
  3452. begin
  3453. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3454. begin
  3455. writeln(localsize);
  3456. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3457. internalerror(2013040601);
  3458. end
  3459. else
  3460. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3461. end
  3462. else
  3463. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3464. if localsize<508 then
  3465. begin
  3466. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3467. end
  3468. else if localsize<=1016 then
  3469. begin
  3470. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3471. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3472. end
  3473. else
  3474. begin
  3475. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3476. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3477. include(regs,RS_R4);
  3478. end;
  3479. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  3480. end;
  3481. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3482. begin
  3483. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3484. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  3485. end;
  3486. end;
  3487. end;
  3488. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3489. var
  3490. LocalSize : longint;
  3491. r: byte;
  3492. regs : tcpuregisterset;
  3493. registerarea : DWord;
  3494. stackmisalignment: pint;
  3495. stack_parameters : Boolean;
  3496. begin
  3497. { a routine not returning needs no exit code,
  3498. we trust this directive as arm thumb is normally used if small code shall be generated }
  3499. if po_noreturn in current_procinfo.procdef.procoptions then
  3500. exit;
  3501. if not(nostackframe) then
  3502. begin
  3503. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3504. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3505. include(regs,RS_R15);
  3506. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3507. include(regs,getsupreg(current_procinfo.framepointer));
  3508. registerarea:=0;
  3509. for r:=RS_R0 to RS_R15 do
  3510. if r in regs then
  3511. inc(registerarea,4);
  3512. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3513. LocalSize:=current_procinfo.calc_stackframe_size;
  3514. if stack_parameters then
  3515. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3516. else
  3517. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3518. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3519. (target_info.system in systems_darwin) then
  3520. begin
  3521. if (LocalSize<>0) or
  3522. ((stackmisalignment<>0) and
  3523. ((pi_do_call in current_procinfo.flags) or
  3524. (po_assembler in current_procinfo.procdef.procoptions))) then
  3525. begin
  3526. if LocalSize=0 then
  3527. else if LocalSize<=508 then
  3528. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3529. else if LocalSize<=1016 then
  3530. begin
  3531. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3532. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3533. end
  3534. else
  3535. begin
  3536. a_reg_alloc(list,NR_R3);
  3537. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3538. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3539. a_reg_dealloc(list,NR_R3);
  3540. end;
  3541. end;
  3542. if regs=[] then
  3543. begin
  3544. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3545. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3546. else
  3547. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3548. end
  3549. else
  3550. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3551. end;
  3552. end
  3553. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3554. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3555. else
  3556. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3557. end;
  3558. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3559. var
  3560. oppostfix:toppostfix;
  3561. usedtmpref: treference;
  3562. tmpreg,tmpreg2 : tregister;
  3563. dir : integer;
  3564. begin
  3565. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3566. FromSize := ToSize;
  3567. case FromSize of
  3568. { signed integer registers }
  3569. OS_8:
  3570. oppostfix:=PF_B;
  3571. OS_S8:
  3572. oppostfix:=PF_SB;
  3573. OS_16:
  3574. oppostfix:=PF_H;
  3575. OS_S16:
  3576. oppostfix:=PF_SH;
  3577. OS_32,
  3578. OS_S32:
  3579. oppostfix:=PF_None;
  3580. else
  3581. InternalError(200308298);
  3582. end;
  3583. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3584. begin
  3585. if target_info.endian=endian_big then
  3586. dir:=-1
  3587. else
  3588. dir:=1;
  3589. case FromSize of
  3590. OS_16,OS_S16:
  3591. begin
  3592. { only complicated references need an extra loadaddr }
  3593. if assigned(ref.symbol) or
  3594. (ref.index<>NR_NO) or
  3595. (ref.offset<-124) or
  3596. (ref.offset>124) or
  3597. { sometimes the compiler reused registers }
  3598. (reg=ref.index) or
  3599. (reg=ref.base) then
  3600. begin
  3601. tmpreg2:=getintregister(list,OS_INT);
  3602. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3603. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3604. end
  3605. else
  3606. usedtmpref:=ref;
  3607. if target_info.endian=endian_big then
  3608. inc(usedtmpref.offset,1);
  3609. tmpreg:=getintregister(list,OS_INT);
  3610. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3611. inc(usedtmpref.offset,dir);
  3612. if FromSize=OS_16 then
  3613. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3614. else
  3615. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3616. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3617. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3618. end;
  3619. OS_32,OS_S32:
  3620. begin
  3621. tmpreg:=getintregister(list,OS_INT);
  3622. { only complicated references need an extra loadaddr }
  3623. if assigned(ref.symbol) or
  3624. (ref.index<>NR_NO) or
  3625. (ref.offset<-124) or
  3626. (ref.offset>124) or
  3627. { sometimes the compiler reused registers }
  3628. (reg=ref.index) or
  3629. (reg=ref.base) then
  3630. begin
  3631. tmpreg2:=getintregister(list,OS_INT);
  3632. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3633. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3634. end
  3635. else
  3636. usedtmpref:=ref;
  3637. if ref.alignment=2 then
  3638. begin
  3639. if target_info.endian=endian_big then
  3640. inc(usedtmpref.offset,2);
  3641. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3642. inc(usedtmpref.offset,dir*2);
  3643. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3644. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3645. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3646. end
  3647. else
  3648. begin
  3649. if target_info.endian=endian_big then
  3650. inc(usedtmpref.offset,3);
  3651. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3652. inc(usedtmpref.offset,dir);
  3653. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3654. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3655. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3656. inc(usedtmpref.offset,dir);
  3657. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3658. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3659. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3660. inc(usedtmpref.offset,dir);
  3661. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3662. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3663. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3664. end;
  3665. end
  3666. else
  3667. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3668. end;
  3669. end
  3670. else
  3671. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3672. if (fromsize=OS_S8) and (tosize = OS_16) then
  3673. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3674. end;
  3675. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3676. var
  3677. l : tasmlabel;
  3678. hr : treference;
  3679. begin
  3680. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3681. internalerror(2002090908);
  3682. if is_thumb_imm(a) then
  3683. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,a),PF_S))
  3684. else
  3685. begin
  3686. reference_reset(hr,4,[]);
  3687. current_asmdata.getjumplabel(l);
  3688. cg.a_label(current_procinfo.aktlocaldata,l);
  3689. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3690. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3691. hr.symbol:=l;
  3692. hr.base:=NR_PC;
  3693. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3694. end;
  3695. end;
  3696. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3697. var
  3698. hsym : tsym;
  3699. href,
  3700. tmpref : treference;
  3701. paraloc : Pcgparalocation;
  3702. l : TAsmLabel;
  3703. begin
  3704. { calculate the parameter info for the procdef }
  3705. procdef.init_paraloc_info(callerside);
  3706. hsym:=tsym(procdef.parast.Find('self'));
  3707. if not(assigned(hsym) and
  3708. (hsym.typ=paravarsym)) then
  3709. internalerror(2003052504);
  3710. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3711. while paraloc<>nil do
  3712. with paraloc^ do
  3713. begin
  3714. case loc of
  3715. LOC_REGISTER:
  3716. begin
  3717. if is_thumb_imm(ioffset) then
  3718. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3719. else
  3720. begin
  3721. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3722. reference_reset(tmpref,4,[]);
  3723. current_asmdata.getjumplabel(l);
  3724. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3725. cg.a_label(current_procinfo.aktlocaldata,l);
  3726. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3727. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3728. tmpref.symbol:=l;
  3729. tmpref.base:=NR_PC;
  3730. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3731. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3732. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3733. end;
  3734. end;
  3735. LOC_REFERENCE:
  3736. begin
  3737. { offset in the wrapper needs to be adjusted for the stored
  3738. return address }
  3739. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3740. if is_thumb_imm(ioffset) then
  3741. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3742. else
  3743. begin
  3744. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3745. reference_reset(tmpref,4,[]);
  3746. current_asmdata.getjumplabel(l);
  3747. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3748. cg.a_label(current_procinfo.aktlocaldata,l);
  3749. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3750. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3751. tmpref.symbol:=l;
  3752. tmpref.base:=NR_PC;
  3753. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3754. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3755. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3756. end;
  3757. end
  3758. else
  3759. internalerror(2003091804);
  3760. end;
  3761. paraloc:=next;
  3762. end;
  3763. end;
  3764. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3765. var
  3766. href : treference;
  3767. tmpreg : TRegister;
  3768. begin
  3769. href:=ref;
  3770. if { LDR/STR limitations }
  3771. (
  3772. (((op=A_LDR) and (oppostfix=PF_None)) or
  3773. ((op=A_STR) and (oppostfix=PF_None))) and
  3774. (ref.base<>NR_STACK_POINTER_REG) and
  3775. (abs(ref.offset)>124)
  3776. ) or
  3777. { LDRB/STRB limitations }
  3778. (
  3779. (((op=A_LDR) and (oppostfix=PF_B)) or
  3780. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3781. ((op=A_STR) and (oppostfix=PF_B)) or
  3782. ((op=A_STRB) and (oppostfix=PF_None))) and
  3783. ((ref.base=NR_STACK_POINTER_REG) or
  3784. (ref.index=NR_STACK_POINTER_REG) or
  3785. (abs(ref.offset)>31)
  3786. )
  3787. ) or
  3788. { LDRH/STRH limitations }
  3789. (
  3790. (((op=A_LDR) and (oppostfix=PF_H)) or
  3791. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3792. ((op=A_STR) and (oppostfix=PF_H)) or
  3793. ((op=A_STRH) and (oppostfix=PF_None))) and
  3794. ((ref.base=NR_STACK_POINTER_REG) or
  3795. (ref.index=NR_STACK_POINTER_REG) or
  3796. (abs(ref.offset)>62) or
  3797. ((abs(ref.offset) mod 2)<>0)
  3798. )
  3799. ) then
  3800. begin
  3801. tmpreg:=getintregister(list,OS_ADDR);
  3802. a_loadaddr_ref_reg(list,ref,tmpreg);
  3803. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3804. end
  3805. else if (op=A_LDR) and
  3806. (oppostfix in [PF_None]) and
  3807. (ref.base=NR_STACK_POINTER_REG) and
  3808. (abs(ref.offset)>1020) then
  3809. begin
  3810. tmpreg:=getintregister(list,OS_ADDR);
  3811. a_loadaddr_ref_reg(list,ref,tmpreg);
  3812. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3813. end
  3814. else if (op=A_LDR) and
  3815. ((oppostfix in [PF_SH,PF_SB]) or
  3816. (abs(ref.offset)>124)) then
  3817. begin
  3818. tmpreg:=getintregister(list,OS_ADDR);
  3819. a_loadaddr_ref_reg(list,ref,tmpreg);
  3820. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3821. end;
  3822. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3823. end;
  3824. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3825. var
  3826. tmpreg : tregister;
  3827. begin
  3828. case op of
  3829. OP_NEG:
  3830. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3831. OP_NOT:
  3832. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,dst,src),PF_S));
  3833. OP_DIV,OP_IDIV:
  3834. internalerror(200308284);
  3835. OP_ROL:
  3836. begin
  3837. if not(size in [OS_32,OS_S32]) then
  3838. internalerror(2008072805);
  3839. { simulate ROL by ror'ing 32-value }
  3840. tmpreg:=getintregister(list,OS_32);
  3841. a_load_const_reg(list,OS_32,32,tmpreg);
  3842. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3843. list.concat(setoppostfix(taicpu.op_reg_reg(A_ROR,dst,src),PF_S));
  3844. end;
  3845. else
  3846. begin
  3847. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3848. list.concat(setoppostfix(
  3849. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix_thumb[op]));
  3850. end;
  3851. end;
  3852. maybeadjustresult(list,op,size,dst);
  3853. end;
  3854. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3855. var
  3856. tmpreg : tregister;
  3857. {$ifdef DUMMY}
  3858. l1 : longint;
  3859. {$endif DUMMY}
  3860. begin
  3861. //!!! ovloc.loc:=LOC_VOID;
  3862. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3863. case op of
  3864. OP_ADD:
  3865. begin
  3866. op:=OP_SUB;
  3867. a:=aint(dword(-a));
  3868. end;
  3869. OP_SUB:
  3870. begin
  3871. op:=OP_ADD;
  3872. a:=aint(dword(-a));
  3873. end
  3874. else
  3875. ;
  3876. end;
  3877. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3878. begin
  3879. // if cgsetflags or setflags then
  3880. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3881. list.concat(setoppostfix(
  3882. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix_thumb[op]));
  3883. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3884. begin
  3885. //!!! ovloc.loc:=LOC_FLAGS;
  3886. case op of
  3887. OP_ADD:
  3888. //!!! ovloc.resflags:=F_CS;
  3889. ;
  3890. OP_SUB:
  3891. //!!! ovloc.resflags:=F_CC;
  3892. ;
  3893. else
  3894. ;
  3895. end;
  3896. end;
  3897. end
  3898. else
  3899. begin
  3900. { there could be added some more sophisticated optimizations }
  3901. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3902. a_load_reg_reg(list,size,size,dst,dst)
  3903. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3904. a_load_const_reg(list,size,0,dst)
  3905. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3906. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3907. { we do this here instead in the peephole optimizer because
  3908. it saves us a register }
  3909. {$ifdef DUMMY}
  3910. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3911. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3912. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3913. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3914. begin
  3915. if l1>32 then{roozbeh does this ever happen?}
  3916. internalerror(2003082903);
  3917. shifterop_reset(so);
  3918. so.shiftmode:=SM_LSL;
  3919. so.shiftimm:=l1;
  3920. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3921. end
  3922. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3923. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3924. begin
  3925. if l1>32 then{does this ever happen?}
  3926. internalerror(2012051802);
  3927. shifterop_reset(so);
  3928. so.shiftmode:=SM_LSL;
  3929. so.shiftimm:=l1;
  3930. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3931. end
  3932. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3933. begin
  3934. { nothing to do on success }
  3935. end
  3936. {$endif DUMMY}
  3937. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3938. Just using mov x, #0 might allow some easier optimizations down the line. }
  3939. else if (op = OP_AND) and (dword(a)=0) then
  3940. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,dst,0),PF_S))
  3941. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3942. else if (op = OP_AND) and (not(dword(a))=0) then
  3943. // do nothing
  3944. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3945. broader range of shifterconstants.}
  3946. {$ifdef DUMMY}
  3947. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3948. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3949. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3950. begin
  3951. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3952. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3953. end
  3954. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3955. not(cgsetflags or setflags) and
  3956. split_into_shifter_const(a, imm1, imm2) then
  3957. begin
  3958. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3959. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3960. end
  3961. {$endif DUMMY}
  3962. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3963. begin
  3964. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3965. end
  3966. else
  3967. begin
  3968. tmpreg:=getintregister(list,size);
  3969. a_load_const_reg(list,size,a,tmpreg);
  3970. a_op_reg_reg(list,op,size,tmpreg,dst);
  3971. end;
  3972. end;
  3973. maybeadjustresult(list,op,size,dst);
  3974. end;
  3975. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3976. begin
  3977. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3978. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3979. else
  3980. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3981. end;
  3982. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3983. var
  3984. l1,l2 : tasmlabel;
  3985. ai : taicpu;
  3986. begin
  3987. current_asmdata.getjumplabel(l1);
  3988. current_asmdata.getjumplabel(l2);
  3989. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3990. ai.is_jmp:=true;
  3991. list.concat(ai);
  3992. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,0),PF_S));
  3993. list.concat(taicpu.op_sym(A_B,l2));
  3994. cg.a_label(list,l1);
  3995. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,1),PF_S));
  3996. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3997. cg.a_label(list,l2);
  3998. end;
  3999. procedure tthumb2cgarm.init_register_allocators;
  4000. begin
  4001. inherited init_register_allocators;
  4002. { currently, we save R14 always, so we can use it }
  4003. if (target_info.system<>system_arm_ios) then
  4004. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4005. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4006. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  4007. else
  4008. { r9 is not available on Darwin according to the llvm code generator }
  4009. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4010. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4011. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  4012. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4013. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  4014. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  4015. init_mmregister_allocator;
  4016. end;
  4017. procedure tthumb2cgarm.done_register_allocators;
  4018. begin
  4019. rg[R_INTREGISTER].free;
  4020. rg[R_FPUREGISTER].free;
  4021. rg[R_MMREGISTER].free;
  4022. inherited done_register_allocators;
  4023. end;
  4024. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  4025. begin
  4026. list.concat(taicpu.op_reg(A_BLX, reg));
  4027. {
  4028. the compiler does not properly set this flag anymore in pass 1, and
  4029. for now we only need it after pass 2 (I hope) (JM)
  4030. if not(pi_do_call in current_procinfo.flags) then
  4031. internalerror(2003060703);
  4032. }
  4033. include(current_procinfo.flags,pi_do_call);
  4034. end;
  4035. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4036. var
  4037. l : tasmlabel;
  4038. hr : treference;
  4039. begin
  4040. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4041. internalerror(2002090909);
  4042. if is_thumb32_imm(a) then
  4043. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4044. else if is_thumb32_imm(not(a)) then
  4045. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4046. else if (a and $FFFF)=a then
  4047. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4048. else
  4049. begin
  4050. reference_reset(hr,4,[]);
  4051. current_asmdata.getjumplabel(l);
  4052. cg.a_label(current_procinfo.aktlocaldata,l);
  4053. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4054. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4055. hr.symbol:=l;
  4056. hr.base:=NR_PC;
  4057. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4058. end;
  4059. end;
  4060. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4061. var
  4062. oppostfix:toppostfix;
  4063. usedtmpref: treference;
  4064. tmpreg,tmpreg2 : tregister;
  4065. so : tshifterop;
  4066. dir : integer;
  4067. begin
  4068. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4069. FromSize := ToSize;
  4070. case FromSize of
  4071. { signed integer registers }
  4072. OS_8:
  4073. oppostfix:=PF_B;
  4074. OS_S8:
  4075. oppostfix:=PF_SB;
  4076. OS_16:
  4077. oppostfix:=PF_H;
  4078. OS_S16:
  4079. oppostfix:=PF_SH;
  4080. OS_32,
  4081. OS_S32:
  4082. oppostfix:=PF_None;
  4083. else
  4084. InternalError(2003082913);
  4085. end;
  4086. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4087. begin
  4088. if target_info.endian=endian_big then
  4089. dir:=-1
  4090. else
  4091. dir:=1;
  4092. case FromSize of
  4093. OS_16,OS_S16:
  4094. begin
  4095. { only complicated references need an extra loadaddr }
  4096. if assigned(ref.symbol) or
  4097. (ref.index<>NR_NO) or
  4098. (ref.offset<-255) or
  4099. (ref.offset>4094) or
  4100. { sometimes the compiler reused registers }
  4101. (reg=ref.index) or
  4102. (reg=ref.base) then
  4103. begin
  4104. tmpreg2:=getintregister(list,OS_INT);
  4105. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4106. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4107. end
  4108. else
  4109. usedtmpref:=ref;
  4110. if target_info.endian=endian_big then
  4111. inc(usedtmpref.offset,1);
  4112. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4113. tmpreg:=getintregister(list,OS_INT);
  4114. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4115. inc(usedtmpref.offset,dir);
  4116. if FromSize=OS_16 then
  4117. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4118. else
  4119. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4120. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4121. end;
  4122. OS_32,OS_S32:
  4123. begin
  4124. tmpreg:=getintregister(list,OS_INT);
  4125. { only complicated references need an extra loadaddr }
  4126. if assigned(ref.symbol) or
  4127. (ref.index<>NR_NO) or
  4128. (ref.offset<-255) or
  4129. (ref.offset>4092) or
  4130. { sometimes the compiler reused registers }
  4131. (reg=ref.index) or
  4132. (reg=ref.base) then
  4133. begin
  4134. tmpreg2:=getintregister(list,OS_INT);
  4135. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4136. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4137. end
  4138. else
  4139. usedtmpref:=ref;
  4140. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4141. if ref.alignment=2 then
  4142. begin
  4143. if target_info.endian=endian_big then
  4144. inc(usedtmpref.offset,2);
  4145. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4146. inc(usedtmpref.offset,dir*2);
  4147. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4148. so.shiftimm:=16;
  4149. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4150. end
  4151. else
  4152. begin
  4153. if target_info.endian=endian_big then
  4154. inc(usedtmpref.offset,3);
  4155. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4156. inc(usedtmpref.offset,dir);
  4157. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4158. so.shiftimm:=8;
  4159. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4160. inc(usedtmpref.offset,dir);
  4161. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4162. so.shiftimm:=16;
  4163. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4164. inc(usedtmpref.offset,dir);
  4165. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4166. so.shiftimm:=24;
  4167. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4168. end;
  4169. end
  4170. else
  4171. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4172. end;
  4173. end
  4174. else
  4175. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4176. if (fromsize=OS_S8) and (tosize = OS_16) then
  4177. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4178. end;
  4179. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4180. begin
  4181. if op = OP_NOT then
  4182. begin
  4183. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4184. case size of
  4185. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4186. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4187. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4188. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4189. OS_32,
  4190. OS_S32:
  4191. ;
  4192. else
  4193. internalerror(2019050916);
  4194. end;
  4195. end
  4196. else
  4197. inherited a_op_reg_reg(list, op, size, src, dst);
  4198. end;
  4199. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4200. var
  4201. shift, width : byte;
  4202. tmpreg : tregister;
  4203. so : tshifterop;
  4204. l1 : longint;
  4205. begin
  4206. ovloc.loc:=LOC_VOID;
  4207. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4208. case op of
  4209. OP_ADD:
  4210. begin
  4211. op:=OP_SUB;
  4212. a:=aint(dword(-a));
  4213. end;
  4214. OP_SUB:
  4215. begin
  4216. op:=OP_ADD;
  4217. a:=aint(dword(-a));
  4218. end
  4219. else
  4220. ;
  4221. end;
  4222. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4223. case op of
  4224. OP_NEG,OP_NOT,
  4225. OP_DIV,OP_IDIV:
  4226. internalerror(200308285);
  4227. OP_SHL:
  4228. begin
  4229. if a>32 then
  4230. internalerror(2014020703);
  4231. if a<>0 then
  4232. begin
  4233. shifterop_reset(so);
  4234. so.shiftmode:=SM_LSL;
  4235. so.shiftimm:=a;
  4236. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4237. end
  4238. else
  4239. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4240. end;
  4241. OP_ROL:
  4242. begin
  4243. if a>32 then
  4244. internalerror(2014020704);
  4245. if a<>0 then
  4246. begin
  4247. shifterop_reset(so);
  4248. so.shiftmode:=SM_ROR;
  4249. so.shiftimm:=32-a;
  4250. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4251. end
  4252. else
  4253. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4254. end;
  4255. OP_ROR:
  4256. begin
  4257. if a>32 then
  4258. internalerror(2014020705);
  4259. if a<>0 then
  4260. begin
  4261. shifterop_reset(so);
  4262. so.shiftmode:=SM_ROR;
  4263. so.shiftimm:=a;
  4264. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4265. end
  4266. else
  4267. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4268. end;
  4269. OP_SHR:
  4270. begin
  4271. if a>32 then
  4272. internalerror(200308292);
  4273. shifterop_reset(so);
  4274. if a<>0 then
  4275. begin
  4276. so.shiftmode:=SM_LSR;
  4277. so.shiftimm:=a;
  4278. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4279. end
  4280. else
  4281. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4282. end;
  4283. OP_SAR:
  4284. begin
  4285. if a>32 then
  4286. internalerror(200308295);
  4287. if a<>0 then
  4288. begin
  4289. shifterop_reset(so);
  4290. so.shiftmode:=SM_ASR;
  4291. so.shiftimm:=a;
  4292. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4293. end
  4294. else
  4295. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4296. end;
  4297. else
  4298. if (op in [OP_SUB, OP_ADD]) and
  4299. ((a < 0) or
  4300. (a > 4095)) then
  4301. begin
  4302. tmpreg:=getintregister(list,size);
  4303. a_load_const_reg(list, size, a, tmpreg);
  4304. if cgsetflags or setflags then
  4305. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4306. list.concat(setoppostfix(
  4307. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4308. end
  4309. else
  4310. begin
  4311. if cgsetflags or setflags then
  4312. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4313. list.concat(setoppostfix(
  4314. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4315. end;
  4316. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4317. begin
  4318. ovloc.loc:=LOC_FLAGS;
  4319. case op of
  4320. OP_ADD:
  4321. ovloc.resflags:=F_CS;
  4322. OP_SUB:
  4323. ovloc.resflags:=F_CC;
  4324. else
  4325. ;
  4326. end;
  4327. end;
  4328. end
  4329. else
  4330. begin
  4331. { there could be added some more sophisticated optimizations }
  4332. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4333. a_load_reg_reg(list,size,size,src,dst)
  4334. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4335. a_load_const_reg(list,size,0,dst)
  4336. else if (op in [OP_IMUL]) and (a=-1) then
  4337. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4338. { we do this here instead in the peephole optimizer because
  4339. it saves us a register }
  4340. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4341. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4342. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4343. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4344. begin
  4345. if l1>32 then{roozbeh does this ever happen?}
  4346. internalerror(2003082911);
  4347. shifterop_reset(so);
  4348. so.shiftmode:=SM_LSL;
  4349. so.shiftimm:=l1;
  4350. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4351. end
  4352. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4353. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4354. begin
  4355. if l1>32 then{does this ever happen?}
  4356. internalerror(2012051803);
  4357. shifterop_reset(so);
  4358. so.shiftmode:=SM_LSL;
  4359. so.shiftimm:=l1;
  4360. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4361. end
  4362. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4363. begin
  4364. { nothing to do on success }
  4365. end
  4366. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4367. Just using mov x, #0 might allow some easier optimizations down the line. }
  4368. else if (op = OP_AND) and (dword(a)=0) then
  4369. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4370. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4371. else if (op = OP_AND) and (not(dword(a))=0) then
  4372. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4373. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4374. broader range of shifterconstants.}
  4375. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4376. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4377. else if (op = OP_AND) and is_thumb32_imm(a) then
  4378. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4379. else if (op = OP_AND) and (a = $FFFF) then
  4380. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4381. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4382. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4383. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4384. begin
  4385. a_load_reg_reg(list,size,size,src,dst);
  4386. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4387. end
  4388. else
  4389. begin
  4390. tmpreg:=getintregister(list,size);
  4391. a_load_const_reg(list,size,a,tmpreg);
  4392. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4393. end;
  4394. end;
  4395. maybeadjustresult(list,op,size,dst);
  4396. end;
  4397. const
  4398. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4399. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4400. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4401. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4402. var
  4403. so : tshifterop;
  4404. tmpreg,overflowreg : tregister;
  4405. asmop : tasmop;
  4406. begin
  4407. ovloc.loc:=LOC_VOID;
  4408. case op of
  4409. OP_NEG,OP_NOT:
  4410. internalerror(200308286);
  4411. OP_ROL:
  4412. begin
  4413. if not(size in [OS_32,OS_S32]) then
  4414. internalerror(2008072806);
  4415. { simulate ROL by ror'ing 32-value }
  4416. tmpreg:=getintregister(list,OS_32);
  4417. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4418. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4419. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4420. end;
  4421. OP_ROR:
  4422. begin
  4423. if not(size in [OS_32,OS_S32]) then
  4424. internalerror(2008072802);
  4425. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4426. end;
  4427. OP_IMUL,
  4428. OP_MUL:
  4429. begin
  4430. if cgsetflags or setflags then
  4431. begin
  4432. overflowreg:=getintregister(list,size);
  4433. if op=OP_IMUL then
  4434. asmop:=A_SMULL
  4435. else
  4436. asmop:=A_UMULL;
  4437. { the arm doesn't allow that rd and rm are the same }
  4438. if dst=src2 then
  4439. begin
  4440. if dst<>src1 then
  4441. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4442. else
  4443. begin
  4444. tmpreg:=getintregister(list,size);
  4445. a_load_reg_reg(list,size,size,src2,dst);
  4446. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4447. end;
  4448. end
  4449. else
  4450. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4451. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4452. if op=OP_IMUL then
  4453. begin
  4454. shifterop_reset(so);
  4455. so.shiftmode:=SM_ASR;
  4456. so.shiftimm:=31;
  4457. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4458. end
  4459. else
  4460. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4461. ovloc.loc:=LOC_FLAGS;
  4462. ovloc.resflags:=F_NE;
  4463. end
  4464. else
  4465. begin
  4466. { the arm doesn't allow that rd and rm are the same }
  4467. if dst=src2 then
  4468. begin
  4469. if dst<>src1 then
  4470. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4471. else
  4472. begin
  4473. tmpreg:=getintregister(list,size);
  4474. a_load_reg_reg(list,size,size,src2,dst);
  4475. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4476. end;
  4477. end
  4478. else
  4479. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4480. end;
  4481. end;
  4482. else
  4483. begin
  4484. if cgsetflags or setflags then
  4485. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4486. {$ifdef dummy}
  4487. { R13 is not allowed for certain instruction operands }
  4488. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4489. begin
  4490. if getsupreg(dst)=RS_R13 then
  4491. begin
  4492. tmpreg:=getintregister(list,OS_INT);
  4493. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4494. dst:=tmpreg;
  4495. end;
  4496. if getsupreg(src1)=RS_R13 then
  4497. begin
  4498. tmpreg:=getintregister(list,OS_INT);
  4499. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4500. src1:=tmpreg;
  4501. end;
  4502. end;
  4503. {$endif}
  4504. list.concat(setoppostfix(
  4505. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4506. end;
  4507. end;
  4508. maybeadjustresult(list,op,size,dst);
  4509. end;
  4510. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4511. begin
  4512. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4513. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4514. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4515. end;
  4516. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4517. var
  4518. ref : treference;
  4519. shift : byte;
  4520. firstfloatreg,lastfloatreg,
  4521. r : byte;
  4522. regs : tcpuregisterset;
  4523. stackmisalignment: pint;
  4524. begin
  4525. LocalSize:=align(LocalSize,4);
  4526. { call instruction does not put anything on the stack }
  4527. stackmisalignment:=0;
  4528. if not(nostackframe) then
  4529. begin
  4530. firstfloatreg:=RS_NO;
  4531. lastfloatreg:=RS_NO;
  4532. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4533. begin
  4534. { save floating point registers? }
  4535. for r:=RS_F0 to RS_F7 do
  4536. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4537. begin
  4538. if firstfloatreg=RS_NO then
  4539. firstfloatreg:=r;
  4540. lastfloatreg:=r;
  4541. inc(stackmisalignment,12);
  4542. end;
  4543. end;
  4544. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4545. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4546. begin
  4547. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4548. a_reg_alloc(list,NR_R12);
  4549. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4550. end;
  4551. { save int registers }
  4552. reference_reset(ref,4,[]);
  4553. ref.index:=NR_STACK_POINTER_REG;
  4554. ref.addressmode:=AM_PREINDEXED;
  4555. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4556. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4557. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4558. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4559. include(regs,RS_R14);
  4560. if regs<>[] then
  4561. begin
  4562. for r:=RS_R0 to RS_R15 do
  4563. if (r in regs) then
  4564. inc(stackmisalignment,4);
  4565. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4566. end;
  4567. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4568. begin
  4569. { the framepointer now points to the saved R15, so the saved
  4570. framepointer is at R11-12 (for get_caller_frame) }
  4571. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4572. a_reg_dealloc(list,NR_R12);
  4573. end;
  4574. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4575. if (LocalSize<>0) or
  4576. ((stackmisalignment<>0) and
  4577. ((pi_do_call in current_procinfo.flags) or
  4578. (po_assembler in current_procinfo.procdef.procoptions))) then
  4579. begin
  4580. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4581. if not(is_shifter_const(localsize,shift)) then
  4582. begin
  4583. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4584. a_reg_alloc(list,NR_R12);
  4585. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4586. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4587. a_reg_dealloc(list,NR_R12);
  4588. end
  4589. else
  4590. begin
  4591. a_reg_dealloc(list,NR_R12);
  4592. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4593. end;
  4594. end;
  4595. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4596. begin
  4597. if firstfloatreg<>RS_NO then
  4598. begin
  4599. reference_reset(ref,4,[]);
  4600. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4601. begin
  4602. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4603. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4604. ref.base:=NR_R12;
  4605. end
  4606. else
  4607. begin
  4608. ref.base:=current_procinfo.framepointer;
  4609. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4610. end;
  4611. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4612. lastfloatreg-firstfloatreg+1,ref));
  4613. end;
  4614. end;
  4615. end;
  4616. end;
  4617. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4618. var
  4619. ref : treference;
  4620. firstfloatreg,lastfloatreg,
  4621. r : byte;
  4622. shift : byte;
  4623. regs : tcpuregisterset;
  4624. LocalSize : longint;
  4625. stackmisalignment: pint;
  4626. begin
  4627. { a routine not returning needs no exit code,
  4628. we trust this directive as arm thumb is normally used if small code shall be generated }
  4629. if po_noreturn in current_procinfo.procdef.procoptions then
  4630. exit;
  4631. if not(nostackframe) then
  4632. begin
  4633. stackmisalignment:=0;
  4634. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4635. begin
  4636. { restore floating point register }
  4637. firstfloatreg:=RS_NO;
  4638. lastfloatreg:=RS_NO;
  4639. { save floating point registers? }
  4640. for r:=RS_F0 to RS_F7 do
  4641. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4642. begin
  4643. if firstfloatreg=RS_NO then
  4644. firstfloatreg:=r;
  4645. lastfloatreg:=r;
  4646. { floating point register space is already included in
  4647. localsize below by calc_stackframe_size
  4648. inc(stackmisalignment,12);
  4649. }
  4650. end;
  4651. if firstfloatreg<>RS_NO then
  4652. begin
  4653. reference_reset(ref,4,[]);
  4654. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4655. begin
  4656. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4657. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4658. ref.base:=NR_R12;
  4659. end
  4660. else
  4661. begin
  4662. ref.base:=current_procinfo.framepointer;
  4663. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4664. end;
  4665. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4666. lastfloatreg-firstfloatreg+1,ref));
  4667. end;
  4668. end;
  4669. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4670. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4671. begin
  4672. exclude(regs,RS_R14);
  4673. include(regs,RS_R15);
  4674. end;
  4675. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4676. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4677. for r:=RS_R0 to RS_R15 do
  4678. if (r in regs) then
  4679. inc(stackmisalignment,4);
  4680. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4681. LocalSize:=current_procinfo.calc_stackframe_size;
  4682. if (LocalSize<>0) or
  4683. ((stackmisalignment<>0) and
  4684. ((pi_do_call in current_procinfo.flags) or
  4685. (po_assembler in current_procinfo.procdef.procoptions))) then
  4686. begin
  4687. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4688. if not(is_shifter_const(LocalSize,shift)) then
  4689. begin
  4690. a_reg_alloc(list,NR_R12);
  4691. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4692. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4693. a_reg_dealloc(list,NR_R12);
  4694. end
  4695. else
  4696. begin
  4697. a_reg_dealloc(list,NR_R12);
  4698. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4699. end;
  4700. end;
  4701. if regs=[] then
  4702. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4703. else
  4704. begin
  4705. reference_reset(ref,4,[]);
  4706. ref.index:=NR_STACK_POINTER_REG;
  4707. ref.addressmode:=AM_PREINDEXED;
  4708. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4709. end;
  4710. end
  4711. else
  4712. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4713. end;
  4714. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4715. var
  4716. tmpreg : tregister;
  4717. tmpref : treference;
  4718. l : tasmlabel;
  4719. begin
  4720. tmpreg:=NR_NO;
  4721. { Be sure to have a base register }
  4722. if (ref.base=NR_NO) then
  4723. begin
  4724. if ref.shiftmode<>SM_None then
  4725. internalerror(2014020706);
  4726. ref.base:=ref.index;
  4727. ref.index:=NR_NO;
  4728. end;
  4729. { absolute symbols can't be handled directly, we've to store the symbol reference
  4730. in the text segment and access it pc relative
  4731. For now, we assume that references where base or index equals to PC are already
  4732. relative, all other references are assumed to be absolute and thus they need
  4733. to be handled extra.
  4734. A proper solution would be to change refoptions to a set and store the information
  4735. if the symbol is absolute or relative there.
  4736. }
  4737. if (assigned(ref.symbol) and
  4738. not(is_pc(ref.base)) and
  4739. not(is_pc(ref.index))
  4740. ) or
  4741. { [#xxx] isn't a valid address operand }
  4742. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4743. //(ref.offset<-4095) or
  4744. (ref.offset<-255) or
  4745. (ref.offset>4095) or
  4746. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4747. ((ref.offset<-255) or
  4748. (ref.offset>255)
  4749. )
  4750. ) or
  4751. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4752. ((ref.offset<-1020) or
  4753. (ref.offset>1020) or
  4754. ((abs(ref.offset) mod 4)<>0) or
  4755. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4756. assigned(ref.symbol)
  4757. )
  4758. ) then
  4759. begin
  4760. reference_reset(tmpref,4,[]);
  4761. { load symbol }
  4762. tmpreg:=getintregister(list,OS_INT);
  4763. if assigned(ref.symbol) then
  4764. begin
  4765. current_asmdata.getjumplabel(l);
  4766. cg.a_label(current_procinfo.aktlocaldata,l);
  4767. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4768. if ref.refaddr=addr_gottpoff then
  4769. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4770. else if ref.refaddr=addr_tlsgd then
  4771. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4772. else if ref.refaddr=addr_tlsdesc then
  4773. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4774. else if ref.refaddr=addr_tpoff then
  4775. begin
  4776. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4777. Internalerror(2019092807);
  4778. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4779. end
  4780. else
  4781. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4782. { load consts entry }
  4783. tmpref.symbol:=l;
  4784. tmpref.base:=NR_R15;
  4785. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4786. { in case of LDF/STF, we got rid of the NR_R15 }
  4787. if is_pc(ref.base) then
  4788. ref.base:=NR_NO;
  4789. if is_pc(ref.index) then
  4790. ref.index:=NR_NO;
  4791. end
  4792. else
  4793. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4794. if (ref.base<>NR_NO) then
  4795. begin
  4796. if ref.index<>NR_NO then
  4797. begin
  4798. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4799. ref.base:=tmpreg;
  4800. end
  4801. else
  4802. begin
  4803. ref.index:=tmpreg;
  4804. ref.shiftimm:=0;
  4805. ref.signindex:=1;
  4806. ref.shiftmode:=SM_None;
  4807. end;
  4808. end
  4809. else
  4810. ref.base:=tmpreg;
  4811. ref.offset:=0;
  4812. ref.symbol:=nil;
  4813. end;
  4814. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4815. begin
  4816. if tmpreg<>NR_NO then
  4817. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4818. else
  4819. begin
  4820. tmpreg:=getintregister(list,OS_ADDR);
  4821. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4822. ref.base:=tmpreg;
  4823. end;
  4824. ref.offset:=0;
  4825. end;
  4826. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4827. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4828. begin
  4829. tmpreg:=getintregister(list,OS_ADDR);
  4830. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4831. ref.base := tmpreg;
  4832. end;
  4833. { floating point operations have only limited references
  4834. we expect here, that a base is already set }
  4835. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4836. begin
  4837. if ref.shiftmode<>SM_none then
  4838. internalerror(2003091202);
  4839. if tmpreg<>NR_NO then
  4840. begin
  4841. if ref.base=tmpreg then
  4842. begin
  4843. if ref.signindex<0 then
  4844. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4845. else
  4846. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4847. ref.index:=NR_NO;
  4848. end
  4849. else
  4850. begin
  4851. if ref.index<>tmpreg then
  4852. internalerror(2004031602);
  4853. if ref.signindex<0 then
  4854. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4855. else
  4856. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4857. ref.base:=tmpreg;
  4858. ref.index:=NR_NO;
  4859. end;
  4860. end
  4861. else
  4862. begin
  4863. tmpreg:=getintregister(list,OS_ADDR);
  4864. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4865. ref.base:=tmpreg;
  4866. ref.index:=NR_NO;
  4867. end;
  4868. end;
  4869. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4870. Result := ref;
  4871. end;
  4872. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4873. var
  4874. instr: taicpu;
  4875. begin
  4876. if (fromsize=OS_F32) and
  4877. (tosize=OS_F32) then
  4878. begin
  4879. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4880. list.Concat(instr);
  4881. add_move_instruction(instr);
  4882. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4883. end
  4884. else if (fromsize=OS_F64) and
  4885. (tosize=OS_F64) then
  4886. begin
  4887. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4888. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4889. end
  4890. else if (fromsize=OS_F32) and
  4891. (tosize=OS_F64) then
  4892. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4893. begin
  4894. //list.concat(nil);
  4895. end;
  4896. end;
  4897. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4898. begin
  4899. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4900. end;
  4901. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4902. begin
  4903. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4904. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4905. end;
  4906. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4907. begin
  4908. if //(shuffle=nil) and
  4909. (tosize=OS_F32) then
  4910. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4911. else
  4912. internalerror(2012100813);
  4913. end;
  4914. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4915. begin
  4916. if //(shuffle=nil) and
  4917. (fromsize=OS_F32) then
  4918. begin
  4919. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4920. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4921. end
  4922. else
  4923. internalerror(2012100814);
  4924. end;
  4925. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4926. var tmpreg: tregister;
  4927. begin
  4928. case op of
  4929. OP_NEG:
  4930. begin
  4931. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4932. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4933. tmpreg:=cg.getintregister(list,OS_32);
  4934. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4935. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4936. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4937. end;
  4938. else
  4939. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4940. end;
  4941. end;
  4942. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4943. begin
  4944. case op of
  4945. OP_NEG:
  4946. begin
  4947. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reglo,0),PF_S));
  4948. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reghi,0),PF_S));
  4949. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4950. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4951. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4952. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4953. end;
  4954. OP_NOT:
  4955. begin
  4956. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4957. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4958. end;
  4959. OP_AND,OP_OR,OP_XOR:
  4960. begin
  4961. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4962. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4963. end;
  4964. OP_ADD:
  4965. begin
  4966. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4967. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4968. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi),PF_S));
  4969. end;
  4970. OP_SUB:
  4971. begin
  4972. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4973. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4974. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4975. end;
  4976. else
  4977. internalerror(2003083105);
  4978. end;
  4979. end;
  4980. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4981. var
  4982. tmpreg : tregister;
  4983. begin
  4984. case op of
  4985. OP_AND,OP_OR,OP_XOR:
  4986. begin
  4987. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4988. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4989. end;
  4990. OP_ADD:
  4991. begin
  4992. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4993. begin
  4994. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4995. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4996. end
  4997. else
  4998. begin
  4999. tmpreg:=cg.getintregister(list,OS_32);
  5000. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5001. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5002. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  5003. end;
  5004. tmpreg:=cg.getintregister(list,OS_32);
  5005. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  5006. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg),PF_S));
  5007. end;
  5008. OP_SUB:
  5009. begin
  5010. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5011. begin
  5012. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5013. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  5014. end
  5015. else
  5016. begin
  5017. tmpreg:=cg.getintregister(list,OS_32);
  5018. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5019. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5020. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  5021. end;
  5022. tmpreg:=cg.getintregister(list,OS_32);
  5023. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  5024. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg),PF_S));
  5025. end;
  5026. else
  5027. internalerror(2003083106);
  5028. end;
  5029. end;
  5030. procedure create_codegen;
  5031. begin
  5032. if GenerateThumb2Code then
  5033. begin
  5034. cg:=tthumb2cgarm.create;
  5035. cg64:=tthumb2cg64farm.create;
  5036. casmoptimizer:=TCpuThumb2AsmOptimizer;
  5037. end
  5038. else if GenerateThumbCode then
  5039. begin
  5040. cg:=tthumbcgarm.create;
  5041. cg64:=tthumbcg64farm.create;
  5042. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5043. end
  5044. else
  5045. begin
  5046. cg:=tarmcgarm.create;
  5047. cg64:=tarmcg64farm.create;
  5048. casmoptimizer:=TCpuAsmOptimizer;
  5049. end;
  5050. end;
  5051. end.