cgcpu.pas 223 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. procedure init_mmregister_allocator;
  36. public
  37. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  38. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  39. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  40. { move instructions }
  41. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  42. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  43. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  44. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  45. { fpu move instructions }
  46. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  47. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  48. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  49. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  50. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  51. { comparison operations }
  52. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  53. l : tasmlabel);override;
  54. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  55. procedure a_jmp_name(list : TAsmList;const s : string); override;
  56. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  57. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  58. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  59. procedure g_profilecode(list : TAsmList); override;
  60. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  61. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  62. procedure g_maybe_got_init(list : TAsmList); override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  66. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  67. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  68. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  69. procedure g_save_registers(list : TAsmList);override;
  70. procedure g_restore_registers(list : TAsmList);override;
  71. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  72. procedure fixref(list : TAsmList;var ref : treference);
  73. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  74. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  75. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  78. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  79. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  80. { Transform unsupported methods into Internal errors }
  81. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  82. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  83. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  84. { clear out potential overflow bits from 8 or 16 bit operations
  85. the upper 24/16 bits of a register after an operation }
  86. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  87. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  88. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  89. procedure g_maybe_tls_init(list : TAsmList); override;
  90. end;
  91. { tcgarm is shared between normal arm and thumb-2 }
  92. tcgarm = class(tbasecgarm)
  93. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  94. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  95. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  96. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  97. size: tcgsize; a: tcgint; src, dst: tregister); override;
  98. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  99. size: tcgsize; src1, src2, dst: tregister); override;
  100. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  101. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  103. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  104. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  105. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  106. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  107. end;
  108. { normal arm cg }
  109. tarmcgarm = class(tcgarm)
  110. procedure init_register_allocators;override;
  111. procedure done_register_allocators;override;
  112. end;
  113. { 64 bit cg for all arm flavours }
  114. tbasecg64farm = class(tcg64f32)
  115. end;
  116. { tcg64farm is shared between normal arm and thumb-2 }
  117. tcg64farm = class(tbasecg64farm)
  118. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  119. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  120. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  121. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  122. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  123. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  125. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  126. end;
  127. tarmcg64farm = class(tcg64farm)
  128. end;
  129. tthumbcgarm = class(tbasecgarm)
  130. procedure init_register_allocators;override;
  131. procedure done_register_allocators;override;
  132. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  133. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  134. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  135. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  136. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  137. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  138. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  139. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  140. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  141. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  142. end;
  143. tthumbcg64farm = class(tbasecg64farm)
  144. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  145. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  146. end;
  147. tthumb2cgarm = class(tcgarm)
  148. procedure init_register_allocators;override;
  149. procedure done_register_allocators;override;
  150. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  151. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  152. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  153. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  154. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  155. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  157. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  158. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  159. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  160. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  161. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  164. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  165. end;
  166. tthumb2cg64farm = class(tcg64farm)
  167. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  168. end;
  169. const
  170. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  171. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  172. winstackpagesize = 4096;
  173. function get_fpu_postfix(def : tdef) : toppostfix;
  174. procedure create_codegen;
  175. implementation
  176. uses
  177. globals,verbose,systems,cutils,
  178. aopt,aoptcpu,
  179. fmodule,
  180. symconst,symsym,symtable,
  181. tgobj,
  182. procinfo,cpupi,
  183. paramgr;
  184. { Range check must be disabled explicitly as conversions between signed and unsigned
  185. 32-bit values are done without explicit typecasts }
  186. {$R-}
  187. function get_fpu_postfix(def : tdef) : toppostfix;
  188. begin
  189. if def.typ=floatdef then
  190. begin
  191. case tfloatdef(def).floattype of
  192. s32real:
  193. result:=PF_S;
  194. s64real:
  195. result:=PF_D;
  196. s80real:
  197. result:=PF_E;
  198. else
  199. internalerror(200401272);
  200. end;
  201. end
  202. else
  203. internalerror(200401271);
  204. end;
  205. procedure tarmcgarm.init_register_allocators;
  206. begin
  207. inherited init_register_allocators;
  208. { currently, we always save R14, so we can use it }
  209. if (target_info.system<>system_arm_ios) then
  210. begin
  211. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  212. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  213. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  214. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  215. else
  216. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  217. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  218. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  219. end
  220. else
  221. { r7 is not available on Darwin, it's used as frame pointer (always,
  222. for backtrace support -- also in gcc/clang -> R11 can be used).
  223. r9 is volatile }
  224. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  225. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  226. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  227. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  228. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  229. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  230. init_mmregister_allocator;
  231. end;
  232. procedure tarmcgarm.done_register_allocators;
  233. begin
  234. rg[R_INTREGISTER].free;
  235. rg[R_FPUREGISTER].free;
  236. rg[R_MMREGISTER].free;
  237. inherited done_register_allocators;
  238. end;
  239. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  240. var
  241. imm_shift : byte;
  242. l : tasmlabel;
  243. hr : treference;
  244. imm1, imm2: DWord;
  245. begin
  246. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  247. internalerror(2002090907);
  248. if is_shifter_const(a,imm_shift) then
  249. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  250. else if is_shifter_const(not(a),imm_shift) then
  251. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  252. { loading of constants with mov and orr }
  253. else if (split_into_shifter_const(a,imm1, imm2)) then
  254. begin
  255. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  256. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  257. end
  258. { loading of constants with mvn and bic }
  259. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  260. begin
  261. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  262. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  263. end
  264. else
  265. begin
  266. reference_reset(hr,4,[]);
  267. current_asmdata.getjumplabel(l);
  268. cg.a_label(current_procinfo.aktlocaldata,l);
  269. hr.symboldata:=current_procinfo.aktlocaldata.last;
  270. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  271. hr.symbol:=l;
  272. hr.base:=NR_PC;
  273. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  274. end;
  275. end;
  276. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  277. var
  278. oppostfix:toppostfix;
  279. usedtmpref: treference;
  280. tmpreg,tmpreg2 : tregister;
  281. so : tshifterop;
  282. dir : integer;
  283. begin
  284. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  285. FromSize := ToSize;
  286. case FromSize of
  287. { signed integer registers }
  288. OS_8:
  289. oppostfix:=PF_B;
  290. OS_S8:
  291. oppostfix:=PF_SB;
  292. OS_16:
  293. oppostfix:=PF_H;
  294. OS_S16:
  295. oppostfix:=PF_SH;
  296. OS_32,
  297. OS_S32:
  298. oppostfix:=PF_None;
  299. else
  300. InternalError(200308297);
  301. end;
  302. if (fromsize=OS_S8) and
  303. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  304. oppostfix:=PF_B;
  305. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  306. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  307. (oppostfix in [PF_SH,PF_H])) then
  308. begin
  309. if target_info.endian=endian_big then
  310. dir:=-1
  311. else
  312. dir:=1;
  313. case FromSize of
  314. OS_16,OS_S16:
  315. begin
  316. { only complicated references need an extra loadaddr }
  317. if assigned(ref.symbol) or
  318. (ref.index<>NR_NO) or
  319. (ref.offset<-4095) or
  320. (ref.offset>4094) or
  321. { sometimes the compiler reused registers }
  322. (reg=ref.index) or
  323. (reg=ref.base) then
  324. begin
  325. tmpreg2:=getintregister(list,OS_INT);
  326. a_loadaddr_ref_reg(list,ref,tmpreg2);
  327. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  328. end
  329. else
  330. usedtmpref:=ref;
  331. if target_info.endian=endian_big then
  332. inc(usedtmpref.offset,1);
  333. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  334. tmpreg:=getintregister(list,OS_INT);
  335. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  336. inc(usedtmpref.offset,dir);
  337. if FromSize=OS_16 then
  338. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  339. else
  340. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  341. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  342. end;
  343. OS_32,OS_S32:
  344. begin
  345. tmpreg:=getintregister(list,OS_INT);
  346. { only complicated references need an extra loadaddr }
  347. if assigned(ref.symbol) or
  348. (ref.index<>NR_NO) or
  349. (ref.offset<-4095) or
  350. (ref.offset>4092) or
  351. { sometimes the compiler reused registers }
  352. (reg=ref.index) or
  353. (reg=ref.base) then
  354. begin
  355. tmpreg2:=getintregister(list,OS_INT);
  356. a_loadaddr_ref_reg(list,ref,tmpreg2);
  357. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  358. end
  359. else
  360. usedtmpref:=ref;
  361. shifterop_reset(so);so.shiftmode:=SM_LSL;
  362. if ref.alignment=2 then
  363. begin
  364. if target_info.endian=endian_big then
  365. inc(usedtmpref.offset,2);
  366. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  367. inc(usedtmpref.offset,dir*2);
  368. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  369. so.shiftimm:=16;
  370. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  371. end
  372. else
  373. begin
  374. tmpreg2:=getintregister(list,OS_INT);
  375. if target_info.endian=endian_big then
  376. inc(usedtmpref.offset,3);
  377. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  378. inc(usedtmpref.offset,dir);
  379. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  380. inc(usedtmpref.offset,dir);
  381. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  382. so.shiftimm:=8;
  383. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  384. inc(usedtmpref.offset,dir);
  385. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  386. so.shiftimm:=16;
  387. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  388. so.shiftimm:=24;
  389. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  390. end;
  391. end
  392. else
  393. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  394. end;
  395. end
  396. else
  397. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  398. if (fromsize=OS_S8) and
  399. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  400. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  401. else if (fromsize=OS_S8) and (tosize = OS_16) then
  402. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  403. end;
  404. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  405. var
  406. hsym : tsym;
  407. href : treference;
  408. paraloc : Pcgparalocation;
  409. shift : byte;
  410. begin
  411. { calculate the parameter info for the procdef }
  412. procdef.init_paraloc_info(callerside);
  413. hsym:=tsym(procdef.parast.Find('self'));
  414. if not(assigned(hsym) and
  415. (hsym.typ=paravarsym)) then
  416. internalerror(2003052503);
  417. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  418. while paraloc<>nil do
  419. with paraloc^ do
  420. begin
  421. case loc of
  422. LOC_REGISTER:
  423. begin
  424. if is_shifter_const(ioffset,shift) then
  425. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  426. else
  427. begin
  428. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  429. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  430. end;
  431. end;
  432. LOC_REFERENCE:
  433. begin
  434. { offset in the wrapper needs to be adjusted for the stored
  435. return address }
  436. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  437. if is_shifter_const(ioffset,shift) then
  438. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  439. else
  440. begin
  441. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  442. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  443. end;
  444. end
  445. else
  446. internalerror(2003091803);
  447. end;
  448. paraloc:=next;
  449. end;
  450. end;
  451. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  452. var
  453. ref: treference;
  454. begin
  455. paraloc.check_simple_location;
  456. paramanager.allocparaloc(list,paraloc.location);
  457. case paraloc.location^.loc of
  458. LOC_REGISTER,LOC_CREGISTER:
  459. a_load_const_reg(list,size,a,paraloc.location^.register);
  460. LOC_REFERENCE:
  461. begin
  462. reference_reset(ref,paraloc.alignment,[]);
  463. ref.base:=paraloc.location^.reference.index;
  464. ref.offset:=paraloc.location^.reference.offset;
  465. a_load_const_ref(list,size,a,ref);
  466. end;
  467. else
  468. internalerror(2002081101);
  469. end;
  470. end;
  471. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  472. begin
  473. { doubles in softemu mode have a strange order of registers and references }
  474. if (cgpara.size=OS_F64) and
  475. (location^.size=OS_32) then
  476. begin
  477. g_concatcopy(list,ref,paralocref,4)
  478. end
  479. else
  480. inherited;
  481. end;
  482. procedure tbasecgarm.init_mmregister_allocator;
  483. begin
  484. { The register allocator currently cannot deal with multiple
  485. non-overlapping subregs per register, so we can only use
  486. half the single precision registers for now (as sub registers of the
  487. double precision ones). }
  488. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  489. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  490. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  491. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  492. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  493. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  494. ],first_mm_imreg,[])
  495. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  496. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  497. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  498. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  499. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  500. ],first_mm_imreg,[])
  501. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  502. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  503. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  504. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  505. ],first_mm_imreg,[]);
  506. end;
  507. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  508. var
  509. ref: treference;
  510. tmpreg: tregister;
  511. begin
  512. paraloc.check_simple_location;
  513. paramanager.allocparaloc(list,paraloc.location);
  514. case paraloc.location^.loc of
  515. LOC_REGISTER,LOC_CREGISTER:
  516. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  517. LOC_REFERENCE:
  518. begin
  519. reference_reset(ref,paraloc.alignment,[]);
  520. ref.base := paraloc.location^.reference.index;
  521. ref.offset := paraloc.location^.reference.offset;
  522. tmpreg := getintregister(list,OS_ADDR);
  523. a_loadaddr_ref_reg(list,r,tmpreg);
  524. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  525. end;
  526. else
  527. internalerror(2002080701);
  528. end;
  529. end;
  530. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  531. var
  532. branchopcode: tasmop;
  533. r : treference;
  534. sym : TAsmSymbol;
  535. begin
  536. { use always BL as newer binutils do not translate blx apparently
  537. generating BL is also what clang and gcc do by default }
  538. branchopcode:=A_BL;
  539. if not(weak) then
  540. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  541. else
  542. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  543. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  544. if (tf_pic_uses_got in target_info.flags) and
  545. (cs_create_pic in current_settings.moduleswitches) then
  546. begin
  547. r.refaddr:=addr_pic
  548. end
  549. else
  550. r.refaddr:=addr_full;
  551. list.concat(taicpu.op_ref(branchopcode,r));
  552. {
  553. the compiler does not properly set this flag anymore in pass 1, and
  554. for now we only need it after pass 2 (I hope) (JM)
  555. if not(pi_do_call in current_procinfo.flags) then
  556. internalerror(2003060703);
  557. }
  558. include(current_procinfo.flags,pi_do_call);
  559. end;
  560. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  561. begin
  562. { check not really correct: should only be used for non-Thumb cpus }
  563. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  564. begin
  565. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  566. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  567. end
  568. else
  569. list.concat(taicpu.op_reg(A_BLX, reg));
  570. {
  571. the compiler does not properly set this flag anymore in pass 1, and
  572. for now we only need it after pass 2 (I hope) (JM)
  573. if not(pi_do_call in current_procinfo.flags) then
  574. internalerror(2003060703);
  575. }
  576. include(current_procinfo.flags,pi_do_call);
  577. end;
  578. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  579. begin
  580. a_op_const_reg_reg(list,op,size,a,reg,reg);
  581. end;
  582. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  583. var
  584. tmpreg,tmpresreg : tregister;
  585. tmpref : treference;
  586. begin
  587. tmpreg:=getintregister(list,size);
  588. tmpresreg:=getintregister(list,size);
  589. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  590. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  591. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  592. end;
  593. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  594. var
  595. so : tshifterop;
  596. begin
  597. if op = OP_NEG then
  598. begin
  599. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  600. maybeadjustresult(list,OP_NEG,size,dst);
  601. end
  602. else if op = OP_NOT then
  603. begin
  604. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  605. begin
  606. shifterop_reset(so);
  607. so.shiftmode:=SM_LSL;
  608. if size in [OS_8, OS_S8] then
  609. so.shiftimm:=24
  610. else
  611. so.shiftimm:=16;
  612. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  613. {Using a shift here allows this to be folded into another instruction}
  614. if size in [OS_S8, OS_S16] then
  615. so.shiftmode:=SM_ASR
  616. else
  617. so.shiftmode:=SM_LSR;
  618. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  619. end
  620. else
  621. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  622. end
  623. else
  624. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  625. end;
  626. const
  627. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  628. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  629. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  630. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  631. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  632. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  633. op_reg_postfix_thumb: array[TOpCG] of TOpPostfix =
  634. (PF_None,PF_None,PF_None,PF_S,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_S,
  635. PF_None,PF_S,PF_S,PF_None,PF_S,PF_None,PF_S);
  636. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  637. size: tcgsize; a: tcgint; src, dst: tregister);
  638. var
  639. ovloc : tlocation;
  640. begin
  641. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  642. end;
  643. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  644. size: tcgsize; src1, src2, dst: tregister);
  645. var
  646. ovloc : tlocation;
  647. begin
  648. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  649. end;
  650. function opshift2shiftmode(op: TOpCg): tshiftmode;
  651. begin
  652. case op of
  653. OP_SHL: Result:=SM_LSL;
  654. OP_SHR: Result:=SM_LSR;
  655. OP_ROR: Result:=SM_ROR;
  656. OP_ROL: Result:=SM_ROR;
  657. OP_SAR: Result:=SM_ASR;
  658. else internalerror(2012070501);
  659. end
  660. end;
  661. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  662. var
  663. multiplier : dword;
  664. power : longint;
  665. shifterop : tshifterop;
  666. bitsset : byte;
  667. negative : boolean;
  668. first, doshiftadd: boolean;
  669. b,
  670. cycles : byte;
  671. maxeffort : byte;
  672. leftmostbit,i,shiftvalue: DWord;
  673. begin
  674. result:=true;
  675. cycles:=0;
  676. negative:=a<0;
  677. shifterop.rs:=NR_NO;
  678. shifterop.shiftmode:=SM_LSL;
  679. if negative then
  680. inc(cycles);
  681. multiplier:=dword(abs(a));
  682. { heuristics to estimate how much instructions are reasonable to replace the mul,
  683. this is currently based on XScale timings }
  684. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  685. actual multiplication, this requires min. 1+4 cycles
  686. because the first shift imm. might cause a stall and because we need more instructions
  687. when replacing the mul we generate max. 3 instructions to replace this mul }
  688. maxeffort:=3;
  689. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  690. a ldr, so generating one more operation to replace this is beneficial }
  691. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  692. inc(maxeffort);
  693. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  694. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  695. dec(maxeffort);
  696. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  697. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  698. dec(maxeffort);
  699. { "symmetric" bit pattern like $10101010 where
  700. res:=a*$10101010 can be simplified into
  701. temp:=a*$1010
  702. res:=temp+temp shl 16
  703. }
  704. doshiftadd:=false;
  705. leftmostbit:=BsrDWord(multiplier);
  706. shiftvalue:=0;
  707. if (maxeffort>1) and (leftmostbit>2) then
  708. begin
  709. for i:=2 to 31 do
  710. if (multiplier shr i)=(multiplier and ($ffffffff shr (32-i))) then
  711. begin
  712. doshiftadd:=true;
  713. shiftvalue:=i;
  714. dec(maxeffort);
  715. multiplier:=multiplier shr shiftvalue;
  716. break;
  717. end;
  718. end;
  719. bitsset:=popcnt(multiplier and $fffffffe);
  720. { most simple cases }
  721. if a=1 then
  722. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  723. else if a=0 then
  724. a_load_const_reg(list,OS_32,0,dst)
  725. else if a=-1 then
  726. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  727. { add up ?
  728. basically, one add is needed for each bit being set in the constant factor
  729. however, the least significant bit is for free, it can be hidden in the initial
  730. instruction
  731. }
  732. else if (bitsset+cycles<=maxeffort) and
  733. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  734. begin
  735. first:=true;
  736. while multiplier<>0 do
  737. begin
  738. shifterop.shiftimm:=BsrDWord(multiplier);
  739. if odd(multiplier) then
  740. begin
  741. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  742. dec(multiplier);
  743. end
  744. else
  745. if first then
  746. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  747. else
  748. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  749. first:=false;
  750. dec(multiplier,1 shl shifterop.shiftimm);
  751. end;
  752. if doshiftadd then
  753. begin
  754. shifterop.shiftimm:=shiftvalue;
  755. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  756. end;
  757. if negative then
  758. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  759. end
  760. { subtract from the next greater power of two? }
  761. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  762. begin
  763. first:=true;
  764. while multiplier<>0 do
  765. begin
  766. if first then
  767. begin
  768. multiplier:=(1 shl power)-multiplier;
  769. shifterop.shiftimm:=power;
  770. end
  771. else
  772. shifterop.shiftimm:=BsrDWord(multiplier);
  773. if odd(multiplier) then
  774. begin
  775. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  776. dec(multiplier);
  777. end
  778. else
  779. if first then
  780. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  781. else
  782. begin
  783. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  784. dec(multiplier,1 shl shifterop.shiftimm);
  785. end;
  786. first:=false;
  787. end;
  788. if doshiftadd then
  789. begin
  790. shifterop.shiftimm:=shiftvalue;
  791. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  792. end;
  793. if negative then
  794. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  795. end
  796. else
  797. result:=false;
  798. end;
  799. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  800. var
  801. shift, lsb, width : byte;
  802. tmpreg : tregister;
  803. so : tshifterop;
  804. l1 : longint;
  805. imm1, imm2: DWord;
  806. begin
  807. optimize_op_const(size, op, a);
  808. case op of
  809. OP_NONE:
  810. begin
  811. if src <> dst then
  812. a_load_reg_reg(list, size, size, src, dst);
  813. exit;
  814. end;
  815. OP_MOVE:
  816. begin
  817. a_load_const_reg(list, size, a, dst);
  818. exit;
  819. end;
  820. else
  821. ;
  822. end;
  823. ovloc.loc:=LOC_VOID;
  824. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  825. case op of
  826. OP_ADD:
  827. begin
  828. op:=OP_SUB;
  829. a:=aint(dword(-a));
  830. end;
  831. OP_SUB:
  832. begin
  833. op:=OP_ADD;
  834. a:=aint(dword(-a));
  835. end
  836. else
  837. ;
  838. end;
  839. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  840. case op of
  841. OP_NEG,OP_NOT:
  842. internalerror(200308281);
  843. OP_SHL,
  844. OP_SHR,
  845. OP_ROL,
  846. OP_ROR,
  847. OP_SAR:
  848. begin
  849. if a>32 then
  850. internalerror(200308294);
  851. shifterop_reset(so);
  852. so.shiftmode:=opshift2shiftmode(op);
  853. if op = OP_ROL then
  854. so.shiftimm:=32-a
  855. else
  856. so.shiftimm:=a;
  857. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  858. end;
  859. else
  860. {if (op in [OP_SUB, OP_ADD]) and
  861. ((a < 0) or
  862. (a > 4095)) then
  863. begin
  864. tmpreg:=getintregister(list,size);
  865. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  866. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  867. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  868. ));
  869. end
  870. else}
  871. begin
  872. if cgsetflags or setflags then
  873. a_reg_alloc(list,NR_DEFAULTFLAGS);
  874. list.concat(setoppostfix(
  875. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  876. end;
  877. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  878. begin
  879. ovloc.loc:=LOC_FLAGS;
  880. case op of
  881. OP_ADD:
  882. ovloc.resflags:=F_CS;
  883. OP_SUB:
  884. ovloc.resflags:=F_CC;
  885. else
  886. internalerror(2019050922);
  887. end;
  888. end;
  889. end
  890. else
  891. begin
  892. { there could be added some more sophisticated optimizations }
  893. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  894. a_op_reg_reg(list,OP_NEG,size,src,dst)
  895. { we do this here instead in the peephole optimizer because
  896. it saves us a register }
  897. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  898. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  899. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  900. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  901. begin
  902. if l1>32 then{roozbeh does this ever happen?}
  903. internalerror(200308296);
  904. shifterop_reset(so);
  905. so.shiftmode:=SM_LSL;
  906. so.shiftimm:=l1;
  907. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  908. end
  909. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  910. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  911. begin
  912. if l1>32 then{does this ever happen?}
  913. internalerror(201205181);
  914. shifterop_reset(so);
  915. so.shiftmode:=SM_LSL;
  916. so.shiftimm:=l1;
  917. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  918. end
  919. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  920. begin
  921. { nothing to do on success }
  922. end
  923. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  924. broader range of shifterconstants.}
  925. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  926. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  927. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  928. into the following instruction}
  929. else if (op = OP_AND) and
  930. is_continuous_mask(aword(a), lsb, width) and
  931. ((lsb = 0) or ((lsb + width) = 32)) then
  932. begin
  933. shifterop_reset(so);
  934. if (width = 16) and
  935. (lsb = 0) and
  936. (current_settings.cputype >= cpu_armv6) then
  937. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  938. else if (width = 8) and
  939. (lsb = 0) and
  940. (current_settings.cputype >= cpu_armv6) then
  941. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  942. else if lsb = 0 then
  943. begin
  944. so.shiftmode:=SM_LSL;
  945. so.shiftimm:=32-width;
  946. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  947. so.shiftmode:=SM_LSR;
  948. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  949. end
  950. else
  951. begin
  952. so.shiftmode:=SM_LSR;
  953. so.shiftimm:=lsb;
  954. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  955. so.shiftmode:=SM_LSL;
  956. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  957. end;
  958. end
  959. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  960. begin
  961. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  962. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  963. end
  964. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  965. not(cgsetflags or setflags) and
  966. split_into_shifter_const(a, imm1, imm2) then
  967. begin
  968. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  969. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  970. end
  971. else
  972. begin
  973. tmpreg:=getintregister(list,size);
  974. a_load_const_reg(list,size,a,tmpreg);
  975. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  976. end;
  977. end;
  978. maybeadjustresult(list,op,size,dst);
  979. end;
  980. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  981. var
  982. so : tshifterop;
  983. tmpreg,overflowreg : tregister;
  984. asmop : tasmop;
  985. begin
  986. ovloc.loc:=LOC_VOID;
  987. case op of
  988. OP_NEG,OP_NOT,
  989. OP_DIV,OP_IDIV:
  990. internalerror(200308283);
  991. OP_SHL,
  992. OP_SHR,
  993. OP_SAR,
  994. OP_ROR:
  995. begin
  996. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  997. internalerror(2008072801);
  998. shifterop_reset(so);
  999. so.rs:=src1;
  1000. so.shiftmode:=opshift2shiftmode(op);
  1001. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1002. end;
  1003. OP_ROL:
  1004. begin
  1005. if not(size in [OS_32,OS_S32]) then
  1006. internalerror(2008072804);
  1007. { simulate ROL by ror'ing 32-value }
  1008. tmpreg:=getintregister(list,OS_32);
  1009. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  1010. shifterop_reset(so);
  1011. so.rs:=tmpreg;
  1012. so.shiftmode:=SM_ROR;
  1013. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1014. end;
  1015. OP_IMUL,
  1016. OP_MUL:
  1017. begin
  1018. if (cgsetflags or setflags) and
  1019. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1020. begin
  1021. overflowreg:=getintregister(list,size);
  1022. if op=OP_IMUL then
  1023. asmop:=A_SMULL
  1024. else
  1025. asmop:=A_UMULL;
  1026. { the arm doesn't allow that rd and rm are the same }
  1027. if dst=src2 then
  1028. begin
  1029. if dst<>src1 then
  1030. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1031. else
  1032. begin
  1033. tmpreg:=getintregister(list,size);
  1034. a_load_reg_reg(list,size,size,src2,dst);
  1035. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1036. end;
  1037. end
  1038. else
  1039. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1040. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1041. if op=OP_IMUL then
  1042. begin
  1043. shifterop_reset(so);
  1044. so.shiftmode:=SM_ASR;
  1045. so.shiftimm:=31;
  1046. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1047. end
  1048. else
  1049. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1050. ovloc.loc:=LOC_FLAGS;
  1051. ovloc.resflags:=F_NE;
  1052. end
  1053. else
  1054. begin
  1055. { the arm doesn't allow that rd and rm are the same }
  1056. if dst=src2 then
  1057. begin
  1058. if dst<>src1 then
  1059. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1060. else
  1061. begin
  1062. tmpreg:=getintregister(list,size);
  1063. a_load_reg_reg(list,size,size,src2,dst);
  1064. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1065. end;
  1066. end
  1067. else
  1068. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1069. end;
  1070. end;
  1071. else
  1072. begin
  1073. if cgsetflags or setflags then
  1074. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1075. list.concat(setoppostfix(
  1076. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1077. end;
  1078. end;
  1079. maybeadjustresult(list,op,size,dst);
  1080. end;
  1081. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1082. var
  1083. asmop: tasmop;
  1084. begin
  1085. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1086. begin
  1087. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1088. case size of
  1089. OS_32: asmop:=A_UMULL;
  1090. OS_S32: asmop:=A_SMULL;
  1091. else
  1092. InternalError(2014060802);
  1093. end;
  1094. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1095. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1096. 32x32=32 bit multiplication}
  1097. if (dstlo = NR_NO) then
  1098. dstlo:=getintregister(list,size);
  1099. if (dsthi = NR_NO) then
  1100. dsthi:=getintregister(list,size);
  1101. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1102. end
  1103. else if dsthi=NR_NO then
  1104. begin
  1105. if (dstlo = NR_NO) then
  1106. dstlo:=getintregister(list,size);
  1107. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1108. end
  1109. else
  1110. begin
  1111. internalerror(2015083022);
  1112. end;
  1113. end;
  1114. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1115. var
  1116. tmpreg1,tmpreg2 : tregister;
  1117. begin
  1118. tmpreg1:=NR_NO;
  1119. { Be sure to have a base register }
  1120. if (ref.base=NR_NO) then
  1121. begin
  1122. if ref.shiftmode<>SM_None then
  1123. internalerror(2014020707);
  1124. ref.base:=ref.index;
  1125. ref.index:=NR_NO;
  1126. end;
  1127. { absolute symbols can't be handled directly, we've to store the symbol reference
  1128. in the text segment and access it pc relative
  1129. For now, we assume that references where base or index equals to PC are already
  1130. relative, all other references are assumed to be absolute and thus they need
  1131. to be handled extra.
  1132. A proper solution would be to change refoptions to a set and store the information
  1133. if the symbol is absolute or relative there.
  1134. }
  1135. if (assigned(ref.symbol) and
  1136. not(is_pc(ref.base)) and
  1137. not(is_pc(ref.index))
  1138. ) or
  1139. { [#xxx] isn't a valid address operand }
  1140. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1141. (ref.offset<-4095) or
  1142. (ref.offset>4095) or
  1143. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1144. ((ref.offset<-255) or
  1145. (ref.offset>255)
  1146. )
  1147. ) or
  1148. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1149. ((ref.offset<-1020) or
  1150. (ref.offset>1020) or
  1151. ((abs(ref.offset) mod 4)<>0)
  1152. )
  1153. ) or
  1154. ((GenerateThumbCode) and
  1155. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1156. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1157. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1158. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1159. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1160. )
  1161. ) then
  1162. begin
  1163. fixref(list,ref);
  1164. end;
  1165. if GenerateThumbCode then
  1166. begin
  1167. { certain thumb load require base and index }
  1168. if (oppostfix in [PF_SB,PF_SH]) and
  1169. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1170. begin
  1171. tmpreg1:=getintregister(list,OS_ADDR);
  1172. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1173. ref.index:=tmpreg1;
  1174. end;
  1175. { "hi" registers cannot be used as base or index }
  1176. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1177. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1178. begin
  1179. tmpreg1:=getintregister(list,OS_ADDR);
  1180. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1181. ref.base:=tmpreg1;
  1182. end;
  1183. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1184. begin
  1185. tmpreg1:=getintregister(list,OS_ADDR);
  1186. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1187. ref.index:=tmpreg1;
  1188. end;
  1189. end;
  1190. { fold if there is base, index and offset, however, don't fold
  1191. for vfp memory instructions because we later fold the index }
  1192. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1193. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1194. begin
  1195. if tmpreg1<>NR_NO then
  1196. begin
  1197. tmpreg2:=getintregister(list,OS_ADDR);
  1198. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1199. tmpreg1:=tmpreg2;
  1200. end
  1201. else
  1202. begin
  1203. tmpreg1:=getintregister(list,OS_ADDR);
  1204. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1205. ref.base:=tmpreg1;
  1206. end;
  1207. ref.offset:=0;
  1208. end;
  1209. { floating point operations have only limited references
  1210. we expect here, that a base is already set }
  1211. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1212. begin
  1213. if ref.shiftmode<>SM_none then
  1214. internalerror(200309121);
  1215. if tmpreg1<>NR_NO then
  1216. begin
  1217. if ref.base=tmpreg1 then
  1218. begin
  1219. if ref.signindex<0 then
  1220. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1221. else
  1222. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1223. ref.index:=NR_NO;
  1224. end
  1225. else
  1226. begin
  1227. if ref.index<>tmpreg1 then
  1228. internalerror(200403161);
  1229. if ref.signindex<0 then
  1230. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1231. else
  1232. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1233. ref.base:=tmpreg1;
  1234. ref.index:=NR_NO;
  1235. end;
  1236. end
  1237. else
  1238. begin
  1239. tmpreg1:=getintregister(list,OS_ADDR);
  1240. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1241. ref.base:=tmpreg1;
  1242. ref.index:=NR_NO;
  1243. end;
  1244. end;
  1245. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1246. Result := ref;
  1247. end;
  1248. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1249. var
  1250. oppostfix:toppostfix;
  1251. usedtmpref: treference;
  1252. tmpreg : tregister;
  1253. dir : integer;
  1254. begin
  1255. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1256. FromSize := ToSize;
  1257. case ToSize of
  1258. { signed integer registers }
  1259. OS_8,
  1260. OS_S8:
  1261. oppostfix:=PF_B;
  1262. OS_16,
  1263. OS_S16:
  1264. oppostfix:=PF_H;
  1265. OS_32,
  1266. OS_S32,
  1267. { for vfp value stored in integer register }
  1268. OS_F32:
  1269. oppostfix:=PF_None;
  1270. else
  1271. InternalError(2003082912);
  1272. end;
  1273. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1274. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1275. (oppostfix =PF_H)) then
  1276. begin
  1277. if target_info.endian=endian_big then
  1278. dir:=-1
  1279. else
  1280. dir:=1;
  1281. case FromSize of
  1282. OS_16,OS_S16:
  1283. begin
  1284. tmpreg:=getintregister(list,OS_INT);
  1285. usedtmpref:=ref;
  1286. if target_info.endian=endian_big then
  1287. inc(usedtmpref.offset,1);
  1288. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1289. inc(usedtmpref.offset,dir);
  1290. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1291. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1292. end;
  1293. OS_32,OS_S32:
  1294. begin
  1295. tmpreg:=getintregister(list,OS_INT);
  1296. usedtmpref:=ref;
  1297. if ref.alignment=2 then
  1298. begin
  1299. if target_info.endian=endian_big then
  1300. inc(usedtmpref.offset,2);
  1301. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1302. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1303. inc(usedtmpref.offset,dir*2);
  1304. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1305. end
  1306. else
  1307. begin
  1308. if target_info.endian=endian_big then
  1309. inc(usedtmpref.offset,3);
  1310. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1311. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1312. inc(usedtmpref.offset,dir);
  1313. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1314. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1315. inc(usedtmpref.offset,dir);
  1316. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1317. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1318. inc(usedtmpref.offset,dir);
  1319. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1320. end;
  1321. end
  1322. else
  1323. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1324. end;
  1325. end
  1326. else
  1327. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1328. end;
  1329. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1330. var
  1331. oppostfix:toppostfix;
  1332. href: treference;
  1333. tmpreg: TRegister;
  1334. begin
  1335. case ToSize of
  1336. { signed integer registers }
  1337. OS_8,
  1338. OS_S8:
  1339. oppostfix:=PF_B;
  1340. OS_16,
  1341. OS_S16:
  1342. oppostfix:=PF_H;
  1343. OS_32,
  1344. OS_S32:
  1345. oppostfix:=PF_None;
  1346. else
  1347. InternalError(2003082910);
  1348. end;
  1349. if (tosize in [OS_S16,OS_16]) and
  1350. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1351. begin
  1352. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1353. tmpreg:=getintregister(list,OS_INT);
  1354. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1355. href:=result;
  1356. inc(href.offset);
  1357. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1358. end
  1359. else
  1360. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1361. end;
  1362. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1363. var
  1364. oppostfix:toppostfix;
  1365. so: tshifterop;
  1366. tmpreg: TRegister;
  1367. href: treference;
  1368. begin
  1369. case FromSize of
  1370. { signed integer registers }
  1371. OS_8:
  1372. oppostfix:=PF_B;
  1373. OS_S8:
  1374. oppostfix:=PF_SB;
  1375. OS_16:
  1376. oppostfix:=PF_H;
  1377. OS_S16:
  1378. oppostfix:=PF_SH;
  1379. OS_32,
  1380. OS_S32:
  1381. oppostfix:=PF_None;
  1382. else
  1383. InternalError(200308291);
  1384. end;
  1385. if (tosize=OS_S8) and
  1386. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1387. begin
  1388. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1389. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1390. end
  1391. else if (tosize in [OS_S16,OS_16]) and
  1392. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1393. begin
  1394. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1395. tmpreg:=getintregister(list,OS_INT);
  1396. href:=result;
  1397. inc(href.offset);
  1398. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1399. shifterop_reset(so);
  1400. so.shiftmode:=SM_LSL;
  1401. so.shiftimm:=8;
  1402. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1403. end
  1404. else
  1405. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1406. end;
  1407. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1408. var
  1409. so : tshifterop;
  1410. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1411. begin
  1412. if GenerateThumbCode then
  1413. begin
  1414. case shiftmode of
  1415. SM_ASR:
  1416. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1417. SM_LSR:
  1418. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1419. SM_LSL:
  1420. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1421. else
  1422. internalerror(2013090301);
  1423. end;
  1424. end
  1425. else
  1426. begin
  1427. so.shiftmode:=shiftmode;
  1428. so.shiftimm:=shiftimm;
  1429. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1430. end;
  1431. end;
  1432. var
  1433. instr: taicpu;
  1434. conv_done: boolean;
  1435. begin
  1436. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1437. internalerror(2002090901);
  1438. conv_done:=false;
  1439. if tosize<>fromsize then
  1440. begin
  1441. shifterop_reset(so);
  1442. conv_done:=true;
  1443. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1444. fromsize:=tosize;
  1445. if current_settings.cputype<cpu_armv6 then
  1446. case fromsize of
  1447. OS_8:
  1448. if GenerateThumbCode then
  1449. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1450. else
  1451. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1452. OS_S8:
  1453. begin
  1454. do_shift(SM_LSL,24,reg1);
  1455. if tosize=OS_16 then
  1456. begin
  1457. do_shift(SM_ASR,8,reg2);
  1458. do_shift(SM_LSR,16,reg2);
  1459. end
  1460. else
  1461. do_shift(SM_ASR,24,reg2);
  1462. end;
  1463. OS_16:
  1464. begin
  1465. do_shift(SM_LSL,16,reg1);
  1466. do_shift(SM_LSR,16,reg2);
  1467. end;
  1468. OS_S16:
  1469. begin
  1470. do_shift(SM_LSL,16,reg1);
  1471. do_shift(SM_ASR,16,reg2)
  1472. end;
  1473. else
  1474. conv_done:=false;
  1475. end
  1476. else
  1477. case fromsize of
  1478. OS_8:
  1479. if GenerateThumbCode then
  1480. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1481. else
  1482. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1483. OS_S8:
  1484. begin
  1485. if tosize=OS_16 then
  1486. begin
  1487. so.shiftmode:=SM_ROR;
  1488. so.shiftimm:=16;
  1489. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1490. do_shift(SM_LSR,16,reg2);
  1491. end
  1492. else
  1493. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1494. end;
  1495. OS_16:
  1496. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1497. OS_S16:
  1498. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1499. else
  1500. conv_done:=false;
  1501. end
  1502. end;
  1503. if not conv_done and (reg1<>reg2) then
  1504. begin
  1505. { same size, only a register mov required }
  1506. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1507. list.Concat(instr);
  1508. { Notify the register allocator that we have written a move instruction so
  1509. it can try to eliminate it. }
  1510. add_move_instruction(instr);
  1511. end;
  1512. end;
  1513. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1514. var
  1515. href,href2 : treference;
  1516. hloc : pcgparalocation;
  1517. begin
  1518. href:=ref;
  1519. hloc:=paraloc.location;
  1520. while assigned(hloc) do
  1521. begin
  1522. case hloc^.loc of
  1523. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1524. begin
  1525. paramanager.allocparaloc(list,paraloc.location);
  1526. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1527. end;
  1528. LOC_REGISTER :
  1529. case hloc^.size of
  1530. OS_32,
  1531. OS_F32:
  1532. begin
  1533. paramanager.allocparaloc(list,paraloc.location);
  1534. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1535. end;
  1536. OS_64,
  1537. OS_F64:
  1538. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1539. else
  1540. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1541. end;
  1542. LOC_REFERENCE :
  1543. begin
  1544. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1545. { concatcopy should choose the best way to copy the data }
  1546. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1547. end;
  1548. else
  1549. internalerror(200408241);
  1550. end;
  1551. inc(href.offset,tcgsize2size[hloc^.size]);
  1552. hloc:=hloc^.next;
  1553. end;
  1554. end;
  1555. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1556. begin
  1557. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1558. end;
  1559. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1560. var
  1561. oppostfix:toppostfix;
  1562. begin
  1563. case fromsize of
  1564. OS_32,
  1565. OS_F32:
  1566. oppostfix:=PF_S;
  1567. OS_64,
  1568. OS_F64:
  1569. oppostfix:=PF_D;
  1570. OS_F80:
  1571. oppostfix:=PF_E;
  1572. else
  1573. InternalError(200309021);
  1574. end;
  1575. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1576. if fromsize<>tosize then
  1577. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1578. end;
  1579. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1580. var
  1581. oppostfix:toppostfix;
  1582. begin
  1583. case tosize of
  1584. OS_F32:
  1585. oppostfix:=PF_S;
  1586. OS_F64:
  1587. oppostfix:=PF_D;
  1588. OS_F80:
  1589. oppostfix:=PF_E;
  1590. else
  1591. InternalError(200309022);
  1592. end;
  1593. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1594. end;
  1595. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1596. var
  1597. r : TRegister;
  1598. ai: taicpu;
  1599. l: TAsmLabel;
  1600. begin
  1601. if (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) and
  1602. needs_check_for_fpu_exceptions and
  1603. (force or current_procinfo.FPUExceptionCheckNeeded) then
  1604. begin
  1605. r:=getintregister(list,OS_INT);
  1606. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1607. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1608. current_asmdata.getjumplabel(l);
  1609. ai:=taicpu.op_sym(A_B,l);
  1610. ai.is_jmp:=true;
  1611. ai.condition:=C_EQ;
  1612. list.concat(ai);
  1613. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1614. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1615. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1616. a_label(list,l);
  1617. if clear then
  1618. current_procinfo.FPUExceptionCheckNeeded:=false;
  1619. end;
  1620. end;
  1621. { comparison operations }
  1622. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1623. l : tasmlabel);
  1624. var
  1625. tmpreg : tregister;
  1626. b : byte;
  1627. begin
  1628. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1629. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1630. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1631. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1632. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1633. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1634. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1635. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1636. else
  1637. begin
  1638. tmpreg:=getintregister(list,size);
  1639. a_load_const_reg(list,size,a,tmpreg);
  1640. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1641. end;
  1642. a_jmp_cond(list,cmp_op,l);
  1643. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1644. end;
  1645. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1646. begin
  1647. if reverse then
  1648. begin
  1649. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1650. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1651. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1652. end
  1653. { it is decided during the compilation of the system unit if this code is used or not
  1654. so no additional check for rbit is needed }
  1655. else
  1656. begin
  1657. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1658. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1659. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1660. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1661. if GenerateThumb2Code then
  1662. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1663. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1664. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1665. end;
  1666. end;
  1667. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1668. begin
  1669. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1670. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1671. a_jmp_cond(list,cmp_op,l);
  1672. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1673. end;
  1674. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1675. var
  1676. ai : taicpu;
  1677. begin
  1678. { generate far jump, leave it to the optimizer to get rid of it }
  1679. if GenerateThumbCode then
  1680. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1681. else
  1682. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1683. ai.is_jmp:=true;
  1684. list.concat(ai);
  1685. end;
  1686. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1687. var
  1688. ai : taicpu;
  1689. begin
  1690. { generate far jump, leave it to the optimizer to get rid of it }
  1691. if GenerateThumbCode then
  1692. ai:=taicpu.op_sym(A_BL,l)
  1693. else
  1694. ai:=taicpu.op_sym(A_B,l);
  1695. ai.is_jmp:=true;
  1696. list.concat(ai);
  1697. end;
  1698. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1699. var
  1700. ai : taicpu;
  1701. inv_flags : TResFlags;
  1702. hlabel : TAsmLabel;
  1703. begin
  1704. if GenerateThumbCode then
  1705. begin
  1706. inv_flags:=f;
  1707. inverse_flags(inv_flags);
  1708. { the optimizer has to fix this if jump range is sufficient short }
  1709. current_asmdata.getjumplabel(hlabel);
  1710. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1711. ai.is_jmp:=true;
  1712. list.concat(ai);
  1713. a_jmp_always(list,l);
  1714. a_label(list,hlabel);
  1715. end
  1716. else
  1717. begin
  1718. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1719. ai.is_jmp:=true;
  1720. list.concat(ai);
  1721. end;
  1722. end;
  1723. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1724. begin
  1725. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1726. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1727. end;
  1728. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1729. begin
  1730. if target_info.system = system_arm_linux then
  1731. begin
  1732. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1733. a_call_name(list,'__gnu_mcount_nc',false);
  1734. end
  1735. else
  1736. internalerror(2014091201);
  1737. end;
  1738. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1739. var
  1740. ref : treference;
  1741. shift : byte;
  1742. firstfloatreg,lastfloatreg,
  1743. r : byte;
  1744. mmregs,
  1745. regs, saveregs : tcpuregisterset;
  1746. registerarea, offset,
  1747. r7offset,
  1748. stackmisalignment : pint;
  1749. imm1, imm2: DWord;
  1750. stack_parameters : Boolean;
  1751. begin
  1752. LocalSize:=align(LocalSize,4);
  1753. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1754. { call instruction does not put anything on the stack }
  1755. registerarea:=0;
  1756. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1757. lastfloatreg:=RS_NO;
  1758. if not(nostackframe) then
  1759. begin
  1760. firstfloatreg:=RS_NO;
  1761. mmregs:=[];
  1762. case current_settings.fputype of
  1763. fpu_none,
  1764. fpu_soft,
  1765. fpu_libgcc:
  1766. ;
  1767. fpu_fpa,
  1768. fpu_fpa10,
  1769. fpu_fpa11:
  1770. begin
  1771. { save floating point registers? }
  1772. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1773. for r:=RS_F0 to RS_F7 do
  1774. if r in regs then
  1775. begin
  1776. if firstfloatreg=RS_NO then
  1777. firstfloatreg:=r;
  1778. lastfloatreg:=r;
  1779. inc(registerarea,12);
  1780. end;
  1781. end;
  1782. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1783. begin;
  1784. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1785. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1786. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1787. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1788. end
  1789. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1790. begin;
  1791. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1792. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1793. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1794. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1795. end
  1796. else
  1797. internalerror(2019050924);
  1798. end;
  1799. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1800. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1801. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1802. { save int registers }
  1803. reference_reset(ref,4,[]);
  1804. ref.index:=NR_STACK_POINTER_REG;
  1805. ref.addressmode:=AM_PREINDEXED;
  1806. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1807. if not(target_info.system in systems_darwin) then
  1808. begin
  1809. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1810. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1811. begin
  1812. a_reg_alloc(list,NR_R12);
  1813. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1814. end;
  1815. { the (old) ARM APCS requires saving both the stack pointer (to
  1816. crawl the stack) and the PC (to identify the function this
  1817. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1818. and R15 -- still needs updating for EABI and Darwin, they don't
  1819. need that }
  1820. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1821. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1822. else
  1823. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1824. include(regs,RS_R14);
  1825. if regs<>[] then
  1826. begin
  1827. for r:=RS_R0 to RS_R15 do
  1828. if r in regs then
  1829. inc(registerarea,4);
  1830. { if the stack is not 8 byte aligned, try to add an extra register,
  1831. so we can avoid the extra sub/add ...,#4 later (KB) }
  1832. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1833. for r:=RS_R3 downto RS_R0 do
  1834. if not(r in regs) then
  1835. begin
  1836. regs:=regs+[r];
  1837. inc(registerarea,4);
  1838. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1839. break;
  1840. end;
  1841. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1842. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  1843. end;
  1844. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1845. begin
  1846. offset:=-4;
  1847. for r:=RS_R15 downto RS_R0 do
  1848. if r in regs then
  1849. begin
  1850. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),offset);
  1851. dec(offset,4);
  1852. end;
  1853. { the framepointer now points to the saved R15, so the saved
  1854. framepointer is at R11-12 (for get_caller_frame) }
  1855. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1856. a_reg_dealloc(list,NR_R12);
  1857. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  1858. current_asmdata.asmcfi.cfa_def_cfa_offset(list,4);
  1859. end;
  1860. end
  1861. else
  1862. begin
  1863. { always save r14 if we use r7 as the framepointer, because
  1864. the parameter offsets are hardcoded in advance and always
  1865. assume that r14 sits on the stack right behind the saved r7
  1866. }
  1867. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1868. include(regs,RS_FRAME_POINTER_REG);
  1869. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1870. include(regs,RS_R14);
  1871. if regs<>[] then
  1872. begin
  1873. { on Darwin, you first have to save [r4-r7,lr], and then
  1874. [r8,r10,r11] and make r7 point to the previously saved
  1875. r7 so that you can perform a stack crawl based on it
  1876. ([r7] is previous stack frame, [r7+4] is return address
  1877. }
  1878. include(regs,RS_FRAME_POINTER_REG);
  1879. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1880. r7offset:=0;
  1881. for r:=RS_R0 to RS_R15 do
  1882. if r in saveregs then
  1883. begin
  1884. inc(registerarea,4);
  1885. if r<RS_FRAME_POINTER_REG then
  1886. inc(r7offset,4);
  1887. end;
  1888. { save the registers }
  1889. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1890. { make r7 point to the saved r7 (regardless of whether this
  1891. frame uses the framepointer, for backtrace purposes) }
  1892. if r7offset<>0 then
  1893. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1894. else
  1895. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1896. { now save the rest (if any) }
  1897. saveregs:=regs-saveregs;
  1898. if saveregs<>[] then
  1899. begin
  1900. for r:=RS_R8 to RS_R11 do
  1901. if r in saveregs then
  1902. inc(registerarea,4);
  1903. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1904. end;
  1905. end;
  1906. end;
  1907. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1908. if (LocalSize<>0) or
  1909. ((stackmisalignment<>0) and
  1910. ((pi_do_call in current_procinfo.flags) or
  1911. (po_assembler in current_procinfo.procdef.procoptions))) then
  1912. begin
  1913. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1914. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1915. begin
  1916. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1917. internalerror(2014030901)
  1918. else
  1919. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1920. end;
  1921. if is_shifter_const(localsize,shift) then
  1922. begin
  1923. a_reg_dealloc(list,NR_R12);
  1924. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1925. end
  1926. else if split_into_shifter_const(localsize, imm1, imm2) then
  1927. begin
  1928. a_reg_dealloc(list,NR_R12);
  1929. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1930. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1931. end
  1932. else
  1933. begin
  1934. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1935. a_reg_alloc(list,NR_R12);
  1936. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1937. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1938. a_reg_dealloc(list,NR_R12);
  1939. end;
  1940. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1941. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  1942. end;
  1943. if (mmregs<>[]) or
  1944. (firstfloatreg<>RS_NO) then
  1945. begin
  1946. reference_reset(ref,4,[]);
  1947. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1948. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1949. begin
  1950. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1951. begin
  1952. a_reg_alloc(list,NR_R12);
  1953. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1954. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1955. a_reg_dealloc(list,NR_R12);
  1956. end
  1957. else
  1958. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1959. ref.base:=NR_R12;
  1960. end
  1961. else
  1962. begin
  1963. ref.base:=current_procinfo.framepointer;
  1964. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1965. end;
  1966. case current_settings.fputype of
  1967. fpu_fpa,
  1968. fpu_fpa10,
  1969. fpu_fpa11:
  1970. begin
  1971. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1972. lastfloatreg-firstfloatreg+1,ref));
  1973. end;
  1974. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1975. begin
  1976. ref.index:=ref.base;
  1977. ref.base:=NR_NO;
  1978. if mmregs<>[] then
  1979. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1980. end
  1981. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1982. begin
  1983. ref.index:=ref.base;
  1984. ref.base:=NR_NO;
  1985. if mmregs<>[] then
  1986. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1987. end
  1988. else
  1989. internalerror(2019050923);
  1990. end;
  1991. end;
  1992. end;
  1993. end;
  1994. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1995. var
  1996. ref : treference;
  1997. LocalSize : longint;
  1998. firstfloatreg,lastfloatreg,
  1999. r,
  2000. shift : byte;
  2001. mmregs,
  2002. saveregs,
  2003. regs : tcpuregisterset;
  2004. registerarea,
  2005. stackmisalignment: pint;
  2006. paddingreg: TSuperRegister;
  2007. imm1, imm2: DWord;
  2008. begin
  2009. if not(nostackframe) then
  2010. begin
  2011. registerarea:=0;
  2012. firstfloatreg:=RS_NO;
  2013. lastfloatreg:=RS_NO;
  2014. mmregs:=[];
  2015. saveregs:=[];
  2016. case current_settings.fputype of
  2017. fpu_none,
  2018. fpu_soft,
  2019. fpu_libgcc:
  2020. ;
  2021. fpu_fpa,
  2022. fpu_fpa10,
  2023. fpu_fpa11:
  2024. begin
  2025. { restore floating point registers? }
  2026. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  2027. for r:=RS_F0 to RS_F7 do
  2028. if r in regs then
  2029. begin
  2030. if firstfloatreg=RS_NO then
  2031. firstfloatreg:=r;
  2032. lastfloatreg:=r;
  2033. { floating point register space is already included in
  2034. localsize below by calc_stackframe_size
  2035. inc(registerarea,12);
  2036. }
  2037. end;
  2038. end;
  2039. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2040. begin
  2041. { restore vfp registers? }
  2042. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  2043. they have numbers>$1f which is not really correct as they should simply have the same numbers
  2044. as the even ones by with a different subtype as it is done on x86 with al/ah }
  2045. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  2046. end
  2047. else
  2048. internalerror(2019050908);
  2049. end;
  2050. if (firstfloatreg<>RS_NO) or
  2051. (mmregs<>[]) then
  2052. begin
  2053. reference_reset(ref,4,[]);
  2054. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  2055. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2056. begin
  2057. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2058. begin
  2059. a_reg_alloc(list,NR_R12);
  2060. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2061. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2062. a_reg_dealloc(list,NR_R12);
  2063. end
  2064. else
  2065. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2066. ref.base:=NR_R12;
  2067. end
  2068. else
  2069. begin
  2070. ref.base:=current_procinfo.framepointer;
  2071. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2072. end;
  2073. case current_settings.fputype of
  2074. fpu_fpa,
  2075. fpu_fpa10,
  2076. fpu_fpa11:
  2077. begin
  2078. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2079. lastfloatreg-firstfloatreg+1,ref));
  2080. end;
  2081. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2082. begin
  2083. ref.index:=ref.base;
  2084. ref.base:=NR_NO;
  2085. if mmregs<>[] then
  2086. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2087. end
  2088. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2089. begin
  2090. ref.index:=ref.base;
  2091. ref.base:=NR_NO;
  2092. if mmregs<>[] then
  2093. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2094. end
  2095. else
  2096. internalerror(2019050921);
  2097. end;
  2098. end;
  2099. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2100. if (pi_do_call in current_procinfo.flags) or
  2101. (regs<>[]) or
  2102. ((target_info.system in systems_darwin) and
  2103. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2104. begin
  2105. exclude(regs,RS_R14);
  2106. include(regs,RS_R15);
  2107. if (target_info.system in systems_darwin) then
  2108. include(regs,RS_FRAME_POINTER_REG);
  2109. end;
  2110. if not(target_info.system in systems_darwin) then
  2111. begin
  2112. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2113. The saved PC came after that but is discarded, since we restore
  2114. the stack pointer }
  2115. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2116. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2117. end
  2118. else
  2119. begin
  2120. { restore R8-R11 already if necessary (they've been stored
  2121. before the others) }
  2122. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2123. if saveregs<>[] then
  2124. begin
  2125. reference_reset(ref,4,[]);
  2126. ref.index:=NR_STACK_POINTER_REG;
  2127. ref.addressmode:=AM_PREINDEXED;
  2128. for r:=RS_R8 to RS_R11 do
  2129. if r in saveregs then
  2130. inc(registerarea,4);
  2131. regs:=regs-saveregs;
  2132. end;
  2133. end;
  2134. for r:=RS_R0 to RS_R15 do
  2135. if r in regs then
  2136. inc(registerarea,4);
  2137. { reapply the stack padding reg, in case there was one, see the complimentary
  2138. comment in g_proc_entry() (KB) }
  2139. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2140. if paddingreg < RS_R4 then
  2141. if paddingreg in regs then
  2142. internalerror(201306190)
  2143. else
  2144. begin
  2145. regs:=regs+[paddingreg];
  2146. inc(registerarea,4);
  2147. end;
  2148. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2149. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2150. (target_info.system in systems_darwin) then
  2151. begin
  2152. LocalSize:=current_procinfo.calc_stackframe_size;
  2153. if (LocalSize<>0) or
  2154. ((stackmisalignment<>0) and
  2155. ((pi_do_call in current_procinfo.flags) or
  2156. (po_assembler in current_procinfo.procdef.procoptions))) then
  2157. begin
  2158. if pi_estimatestacksize in current_procinfo.flags then
  2159. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2160. else
  2161. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2162. if is_shifter_const(LocalSize,shift) then
  2163. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2164. else if split_into_shifter_const(localsize, imm1, imm2) then
  2165. begin
  2166. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2167. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2168. end
  2169. else
  2170. begin
  2171. a_reg_alloc(list,NR_R12);
  2172. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2173. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2174. a_reg_dealloc(list,NR_R12);
  2175. end;
  2176. end;
  2177. if (target_info.system in systems_darwin) and
  2178. (saveregs<>[]) then
  2179. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2180. if regs=[] then
  2181. begin
  2182. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2183. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2184. else
  2185. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2186. end
  2187. else
  2188. begin
  2189. reference_reset(ref,4,[]);
  2190. ref.index:=NR_STACK_POINTER_REG;
  2191. ref.addressmode:=AM_PREINDEXED;
  2192. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2193. end;
  2194. end
  2195. else
  2196. begin
  2197. { restore int registers and return }
  2198. reference_reset(ref,4,[]);
  2199. ref.index:=NR_FRAME_POINTER_REG;
  2200. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2201. end;
  2202. end
  2203. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2204. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2205. else
  2206. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2207. end;
  2208. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2209. var
  2210. ref : treference;
  2211. l : TAsmLabel;
  2212. regs : tcpuregisterset;
  2213. r: byte;
  2214. begin
  2215. if (cs_create_pic in current_settings.moduleswitches) and
  2216. (pi_needs_got in current_procinfo.flags) and
  2217. (tf_pic_uses_got in target_info.flags) then
  2218. begin
  2219. { Procedure parametrs are not initialized at this stage.
  2220. Before GOT initialization code, allocate registers used for procedure parameters
  2221. to prevent usage of these registers for temp operations in later stages of code
  2222. generation. }
  2223. regs:=rg[R_INTREGISTER].used_in_proc;
  2224. for r:=RS_R0 to RS_R3 do
  2225. if r in regs then
  2226. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2227. { Allocate scratch register R12 and use it for GOT calculations directly.
  2228. Otherwise the init code can be distorted in later stages of code generation. }
  2229. a_reg_alloc(list,NR_R12);
  2230. reference_reset(ref,4,[]);
  2231. current_asmdata.getglobaldatalabel(l);
  2232. cg.a_label(current_procinfo.aktlocaldata,l);
  2233. ref.symbol:=l;
  2234. ref.base:=NR_PC;
  2235. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2236. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2237. current_asmdata.getaddrlabel(l);
  2238. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2239. cg.a_label(list,l);
  2240. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2241. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2242. { Deallocate registers }
  2243. a_reg_dealloc(list,NR_R12);
  2244. for r:=RS_R3 downto RS_R0 do
  2245. if r in regs then
  2246. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2247. end;
  2248. end;
  2249. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2250. var
  2251. b : byte;
  2252. tmpref : treference;
  2253. instr : taicpu;
  2254. begin
  2255. if ref.addressmode<>AM_OFFSET then
  2256. internalerror(200309071);
  2257. tmpref:=ref;
  2258. { Be sure to have a base register }
  2259. if (tmpref.base=NR_NO) then
  2260. begin
  2261. if tmpref.shiftmode<>SM_None then
  2262. internalerror(2014020702);
  2263. if tmpref.signindex<0 then
  2264. internalerror(200312023);
  2265. tmpref.base:=tmpref.index;
  2266. tmpref.index:=NR_NO;
  2267. end;
  2268. if assigned(tmpref.symbol) or
  2269. not((is_shifter_const(tmpref.offset,b)) or
  2270. (is_shifter_const(-tmpref.offset,b))
  2271. ) then
  2272. fixref(list,tmpref);
  2273. { expect a base here if there is an index }
  2274. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2275. internalerror(200312022);
  2276. if tmpref.index<>NR_NO then
  2277. begin
  2278. if tmpref.shiftmode<>SM_None then
  2279. internalerror(200312021);
  2280. if tmpref.signindex<0 then
  2281. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2282. else
  2283. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2284. if tmpref.offset<>0 then
  2285. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2286. end
  2287. else
  2288. begin
  2289. if tmpref.base=NR_NO then
  2290. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2291. else
  2292. if tmpref.offset<>0 then
  2293. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2294. else
  2295. begin
  2296. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2297. list.concat(instr);
  2298. add_move_instruction(instr);
  2299. end;
  2300. end;
  2301. end;
  2302. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2303. var
  2304. tmpreg, tmpreg2 : tregister;
  2305. tmpref : treference;
  2306. l, piclabel : tasmlabel;
  2307. indirection_done : boolean;
  2308. begin
  2309. { absolute symbols can't be handled directly, we've to store the symbol reference
  2310. in the text segment and access it pc relative
  2311. For now, we assume that references where base or index equals to PC are already
  2312. relative, all other references are assumed to be absolute and thus they need
  2313. to be handled extra.
  2314. A proper solution would be to change refoptions to a set and store the information
  2315. if the symbol is absolute or relative there.
  2316. }
  2317. { create consts entry }
  2318. reference_reset(tmpref,4,[]);
  2319. current_asmdata.getjumplabel(l);
  2320. cg.a_label(current_procinfo.aktlocaldata,l);
  2321. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2322. piclabel:=nil;
  2323. tmpreg:=NR_NO;
  2324. indirection_done:=false;
  2325. if assigned(ref.symbol) then
  2326. begin
  2327. if (target_info.system=system_arm_ios) and
  2328. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2329. begin
  2330. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2331. if ref.offset<>0 then
  2332. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2333. indirection_done:=true;
  2334. end
  2335. else if ref.refaddr=addr_gottpoff then
  2336. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2337. else if ref.refaddr=addr_tlsgd then
  2338. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2339. else if ref.refaddr=addr_tlsdesc then
  2340. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2341. else if ref.refaddr=addr_tpoff then
  2342. begin
  2343. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2344. Internalerror(2019092804);
  2345. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2346. end
  2347. else if (cs_create_pic in current_settings.moduleswitches) then
  2348. if (tf_pic_uses_got in target_info.flags) then
  2349. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2350. else
  2351. begin
  2352. { ideally, we would want to generate
  2353. ldr r1, LPICConstPool
  2354. LPICLocal:
  2355. ldr/str r2,[pc,r1]
  2356. ...
  2357. LPICConstPool:
  2358. .long _globsym-(LPICLocal+8)
  2359. However, we cannot be sure that the ldr/str will follow
  2360. right after the call to fixref, so we have to load the
  2361. complete address already in a register.
  2362. }
  2363. current_asmdata.getaddrlabel(piclabel);
  2364. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2365. end
  2366. else
  2367. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2368. end
  2369. else
  2370. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2371. { load consts entry }
  2372. if not indirection_done then
  2373. begin
  2374. tmpreg:=getintregister(list,OS_INT);
  2375. tmpref.symbol:=l;
  2376. tmpref.base:=NR_PC;
  2377. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2378. if (cs_create_pic in current_settings.moduleswitches) and
  2379. (tf_pic_uses_got in target_info.flags) and
  2380. assigned(ref.symbol) then
  2381. begin
  2382. {$ifdef EXTDEBUG}
  2383. if not (pi_needs_got in current_procinfo.flags) then
  2384. Comment(V_warning,'pi_needs_got not included');
  2385. {$endif EXTDEBUG}
  2386. Include(current_procinfo.flags,pi_needs_got);
  2387. reference_reset(tmpref,4,[]);
  2388. tmpref.base:=current_procinfo.got;
  2389. tmpref.index:=tmpreg;
  2390. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2391. if ref.offset<>0 then
  2392. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2393. end;
  2394. end;
  2395. if assigned(piclabel) then
  2396. begin
  2397. cg.a_label(list,piclabel);
  2398. tmpreg2:=getaddressregister(list);
  2399. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2400. tmpreg:=tmpreg2
  2401. end;
  2402. { This routine can be called with PC as base/index in case the offset
  2403. was too large to encode in a load/store. In that case, the entire
  2404. absolute expression has been re-encoded in a new constpool entry, and
  2405. we have to remove the use of PC from the original reference (the code
  2406. above made everything relative to the value loaded from the new
  2407. constpool entry) }
  2408. if is_pc(ref.base) then
  2409. ref.base:=NR_NO;
  2410. if is_pc(ref.index) then
  2411. ref.index:=NR_NO;
  2412. if (ref.base<>NR_NO) then
  2413. begin
  2414. if ref.index<>NR_NO then
  2415. begin
  2416. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2417. ref.base:=tmpreg;
  2418. end
  2419. else
  2420. if ref.base<>NR_PC then
  2421. begin
  2422. ref.index:=tmpreg;
  2423. ref.shiftimm:=0;
  2424. ref.signindex:=1;
  2425. ref.shiftmode:=SM_None;
  2426. end
  2427. else
  2428. ref.base:=tmpreg;
  2429. end
  2430. else
  2431. ref.base:=tmpreg;
  2432. ref.offset:=0;
  2433. ref.symbol:=nil;
  2434. end;
  2435. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2436. const
  2437. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2438. maxtmpreg_thumb = 5;
  2439. type
  2440. ttmpregisters = array[1..maxtmpreg_arm] of tregister;
  2441. var
  2442. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2443. srcreg,destreg,countreg,r,tmpreg:tregister;
  2444. helpsize:aint;
  2445. copysize:byte;
  2446. cgsize:Tcgsize;
  2447. tmpregisters:ttmpregisters;
  2448. maxtmpreg,
  2449. tmpregi,tmpregi2:byte;
  2450. { will never be called with count<=4 }
  2451. procedure genloop(count : aword;size : byte);
  2452. const
  2453. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2454. var
  2455. l : tasmlabel;
  2456. begin
  2457. current_asmdata.getjumplabel(l);
  2458. if count<size then size:=1;
  2459. a_load_const_reg(list,OS_INT,count div size,countreg);
  2460. cg.a_label(list,l);
  2461. srcref.addressmode:=AM_POSTINDEXED;
  2462. dstref.addressmode:=AM_POSTINDEXED;
  2463. srcref.offset:=size;
  2464. dstref.offset:=size;
  2465. r:=getintregister(list,size2opsize[size]);
  2466. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2467. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2468. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2469. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2470. a_jmp_flags(list,F_NE,l);
  2471. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2472. srcref.offset:=1;
  2473. dstref.offset:=1;
  2474. case count mod size of
  2475. 1:
  2476. begin
  2477. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2478. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2479. end;
  2480. 2:
  2481. if aligned then
  2482. begin
  2483. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2484. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2485. end
  2486. else
  2487. begin
  2488. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2489. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2490. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2491. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2492. end;
  2493. 3:
  2494. if aligned then
  2495. begin
  2496. srcref.offset:=2;
  2497. dstref.offset:=2;
  2498. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2499. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2500. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2501. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2502. end
  2503. else
  2504. begin
  2505. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2506. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2507. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2508. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2509. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2510. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2511. end;
  2512. end;
  2513. { keep the registers alive }
  2514. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2515. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2516. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2517. end;
  2518. { save estimation, if a creating a separate ref is needed or
  2519. if we can keep the original reference while copying }
  2520. function SimpleRef(const ref : treference) : boolean;
  2521. begin
  2522. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2523. ((ref.symbol=nil) and
  2524. (ref.addressmode=AM_OFFSET) and
  2525. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2526. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2527. { ldrh has a limited offset range }
  2528. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2529. )
  2530. );
  2531. end;
  2532. { will never be called with count<=4 }
  2533. procedure genloop_thumb(count : aword;size : byte);
  2534. procedure refincofs(const ref : treference;const value : longint = 1);
  2535. begin
  2536. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2537. end;
  2538. const
  2539. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2540. var
  2541. l : tasmlabel;
  2542. begin
  2543. current_asmdata.getjumplabel(l);
  2544. if count<size then size:=1;
  2545. a_load_const_reg(list,OS_INT,count div size,countreg);
  2546. cg.a_label(list,l);
  2547. r:=getintregister(list,size2opsize[size]);
  2548. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2549. refincofs(srcref);
  2550. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2551. refincofs(dstref);
  2552. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2553. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2554. a_jmp_flags(list,F_NE,l);
  2555. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2556. case count mod size of
  2557. 1:
  2558. begin
  2559. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2560. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2561. end;
  2562. 2:
  2563. if aligned then
  2564. begin
  2565. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2566. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2567. end
  2568. else
  2569. begin
  2570. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2571. refincofs(srcref);
  2572. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2573. refincofs(dstref);
  2574. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2575. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2576. end;
  2577. 3:
  2578. if aligned then
  2579. begin
  2580. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2581. refincofs(srcref,2);
  2582. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2583. refincofs(dstref,2);
  2584. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2585. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2586. end
  2587. else
  2588. begin
  2589. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2590. refincofs(srcref);
  2591. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2592. refincofs(dstref);
  2593. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2594. refincofs(srcref);
  2595. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2596. refincofs(dstref);
  2597. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2598. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2599. end;
  2600. end;
  2601. { keep the registers alive }
  2602. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2603. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2604. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2605. end;
  2606. begin
  2607. tmpregisters:=Default(ttmpregisters);
  2608. if len=0 then
  2609. exit;
  2610. if GenerateThumbCode then
  2611. maxtmpreg:=maxtmpreg_thumb
  2612. else
  2613. maxtmpreg:=maxtmpreg_arm;
  2614. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2615. dstref:=dest;
  2616. srcref:=source;
  2617. if cs_opt_size in current_settings.optimizerswitches then
  2618. helpsize:=8;
  2619. if aligned and (len=4) then
  2620. begin
  2621. tmpreg:=getintregister(list,OS_32);
  2622. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2623. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2624. end
  2625. else if aligned and (len=2) then
  2626. begin
  2627. tmpreg:=getintregister(list,OS_16);
  2628. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2629. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2630. end
  2631. else if (len<=helpsize) and aligned then
  2632. begin
  2633. tmpregi:=0;
  2634. { loading address in a separate register needed? }
  2635. if SimpleRef(source) then
  2636. begin
  2637. { ... then we don't need a loadaddr }
  2638. srcref:=source;
  2639. end
  2640. else
  2641. begin
  2642. srcreg:=getintregister(list,OS_ADDR);
  2643. a_loadaddr_ref_reg(list,source,srcreg);
  2644. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2645. end;
  2646. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2647. begin
  2648. inc(tmpregi);
  2649. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2650. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2651. inc(srcref.offset,4);
  2652. dec(len,4);
  2653. end;
  2654. { loading address in a separate register needed? }
  2655. if SimpleRef(dest) then
  2656. dstref:=dest
  2657. else
  2658. begin
  2659. destreg:=getintregister(list,OS_ADDR);
  2660. a_loadaddr_ref_reg(list,dest,destreg);
  2661. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2662. end;
  2663. tmpregi2:=1;
  2664. while (tmpregi2<=tmpregi) do
  2665. begin
  2666. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2667. inc(dstref.offset,4);
  2668. inc(tmpregi2);
  2669. end;
  2670. copysize:=4;
  2671. cgsize:=OS_32;
  2672. while len<>0 do
  2673. begin
  2674. if len<2 then
  2675. begin
  2676. copysize:=1;
  2677. cgsize:=OS_8;
  2678. end
  2679. else if len<4 then
  2680. begin
  2681. copysize:=2;
  2682. cgsize:=OS_16;
  2683. end;
  2684. dec(len,copysize);
  2685. r:=getintregister(list,cgsize);
  2686. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2687. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2688. inc(srcref.offset,copysize);
  2689. inc(dstref.offset,copysize);
  2690. end;{end of while}
  2691. end
  2692. else
  2693. begin
  2694. cgsize:=OS_32;
  2695. if (len<=4) then{len<=4 and not aligned}
  2696. begin
  2697. r:=getintregister(list,cgsize);
  2698. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2699. if Len=1 then
  2700. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2701. else
  2702. begin
  2703. tmpreg:=getintregister(list,cgsize);
  2704. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2705. inc(usedtmpref.offset,1);
  2706. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2707. inc(usedtmpref2.offset,1);
  2708. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2709. if len>2 then
  2710. begin
  2711. inc(usedtmpref.offset,1);
  2712. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2713. inc(usedtmpref2.offset,1);
  2714. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2715. if len>3 then
  2716. begin
  2717. inc(usedtmpref.offset,1);
  2718. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2719. inc(usedtmpref2.offset,1);
  2720. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2721. end;
  2722. end;
  2723. end;
  2724. end{end of if len<=4}
  2725. else
  2726. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2727. destreg:=getintregister(list,OS_ADDR);
  2728. a_loadaddr_ref_reg(list,dest,destreg);
  2729. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2730. srcreg:=getintregister(list,OS_ADDR);
  2731. a_loadaddr_ref_reg(list,source,srcreg);
  2732. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2733. countreg:=getintregister(list,OS_32);
  2734. // if cs_opt_size in current_settings.optimizerswitches then
  2735. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2736. {if aligned then
  2737. genloop(len,4)
  2738. else}
  2739. if GenerateThumbCode then
  2740. genloop_thumb(len,1)
  2741. else
  2742. genloop(len,1);
  2743. end;
  2744. end;
  2745. end;
  2746. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2747. begin
  2748. g_concatcopy_internal(list,source,dest,len,false);
  2749. end;
  2750. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2751. begin
  2752. if (source.alignment in [1,3]) or
  2753. (dest.alignment in [1,3]) then
  2754. g_concatcopy_internal(list,source,dest,len,false)
  2755. else
  2756. g_concatcopy_internal(list,source,dest,len,true);
  2757. end;
  2758. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2759. var
  2760. ovloc : tlocation;
  2761. begin
  2762. ovloc.loc:=LOC_VOID;
  2763. g_overflowCheck_loc(list,l,def,ovloc);
  2764. end;
  2765. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2766. var
  2767. hl : tasmlabel;
  2768. ai:TAiCpu;
  2769. hflags : tresflags;
  2770. begin
  2771. if not(cs_check_overflow in current_settings.localswitches) then
  2772. exit;
  2773. current_asmdata.getjumplabel(hl);
  2774. case ovloc.loc of
  2775. LOC_VOID:
  2776. begin
  2777. ai:=taicpu.op_sym(A_B,hl);
  2778. ai.is_jmp:=true;
  2779. if not((def.typ=pointerdef) or
  2780. ((def.typ=orddef) and
  2781. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2782. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2783. ai.SetCondition(C_VC)
  2784. else
  2785. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2786. ai.SetCondition(C_CS)
  2787. else
  2788. ai.SetCondition(C_CC);
  2789. list.concat(ai);
  2790. end;
  2791. LOC_FLAGS:
  2792. begin
  2793. hflags:=ovloc.resflags;
  2794. inverse_flags(hflags);
  2795. cg.a_jmp_flags(list,hflags,hl);
  2796. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2797. end;
  2798. else
  2799. internalerror(200409281);
  2800. end;
  2801. a_call_name(list,'FPC_OVERFLOW',false);
  2802. a_label(list,hl);
  2803. end;
  2804. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2805. begin
  2806. { this work is done in g_proc_entry }
  2807. end;
  2808. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2809. begin
  2810. { this work is done in g_proc_exit }
  2811. end;
  2812. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2813. var
  2814. ai : taicpu;
  2815. hlabel : TAsmLabel;
  2816. begin
  2817. if GenerateThumbCode then
  2818. begin
  2819. { the optimizer has to fix this if jump range is sufficient short }
  2820. current_asmdata.getjumplabel(hlabel);
  2821. ai:=Taicpu.Op_sym(A_B,hlabel);
  2822. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2823. ai.is_jmp:=true;
  2824. list.concat(ai);
  2825. a_jmp_always(list,l);
  2826. a_label(list,hlabel);
  2827. end
  2828. else
  2829. begin
  2830. ai:=Taicpu.Op_sym(A_B,l);
  2831. ai.SetCondition(OpCmp2AsmCond[cond]);
  2832. ai.is_jmp:=true;
  2833. list.concat(ai);
  2834. end;
  2835. end;
  2836. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2837. const
  2838. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2839. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2840. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2841. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2842. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2843. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2844. begin
  2845. result:=convertop[fromsize,tosize];
  2846. if result=A_NONE then
  2847. internalerror(200312205);
  2848. end;
  2849. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2850. const
  2851. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2852. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2853. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2854. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2855. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2856. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2857. begin
  2858. result:=convertop[fromsize,tosize];
  2859. end;
  2860. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2861. var
  2862. instr: taicpu;
  2863. begin
  2864. if (shuffle=nil) or shufflescalar(shuffle) then
  2865. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2866. else
  2867. internalerror(2009112407);
  2868. list.concat(instr);
  2869. case instr.opcode of
  2870. A_VMOV:
  2871. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2872. add_move_instruction(instr);
  2873. else
  2874. { VCVT can generate an exception }
  2875. maybe_check_for_fpu_exception(list);
  2876. end;
  2877. end;
  2878. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2879. var
  2880. intreg,
  2881. tmpmmreg : tregister;
  2882. reg64 : tregister64;
  2883. begin
  2884. if assigned(shuffle) and
  2885. not(shufflescalar(shuffle)) then
  2886. internalerror(2009112413);
  2887. case fromsize of
  2888. OS_32,OS_S32:
  2889. begin
  2890. fromsize:=OS_F32;
  2891. { since we are loading an integer, no conversion may be required }
  2892. if (fromsize<>tosize) then
  2893. internalerror(2009112801);
  2894. end;
  2895. OS_64,OS_S64:
  2896. begin
  2897. fromsize:=OS_F64;
  2898. { since we are loading an integer, no conversion may be required }
  2899. if (fromsize<>tosize) then
  2900. internalerror(2009112901);
  2901. end;
  2902. OS_F32,OS_F64:
  2903. ;
  2904. else
  2905. internalerror(2019050920);
  2906. end;
  2907. if (fromsize<>tosize) then
  2908. tmpmmreg:=getmmregister(list,fromsize)
  2909. else
  2910. tmpmmreg:=reg;
  2911. if (ref.alignment in [1,2]) then
  2912. begin
  2913. case fromsize of
  2914. OS_F32:
  2915. begin
  2916. intreg:=getintregister(list,OS_32);
  2917. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2918. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2919. end;
  2920. OS_F64:
  2921. begin
  2922. reg64.reglo:=getintregister(list,OS_32);
  2923. reg64.reghi:=getintregister(list,OS_32);
  2924. cg64.a_load64_ref_reg(list,ref,reg64);
  2925. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2926. end;
  2927. else
  2928. internalerror(2009112412);
  2929. end;
  2930. end
  2931. else
  2932. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2933. if (tmpmmreg<>reg) then
  2934. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2935. end;
  2936. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2937. var
  2938. intreg,
  2939. tmpmmreg : tregister;
  2940. reg64 : tregister64;
  2941. begin
  2942. if assigned(shuffle) and
  2943. not(shufflescalar(shuffle)) then
  2944. internalerror(2009112416);
  2945. case tosize of
  2946. OS_32,OS_S32:
  2947. begin
  2948. tosize:=OS_F32;
  2949. { since we are loading an integer, no conversion may be required }
  2950. if (fromsize<>tosize) then
  2951. internalerror(2009112802);
  2952. end;
  2953. OS_64,OS_S64:
  2954. begin
  2955. tosize:=OS_F64;
  2956. { since we are loading an integer, no conversion may be required }
  2957. if (fromsize<>tosize) then
  2958. internalerror(2009112902);
  2959. end;
  2960. OS_F32,OS_F64:
  2961. ;
  2962. else
  2963. internalerror(2019050919);
  2964. end;
  2965. if (fromsize<>tosize) then
  2966. begin
  2967. tmpmmreg:=getmmregister(list,tosize);
  2968. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2969. end
  2970. else
  2971. tmpmmreg:=reg;
  2972. if (ref.alignment in [1,2]) then
  2973. begin
  2974. case tosize of
  2975. OS_F32:
  2976. begin
  2977. intreg:=getintregister(list,OS_32);
  2978. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2979. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2980. end;
  2981. OS_F64:
  2982. begin
  2983. reg64.reglo:=getintregister(list,OS_32);
  2984. reg64.reghi:=getintregister(list,OS_32);
  2985. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2986. cg64.a_load64_reg_ref(list,reg64,ref);
  2987. end;
  2988. else
  2989. internalerror(2009112417);
  2990. end;
  2991. end
  2992. else
  2993. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2994. { VSTR cannot generate an FPU exception, VCVT is handled separately, so we do not need a check here }
  2995. end;
  2996. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2997. begin
  2998. { this code can only be used to transfer raw data, not to perform
  2999. conversions }
  3000. if (tosize<>OS_F32) then
  3001. internalerror(2009112419);
  3002. if not(fromsize in [OS_32,OS_S32]) then
  3003. internalerror(2009112420);
  3004. if assigned(shuffle) and
  3005. not shufflescalar(shuffle) then
  3006. internalerror(2009112516);
  3007. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  3008. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3009. end;
  3010. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  3011. begin
  3012. { this code can only be used to transfer raw data, not to perform
  3013. conversions }
  3014. if (fromsize<>OS_F32) then
  3015. internalerror(2009112430);
  3016. if not(tosize in [OS_32,OS_S32]) then
  3017. internalerror(2009112409);
  3018. if assigned(shuffle) and
  3019. not shufflescalar(shuffle) then
  3020. internalerror(2009112514);
  3021. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  3022. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3023. end;
  3024. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  3025. var
  3026. tmpreg: tregister;
  3027. begin
  3028. { the vfp doesn't support xor nor any other logical operation, but
  3029. this routine is used to initialise global mm regvars. We can
  3030. easily initialise an mm reg with 0 though. }
  3031. case op of
  3032. OP_XOR:
  3033. begin
  3034. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3035. begin
  3036. if (reg_cgsize(src)<>size) or
  3037. assigned(shuffle) then
  3038. internalerror(2019081301);
  3039. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3040. end
  3041. else
  3042. begin
  3043. if (src<>dst) or
  3044. (reg_cgsize(src)<>size) or
  3045. assigned(shuffle) then
  3046. internalerror(2009112907);
  3047. tmpreg:=getintregister(list,OS_32);
  3048. a_load_const_reg(list,OS_32,0,tmpreg);
  3049. case size of
  3050. OS_F32:
  3051. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3052. OS_F64:
  3053. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3054. else
  3055. internalerror(2009112908);
  3056. end;
  3057. end;
  3058. end
  3059. else
  3060. internalerror(2009112906);
  3061. end;
  3062. end;
  3063. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3064. const
  3065. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3066. begin
  3067. if (op in overflowops) and
  3068. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3069. a_load_reg_reg(list,OS_32,size,dst,dst);
  3070. end;
  3071. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3072. procedure checkreg(var reg : TRegister);
  3073. var
  3074. tmpreg : TRegister;
  3075. begin
  3076. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3077. (getsupreg(reg)=RS_R15) then
  3078. begin
  3079. tmpreg:=getintregister(list,OS_INT);
  3080. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3081. reg:=tmpreg;
  3082. end;
  3083. end;
  3084. begin
  3085. checkreg(op1);
  3086. checkreg(op2);
  3087. checkreg(op3);
  3088. checkreg(op4);
  3089. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3090. end;
  3091. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3092. begin
  3093. if pi_needs_tls in current_procinfo.flags then
  3094. begin
  3095. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3096. a_call_name(list,'fpc_read_tp',false);
  3097. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3098. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3099. end;
  3100. end;
  3101. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3102. begin
  3103. case op of
  3104. OP_NEG:
  3105. begin
  3106. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3107. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3108. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3109. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3110. end;
  3111. OP_NOT:
  3112. begin
  3113. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3114. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3115. end;
  3116. else
  3117. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3118. end;
  3119. end;
  3120. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3121. begin
  3122. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3123. end;
  3124. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3125. var
  3126. ovloc : tlocation;
  3127. begin
  3128. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3129. end;
  3130. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3131. var
  3132. ovloc : tlocation;
  3133. begin
  3134. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3135. end;
  3136. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3137. begin
  3138. { this code can only be used to transfer raw data, not to perform
  3139. conversions }
  3140. if (mmsize<>OS_F64) then
  3141. internalerror(2009112405);
  3142. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3143. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3144. end;
  3145. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3146. begin
  3147. { this code can only be used to transfer raw data, not to perform
  3148. conversions }
  3149. if (mmsize<>OS_F64) then
  3150. internalerror(2009112406);
  3151. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3152. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3153. end;
  3154. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3155. var
  3156. tmpreg : tregister;
  3157. b : byte;
  3158. begin
  3159. ovloc.loc:=LOC_VOID;
  3160. case op of
  3161. OP_NEG,
  3162. OP_NOT :
  3163. internalerror(2012022501);
  3164. else
  3165. ;
  3166. end;
  3167. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3168. begin
  3169. case op of
  3170. OP_ADD:
  3171. begin
  3172. if is_shifter_const(lo(value),b) then
  3173. begin
  3174. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3175. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3176. end
  3177. else
  3178. begin
  3179. tmpreg:=cg.getintregister(list,OS_32);
  3180. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3181. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3182. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3183. end;
  3184. if is_shifter_const(hi(value),b) then
  3185. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3186. else
  3187. begin
  3188. tmpreg:=cg.getintregister(list,OS_32);
  3189. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3190. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3191. end;
  3192. end;
  3193. OP_SUB:
  3194. begin
  3195. if is_shifter_const(lo(value),b) then
  3196. begin
  3197. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3198. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3199. end
  3200. else
  3201. begin
  3202. tmpreg:=cg.getintregister(list,OS_32);
  3203. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3204. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3205. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3206. end;
  3207. if is_shifter_const(hi(value),b) then
  3208. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3209. else
  3210. begin
  3211. tmpreg:=cg.getintregister(list,OS_32);
  3212. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3213. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3214. end;
  3215. end;
  3216. else
  3217. internalerror(200502131);
  3218. end;
  3219. if size=OS_64 then
  3220. begin
  3221. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3222. ovloc.loc:=LOC_FLAGS;
  3223. case op of
  3224. OP_ADD:
  3225. ovloc.resflags:=F_CS;
  3226. OP_SUB:
  3227. ovloc.resflags:=F_CC;
  3228. else
  3229. internalerror(2019050918);
  3230. end;
  3231. end;
  3232. end
  3233. else
  3234. begin
  3235. case op of
  3236. OP_AND,OP_OR,OP_XOR:
  3237. begin
  3238. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3239. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3240. end;
  3241. OP_ADD:
  3242. begin
  3243. if is_shifter_const(aint(lo(value)),b) then
  3244. begin
  3245. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3246. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3247. end
  3248. else
  3249. begin
  3250. tmpreg:=cg.getintregister(list,OS_32);
  3251. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3252. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3253. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3254. end;
  3255. if is_shifter_const(aint(hi(value)),b) then
  3256. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3257. else
  3258. begin
  3259. tmpreg:=cg.getintregister(list,OS_32);
  3260. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3261. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3262. end;
  3263. end;
  3264. OP_SUB:
  3265. begin
  3266. if is_shifter_const(aint(lo(value)),b) then
  3267. begin
  3268. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3269. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3270. end
  3271. else
  3272. begin
  3273. tmpreg:=cg.getintregister(list,OS_32);
  3274. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3275. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3276. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3277. end;
  3278. if is_shifter_const(aint(hi(value)),b) then
  3279. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3280. else
  3281. begin
  3282. tmpreg:=cg.getintregister(list,OS_32);
  3283. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3284. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3285. end;
  3286. end;
  3287. else
  3288. internalerror(2003083101);
  3289. end;
  3290. end;
  3291. end;
  3292. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3293. begin
  3294. ovloc.loc:=LOC_VOID;
  3295. case op of
  3296. OP_NEG,
  3297. OP_NOT :
  3298. internalerror(2012022502);
  3299. else
  3300. ;
  3301. end;
  3302. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3303. begin
  3304. case op of
  3305. OP_ADD:
  3306. begin
  3307. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3308. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3309. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3310. end;
  3311. OP_SUB:
  3312. begin
  3313. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3314. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3315. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3316. end;
  3317. else
  3318. internalerror(2003083102);
  3319. end;
  3320. ovloc.loc:=LOC_FLAGS;
  3321. if size=OS_64 then
  3322. begin
  3323. { arm has a weired opinion how flags for SUB/ADD are handled }
  3324. case op of
  3325. OP_ADD:
  3326. ovloc.resflags:=F_CS;
  3327. OP_SUB:
  3328. ovloc.resflags:=F_CC;
  3329. else
  3330. internalerror(2019050917);
  3331. end;
  3332. end
  3333. else
  3334. ovloc.resflags:=F_VS;
  3335. end
  3336. else
  3337. begin
  3338. case op of
  3339. OP_AND,OP_OR,OP_XOR:
  3340. begin
  3341. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3342. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3343. end;
  3344. OP_ADD:
  3345. begin
  3346. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3347. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3348. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3349. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3350. end;
  3351. OP_SUB:
  3352. begin
  3353. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3354. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3355. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3356. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3357. end;
  3358. else
  3359. internalerror(2003083104);
  3360. end;
  3361. end;
  3362. end;
  3363. procedure tthumbcgarm.init_register_allocators;
  3364. begin
  3365. inherited init_register_allocators;
  3366. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3367. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3368. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3369. else
  3370. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3371. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3372. end;
  3373. procedure tthumbcgarm.done_register_allocators;
  3374. begin
  3375. rg[R_INTREGISTER].free;
  3376. rg[R_FPUREGISTER].free;
  3377. rg[R_MMREGISTER].free;
  3378. inherited done_register_allocators;
  3379. end;
  3380. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3381. var
  3382. ref : treference;
  3383. r : byte;
  3384. regs : tcpuregisterset;
  3385. stackmisalignment : pint;
  3386. registerarea: DWord;
  3387. stack_parameters: Boolean;
  3388. begin
  3389. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3390. LocalSize:=align(LocalSize,4);
  3391. { call instruction does not put anything on the stack }
  3392. stackmisalignment:=0;
  3393. if not(nostackframe) then
  3394. begin
  3395. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3396. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3397. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3398. { save int registers }
  3399. reference_reset(ref,4,[]);
  3400. ref.index:=NR_STACK_POINTER_REG;
  3401. ref.addressmode:=AM_PREINDEXED;
  3402. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3403. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3404. begin
  3405. //!!!! a_reg_alloc(list,NR_R12);
  3406. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3407. end;
  3408. { the (old) ARM APCS requires saving both the stack pointer (to
  3409. crawl the stack) and the PC (to identify the function this
  3410. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3411. and R15 -- still needs updating for EABI and Darwin, they don't
  3412. need that }
  3413. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3414. regs:=regs+[RS_R7,RS_R14]
  3415. else
  3416. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3417. include(regs,RS_R14);
  3418. { safely estimate stack size }
  3419. if localsize+current_settings.alignment.localalignmax+4>508 then
  3420. begin
  3421. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3422. include(regs,RS_R4);
  3423. end;
  3424. registerarea:=0;
  3425. { do not save integer registers if the procedure does not return }
  3426. if po_noreturn in current_procinfo.procdef.procoptions then
  3427. regs:=[];
  3428. if regs<>[] then
  3429. begin
  3430. for r:=RS_R0 to RS_R15 do
  3431. if r in regs then
  3432. inc(registerarea,4);
  3433. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3434. { we need to run the loop twice to get cfi right }
  3435. registerarea:=0;
  3436. for r:=RS_R0 to RS_R15 do
  3437. if r in regs then
  3438. begin
  3439. inc(registerarea,4);
  3440. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),-registerarea);
  3441. end;
  3442. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  3443. end;
  3444. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3445. if stack_parameters or (LocalSize<>0) or
  3446. ((stackmisalignment<>0) and
  3447. ((pi_do_call in current_procinfo.flags) or
  3448. (po_assembler in current_procinfo.procdef.procoptions))) then
  3449. begin
  3450. { do we access stack parameters?
  3451. if yes, the previously estimated stacksize must be used }
  3452. if stack_parameters then
  3453. begin
  3454. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3455. begin
  3456. writeln(localsize);
  3457. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3458. internalerror(2013040601);
  3459. end
  3460. else
  3461. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3462. end
  3463. else
  3464. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3465. if localsize<508 then
  3466. begin
  3467. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3468. end
  3469. else if localsize<=1016 then
  3470. begin
  3471. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3472. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3473. end
  3474. else
  3475. begin
  3476. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3477. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3478. include(regs,RS_R4);
  3479. end;
  3480. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  3481. end;
  3482. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3483. begin
  3484. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3485. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  3486. end;
  3487. end;
  3488. end;
  3489. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3490. var
  3491. LocalSize : longint;
  3492. r: byte;
  3493. regs : tcpuregisterset;
  3494. registerarea : DWord;
  3495. stackmisalignment: pint;
  3496. stack_parameters : Boolean;
  3497. begin
  3498. { a routine not returning needs no exit code,
  3499. we trust this directive as arm thumb is normally used if small code shall be generated }
  3500. if po_noreturn in current_procinfo.procdef.procoptions then
  3501. exit;
  3502. if not(nostackframe) then
  3503. begin
  3504. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3505. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3506. include(regs,RS_R15);
  3507. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3508. include(regs,getsupreg(current_procinfo.framepointer));
  3509. registerarea:=0;
  3510. for r:=RS_R0 to RS_R15 do
  3511. if r in regs then
  3512. inc(registerarea,4);
  3513. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3514. LocalSize:=current_procinfo.calc_stackframe_size;
  3515. if stack_parameters then
  3516. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3517. else
  3518. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3519. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3520. (target_info.system in systems_darwin) then
  3521. begin
  3522. if (LocalSize<>0) or
  3523. ((stackmisalignment<>0) and
  3524. ((pi_do_call in current_procinfo.flags) or
  3525. (po_assembler in current_procinfo.procdef.procoptions))) then
  3526. begin
  3527. if LocalSize=0 then
  3528. else if LocalSize<=508 then
  3529. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3530. else if LocalSize<=1016 then
  3531. begin
  3532. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3533. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3534. end
  3535. else
  3536. begin
  3537. a_reg_alloc(list,NR_R3);
  3538. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3539. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3540. a_reg_dealloc(list,NR_R3);
  3541. end;
  3542. end;
  3543. if regs=[] then
  3544. begin
  3545. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3546. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3547. else
  3548. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3549. end
  3550. else
  3551. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3552. end;
  3553. end
  3554. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3555. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3556. else
  3557. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3558. end;
  3559. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3560. var
  3561. oppostfix:toppostfix;
  3562. usedtmpref: treference;
  3563. tmpreg,tmpreg2 : tregister;
  3564. dir : integer;
  3565. begin
  3566. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3567. FromSize := ToSize;
  3568. case FromSize of
  3569. { signed integer registers }
  3570. OS_8:
  3571. oppostfix:=PF_B;
  3572. OS_S8:
  3573. oppostfix:=PF_SB;
  3574. OS_16:
  3575. oppostfix:=PF_H;
  3576. OS_S16:
  3577. oppostfix:=PF_SH;
  3578. OS_32,
  3579. OS_S32:
  3580. oppostfix:=PF_None;
  3581. else
  3582. InternalError(200308298);
  3583. end;
  3584. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3585. begin
  3586. if target_info.endian=endian_big then
  3587. dir:=-1
  3588. else
  3589. dir:=1;
  3590. case FromSize of
  3591. OS_16,OS_S16:
  3592. begin
  3593. { only complicated references need an extra loadaddr }
  3594. if assigned(ref.symbol) or
  3595. (ref.index<>NR_NO) or
  3596. (ref.offset<-124) or
  3597. (ref.offset>124) or
  3598. { sometimes the compiler reused registers }
  3599. (reg=ref.index) or
  3600. (reg=ref.base) then
  3601. begin
  3602. tmpreg2:=getintregister(list,OS_INT);
  3603. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3604. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3605. end
  3606. else
  3607. usedtmpref:=ref;
  3608. if target_info.endian=endian_big then
  3609. inc(usedtmpref.offset,1);
  3610. tmpreg:=getintregister(list,OS_INT);
  3611. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3612. inc(usedtmpref.offset,dir);
  3613. if FromSize=OS_16 then
  3614. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3615. else
  3616. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3617. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3618. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3619. end;
  3620. OS_32,OS_S32:
  3621. begin
  3622. tmpreg:=getintregister(list,OS_INT);
  3623. { only complicated references need an extra loadaddr }
  3624. if assigned(ref.symbol) or
  3625. (ref.index<>NR_NO) or
  3626. (ref.offset<-124) or
  3627. (ref.offset>124) or
  3628. { sometimes the compiler reused registers }
  3629. (reg=ref.index) or
  3630. (reg=ref.base) then
  3631. begin
  3632. tmpreg2:=getintregister(list,OS_INT);
  3633. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3634. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3635. end
  3636. else
  3637. usedtmpref:=ref;
  3638. if ref.alignment=2 then
  3639. begin
  3640. if target_info.endian=endian_big then
  3641. inc(usedtmpref.offset,2);
  3642. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3643. inc(usedtmpref.offset,dir*2);
  3644. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3645. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3646. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3647. end
  3648. else
  3649. begin
  3650. if target_info.endian=endian_big then
  3651. inc(usedtmpref.offset,3);
  3652. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3653. inc(usedtmpref.offset,dir);
  3654. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3655. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3656. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3657. inc(usedtmpref.offset,dir);
  3658. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3659. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3660. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3661. inc(usedtmpref.offset,dir);
  3662. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3663. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3664. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3665. end;
  3666. end
  3667. else
  3668. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3669. end;
  3670. end
  3671. else
  3672. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3673. if (fromsize=OS_S8) and (tosize = OS_16) then
  3674. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3675. end;
  3676. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3677. var
  3678. l : tasmlabel;
  3679. hr : treference;
  3680. begin
  3681. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3682. internalerror(2002090908);
  3683. if is_thumb_imm(a) then
  3684. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,a),PF_S))
  3685. else
  3686. begin
  3687. reference_reset(hr,4,[]);
  3688. current_asmdata.getjumplabel(l);
  3689. cg.a_label(current_procinfo.aktlocaldata,l);
  3690. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3691. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3692. hr.symbol:=l;
  3693. hr.base:=NR_PC;
  3694. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3695. end;
  3696. end;
  3697. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3698. var
  3699. hsym : tsym;
  3700. href,
  3701. tmpref : treference;
  3702. paraloc : Pcgparalocation;
  3703. l : TAsmLabel;
  3704. begin
  3705. { calculate the parameter info for the procdef }
  3706. procdef.init_paraloc_info(callerside);
  3707. hsym:=tsym(procdef.parast.Find('self'));
  3708. if not(assigned(hsym) and
  3709. (hsym.typ=paravarsym)) then
  3710. internalerror(2003052504);
  3711. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3712. while paraloc<>nil do
  3713. with paraloc^ do
  3714. begin
  3715. case loc of
  3716. LOC_REGISTER:
  3717. begin
  3718. if is_thumb_imm(ioffset) then
  3719. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3720. else
  3721. begin
  3722. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3723. reference_reset(tmpref,4,[]);
  3724. current_asmdata.getjumplabel(l);
  3725. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3726. cg.a_label(current_procinfo.aktlocaldata,l);
  3727. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3728. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3729. tmpref.symbol:=l;
  3730. tmpref.base:=NR_PC;
  3731. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3732. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3733. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3734. end;
  3735. end;
  3736. LOC_REFERENCE:
  3737. begin
  3738. { offset in the wrapper needs to be adjusted for the stored
  3739. return address }
  3740. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3741. if is_thumb_imm(ioffset) then
  3742. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3743. else
  3744. begin
  3745. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3746. reference_reset(tmpref,4,[]);
  3747. current_asmdata.getjumplabel(l);
  3748. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3749. cg.a_label(current_procinfo.aktlocaldata,l);
  3750. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3751. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3752. tmpref.symbol:=l;
  3753. tmpref.base:=NR_PC;
  3754. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3755. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3756. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3757. end;
  3758. end
  3759. else
  3760. internalerror(2003091804);
  3761. end;
  3762. paraloc:=next;
  3763. end;
  3764. end;
  3765. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3766. var
  3767. href : treference;
  3768. tmpreg : TRegister;
  3769. begin
  3770. href:=ref;
  3771. if { LDR/STR limitations }
  3772. (
  3773. (((op=A_LDR) and (oppostfix=PF_None)) or
  3774. ((op=A_STR) and (oppostfix=PF_None))) and
  3775. (ref.base<>NR_STACK_POINTER_REG) and
  3776. (abs(ref.offset)>124)
  3777. ) or
  3778. { LDRB/STRB limitations }
  3779. (
  3780. (((op=A_LDR) and (oppostfix=PF_B)) or
  3781. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3782. ((op=A_STR) and (oppostfix=PF_B)) or
  3783. ((op=A_STRB) and (oppostfix=PF_None))) and
  3784. ((ref.base=NR_STACK_POINTER_REG) or
  3785. (ref.index=NR_STACK_POINTER_REG) or
  3786. (abs(ref.offset)>31)
  3787. )
  3788. ) or
  3789. { LDRH/STRH limitations }
  3790. (
  3791. (((op=A_LDR) and (oppostfix=PF_H)) or
  3792. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3793. ((op=A_STR) and (oppostfix=PF_H)) or
  3794. ((op=A_STRH) and (oppostfix=PF_None))) and
  3795. ((ref.base=NR_STACK_POINTER_REG) or
  3796. (ref.index=NR_STACK_POINTER_REG) or
  3797. (abs(ref.offset)>62) or
  3798. ((abs(ref.offset) mod 2)<>0)
  3799. )
  3800. ) then
  3801. begin
  3802. tmpreg:=getintregister(list,OS_ADDR);
  3803. a_loadaddr_ref_reg(list,ref,tmpreg);
  3804. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3805. end
  3806. else if (op=A_LDR) and
  3807. (oppostfix in [PF_None]) and
  3808. (ref.base=NR_STACK_POINTER_REG) and
  3809. (abs(ref.offset)>1020) then
  3810. begin
  3811. tmpreg:=getintregister(list,OS_ADDR);
  3812. a_loadaddr_ref_reg(list,ref,tmpreg);
  3813. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3814. end
  3815. else if (op=A_LDR) and
  3816. ((oppostfix in [PF_SH,PF_SB]) or
  3817. (abs(ref.offset)>124)) then
  3818. begin
  3819. tmpreg:=getintregister(list,OS_ADDR);
  3820. a_loadaddr_ref_reg(list,ref,tmpreg);
  3821. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3822. end;
  3823. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3824. end;
  3825. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3826. var
  3827. tmpreg : tregister;
  3828. begin
  3829. case op of
  3830. OP_NEG:
  3831. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3832. OP_NOT:
  3833. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,dst,src),PF_S));
  3834. OP_DIV,OP_IDIV:
  3835. internalerror(200308284);
  3836. OP_ROL:
  3837. begin
  3838. if not(size in [OS_32,OS_S32]) then
  3839. internalerror(2008072805);
  3840. { simulate ROL by ror'ing 32-value }
  3841. tmpreg:=getintregister(list,OS_32);
  3842. a_load_const_reg(list,OS_32,32,tmpreg);
  3843. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3844. list.concat(setoppostfix(taicpu.op_reg_reg(A_ROR,dst,src),PF_S));
  3845. end;
  3846. else
  3847. begin
  3848. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3849. list.concat(setoppostfix(
  3850. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix_thumb[op]));
  3851. end;
  3852. end;
  3853. maybeadjustresult(list,op,size,dst);
  3854. end;
  3855. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3856. var
  3857. tmpreg : tregister;
  3858. {$ifdef DUMMY}
  3859. l1 : longint;
  3860. {$endif DUMMY}
  3861. begin
  3862. //!!! ovloc.loc:=LOC_VOID;
  3863. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3864. case op of
  3865. OP_ADD:
  3866. begin
  3867. op:=OP_SUB;
  3868. a:=aint(dword(-a));
  3869. end;
  3870. OP_SUB:
  3871. begin
  3872. op:=OP_ADD;
  3873. a:=aint(dword(-a));
  3874. end
  3875. else
  3876. ;
  3877. end;
  3878. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3879. begin
  3880. // if cgsetflags or setflags then
  3881. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3882. list.concat(setoppostfix(
  3883. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix_thumb[op]));
  3884. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3885. begin
  3886. //!!! ovloc.loc:=LOC_FLAGS;
  3887. case op of
  3888. OP_ADD:
  3889. //!!! ovloc.resflags:=F_CS;
  3890. ;
  3891. OP_SUB:
  3892. //!!! ovloc.resflags:=F_CC;
  3893. ;
  3894. else
  3895. ;
  3896. end;
  3897. end;
  3898. end
  3899. else
  3900. begin
  3901. { there could be added some more sophisticated optimizations }
  3902. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3903. a_load_reg_reg(list,size,size,dst,dst)
  3904. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3905. a_load_const_reg(list,size,0,dst)
  3906. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3907. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3908. { we do this here instead in the peephole optimizer because
  3909. it saves us a register }
  3910. {$ifdef DUMMY}
  3911. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3912. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3913. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3914. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3915. begin
  3916. if l1>32 then{roozbeh does this ever happen?}
  3917. internalerror(2003082903);
  3918. shifterop_reset(so);
  3919. so.shiftmode:=SM_LSL;
  3920. so.shiftimm:=l1;
  3921. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3922. end
  3923. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3924. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3925. begin
  3926. if l1>32 then{does this ever happen?}
  3927. internalerror(2012051802);
  3928. shifterop_reset(so);
  3929. so.shiftmode:=SM_LSL;
  3930. so.shiftimm:=l1;
  3931. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3932. end
  3933. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3934. begin
  3935. { nothing to do on success }
  3936. end
  3937. {$endif DUMMY}
  3938. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3939. Just using mov x, #0 might allow some easier optimizations down the line. }
  3940. else if (op = OP_AND) and (dword(a)=0) then
  3941. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,dst,0),PF_S))
  3942. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3943. else if (op = OP_AND) and (not(dword(a))=0) then
  3944. // do nothing
  3945. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3946. broader range of shifterconstants.}
  3947. {$ifdef DUMMY}
  3948. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3949. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3950. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3951. begin
  3952. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3953. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3954. end
  3955. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3956. not(cgsetflags or setflags) and
  3957. split_into_shifter_const(a, imm1, imm2) then
  3958. begin
  3959. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3960. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3961. end
  3962. {$endif DUMMY}
  3963. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3964. begin
  3965. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3966. end
  3967. else
  3968. begin
  3969. tmpreg:=getintregister(list,size);
  3970. a_load_const_reg(list,size,a,tmpreg);
  3971. a_op_reg_reg(list,op,size,tmpreg,dst);
  3972. end;
  3973. end;
  3974. maybeadjustresult(list,op,size,dst);
  3975. end;
  3976. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3977. begin
  3978. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3979. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3980. else
  3981. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3982. end;
  3983. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3984. var
  3985. l1,l2 : tasmlabel;
  3986. ai : taicpu;
  3987. begin
  3988. current_asmdata.getjumplabel(l1);
  3989. current_asmdata.getjumplabel(l2);
  3990. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3991. ai.is_jmp:=true;
  3992. list.concat(ai);
  3993. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,0),PF_S));
  3994. list.concat(taicpu.op_sym(A_B,l2));
  3995. cg.a_label(list,l1);
  3996. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,1),PF_S));
  3997. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3998. cg.a_label(list,l2);
  3999. end;
  4000. procedure tthumb2cgarm.init_register_allocators;
  4001. begin
  4002. inherited init_register_allocators;
  4003. { currently, we save R14 always, so we can use it }
  4004. if (target_info.system<>system_arm_ios) then
  4005. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4006. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4007. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  4008. else
  4009. { r9 is not available on Darwin according to the llvm code generator }
  4010. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4011. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4012. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  4013. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4014. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  4015. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  4016. init_mmregister_allocator;
  4017. end;
  4018. procedure tthumb2cgarm.done_register_allocators;
  4019. begin
  4020. rg[R_INTREGISTER].free;
  4021. rg[R_FPUREGISTER].free;
  4022. rg[R_MMREGISTER].free;
  4023. inherited done_register_allocators;
  4024. end;
  4025. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  4026. begin
  4027. list.concat(taicpu.op_reg(A_BLX, reg));
  4028. {
  4029. the compiler does not properly set this flag anymore in pass 1, and
  4030. for now we only need it after pass 2 (I hope) (JM)
  4031. if not(pi_do_call in current_procinfo.flags) then
  4032. internalerror(2003060703);
  4033. }
  4034. include(current_procinfo.flags,pi_do_call);
  4035. end;
  4036. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4037. var
  4038. l : tasmlabel;
  4039. hr : treference;
  4040. begin
  4041. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4042. internalerror(2002090909);
  4043. if is_thumb32_imm(a) then
  4044. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4045. else if is_thumb32_imm(not(a)) then
  4046. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4047. else if (a and $FFFF)=a then
  4048. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4049. else
  4050. begin
  4051. reference_reset(hr,4,[]);
  4052. current_asmdata.getjumplabel(l);
  4053. cg.a_label(current_procinfo.aktlocaldata,l);
  4054. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4055. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4056. hr.symbol:=l;
  4057. hr.base:=NR_PC;
  4058. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4059. end;
  4060. end;
  4061. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4062. var
  4063. oppostfix:toppostfix;
  4064. usedtmpref: treference;
  4065. tmpreg,tmpreg2 : tregister;
  4066. so : tshifterop;
  4067. dir : integer;
  4068. begin
  4069. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4070. FromSize := ToSize;
  4071. case FromSize of
  4072. { signed integer registers }
  4073. OS_8:
  4074. oppostfix:=PF_B;
  4075. OS_S8:
  4076. oppostfix:=PF_SB;
  4077. OS_16:
  4078. oppostfix:=PF_H;
  4079. OS_S16:
  4080. oppostfix:=PF_SH;
  4081. OS_32,
  4082. OS_S32:
  4083. oppostfix:=PF_None;
  4084. else
  4085. InternalError(2003082913);
  4086. end;
  4087. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4088. begin
  4089. if target_info.endian=endian_big then
  4090. dir:=-1
  4091. else
  4092. dir:=1;
  4093. case FromSize of
  4094. OS_16,OS_S16:
  4095. begin
  4096. { only complicated references need an extra loadaddr }
  4097. if assigned(ref.symbol) or
  4098. (ref.index<>NR_NO) or
  4099. (ref.offset<-255) or
  4100. (ref.offset>4094) or
  4101. { sometimes the compiler reused registers }
  4102. (reg=ref.index) or
  4103. (reg=ref.base) then
  4104. begin
  4105. tmpreg2:=getintregister(list,OS_INT);
  4106. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4107. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4108. end
  4109. else
  4110. usedtmpref:=ref;
  4111. if target_info.endian=endian_big then
  4112. inc(usedtmpref.offset,1);
  4113. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4114. tmpreg:=getintregister(list,OS_INT);
  4115. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4116. inc(usedtmpref.offset,dir);
  4117. if FromSize=OS_16 then
  4118. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4119. else
  4120. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4121. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4122. end;
  4123. OS_32,OS_S32:
  4124. begin
  4125. tmpreg:=getintregister(list,OS_INT);
  4126. { only complicated references need an extra loadaddr }
  4127. if assigned(ref.symbol) or
  4128. (ref.index<>NR_NO) or
  4129. (ref.offset<-255) or
  4130. (ref.offset>4092) or
  4131. { sometimes the compiler reused registers }
  4132. (reg=ref.index) or
  4133. (reg=ref.base) then
  4134. begin
  4135. tmpreg2:=getintregister(list,OS_INT);
  4136. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4137. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4138. end
  4139. else
  4140. usedtmpref:=ref;
  4141. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4142. if ref.alignment=2 then
  4143. begin
  4144. if target_info.endian=endian_big then
  4145. inc(usedtmpref.offset,2);
  4146. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4147. inc(usedtmpref.offset,dir*2);
  4148. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4149. so.shiftimm:=16;
  4150. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4151. end
  4152. else
  4153. begin
  4154. if target_info.endian=endian_big then
  4155. inc(usedtmpref.offset,3);
  4156. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4157. inc(usedtmpref.offset,dir);
  4158. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4159. so.shiftimm:=8;
  4160. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4161. inc(usedtmpref.offset,dir);
  4162. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4163. so.shiftimm:=16;
  4164. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4165. inc(usedtmpref.offset,dir);
  4166. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4167. so.shiftimm:=24;
  4168. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4169. end;
  4170. end
  4171. else
  4172. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4173. end;
  4174. end
  4175. else
  4176. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4177. if (fromsize=OS_S8) and (tosize = OS_16) then
  4178. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4179. end;
  4180. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4181. begin
  4182. if op = OP_NOT then
  4183. begin
  4184. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4185. case size of
  4186. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4187. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4188. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4189. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4190. OS_32,
  4191. OS_S32:
  4192. ;
  4193. else
  4194. internalerror(2019050916);
  4195. end;
  4196. end
  4197. else
  4198. inherited a_op_reg_reg(list, op, size, src, dst);
  4199. end;
  4200. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4201. var
  4202. shift, width : byte;
  4203. tmpreg : tregister;
  4204. so : tshifterop;
  4205. l1 : longint;
  4206. begin
  4207. ovloc.loc:=LOC_VOID;
  4208. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4209. case op of
  4210. OP_ADD:
  4211. begin
  4212. op:=OP_SUB;
  4213. a:=aint(dword(-a));
  4214. end;
  4215. OP_SUB:
  4216. begin
  4217. op:=OP_ADD;
  4218. a:=aint(dword(-a));
  4219. end
  4220. else
  4221. ;
  4222. end;
  4223. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4224. case op of
  4225. OP_NEG,OP_NOT,
  4226. OP_DIV,OP_IDIV:
  4227. internalerror(200308285);
  4228. OP_SHL:
  4229. begin
  4230. if a>32 then
  4231. internalerror(2014020703);
  4232. if a<>0 then
  4233. begin
  4234. shifterop_reset(so);
  4235. so.shiftmode:=SM_LSL;
  4236. so.shiftimm:=a;
  4237. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4238. end
  4239. else
  4240. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4241. end;
  4242. OP_ROL:
  4243. begin
  4244. if a>32 then
  4245. internalerror(2014020704);
  4246. if a<>0 then
  4247. begin
  4248. shifterop_reset(so);
  4249. so.shiftmode:=SM_ROR;
  4250. so.shiftimm:=32-a;
  4251. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4252. end
  4253. else
  4254. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4255. end;
  4256. OP_ROR:
  4257. begin
  4258. if a>32 then
  4259. internalerror(2014020705);
  4260. if a<>0 then
  4261. begin
  4262. shifterop_reset(so);
  4263. so.shiftmode:=SM_ROR;
  4264. so.shiftimm:=a;
  4265. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4266. end
  4267. else
  4268. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4269. end;
  4270. OP_SHR:
  4271. begin
  4272. if a>32 then
  4273. internalerror(200308292);
  4274. shifterop_reset(so);
  4275. if a<>0 then
  4276. begin
  4277. so.shiftmode:=SM_LSR;
  4278. so.shiftimm:=a;
  4279. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4280. end
  4281. else
  4282. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4283. end;
  4284. OP_SAR:
  4285. begin
  4286. if a>32 then
  4287. internalerror(200308295);
  4288. if a<>0 then
  4289. begin
  4290. shifterop_reset(so);
  4291. so.shiftmode:=SM_ASR;
  4292. so.shiftimm:=a;
  4293. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4294. end
  4295. else
  4296. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4297. end;
  4298. else
  4299. if (op in [OP_SUB, OP_ADD]) and
  4300. ((a < 0) or
  4301. (a > 4095)) then
  4302. begin
  4303. tmpreg:=getintregister(list,size);
  4304. a_load_const_reg(list, size, a, tmpreg);
  4305. if cgsetflags or setflags then
  4306. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4307. list.concat(setoppostfix(
  4308. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4309. end
  4310. else
  4311. begin
  4312. if cgsetflags or setflags then
  4313. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4314. list.concat(setoppostfix(
  4315. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4316. end;
  4317. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4318. begin
  4319. ovloc.loc:=LOC_FLAGS;
  4320. case op of
  4321. OP_ADD:
  4322. ovloc.resflags:=F_CS;
  4323. OP_SUB:
  4324. ovloc.resflags:=F_CC;
  4325. else
  4326. ;
  4327. end;
  4328. end;
  4329. end
  4330. else
  4331. begin
  4332. { there could be added some more sophisticated optimizations }
  4333. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4334. a_load_reg_reg(list,size,size,src,dst)
  4335. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4336. a_load_const_reg(list,size,0,dst)
  4337. else if (op in [OP_IMUL]) and (a=-1) then
  4338. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4339. { we do this here instead in the peephole optimizer because
  4340. it saves us a register }
  4341. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4342. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4343. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4344. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4345. begin
  4346. if l1>32 then{roozbeh does this ever happen?}
  4347. internalerror(2003082911);
  4348. shifterop_reset(so);
  4349. so.shiftmode:=SM_LSL;
  4350. so.shiftimm:=l1;
  4351. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4352. end
  4353. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4354. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4355. begin
  4356. if l1>32 then{does this ever happen?}
  4357. internalerror(2012051803);
  4358. shifterop_reset(so);
  4359. so.shiftmode:=SM_LSL;
  4360. so.shiftimm:=l1;
  4361. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4362. end
  4363. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4364. begin
  4365. { nothing to do on success }
  4366. end
  4367. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4368. Just using mov x, #0 might allow some easier optimizations down the line. }
  4369. else if (op = OP_AND) and (dword(a)=0) then
  4370. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4371. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4372. else if (op = OP_AND) and (not(dword(a))=0) then
  4373. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4374. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4375. broader range of shifterconstants.}
  4376. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4377. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4378. else if (op = OP_AND) and is_thumb32_imm(a) then
  4379. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4380. else if (op = OP_AND) and (a = $FFFF) then
  4381. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4382. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4383. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4384. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4385. begin
  4386. a_load_reg_reg(list,size,size,src,dst);
  4387. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4388. end
  4389. else
  4390. begin
  4391. tmpreg:=getintregister(list,size);
  4392. a_load_const_reg(list,size,a,tmpreg);
  4393. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4394. end;
  4395. end;
  4396. maybeadjustresult(list,op,size,dst);
  4397. end;
  4398. const
  4399. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4400. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4401. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4402. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4403. var
  4404. so : tshifterop;
  4405. tmpreg,overflowreg : tregister;
  4406. asmop : tasmop;
  4407. begin
  4408. ovloc.loc:=LOC_VOID;
  4409. case op of
  4410. OP_NEG,OP_NOT:
  4411. internalerror(200308286);
  4412. OP_ROL:
  4413. begin
  4414. if not(size in [OS_32,OS_S32]) then
  4415. internalerror(2008072806);
  4416. { simulate ROL by ror'ing 32-value }
  4417. tmpreg:=getintregister(list,OS_32);
  4418. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4419. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4420. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4421. end;
  4422. OP_ROR:
  4423. begin
  4424. if not(size in [OS_32,OS_S32]) then
  4425. internalerror(2008072802);
  4426. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4427. end;
  4428. OP_IMUL,
  4429. OP_MUL:
  4430. begin
  4431. if cgsetflags or setflags then
  4432. begin
  4433. overflowreg:=getintregister(list,size);
  4434. if op=OP_IMUL then
  4435. asmop:=A_SMULL
  4436. else
  4437. asmop:=A_UMULL;
  4438. { the arm doesn't allow that rd and rm are the same }
  4439. if dst=src2 then
  4440. begin
  4441. if dst<>src1 then
  4442. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4443. else
  4444. begin
  4445. tmpreg:=getintregister(list,size);
  4446. a_load_reg_reg(list,size,size,src2,dst);
  4447. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4448. end;
  4449. end
  4450. else
  4451. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4452. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4453. if op=OP_IMUL then
  4454. begin
  4455. shifterop_reset(so);
  4456. so.shiftmode:=SM_ASR;
  4457. so.shiftimm:=31;
  4458. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4459. end
  4460. else
  4461. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4462. ovloc.loc:=LOC_FLAGS;
  4463. ovloc.resflags:=F_NE;
  4464. end
  4465. else
  4466. begin
  4467. { the arm doesn't allow that rd and rm are the same }
  4468. if dst=src2 then
  4469. begin
  4470. if dst<>src1 then
  4471. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4472. else
  4473. begin
  4474. tmpreg:=getintregister(list,size);
  4475. a_load_reg_reg(list,size,size,src2,dst);
  4476. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4477. end;
  4478. end
  4479. else
  4480. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4481. end;
  4482. end;
  4483. else
  4484. begin
  4485. if cgsetflags or setflags then
  4486. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4487. {$ifdef dummy}
  4488. { R13 is not allowed for certain instruction operands }
  4489. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4490. begin
  4491. if getsupreg(dst)=RS_R13 then
  4492. begin
  4493. tmpreg:=getintregister(list,OS_INT);
  4494. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4495. dst:=tmpreg;
  4496. end;
  4497. if getsupreg(src1)=RS_R13 then
  4498. begin
  4499. tmpreg:=getintregister(list,OS_INT);
  4500. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4501. src1:=tmpreg;
  4502. end;
  4503. end;
  4504. {$endif}
  4505. list.concat(setoppostfix(
  4506. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4507. end;
  4508. end;
  4509. maybeadjustresult(list,op,size,dst);
  4510. end;
  4511. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4512. begin
  4513. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4514. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4515. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4516. end;
  4517. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4518. var
  4519. ref : treference;
  4520. shift : byte;
  4521. firstfloatreg,lastfloatreg,
  4522. r : byte;
  4523. regs : tcpuregisterset;
  4524. stackmisalignment: pint;
  4525. begin
  4526. LocalSize:=align(LocalSize,4);
  4527. { call instruction does not put anything on the stack }
  4528. stackmisalignment:=0;
  4529. if not(nostackframe) then
  4530. begin
  4531. firstfloatreg:=RS_NO;
  4532. lastfloatreg:=RS_NO;
  4533. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4534. begin
  4535. { save floating point registers? }
  4536. for r:=RS_F0 to RS_F7 do
  4537. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4538. begin
  4539. if firstfloatreg=RS_NO then
  4540. firstfloatreg:=r;
  4541. lastfloatreg:=r;
  4542. inc(stackmisalignment,12);
  4543. end;
  4544. end;
  4545. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4546. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4547. begin
  4548. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4549. a_reg_alloc(list,NR_R12);
  4550. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4551. end;
  4552. { save int registers }
  4553. reference_reset(ref,4,[]);
  4554. ref.index:=NR_STACK_POINTER_REG;
  4555. ref.addressmode:=AM_PREINDEXED;
  4556. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4557. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4558. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4559. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4560. include(regs,RS_R14);
  4561. if regs<>[] then
  4562. begin
  4563. for r:=RS_R0 to RS_R15 do
  4564. if (r in regs) then
  4565. inc(stackmisalignment,4);
  4566. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4567. end;
  4568. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4569. begin
  4570. { the framepointer now points to the saved R15, so the saved
  4571. framepointer is at R11-12 (for get_caller_frame) }
  4572. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4573. a_reg_dealloc(list,NR_R12);
  4574. end;
  4575. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4576. if (LocalSize<>0) or
  4577. ((stackmisalignment<>0) and
  4578. ((pi_do_call in current_procinfo.flags) or
  4579. (po_assembler in current_procinfo.procdef.procoptions))) then
  4580. begin
  4581. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4582. if not(is_shifter_const(localsize,shift)) then
  4583. begin
  4584. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4585. a_reg_alloc(list,NR_R12);
  4586. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4587. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4588. a_reg_dealloc(list,NR_R12);
  4589. end
  4590. else
  4591. begin
  4592. a_reg_dealloc(list,NR_R12);
  4593. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4594. end;
  4595. end;
  4596. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4597. begin
  4598. if firstfloatreg<>RS_NO then
  4599. begin
  4600. reference_reset(ref,4,[]);
  4601. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4602. begin
  4603. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4604. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4605. ref.base:=NR_R12;
  4606. end
  4607. else
  4608. begin
  4609. ref.base:=current_procinfo.framepointer;
  4610. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4611. end;
  4612. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4613. lastfloatreg-firstfloatreg+1,ref));
  4614. end;
  4615. end;
  4616. end;
  4617. end;
  4618. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4619. var
  4620. ref : treference;
  4621. firstfloatreg,lastfloatreg,
  4622. r : byte;
  4623. shift : byte;
  4624. regs : tcpuregisterset;
  4625. LocalSize : longint;
  4626. stackmisalignment: pint;
  4627. begin
  4628. { a routine not returning needs no exit code,
  4629. we trust this directive as arm thumb is normally used if small code shall be generated }
  4630. if po_noreturn in current_procinfo.procdef.procoptions then
  4631. exit;
  4632. if not(nostackframe) then
  4633. begin
  4634. stackmisalignment:=0;
  4635. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4636. begin
  4637. { restore floating point register }
  4638. firstfloatreg:=RS_NO;
  4639. lastfloatreg:=RS_NO;
  4640. { save floating point registers? }
  4641. for r:=RS_F0 to RS_F7 do
  4642. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4643. begin
  4644. if firstfloatreg=RS_NO then
  4645. firstfloatreg:=r;
  4646. lastfloatreg:=r;
  4647. { floating point register space is already included in
  4648. localsize below by calc_stackframe_size
  4649. inc(stackmisalignment,12);
  4650. }
  4651. end;
  4652. if firstfloatreg<>RS_NO then
  4653. begin
  4654. reference_reset(ref,4,[]);
  4655. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4656. begin
  4657. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4658. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4659. ref.base:=NR_R12;
  4660. end
  4661. else
  4662. begin
  4663. ref.base:=current_procinfo.framepointer;
  4664. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4665. end;
  4666. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4667. lastfloatreg-firstfloatreg+1,ref));
  4668. end;
  4669. end;
  4670. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4671. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4672. begin
  4673. exclude(regs,RS_R14);
  4674. include(regs,RS_R15);
  4675. end;
  4676. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4677. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4678. for r:=RS_R0 to RS_R15 do
  4679. if (r in regs) then
  4680. inc(stackmisalignment,4);
  4681. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4682. LocalSize:=current_procinfo.calc_stackframe_size;
  4683. if (LocalSize<>0) or
  4684. ((stackmisalignment<>0) and
  4685. ((pi_do_call in current_procinfo.flags) or
  4686. (po_assembler in current_procinfo.procdef.procoptions))) then
  4687. begin
  4688. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4689. if not(is_shifter_const(LocalSize,shift)) then
  4690. begin
  4691. a_reg_alloc(list,NR_R12);
  4692. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4693. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4694. a_reg_dealloc(list,NR_R12);
  4695. end
  4696. else
  4697. begin
  4698. a_reg_dealloc(list,NR_R12);
  4699. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4700. end;
  4701. end;
  4702. if regs=[] then
  4703. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4704. else
  4705. begin
  4706. reference_reset(ref,4,[]);
  4707. ref.index:=NR_STACK_POINTER_REG;
  4708. ref.addressmode:=AM_PREINDEXED;
  4709. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4710. end;
  4711. end
  4712. else
  4713. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4714. end;
  4715. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4716. var
  4717. tmpreg : tregister;
  4718. tmpref : treference;
  4719. l : tasmlabel;
  4720. begin
  4721. tmpreg:=NR_NO;
  4722. { Be sure to have a base register }
  4723. if (ref.base=NR_NO) then
  4724. begin
  4725. if ref.shiftmode<>SM_None then
  4726. internalerror(2014020706);
  4727. ref.base:=ref.index;
  4728. ref.index:=NR_NO;
  4729. end;
  4730. { absolute symbols can't be handled directly, we've to store the symbol reference
  4731. in the text segment and access it pc relative
  4732. For now, we assume that references where base or index equals to PC are already
  4733. relative, all other references are assumed to be absolute and thus they need
  4734. to be handled extra.
  4735. A proper solution would be to change refoptions to a set and store the information
  4736. if the symbol is absolute or relative there.
  4737. }
  4738. if (assigned(ref.symbol) and
  4739. not(is_pc(ref.base)) and
  4740. not(is_pc(ref.index))
  4741. ) or
  4742. { [#xxx] isn't a valid address operand }
  4743. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4744. //(ref.offset<-4095) or
  4745. (ref.offset<-255) or
  4746. (ref.offset>4095) or
  4747. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4748. ((ref.offset<-255) or
  4749. (ref.offset>255)
  4750. )
  4751. ) or
  4752. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4753. ((ref.offset<-1020) or
  4754. (ref.offset>1020) or
  4755. ((abs(ref.offset) mod 4)<>0) or
  4756. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4757. assigned(ref.symbol)
  4758. )
  4759. ) then
  4760. begin
  4761. reference_reset(tmpref,4,[]);
  4762. { load symbol }
  4763. tmpreg:=getintregister(list,OS_INT);
  4764. if assigned(ref.symbol) then
  4765. begin
  4766. current_asmdata.getjumplabel(l);
  4767. cg.a_label(current_procinfo.aktlocaldata,l);
  4768. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4769. if ref.refaddr=addr_gottpoff then
  4770. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4771. else if ref.refaddr=addr_tlsgd then
  4772. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4773. else if ref.refaddr=addr_tlsdesc then
  4774. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4775. else if ref.refaddr=addr_tpoff then
  4776. begin
  4777. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4778. Internalerror(2019092807);
  4779. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4780. end
  4781. else
  4782. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4783. { load consts entry }
  4784. tmpref.symbol:=l;
  4785. tmpref.base:=NR_R15;
  4786. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4787. { in case of LDF/STF, we got rid of the NR_R15 }
  4788. if is_pc(ref.base) then
  4789. ref.base:=NR_NO;
  4790. if is_pc(ref.index) then
  4791. ref.index:=NR_NO;
  4792. end
  4793. else
  4794. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4795. if (ref.base<>NR_NO) then
  4796. begin
  4797. if ref.index<>NR_NO then
  4798. begin
  4799. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4800. ref.base:=tmpreg;
  4801. end
  4802. else
  4803. begin
  4804. ref.index:=tmpreg;
  4805. ref.shiftimm:=0;
  4806. ref.signindex:=1;
  4807. ref.shiftmode:=SM_None;
  4808. end;
  4809. end
  4810. else
  4811. ref.base:=tmpreg;
  4812. ref.offset:=0;
  4813. ref.symbol:=nil;
  4814. end;
  4815. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4816. begin
  4817. if tmpreg<>NR_NO then
  4818. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4819. else
  4820. begin
  4821. tmpreg:=getintregister(list,OS_ADDR);
  4822. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4823. ref.base:=tmpreg;
  4824. end;
  4825. ref.offset:=0;
  4826. end;
  4827. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4828. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4829. begin
  4830. tmpreg:=getintregister(list,OS_ADDR);
  4831. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4832. ref.base := tmpreg;
  4833. end;
  4834. { floating point operations have only limited references
  4835. we expect here, that a base is already set }
  4836. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4837. begin
  4838. if ref.shiftmode<>SM_none then
  4839. internalerror(2003091202);
  4840. if tmpreg<>NR_NO then
  4841. begin
  4842. if ref.base=tmpreg then
  4843. begin
  4844. if ref.signindex<0 then
  4845. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4846. else
  4847. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4848. ref.index:=NR_NO;
  4849. end
  4850. else
  4851. begin
  4852. if ref.index<>tmpreg then
  4853. internalerror(2004031602);
  4854. if ref.signindex<0 then
  4855. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4856. else
  4857. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4858. ref.base:=tmpreg;
  4859. ref.index:=NR_NO;
  4860. end;
  4861. end
  4862. else
  4863. begin
  4864. tmpreg:=getintregister(list,OS_ADDR);
  4865. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4866. ref.base:=tmpreg;
  4867. ref.index:=NR_NO;
  4868. end;
  4869. end;
  4870. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4871. Result := ref;
  4872. end;
  4873. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4874. var
  4875. instr: taicpu;
  4876. begin
  4877. if (fromsize=OS_F32) and
  4878. (tosize=OS_F32) then
  4879. begin
  4880. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4881. list.Concat(instr);
  4882. add_move_instruction(instr);
  4883. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4884. end
  4885. else if (fromsize=OS_F64) and
  4886. (tosize=OS_F64) then
  4887. begin
  4888. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4889. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4890. end
  4891. else if (fromsize=OS_F32) and
  4892. (tosize=OS_F64) then
  4893. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4894. begin
  4895. //list.concat(nil);
  4896. end;
  4897. end;
  4898. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4899. begin
  4900. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4901. end;
  4902. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4903. begin
  4904. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4905. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4906. end;
  4907. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4908. begin
  4909. if //(shuffle=nil) and
  4910. (tosize=OS_F32) then
  4911. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4912. else
  4913. internalerror(2012100813);
  4914. end;
  4915. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4916. begin
  4917. if //(shuffle=nil) and
  4918. (fromsize=OS_F32) then
  4919. begin
  4920. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4921. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4922. end
  4923. else
  4924. internalerror(2012100814);
  4925. end;
  4926. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4927. var tmpreg: tregister;
  4928. begin
  4929. case op of
  4930. OP_NEG:
  4931. begin
  4932. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4933. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4934. tmpreg:=cg.getintregister(list,OS_32);
  4935. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4936. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4937. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4938. end;
  4939. else
  4940. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4941. end;
  4942. end;
  4943. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4944. begin
  4945. case op of
  4946. OP_NEG:
  4947. begin
  4948. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reglo,0),PF_S));
  4949. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reghi,0),PF_S));
  4950. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4951. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4952. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4953. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4954. end;
  4955. OP_NOT:
  4956. begin
  4957. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4958. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4959. end;
  4960. OP_AND,OP_OR,OP_XOR:
  4961. begin
  4962. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4963. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4964. end;
  4965. OP_ADD:
  4966. begin
  4967. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4968. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4969. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi),PF_S));
  4970. end;
  4971. OP_SUB:
  4972. begin
  4973. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4974. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4975. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4976. end;
  4977. else
  4978. internalerror(2003083105);
  4979. end;
  4980. end;
  4981. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4982. var
  4983. tmpreg : tregister;
  4984. begin
  4985. case op of
  4986. OP_AND,OP_OR,OP_XOR:
  4987. begin
  4988. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4989. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4990. end;
  4991. OP_ADD:
  4992. begin
  4993. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4994. begin
  4995. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4996. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4997. end
  4998. else
  4999. begin
  5000. tmpreg:=cg.getintregister(list,OS_32);
  5001. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5002. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5003. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  5004. end;
  5005. tmpreg:=cg.getintregister(list,OS_32);
  5006. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  5007. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg),PF_S));
  5008. end;
  5009. OP_SUB:
  5010. begin
  5011. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5012. begin
  5013. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5014. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  5015. end
  5016. else
  5017. begin
  5018. tmpreg:=cg.getintregister(list,OS_32);
  5019. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5020. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5021. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  5022. end;
  5023. tmpreg:=cg.getintregister(list,OS_32);
  5024. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  5025. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg),PF_S));
  5026. end;
  5027. else
  5028. internalerror(2003083106);
  5029. end;
  5030. end;
  5031. procedure create_codegen;
  5032. begin
  5033. if GenerateThumb2Code then
  5034. begin
  5035. cg:=tthumb2cgarm.create;
  5036. cg64:=tthumb2cg64farm.create;
  5037. casmoptimizer:=TCpuThumb2AsmOptimizer;
  5038. end
  5039. else if GenerateThumbCode then
  5040. begin
  5041. cg:=tthumbcgarm.create;
  5042. cg64:=tthumbcg64farm.create;
  5043. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5044. end
  5045. else
  5046. begin
  5047. cg:=tarmcgarm.create;
  5048. cg64:=tarmcg64farm.create;
  5049. casmoptimizer:=TCpuAsmOptimizer;
  5050. end;
  5051. end;
  5052. end.