cgcpu.pas 224 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. procedure init_mmregister_allocator;
  36. public
  37. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  38. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  39. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  40. { move instructions }
  41. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  42. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  43. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  44. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  45. { fpu move instructions }
  46. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  47. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  48. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  49. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  50. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  51. { comparison operations }
  52. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  53. l : tasmlabel);override;
  54. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  55. procedure a_jmp_name(list : TAsmList;const s : string); override;
  56. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  57. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  58. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  59. procedure g_profilecode(list : TAsmList); override;
  60. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  61. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  62. procedure g_maybe_got_init(list : TAsmList); override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  66. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  67. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  68. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  69. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  70. procedure g_save_registers(list : TAsmList);override;
  71. procedure g_restore_registers(list : TAsmList);override;
  72. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  73. procedure fixref(list : TAsmList;var ref : treference);
  74. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  75. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  78. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  79. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  80. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  81. { Transform unsupported methods into Internal errors }
  82. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  83. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  84. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  85. { clear out potential overflow bits from 8 or 16 bit operations
  86. the upper 24/16 bits of a register after an operation }
  87. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  88. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  89. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  90. procedure g_maybe_tls_init(list : TAsmList); override;
  91. end;
  92. { tcgarm is shared between normal arm and thumb-2 }
  93. tcgarm = class(tbasecgarm)
  94. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  95. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  96. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  97. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  98. size: tcgsize; a: tcgint; src, dst: tregister); override;
  99. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  100. size: tcgsize; src1, src2, dst: tregister); override;
  101. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  103. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  104. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  105. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  106. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  107. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  108. end;
  109. { normal arm cg }
  110. tarmcgarm = class(tcgarm)
  111. procedure init_register_allocators;override;
  112. procedure done_register_allocators;override;
  113. end;
  114. { 64 bit cg for all arm flavours }
  115. tbasecg64farm = class(tcg64f32)
  116. end;
  117. { tcg64farm is shared between normal arm and thumb-2 }
  118. tcg64farm = class(tbasecg64farm)
  119. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  120. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  121. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  122. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  123. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  125. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  126. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  127. end;
  128. tarmcg64farm = class(tcg64farm)
  129. end;
  130. tthumbcgarm = class(tbasecgarm)
  131. procedure init_register_allocators;override;
  132. procedure done_register_allocators;override;
  133. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  134. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  135. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  136. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  137. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  138. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  139. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  140. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  141. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  142. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  143. end;
  144. tthumbcg64farm = class(tbasecg64farm)
  145. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  146. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  147. end;
  148. tthumb2cgarm = class(tcgarm)
  149. procedure init_register_allocators;override;
  150. procedure done_register_allocators;override;
  151. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  152. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  153. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  154. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  155. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  157. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  158. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  159. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  160. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  161. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  164. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  165. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  166. end;
  167. tthumb2cg64farm = class(tcg64farm)
  168. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  169. end;
  170. const
  171. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  172. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  173. winstackpagesize = 4096;
  174. function get_fpu_postfix(def : tdef) : toppostfix;
  175. procedure create_codegen;
  176. implementation
  177. uses
  178. globals,verbose,systems,cutils,
  179. aopt,aoptcpu,
  180. fmodule,
  181. symconst,symsym,symtable,
  182. tgobj,
  183. procinfo,cpupi,
  184. paramgr;
  185. { Range check must be disabled explicitly as conversions between signed and unsigned
  186. 32-bit values are done without explicit typecasts }
  187. {$R-}
  188. function get_fpu_postfix(def : tdef) : toppostfix;
  189. begin
  190. if def.typ=floatdef then
  191. begin
  192. case tfloatdef(def).floattype of
  193. s32real:
  194. result:=PF_S;
  195. s64real:
  196. result:=PF_D;
  197. s80real:
  198. result:=PF_E;
  199. else
  200. internalerror(200401272);
  201. end;
  202. end
  203. else
  204. internalerror(200401271);
  205. end;
  206. procedure tarmcgarm.init_register_allocators;
  207. begin
  208. inherited init_register_allocators;
  209. { currently, we always save R14, so we can use it }
  210. if (target_info.system<>system_arm_ios) then
  211. begin
  212. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  213. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  214. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  215. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  216. else
  217. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  218. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  219. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  220. end
  221. else
  222. { r7 is not available on Darwin, it's used as frame pointer (always,
  223. for backtrace support -- also in gcc/clang -> R11 can be used).
  224. r9 is volatile }
  225. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  226. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  227. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  228. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  229. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  230. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  231. init_mmregister_allocator;
  232. end;
  233. procedure tarmcgarm.done_register_allocators;
  234. begin
  235. rg[R_INTREGISTER].free;
  236. rg[R_FPUREGISTER].free;
  237. rg[R_MMREGISTER].free;
  238. inherited done_register_allocators;
  239. end;
  240. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  241. var
  242. imm_shift : byte;
  243. l : tasmlabel;
  244. hr : treference;
  245. imm1, imm2: DWord;
  246. begin
  247. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  248. internalerror(2002090907);
  249. if is_shifter_const(a,imm_shift) then
  250. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  251. else if is_shifter_const(not(a),imm_shift) then
  252. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  253. { loading of constants with mov and orr }
  254. else if (split_into_shifter_const(a,imm1, imm2)) then
  255. begin
  256. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  257. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  258. end
  259. { loading of constants with mvn and bic }
  260. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  261. begin
  262. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  263. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  264. end
  265. else
  266. begin
  267. reference_reset(hr,4,[]);
  268. current_asmdata.getjumplabel(l);
  269. cg.a_label(current_procinfo.aktlocaldata,l);
  270. hr.symboldata:=current_procinfo.aktlocaldata.last;
  271. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  272. hr.symbol:=l;
  273. hr.base:=NR_PC;
  274. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  275. end;
  276. end;
  277. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  278. var
  279. oppostfix:toppostfix;
  280. usedtmpref: treference;
  281. tmpreg,tmpreg2 : tregister;
  282. so : tshifterop;
  283. dir : integer;
  284. begin
  285. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  286. FromSize := ToSize;
  287. case FromSize of
  288. { signed integer registers }
  289. OS_8:
  290. oppostfix:=PF_B;
  291. OS_S8:
  292. oppostfix:=PF_SB;
  293. OS_16:
  294. oppostfix:=PF_H;
  295. OS_S16:
  296. oppostfix:=PF_SH;
  297. OS_32,
  298. OS_S32:
  299. oppostfix:=PF_None;
  300. else
  301. InternalError(200308297);
  302. end;
  303. if (fromsize=OS_S8) and
  304. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  305. oppostfix:=PF_B;
  306. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  307. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  308. (oppostfix in [PF_SH,PF_H])) then
  309. begin
  310. if target_info.endian=endian_big then
  311. dir:=-1
  312. else
  313. dir:=1;
  314. case FromSize of
  315. OS_16,OS_S16:
  316. begin
  317. { only complicated references need an extra loadaddr }
  318. if assigned(ref.symbol) or
  319. (ref.index<>NR_NO) or
  320. (ref.offset<-4095) or
  321. (ref.offset>4094) or
  322. { sometimes the compiler reused registers }
  323. (reg=ref.index) or
  324. (reg=ref.base) then
  325. begin
  326. tmpreg2:=getintregister(list,OS_INT);
  327. a_loadaddr_ref_reg(list,ref,tmpreg2);
  328. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  329. end
  330. else
  331. usedtmpref:=ref;
  332. if target_info.endian=endian_big then
  333. inc(usedtmpref.offset,1);
  334. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  335. tmpreg:=getintregister(list,OS_INT);
  336. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  337. inc(usedtmpref.offset,dir);
  338. if FromSize=OS_16 then
  339. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  340. else
  341. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  342. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  343. end;
  344. OS_32,OS_S32:
  345. begin
  346. tmpreg:=getintregister(list,OS_INT);
  347. { only complicated references need an extra loadaddr }
  348. if assigned(ref.symbol) or
  349. (ref.index<>NR_NO) or
  350. (ref.offset<-4095) or
  351. (ref.offset>4092) or
  352. { sometimes the compiler reused registers }
  353. (reg=ref.index) or
  354. (reg=ref.base) then
  355. begin
  356. tmpreg2:=getintregister(list,OS_INT);
  357. a_loadaddr_ref_reg(list,ref,tmpreg2);
  358. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  359. end
  360. else
  361. usedtmpref:=ref;
  362. shifterop_reset(so);so.shiftmode:=SM_LSL;
  363. if ref.alignment=2 then
  364. begin
  365. if target_info.endian=endian_big then
  366. inc(usedtmpref.offset,2);
  367. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  368. inc(usedtmpref.offset,dir*2);
  369. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  370. so.shiftimm:=16;
  371. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  372. end
  373. else
  374. begin
  375. tmpreg2:=getintregister(list,OS_INT);
  376. if target_info.endian=endian_big then
  377. inc(usedtmpref.offset,3);
  378. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  379. inc(usedtmpref.offset,dir);
  380. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  381. inc(usedtmpref.offset,dir);
  382. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  383. so.shiftimm:=8;
  384. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  385. inc(usedtmpref.offset,dir);
  386. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  387. so.shiftimm:=16;
  388. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  389. so.shiftimm:=24;
  390. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  391. end;
  392. end
  393. else
  394. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  395. end;
  396. end
  397. else
  398. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  399. if (fromsize=OS_S8) and
  400. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  401. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  402. else if (fromsize=OS_S8) and (tosize = OS_16) then
  403. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  404. end;
  405. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  406. var
  407. hsym : tsym;
  408. href : treference;
  409. paraloc : Pcgparalocation;
  410. shift : byte;
  411. begin
  412. { calculate the parameter info for the procdef }
  413. procdef.init_paraloc_info(callerside);
  414. hsym:=tsym(procdef.parast.Find('self'));
  415. if not(assigned(hsym) and
  416. (hsym.typ=paravarsym)) then
  417. internalerror(2003052503);
  418. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  419. while paraloc<>nil do
  420. with paraloc^ do
  421. begin
  422. case loc of
  423. LOC_REGISTER:
  424. begin
  425. if is_shifter_const(ioffset,shift) then
  426. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  427. else
  428. begin
  429. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  430. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  431. end;
  432. end;
  433. LOC_REFERENCE:
  434. begin
  435. { offset in the wrapper needs to be adjusted for the stored
  436. return address }
  437. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  438. if is_shifter_const(ioffset,shift) then
  439. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  440. else
  441. begin
  442. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  443. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  444. end;
  445. end
  446. else
  447. internalerror(2003091803);
  448. end;
  449. paraloc:=next;
  450. end;
  451. end;
  452. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  453. var
  454. ref: treference;
  455. begin
  456. paraloc.check_simple_location;
  457. paramanager.allocparaloc(list,paraloc.location);
  458. case paraloc.location^.loc of
  459. LOC_REGISTER,LOC_CREGISTER:
  460. a_load_const_reg(list,size,a,paraloc.location^.register);
  461. LOC_REFERENCE:
  462. begin
  463. reference_reset(ref,paraloc.alignment,[]);
  464. ref.base:=paraloc.location^.reference.index;
  465. ref.offset:=paraloc.location^.reference.offset;
  466. a_load_const_ref(list,size,a,ref);
  467. end;
  468. else
  469. internalerror(2002081101);
  470. end;
  471. end;
  472. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  473. begin
  474. { doubles in softemu mode have a strange order of registers and references }
  475. if (cgpara.size=OS_F64) and
  476. (location^.size=OS_32) then
  477. begin
  478. g_concatcopy(list,ref,paralocref,4)
  479. end
  480. else
  481. inherited;
  482. end;
  483. procedure tbasecgarm.init_mmregister_allocator;
  484. begin
  485. { The register allocator currently cannot deal with multiple
  486. non-overlapping subregs per register, so we can only use
  487. half the single precision registers for now (as sub registers of the
  488. double precision ones). }
  489. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  490. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  491. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  492. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  493. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  494. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  495. ],first_mm_imreg,[])
  496. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  497. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  498. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  499. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  500. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  501. ],first_mm_imreg,[])
  502. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  503. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  504. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  505. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  506. ],first_mm_imreg,[]);
  507. end;
  508. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  509. var
  510. ref: treference;
  511. tmpreg: tregister;
  512. begin
  513. paraloc.check_simple_location;
  514. paramanager.allocparaloc(list,paraloc.location);
  515. case paraloc.location^.loc of
  516. LOC_REGISTER,LOC_CREGISTER:
  517. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  518. LOC_REFERENCE:
  519. begin
  520. reference_reset(ref,paraloc.alignment,[]);
  521. ref.base := paraloc.location^.reference.index;
  522. ref.offset := paraloc.location^.reference.offset;
  523. tmpreg := getintregister(list,OS_ADDR);
  524. a_loadaddr_ref_reg(list,r,tmpreg);
  525. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  526. end;
  527. else
  528. internalerror(2002080701);
  529. end;
  530. end;
  531. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  532. var
  533. branchopcode: tasmop;
  534. r : treference;
  535. sym : TAsmSymbol;
  536. begin
  537. { use always BL as newer binutils do not translate blx apparently
  538. generating BL is also what clang and gcc do by default }
  539. branchopcode:=A_BL;
  540. if not(weak) then
  541. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  542. else
  543. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  544. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  545. if (tf_pic_uses_got in target_info.flags) and
  546. (cs_create_pic in current_settings.moduleswitches) then
  547. begin
  548. r.refaddr:=addr_pic
  549. end
  550. else
  551. r.refaddr:=addr_full;
  552. list.concat(taicpu.op_ref(branchopcode,r));
  553. {
  554. the compiler does not properly set this flag anymore in pass 1, and
  555. for now we only need it after pass 2 (I hope) (JM)
  556. if not(pi_do_call in current_procinfo.flags) then
  557. internalerror(2003060703);
  558. }
  559. include(current_procinfo.flags,pi_do_call);
  560. end;
  561. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  562. begin
  563. { check not really correct: should only be used for non-Thumb cpus }
  564. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  565. begin
  566. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  567. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  568. end
  569. else
  570. list.concat(taicpu.op_reg(A_BLX, reg));
  571. {
  572. the compiler does not properly set this flag anymore in pass 1, and
  573. for now we only need it after pass 2 (I hope) (JM)
  574. if not(pi_do_call in current_procinfo.flags) then
  575. internalerror(2003060703);
  576. }
  577. include(current_procinfo.flags,pi_do_call);
  578. end;
  579. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  580. begin
  581. a_op_const_reg_reg(list,op,size,a,reg,reg);
  582. end;
  583. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  584. var
  585. tmpreg,tmpresreg : tregister;
  586. tmpref : treference;
  587. begin
  588. tmpreg:=getintregister(list,size);
  589. tmpresreg:=getintregister(list,size);
  590. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  591. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  592. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  593. end;
  594. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  595. var
  596. so : tshifterop;
  597. begin
  598. if op = OP_NEG then
  599. begin
  600. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  601. maybeadjustresult(list,OP_NEG,size,dst);
  602. end
  603. else if op = OP_NOT then
  604. begin
  605. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  606. begin
  607. shifterop_reset(so);
  608. so.shiftmode:=SM_LSL;
  609. if size in [OS_8, OS_S8] then
  610. so.shiftimm:=24
  611. else
  612. so.shiftimm:=16;
  613. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  614. {Using a shift here allows this to be folded into another instruction}
  615. if size in [OS_S8, OS_S16] then
  616. so.shiftmode:=SM_ASR
  617. else
  618. so.shiftmode:=SM_LSR;
  619. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  620. end
  621. else
  622. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  623. end
  624. else
  625. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  626. end;
  627. const
  628. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  629. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  630. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  631. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  632. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  633. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  634. op_reg_postfix_thumb: array[TOpCG] of TOpPostfix =
  635. (PF_None,PF_None,PF_None,PF_S,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_S,
  636. PF_None,PF_S,PF_S,PF_None,PF_S,PF_None,PF_S);
  637. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  638. size: tcgsize; a: tcgint; src, dst: tregister);
  639. var
  640. ovloc : tlocation;
  641. begin
  642. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  643. end;
  644. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  645. size: tcgsize; src1, src2, dst: tregister);
  646. var
  647. ovloc : tlocation;
  648. begin
  649. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  650. end;
  651. function opshift2shiftmode(op: TOpCg): tshiftmode;
  652. begin
  653. case op of
  654. OP_SHL: Result:=SM_LSL;
  655. OP_SHR: Result:=SM_LSR;
  656. OP_ROR: Result:=SM_ROR;
  657. OP_ROL: Result:=SM_ROR;
  658. OP_SAR: Result:=SM_ASR;
  659. else internalerror(2012070501);
  660. end
  661. end;
  662. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  663. var
  664. multiplier : dword;
  665. power : longint;
  666. shifterop : tshifterop;
  667. bitsset : byte;
  668. negative : boolean;
  669. first, doshiftadd: boolean;
  670. b,
  671. cycles : byte;
  672. maxeffort : byte;
  673. leftmostbit,i,shiftvalue: DWord;
  674. begin
  675. result:=true;
  676. cycles:=0;
  677. negative:=a<0;
  678. shifterop.rs:=NR_NO;
  679. shifterop.shiftmode:=SM_LSL;
  680. if negative then
  681. inc(cycles);
  682. multiplier:=dword(abs(a));
  683. { heuristics to estimate how much instructions are reasonable to replace the mul,
  684. this is currently based on XScale timings }
  685. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  686. actual multiplication, this requires min. 1+4 cycles
  687. because the first shift imm. might cause a stall and because we need more instructions
  688. when replacing the mul we generate max. 3 instructions to replace this mul }
  689. maxeffort:=3;
  690. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  691. a ldr, so generating one more operation to replace this is beneficial }
  692. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  693. inc(maxeffort);
  694. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  695. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  696. dec(maxeffort);
  697. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  698. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  699. dec(maxeffort);
  700. { "symmetric" bit pattern like $10101010 where
  701. res:=a*$10101010 can be simplified into
  702. temp:=a*$1010
  703. res:=temp+temp shl 16
  704. }
  705. doshiftadd:=false;
  706. leftmostbit:=BsrDWord(multiplier);
  707. shiftvalue:=0;
  708. if (maxeffort>1) and (leftmostbit>2) then
  709. begin
  710. for i:=2 to 31 do
  711. if (multiplier shr i)=(multiplier and ($ffffffff shr (32-i))) then
  712. begin
  713. doshiftadd:=true;
  714. shiftvalue:=i;
  715. dec(maxeffort);
  716. multiplier:=multiplier shr shiftvalue;
  717. break;
  718. end;
  719. end;
  720. bitsset:=popcnt(multiplier and $fffffffe);
  721. { most simple cases }
  722. if a=1 then
  723. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  724. else if a=0 then
  725. a_load_const_reg(list,OS_32,0,dst)
  726. else if a=-1 then
  727. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  728. { add up ?
  729. basically, one add is needed for each bit being set in the constant factor
  730. however, the least significant bit is for free, it can be hidden in the initial
  731. instruction
  732. }
  733. else if (bitsset+cycles<=maxeffort) and
  734. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  735. begin
  736. first:=true;
  737. while multiplier<>0 do
  738. begin
  739. shifterop.shiftimm:=BsrDWord(multiplier);
  740. if odd(multiplier) then
  741. begin
  742. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  743. dec(multiplier);
  744. end
  745. else
  746. if first then
  747. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  748. else
  749. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  750. first:=false;
  751. dec(multiplier,1 shl shifterop.shiftimm);
  752. end;
  753. if doshiftadd then
  754. begin
  755. shifterop.shiftimm:=shiftvalue;
  756. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  757. end;
  758. if negative then
  759. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  760. end
  761. { subtract from the next greater power of two? }
  762. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  763. begin
  764. first:=true;
  765. while multiplier<>0 do
  766. begin
  767. if first then
  768. begin
  769. multiplier:=(1 shl power)-multiplier;
  770. shifterop.shiftimm:=power;
  771. end
  772. else
  773. shifterop.shiftimm:=BsrDWord(multiplier);
  774. if odd(multiplier) then
  775. begin
  776. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  777. dec(multiplier);
  778. end
  779. else
  780. if first then
  781. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  782. else
  783. begin
  784. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  785. dec(multiplier,1 shl shifterop.shiftimm);
  786. end;
  787. first:=false;
  788. end;
  789. if doshiftadd then
  790. begin
  791. shifterop.shiftimm:=shiftvalue;
  792. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  793. end;
  794. if negative then
  795. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  796. end
  797. else
  798. result:=false;
  799. end;
  800. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  801. var
  802. shift, lsb, width : byte;
  803. tmpreg : tregister;
  804. so : tshifterop;
  805. l1 : longint;
  806. imm1, imm2: DWord;
  807. begin
  808. optimize_op_const(size, op, a);
  809. case op of
  810. OP_NONE:
  811. begin
  812. if src <> dst then
  813. a_load_reg_reg(list, size, size, src, dst);
  814. exit;
  815. end;
  816. OP_MOVE:
  817. begin
  818. a_load_const_reg(list, size, a, dst);
  819. exit;
  820. end;
  821. else
  822. ;
  823. end;
  824. ovloc.loc:=LOC_VOID;
  825. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  826. case op of
  827. OP_ADD:
  828. begin
  829. op:=OP_SUB;
  830. a:=aint(dword(-a));
  831. end;
  832. OP_SUB:
  833. begin
  834. op:=OP_ADD;
  835. a:=aint(dword(-a));
  836. end
  837. else
  838. ;
  839. end;
  840. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  841. case op of
  842. OP_NEG,OP_NOT:
  843. internalerror(200308281);
  844. OP_SHL,
  845. OP_SHR,
  846. OP_ROL,
  847. OP_ROR,
  848. OP_SAR:
  849. begin
  850. if a>32 then
  851. internalerror(200308294);
  852. shifterop_reset(so);
  853. so.shiftmode:=opshift2shiftmode(op);
  854. if op = OP_ROL then
  855. so.shiftimm:=32-a
  856. else
  857. so.shiftimm:=a;
  858. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  859. end;
  860. else
  861. {if (op in [OP_SUB, OP_ADD]) and
  862. ((a < 0) or
  863. (a > 4095)) then
  864. begin
  865. tmpreg:=getintregister(list,size);
  866. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  867. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  868. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  869. ));
  870. end
  871. else}
  872. begin
  873. if cgsetflags or setflags then
  874. a_reg_alloc(list,NR_DEFAULTFLAGS);
  875. list.concat(setoppostfix(
  876. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  877. end;
  878. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  879. begin
  880. ovloc.loc:=LOC_FLAGS;
  881. case op of
  882. OP_ADD:
  883. ovloc.resflags:=F_CS;
  884. OP_SUB:
  885. ovloc.resflags:=F_CC;
  886. else
  887. internalerror(2019050922);
  888. end;
  889. end;
  890. end
  891. else
  892. begin
  893. { there could be added some more sophisticated optimizations }
  894. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  895. a_op_reg_reg(list,OP_NEG,size,src,dst)
  896. { we do this here instead in the peephole optimizer because
  897. it saves us a register }
  898. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  899. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  900. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  901. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  902. begin
  903. if l1>32 then{roozbeh does this ever happen?}
  904. internalerror(200308296);
  905. shifterop_reset(so);
  906. so.shiftmode:=SM_LSL;
  907. so.shiftimm:=l1;
  908. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  909. end
  910. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  911. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  912. begin
  913. if l1>32 then{does this ever happen?}
  914. internalerror(201205181);
  915. shifterop_reset(so);
  916. so.shiftmode:=SM_LSL;
  917. so.shiftimm:=l1;
  918. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  919. end
  920. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  921. begin
  922. { nothing to do on success }
  923. end
  924. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  925. broader range of shifterconstants.}
  926. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  927. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  928. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  929. into the following instruction}
  930. else if (op = OP_AND) and
  931. is_continuous_mask(aword(a), lsb, width) and
  932. ((lsb = 0) or ((lsb + width) = 32)) then
  933. begin
  934. shifterop_reset(so);
  935. if (width = 16) and
  936. (lsb = 0) and
  937. (current_settings.cputype >= cpu_armv6) then
  938. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  939. else if (width = 8) and
  940. (lsb = 0) and
  941. (current_settings.cputype >= cpu_armv6) then
  942. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  943. else if lsb = 0 then
  944. begin
  945. so.shiftmode:=SM_LSL;
  946. so.shiftimm:=32-width;
  947. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  948. so.shiftmode:=SM_LSR;
  949. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  950. end
  951. else
  952. begin
  953. so.shiftmode:=SM_LSR;
  954. so.shiftimm:=lsb;
  955. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  956. so.shiftmode:=SM_LSL;
  957. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  958. end;
  959. end
  960. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  961. begin
  962. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  963. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  964. end
  965. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  966. not(cgsetflags or setflags) and
  967. split_into_shifter_const(a, imm1, imm2) then
  968. begin
  969. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  970. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  971. end
  972. else
  973. begin
  974. tmpreg:=getintregister(list,size);
  975. a_load_const_reg(list,size,a,tmpreg);
  976. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  977. end;
  978. end;
  979. maybeadjustresult(list,op,size,dst);
  980. end;
  981. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  982. var
  983. so : tshifterop;
  984. tmpreg,overflowreg : tregister;
  985. asmop : tasmop;
  986. begin
  987. ovloc.loc:=LOC_VOID;
  988. case op of
  989. OP_NEG,OP_NOT,
  990. OP_DIV,OP_IDIV:
  991. internalerror(200308283);
  992. OP_SHL,
  993. OP_SHR,
  994. OP_SAR,
  995. OP_ROR:
  996. begin
  997. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  998. internalerror(2008072801);
  999. shifterop_reset(so);
  1000. so.rs:=src1;
  1001. so.shiftmode:=opshift2shiftmode(op);
  1002. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1003. end;
  1004. OP_ROL:
  1005. begin
  1006. if not(size in [OS_32,OS_S32]) then
  1007. internalerror(2008072804);
  1008. { simulate ROL by ror'ing 32-value }
  1009. tmpreg:=getintregister(list,OS_32);
  1010. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  1011. shifterop_reset(so);
  1012. so.rs:=tmpreg;
  1013. so.shiftmode:=SM_ROR;
  1014. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1015. end;
  1016. OP_IMUL,
  1017. OP_MUL:
  1018. begin
  1019. if (cgsetflags or setflags) and
  1020. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1021. begin
  1022. overflowreg:=getintregister(list,size);
  1023. if op=OP_IMUL then
  1024. asmop:=A_SMULL
  1025. else
  1026. asmop:=A_UMULL;
  1027. { the arm doesn't allow that rd and rm are the same }
  1028. if dst=src2 then
  1029. begin
  1030. if dst<>src1 then
  1031. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1032. else
  1033. begin
  1034. tmpreg:=getintregister(list,size);
  1035. a_load_reg_reg(list,size,size,src2,dst);
  1036. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1037. end;
  1038. end
  1039. else
  1040. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1041. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1042. if op=OP_IMUL then
  1043. begin
  1044. shifterop_reset(so);
  1045. so.shiftmode:=SM_ASR;
  1046. so.shiftimm:=31;
  1047. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1048. end
  1049. else
  1050. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1051. ovloc.loc:=LOC_FLAGS;
  1052. ovloc.resflags:=F_NE;
  1053. end
  1054. else
  1055. begin
  1056. { the arm doesn't allow that rd and rm are the same }
  1057. if dst=src2 then
  1058. begin
  1059. if dst<>src1 then
  1060. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1061. else
  1062. begin
  1063. tmpreg:=getintregister(list,size);
  1064. a_load_reg_reg(list,size,size,src2,dst);
  1065. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1066. end;
  1067. end
  1068. else
  1069. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1070. end;
  1071. end;
  1072. else
  1073. begin
  1074. if cgsetflags or setflags then
  1075. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1076. list.concat(setoppostfix(
  1077. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1078. end;
  1079. end;
  1080. maybeadjustresult(list,op,size,dst);
  1081. end;
  1082. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1083. var
  1084. asmop: tasmop;
  1085. begin
  1086. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1087. begin
  1088. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1089. case size of
  1090. OS_32: asmop:=A_UMULL;
  1091. OS_S32: asmop:=A_SMULL;
  1092. else
  1093. InternalError(2014060802);
  1094. end;
  1095. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1096. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1097. 32x32=32 bit multiplication}
  1098. if (dstlo = NR_NO) then
  1099. dstlo:=getintregister(list,size);
  1100. if (dsthi = NR_NO) then
  1101. dsthi:=getintregister(list,size);
  1102. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1103. end
  1104. else if dsthi=NR_NO then
  1105. begin
  1106. if (dstlo = NR_NO) then
  1107. dstlo:=getintregister(list,size);
  1108. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1109. end
  1110. else
  1111. begin
  1112. internalerror(2015083022);
  1113. end;
  1114. end;
  1115. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1116. var
  1117. tmpreg1,tmpreg2 : tregister;
  1118. begin
  1119. tmpreg1:=NR_NO;
  1120. { Be sure to have a base register }
  1121. if (ref.base=NR_NO) then
  1122. begin
  1123. if ref.shiftmode<>SM_None then
  1124. internalerror(2014020707);
  1125. ref.base:=ref.index;
  1126. ref.index:=NR_NO;
  1127. end;
  1128. { absolute symbols can't be handled directly, we've to store the symbol reference
  1129. in the text segment and access it pc relative
  1130. For now, we assume that references where base or index equals to PC are already
  1131. relative, all other references are assumed to be absolute and thus they need
  1132. to be handled extra.
  1133. A proper solution would be to change refoptions to a set and store the information
  1134. if the symbol is absolute or relative there.
  1135. }
  1136. if (assigned(ref.symbol) and
  1137. not(is_pc(ref.base)) and
  1138. not(is_pc(ref.index))
  1139. ) or
  1140. { [#xxx] isn't a valid address operand }
  1141. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1142. (ref.offset<-4095) or
  1143. (ref.offset>4095) or
  1144. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1145. ((ref.offset<-255) or
  1146. (ref.offset>255)
  1147. )
  1148. ) or
  1149. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1150. ((ref.offset<-1020) or
  1151. (ref.offset>1020) or
  1152. ((abs(ref.offset) mod 4)<>0)
  1153. )
  1154. ) or
  1155. ((GenerateThumbCode) and
  1156. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1157. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1158. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1159. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1160. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1161. )
  1162. ) then
  1163. begin
  1164. fixref(list,ref);
  1165. end;
  1166. if GenerateThumbCode then
  1167. begin
  1168. { certain thumb load require base and index }
  1169. if (oppostfix in [PF_SB,PF_SH]) and
  1170. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1171. begin
  1172. tmpreg1:=getintregister(list,OS_ADDR);
  1173. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1174. ref.index:=tmpreg1;
  1175. end;
  1176. { "hi" registers cannot be used as base or index }
  1177. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1178. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1179. begin
  1180. tmpreg1:=getintregister(list,OS_ADDR);
  1181. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1182. ref.base:=tmpreg1;
  1183. end;
  1184. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1185. begin
  1186. tmpreg1:=getintregister(list,OS_ADDR);
  1187. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1188. ref.index:=tmpreg1;
  1189. end;
  1190. end;
  1191. { fold if there is base, index and offset, however, don't fold
  1192. for vfp memory instructions because we later fold the index }
  1193. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1194. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1195. begin
  1196. if tmpreg1<>NR_NO then
  1197. begin
  1198. tmpreg2:=getintregister(list,OS_ADDR);
  1199. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1200. tmpreg1:=tmpreg2;
  1201. end
  1202. else
  1203. begin
  1204. tmpreg1:=getintregister(list,OS_ADDR);
  1205. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1206. ref.base:=tmpreg1;
  1207. end;
  1208. ref.offset:=0;
  1209. end;
  1210. { floating point operations have only limited references
  1211. we expect here, that a base is already set }
  1212. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1213. begin
  1214. if ref.shiftmode<>SM_none then
  1215. internalerror(200309121);
  1216. if tmpreg1<>NR_NO then
  1217. begin
  1218. if ref.base=tmpreg1 then
  1219. begin
  1220. if ref.signindex<0 then
  1221. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1222. else
  1223. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1224. ref.index:=NR_NO;
  1225. end
  1226. else
  1227. begin
  1228. if ref.index<>tmpreg1 then
  1229. internalerror(200403161);
  1230. if ref.signindex<0 then
  1231. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1232. else
  1233. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1234. ref.base:=tmpreg1;
  1235. ref.index:=NR_NO;
  1236. end;
  1237. end
  1238. else
  1239. begin
  1240. tmpreg1:=getintregister(list,OS_ADDR);
  1241. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1242. ref.base:=tmpreg1;
  1243. ref.index:=NR_NO;
  1244. end;
  1245. end;
  1246. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1247. Result := ref;
  1248. end;
  1249. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1250. var
  1251. oppostfix:toppostfix;
  1252. usedtmpref: treference;
  1253. tmpreg : tregister;
  1254. dir : integer;
  1255. begin
  1256. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1257. FromSize := ToSize;
  1258. case ToSize of
  1259. { signed integer registers }
  1260. OS_8,
  1261. OS_S8:
  1262. oppostfix:=PF_B;
  1263. OS_16,
  1264. OS_S16:
  1265. oppostfix:=PF_H;
  1266. OS_32,
  1267. OS_S32,
  1268. { for vfp value stored in integer register }
  1269. OS_F32:
  1270. oppostfix:=PF_None;
  1271. else
  1272. InternalError(2003082912);
  1273. end;
  1274. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1275. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1276. (oppostfix =PF_H)) then
  1277. begin
  1278. if target_info.endian=endian_big then
  1279. dir:=-1
  1280. else
  1281. dir:=1;
  1282. case FromSize of
  1283. OS_16,OS_S16:
  1284. begin
  1285. tmpreg:=getintregister(list,OS_INT);
  1286. usedtmpref:=ref;
  1287. if target_info.endian=endian_big then
  1288. inc(usedtmpref.offset,1);
  1289. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1290. inc(usedtmpref.offset,dir);
  1291. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1292. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1293. end;
  1294. OS_32,OS_S32:
  1295. begin
  1296. tmpreg:=getintregister(list,OS_INT);
  1297. usedtmpref:=ref;
  1298. if ref.alignment=2 then
  1299. begin
  1300. if target_info.endian=endian_big then
  1301. inc(usedtmpref.offset,2);
  1302. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1303. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1304. inc(usedtmpref.offset,dir*2);
  1305. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1306. end
  1307. else
  1308. begin
  1309. if target_info.endian=endian_big then
  1310. inc(usedtmpref.offset,3);
  1311. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1312. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1313. inc(usedtmpref.offset,dir);
  1314. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1315. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1316. inc(usedtmpref.offset,dir);
  1317. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1318. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1319. inc(usedtmpref.offset,dir);
  1320. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1321. end;
  1322. end
  1323. else
  1324. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1325. end;
  1326. end
  1327. else
  1328. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1329. end;
  1330. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1331. var
  1332. oppostfix:toppostfix;
  1333. href: treference;
  1334. tmpreg: TRegister;
  1335. begin
  1336. case ToSize of
  1337. { signed integer registers }
  1338. OS_8,
  1339. OS_S8:
  1340. oppostfix:=PF_B;
  1341. OS_16,
  1342. OS_S16:
  1343. oppostfix:=PF_H;
  1344. OS_32,
  1345. OS_S32:
  1346. oppostfix:=PF_None;
  1347. else
  1348. InternalError(2003082910);
  1349. end;
  1350. if (tosize in [OS_S16,OS_16]) and
  1351. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1352. begin
  1353. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1354. tmpreg:=getintregister(list,OS_INT);
  1355. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1356. href:=result;
  1357. inc(href.offset);
  1358. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1359. end
  1360. else
  1361. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1362. end;
  1363. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1364. var
  1365. oppostfix:toppostfix;
  1366. so: tshifterop;
  1367. tmpreg: TRegister;
  1368. href: treference;
  1369. begin
  1370. case FromSize of
  1371. { signed integer registers }
  1372. OS_8:
  1373. oppostfix:=PF_B;
  1374. OS_S8:
  1375. oppostfix:=PF_SB;
  1376. OS_16:
  1377. oppostfix:=PF_H;
  1378. OS_S16:
  1379. oppostfix:=PF_SH;
  1380. OS_32,
  1381. OS_S32:
  1382. oppostfix:=PF_None;
  1383. else
  1384. InternalError(200308291);
  1385. end;
  1386. if (tosize=OS_S8) and
  1387. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1388. begin
  1389. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1390. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1391. end
  1392. else if (tosize in [OS_S16,OS_16]) and
  1393. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1394. begin
  1395. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1396. tmpreg:=getintregister(list,OS_INT);
  1397. href:=result;
  1398. inc(href.offset);
  1399. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1400. shifterop_reset(so);
  1401. so.shiftmode:=SM_LSL;
  1402. so.shiftimm:=8;
  1403. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1404. end
  1405. else
  1406. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1407. end;
  1408. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1409. var
  1410. so : tshifterop;
  1411. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1412. begin
  1413. if GenerateThumbCode then
  1414. begin
  1415. case shiftmode of
  1416. SM_ASR:
  1417. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1418. SM_LSR:
  1419. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1420. SM_LSL:
  1421. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1422. else
  1423. internalerror(2013090301);
  1424. end;
  1425. end
  1426. else
  1427. begin
  1428. so.shiftmode:=shiftmode;
  1429. so.shiftimm:=shiftimm;
  1430. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1431. end;
  1432. end;
  1433. var
  1434. instr: taicpu;
  1435. conv_done: boolean;
  1436. begin
  1437. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1438. internalerror(2002090901);
  1439. conv_done:=false;
  1440. if tosize<>fromsize then
  1441. begin
  1442. shifterop_reset(so);
  1443. conv_done:=true;
  1444. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1445. fromsize:=tosize;
  1446. if current_settings.cputype<cpu_armv6 then
  1447. case fromsize of
  1448. OS_8:
  1449. if GenerateThumbCode then
  1450. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1451. else
  1452. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1453. OS_S8:
  1454. begin
  1455. do_shift(SM_LSL,24,reg1);
  1456. if tosize=OS_16 then
  1457. begin
  1458. do_shift(SM_ASR,8,reg2);
  1459. do_shift(SM_LSR,16,reg2);
  1460. end
  1461. else
  1462. do_shift(SM_ASR,24,reg2);
  1463. end;
  1464. OS_16:
  1465. begin
  1466. do_shift(SM_LSL,16,reg1);
  1467. do_shift(SM_LSR,16,reg2);
  1468. end;
  1469. OS_S16:
  1470. begin
  1471. do_shift(SM_LSL,16,reg1);
  1472. do_shift(SM_ASR,16,reg2)
  1473. end;
  1474. else
  1475. conv_done:=false;
  1476. end
  1477. else
  1478. case fromsize of
  1479. OS_8:
  1480. if GenerateThumbCode then
  1481. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1482. else
  1483. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1484. OS_S8:
  1485. begin
  1486. if tosize=OS_16 then
  1487. begin
  1488. so.shiftmode:=SM_ROR;
  1489. so.shiftimm:=16;
  1490. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1491. do_shift(SM_LSR,16,reg2);
  1492. end
  1493. else
  1494. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1495. end;
  1496. OS_16:
  1497. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1498. OS_S16:
  1499. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1500. else
  1501. conv_done:=false;
  1502. end
  1503. end;
  1504. if not conv_done and (reg1<>reg2) then
  1505. begin
  1506. { same size, only a register mov required }
  1507. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1508. list.Concat(instr);
  1509. { Notify the register allocator that we have written a move instruction so
  1510. it can try to eliminate it. }
  1511. add_move_instruction(instr);
  1512. end;
  1513. end;
  1514. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1515. var
  1516. href,href2 : treference;
  1517. hloc : pcgparalocation;
  1518. begin
  1519. href:=ref;
  1520. hloc:=paraloc.location;
  1521. while assigned(hloc) do
  1522. begin
  1523. case hloc^.loc of
  1524. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1525. begin
  1526. paramanager.allocparaloc(list,paraloc.location);
  1527. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1528. end;
  1529. LOC_REGISTER :
  1530. case hloc^.size of
  1531. OS_32,
  1532. OS_F32:
  1533. begin
  1534. paramanager.allocparaloc(list,paraloc.location);
  1535. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1536. end;
  1537. OS_64,
  1538. OS_F64:
  1539. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1540. else
  1541. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1542. end;
  1543. LOC_REFERENCE :
  1544. begin
  1545. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1546. { concatcopy should choose the best way to copy the data }
  1547. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1548. end;
  1549. else
  1550. internalerror(200408241);
  1551. end;
  1552. inc(href.offset,tcgsize2size[hloc^.size]);
  1553. hloc:=hloc^.next;
  1554. end;
  1555. end;
  1556. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1557. begin
  1558. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1559. end;
  1560. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1561. var
  1562. oppostfix:toppostfix;
  1563. begin
  1564. case fromsize of
  1565. OS_32,
  1566. OS_F32:
  1567. oppostfix:=PF_S;
  1568. OS_64,
  1569. OS_F64:
  1570. oppostfix:=PF_D;
  1571. OS_F80:
  1572. oppostfix:=PF_E;
  1573. else
  1574. InternalError(200309021);
  1575. end;
  1576. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1577. if fromsize<>tosize then
  1578. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1579. end;
  1580. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1581. var
  1582. oppostfix:toppostfix;
  1583. begin
  1584. case tosize of
  1585. OS_F32:
  1586. oppostfix:=PF_S;
  1587. OS_F64:
  1588. oppostfix:=PF_D;
  1589. OS_F80:
  1590. oppostfix:=PF_E;
  1591. else
  1592. InternalError(200309022);
  1593. end;
  1594. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1595. end;
  1596. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1597. var
  1598. r : TRegister;
  1599. ai: taicpu;
  1600. l: TAsmLabel;
  1601. begin
  1602. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  1603. not(FPUARM_HAS_EXCEPTION_TRAPPING in fpu_capabilities[current_settings.fputype]) and
  1604. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  1605. begin
  1606. r:=getintregister(list,OS_INT);
  1607. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1608. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1609. current_asmdata.getjumplabel(l);
  1610. ai:=taicpu.op_sym(A_B,l);
  1611. ai.is_jmp:=true;
  1612. ai.condition:=C_EQ;
  1613. list.concat(ai);
  1614. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1615. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1616. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1617. a_label(list,l);
  1618. if clear then
  1619. current_procinfo.FPUExceptionCheckNeeded:=false;
  1620. end;
  1621. end;
  1622. { comparison operations }
  1623. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1624. l : tasmlabel);
  1625. var
  1626. tmpreg : tregister;
  1627. b : byte;
  1628. begin
  1629. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1630. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1631. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1632. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1633. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1634. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1635. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1636. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1637. else
  1638. begin
  1639. tmpreg:=getintregister(list,size);
  1640. a_load_const_reg(list,size,a,tmpreg);
  1641. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1642. end;
  1643. a_jmp_cond(list,cmp_op,l);
  1644. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1645. end;
  1646. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1647. begin
  1648. if reverse then
  1649. begin
  1650. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1651. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1652. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1653. end
  1654. { it is decided during the compilation of the system unit if this code is used or not
  1655. so no additional check for rbit is needed }
  1656. else
  1657. begin
  1658. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1659. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1660. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1661. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1662. if GenerateThumb2Code then
  1663. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1664. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1665. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1666. end;
  1667. end;
  1668. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1669. begin
  1670. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1671. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1672. a_jmp_cond(list,cmp_op,l);
  1673. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1674. end;
  1675. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1676. var
  1677. ai : taicpu;
  1678. begin
  1679. { generate far jump, leave it to the optimizer to get rid of it }
  1680. if GenerateThumbCode then
  1681. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1682. else
  1683. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1684. ai.is_jmp:=true;
  1685. list.concat(ai);
  1686. end;
  1687. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1688. var
  1689. ai : taicpu;
  1690. begin
  1691. { generate far jump, leave it to the optimizer to get rid of it }
  1692. if GenerateThumbCode then
  1693. ai:=taicpu.op_sym(A_BL,l)
  1694. else
  1695. ai:=taicpu.op_sym(A_B,l);
  1696. ai.is_jmp:=true;
  1697. list.concat(ai);
  1698. end;
  1699. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1700. var
  1701. ai : taicpu;
  1702. inv_flags : TResFlags;
  1703. hlabel : TAsmLabel;
  1704. begin
  1705. if GenerateThumbCode then
  1706. begin
  1707. inv_flags:=f;
  1708. inverse_flags(inv_flags);
  1709. { the optimizer has to fix this if jump range is sufficient short }
  1710. current_asmdata.getjumplabel(hlabel);
  1711. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1712. ai.is_jmp:=true;
  1713. list.concat(ai);
  1714. a_jmp_always(list,l);
  1715. a_label(list,hlabel);
  1716. end
  1717. else
  1718. begin
  1719. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1720. ai.is_jmp:=true;
  1721. list.concat(ai);
  1722. end;
  1723. end;
  1724. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1725. begin
  1726. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1727. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1728. end;
  1729. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1730. begin
  1731. if target_info.system = system_arm_linux then
  1732. begin
  1733. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1734. a_call_name(list,'__gnu_mcount_nc',false);
  1735. end
  1736. else
  1737. internalerror(2014091201);
  1738. end;
  1739. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1740. var
  1741. ref : treference;
  1742. shift : byte;
  1743. firstfloatreg,lastfloatreg,
  1744. r : byte;
  1745. mmregs,
  1746. regs, saveregs : tcpuregisterset;
  1747. registerarea, offset,
  1748. r7offset,
  1749. stackmisalignment : pint;
  1750. imm1, imm2: DWord;
  1751. stack_parameters : Boolean;
  1752. begin
  1753. LocalSize:=align(LocalSize,4);
  1754. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1755. { call instruction does not put anything on the stack }
  1756. registerarea:=0;
  1757. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1758. lastfloatreg:=RS_NO;
  1759. if not(nostackframe) then
  1760. begin
  1761. firstfloatreg:=RS_NO;
  1762. mmregs:=[];
  1763. case current_settings.fputype of
  1764. fpu_none,
  1765. fpu_soft,
  1766. fpu_libgcc:
  1767. ;
  1768. fpu_fpa,
  1769. fpu_fpa10,
  1770. fpu_fpa11:
  1771. begin
  1772. { save floating point registers? }
  1773. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1774. for r:=RS_F0 to RS_F7 do
  1775. if r in regs then
  1776. begin
  1777. if firstfloatreg=RS_NO then
  1778. firstfloatreg:=r;
  1779. lastfloatreg:=r;
  1780. inc(registerarea,12);
  1781. end;
  1782. end;
  1783. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1784. begin;
  1785. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1786. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1787. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1788. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1789. end
  1790. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1791. begin;
  1792. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1793. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1794. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1795. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1796. end
  1797. else
  1798. internalerror(2019050924);
  1799. end;
  1800. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1801. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1802. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1803. { save int registers }
  1804. reference_reset(ref,4,[]);
  1805. ref.index:=NR_STACK_POINTER_REG;
  1806. ref.addressmode:=AM_PREINDEXED;
  1807. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1808. if not(target_info.system in systems_darwin) then
  1809. begin
  1810. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1811. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1812. begin
  1813. a_reg_alloc(list,NR_R12);
  1814. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1815. end;
  1816. { the (old) ARM APCS requires saving both the stack pointer (to
  1817. crawl the stack) and the PC (to identify the function this
  1818. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1819. and R15 -- still needs updating for EABI and Darwin, they don't
  1820. need that }
  1821. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1822. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1823. else
  1824. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1825. include(regs,RS_R14);
  1826. if regs<>[] then
  1827. begin
  1828. for r:=RS_R0 to RS_R15 do
  1829. if r in regs then
  1830. inc(registerarea,4);
  1831. { if the stack is not 8 byte aligned, try to add an extra register,
  1832. so we can avoid the extra sub/add ...,#4 later (KB) }
  1833. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1834. for r:=RS_R3 downto RS_R0 do
  1835. if not(r in regs) then
  1836. begin
  1837. regs:=regs+[r];
  1838. inc(registerarea,4);
  1839. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1840. break;
  1841. end;
  1842. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1843. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  1844. end;
  1845. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1846. begin
  1847. offset:=-4;
  1848. for r:=RS_R15 downto RS_R0 do
  1849. if r in regs then
  1850. begin
  1851. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),offset);
  1852. dec(offset,4);
  1853. end;
  1854. { the framepointer now points to the saved R15, so the saved
  1855. framepointer is at R11-12 (for get_caller_frame) }
  1856. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1857. a_reg_dealloc(list,NR_R12);
  1858. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  1859. current_asmdata.asmcfi.cfa_def_cfa_offset(list,4);
  1860. end;
  1861. end
  1862. else
  1863. begin
  1864. { always save r14 if we use r7 as the framepointer, because
  1865. the parameter offsets are hardcoded in advance and always
  1866. assume that r14 sits on the stack right behind the saved r7
  1867. }
  1868. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1869. include(regs,RS_FRAME_POINTER_REG);
  1870. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1871. include(regs,RS_R14);
  1872. if regs<>[] then
  1873. begin
  1874. { on Darwin, you first have to save [r4-r7,lr], and then
  1875. [r8,r10,r11] and make r7 point to the previously saved
  1876. r7 so that you can perform a stack crawl based on it
  1877. ([r7] is previous stack frame, [r7+4] is return address
  1878. }
  1879. include(regs,RS_FRAME_POINTER_REG);
  1880. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1881. r7offset:=0;
  1882. for r:=RS_R0 to RS_R15 do
  1883. if r in saveregs then
  1884. begin
  1885. inc(registerarea,4);
  1886. if r<RS_FRAME_POINTER_REG then
  1887. inc(r7offset,4);
  1888. end;
  1889. { save the registers }
  1890. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1891. { make r7 point to the saved r7 (regardless of whether this
  1892. frame uses the framepointer, for backtrace purposes) }
  1893. if r7offset<>0 then
  1894. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1895. else
  1896. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1897. { now save the rest (if any) }
  1898. saveregs:=regs-saveregs;
  1899. if saveregs<>[] then
  1900. begin
  1901. for r:=RS_R8 to RS_R11 do
  1902. if r in saveregs then
  1903. inc(registerarea,4);
  1904. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1905. end;
  1906. end;
  1907. end;
  1908. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1909. if (LocalSize<>0) or
  1910. ((stackmisalignment<>0) and
  1911. ((pi_do_call in current_procinfo.flags) or
  1912. (po_assembler in current_procinfo.procdef.procoptions))) then
  1913. begin
  1914. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1915. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1916. begin
  1917. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1918. internalerror(2014030901)
  1919. else
  1920. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1921. end;
  1922. if is_shifter_const(localsize,shift) then
  1923. begin
  1924. a_reg_dealloc(list,NR_R12);
  1925. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1926. end
  1927. else if split_into_shifter_const(localsize, imm1, imm2) then
  1928. begin
  1929. a_reg_dealloc(list,NR_R12);
  1930. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1931. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1932. end
  1933. else
  1934. begin
  1935. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1936. a_reg_alloc(list,NR_R12);
  1937. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1938. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1939. a_reg_dealloc(list,NR_R12);
  1940. end;
  1941. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1942. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  1943. end;
  1944. if (mmregs<>[]) or
  1945. (firstfloatreg<>RS_NO) then
  1946. begin
  1947. reference_reset(ref,4,[]);
  1948. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1949. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1950. begin
  1951. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1952. begin
  1953. a_reg_alloc(list,NR_R12);
  1954. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1955. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1956. a_reg_dealloc(list,NR_R12);
  1957. end
  1958. else
  1959. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1960. ref.base:=NR_R12;
  1961. end
  1962. else
  1963. begin
  1964. ref.base:=current_procinfo.framepointer;
  1965. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1966. end;
  1967. case current_settings.fputype of
  1968. fpu_fpa,
  1969. fpu_fpa10,
  1970. fpu_fpa11:
  1971. begin
  1972. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1973. lastfloatreg-firstfloatreg+1,ref));
  1974. end;
  1975. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1976. begin
  1977. ref.index:=ref.base;
  1978. ref.base:=NR_NO;
  1979. if mmregs<>[] then
  1980. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1981. end
  1982. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1983. begin
  1984. ref.index:=ref.base;
  1985. ref.base:=NR_NO;
  1986. if mmregs<>[] then
  1987. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1988. end
  1989. else
  1990. internalerror(2019050923);
  1991. end;
  1992. end;
  1993. end;
  1994. end;
  1995. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1996. var
  1997. ref : treference;
  1998. LocalSize : longint;
  1999. firstfloatreg,lastfloatreg,
  2000. r,
  2001. shift : byte;
  2002. mmregs,
  2003. saveregs,
  2004. regs : tcpuregisterset;
  2005. registerarea,
  2006. stackmisalignment: pint;
  2007. paddingreg: TSuperRegister;
  2008. imm1, imm2: DWord;
  2009. begin
  2010. if not(nostackframe) then
  2011. begin
  2012. registerarea:=0;
  2013. firstfloatreg:=RS_NO;
  2014. lastfloatreg:=RS_NO;
  2015. mmregs:=[];
  2016. saveregs:=[];
  2017. case current_settings.fputype of
  2018. fpu_none,
  2019. fpu_soft,
  2020. fpu_libgcc:
  2021. ;
  2022. fpu_fpa,
  2023. fpu_fpa10,
  2024. fpu_fpa11:
  2025. begin
  2026. { restore floating point registers? }
  2027. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  2028. for r:=RS_F0 to RS_F7 do
  2029. if r in regs then
  2030. begin
  2031. if firstfloatreg=RS_NO then
  2032. firstfloatreg:=r;
  2033. lastfloatreg:=r;
  2034. { floating point register space is already included in
  2035. localsize below by calc_stackframe_size
  2036. inc(registerarea,12);
  2037. }
  2038. end;
  2039. end;
  2040. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2041. begin
  2042. { restore vfp registers? }
  2043. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  2044. they have numbers>$1f which is not really correct as they should simply have the same numbers
  2045. as the even ones by with a different subtype as it is done on x86 with al/ah }
  2046. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  2047. end
  2048. else
  2049. internalerror(2019050908);
  2050. end;
  2051. if (firstfloatreg<>RS_NO) or
  2052. (mmregs<>[]) then
  2053. begin
  2054. reference_reset(ref,4,[]);
  2055. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  2056. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2057. begin
  2058. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2059. begin
  2060. a_reg_alloc(list,NR_R12);
  2061. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2062. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2063. a_reg_dealloc(list,NR_R12);
  2064. end
  2065. else
  2066. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2067. ref.base:=NR_R12;
  2068. end
  2069. else
  2070. begin
  2071. ref.base:=current_procinfo.framepointer;
  2072. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2073. end;
  2074. case current_settings.fputype of
  2075. fpu_fpa,
  2076. fpu_fpa10,
  2077. fpu_fpa11:
  2078. begin
  2079. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2080. lastfloatreg-firstfloatreg+1,ref));
  2081. end;
  2082. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2083. begin
  2084. ref.index:=ref.base;
  2085. ref.base:=NR_NO;
  2086. if mmregs<>[] then
  2087. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2088. end
  2089. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2090. begin
  2091. ref.index:=ref.base;
  2092. ref.base:=NR_NO;
  2093. if mmregs<>[] then
  2094. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2095. end
  2096. else
  2097. internalerror(2019050921);
  2098. end;
  2099. end;
  2100. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2101. if (pi_do_call in current_procinfo.flags) or
  2102. (regs<>[]) or
  2103. ((target_info.system in systems_darwin) and
  2104. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2105. begin
  2106. exclude(regs,RS_R14);
  2107. include(regs,RS_R15);
  2108. if (target_info.system in systems_darwin) then
  2109. include(regs,RS_FRAME_POINTER_REG);
  2110. end;
  2111. if not(target_info.system in systems_darwin) then
  2112. begin
  2113. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2114. The saved PC came after that but is discarded, since we restore
  2115. the stack pointer }
  2116. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2117. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2118. end
  2119. else
  2120. begin
  2121. { restore R8-R11 already if necessary (they've been stored
  2122. before the others) }
  2123. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2124. if saveregs<>[] then
  2125. begin
  2126. reference_reset(ref,4,[]);
  2127. ref.index:=NR_STACK_POINTER_REG;
  2128. ref.addressmode:=AM_PREINDEXED;
  2129. for r:=RS_R8 to RS_R11 do
  2130. if r in saveregs then
  2131. inc(registerarea,4);
  2132. regs:=regs-saveregs;
  2133. end;
  2134. end;
  2135. for r:=RS_R0 to RS_R15 do
  2136. if r in regs then
  2137. inc(registerarea,4);
  2138. { reapply the stack padding reg, in case there was one, see the complimentary
  2139. comment in g_proc_entry() (KB) }
  2140. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2141. if paddingreg < RS_R4 then
  2142. if paddingreg in regs then
  2143. internalerror(201306190)
  2144. else
  2145. begin
  2146. regs:=regs+[paddingreg];
  2147. inc(registerarea,4);
  2148. end;
  2149. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2150. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2151. (target_info.system in systems_darwin) then
  2152. begin
  2153. LocalSize:=current_procinfo.calc_stackframe_size;
  2154. if (LocalSize<>0) or
  2155. ((stackmisalignment<>0) and
  2156. ((pi_do_call in current_procinfo.flags) or
  2157. (po_assembler in current_procinfo.procdef.procoptions))) then
  2158. begin
  2159. if pi_estimatestacksize in current_procinfo.flags then
  2160. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2161. else
  2162. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2163. if is_shifter_const(LocalSize,shift) then
  2164. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2165. else if split_into_shifter_const(localsize, imm1, imm2) then
  2166. begin
  2167. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2168. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2169. end
  2170. else
  2171. begin
  2172. a_reg_alloc(list,NR_R12);
  2173. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2174. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2175. a_reg_dealloc(list,NR_R12);
  2176. end;
  2177. end;
  2178. if (target_info.system in systems_darwin) and
  2179. (saveregs<>[]) then
  2180. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2181. if regs=[] then
  2182. begin
  2183. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2184. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2185. else
  2186. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2187. end
  2188. else
  2189. begin
  2190. reference_reset(ref,4,[]);
  2191. ref.index:=NR_STACK_POINTER_REG;
  2192. ref.addressmode:=AM_PREINDEXED;
  2193. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2194. end;
  2195. end
  2196. else
  2197. begin
  2198. { restore int registers and return }
  2199. reference_reset(ref,4,[]);
  2200. ref.index:=NR_FRAME_POINTER_REG;
  2201. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2202. end;
  2203. end
  2204. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2205. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2206. else
  2207. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2208. end;
  2209. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2210. var
  2211. ref : treference;
  2212. l : TAsmLabel;
  2213. regs : tcpuregisterset;
  2214. r: byte;
  2215. begin
  2216. if (cs_create_pic in current_settings.moduleswitches) and
  2217. (pi_needs_got in current_procinfo.flags) and
  2218. (tf_pic_uses_got in target_info.flags) then
  2219. begin
  2220. { Procedure parametrs are not initialized at this stage.
  2221. Before GOT initialization code, allocate registers used for procedure parameters
  2222. to prevent usage of these registers for temp operations in later stages of code
  2223. generation. }
  2224. regs:=rg[R_INTREGISTER].used_in_proc;
  2225. for r:=RS_R0 to RS_R3 do
  2226. if r in regs then
  2227. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2228. { Allocate scratch register R12 and use it for GOT calculations directly.
  2229. Otherwise the init code can be distorted in later stages of code generation. }
  2230. a_reg_alloc(list,NR_R12);
  2231. reference_reset(ref,4,[]);
  2232. current_asmdata.getglobaldatalabel(l);
  2233. cg.a_label(current_procinfo.aktlocaldata,l);
  2234. ref.symbol:=l;
  2235. ref.base:=NR_PC;
  2236. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2237. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2238. current_asmdata.getaddrlabel(l);
  2239. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2240. cg.a_label(list,l);
  2241. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2242. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2243. { Deallocate registers }
  2244. a_reg_dealloc(list,NR_R12);
  2245. for r:=RS_R3 downto RS_R0 do
  2246. if r in regs then
  2247. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2248. end;
  2249. end;
  2250. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2251. var
  2252. b : byte;
  2253. tmpref : treference;
  2254. instr : taicpu;
  2255. begin
  2256. if ref.addressmode<>AM_OFFSET then
  2257. internalerror(200309071);
  2258. tmpref:=ref;
  2259. { Be sure to have a base register }
  2260. if (tmpref.base=NR_NO) then
  2261. begin
  2262. if tmpref.shiftmode<>SM_None then
  2263. internalerror(2014020702);
  2264. if tmpref.signindex<0 then
  2265. internalerror(200312023);
  2266. tmpref.base:=tmpref.index;
  2267. tmpref.index:=NR_NO;
  2268. end;
  2269. if assigned(tmpref.symbol) or
  2270. not((is_shifter_const(tmpref.offset,b)) or
  2271. (is_shifter_const(-tmpref.offset,b))
  2272. ) then
  2273. fixref(list,tmpref);
  2274. { expect a base here if there is an index }
  2275. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2276. internalerror(200312022);
  2277. if tmpref.index<>NR_NO then
  2278. begin
  2279. if tmpref.shiftmode<>SM_None then
  2280. internalerror(200312021);
  2281. if tmpref.signindex<0 then
  2282. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2283. else
  2284. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2285. if tmpref.offset<>0 then
  2286. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2287. end
  2288. else
  2289. begin
  2290. if tmpref.base=NR_NO then
  2291. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2292. else
  2293. if tmpref.offset<>0 then
  2294. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2295. else
  2296. begin
  2297. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2298. list.concat(instr);
  2299. add_move_instruction(instr);
  2300. end;
  2301. end;
  2302. end;
  2303. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2304. var
  2305. tmpreg, tmpreg2 : tregister;
  2306. tmpref : treference;
  2307. l, piclabel : tasmlabel;
  2308. indirection_done : boolean;
  2309. begin
  2310. { absolute symbols can't be handled directly, we've to store the symbol reference
  2311. in the text segment and access it pc relative
  2312. For now, we assume that references where base or index equals to PC are already
  2313. relative, all other references are assumed to be absolute and thus they need
  2314. to be handled extra.
  2315. A proper solution would be to change refoptions to a set and store the information
  2316. if the symbol is absolute or relative there.
  2317. }
  2318. { create consts entry }
  2319. reference_reset(tmpref,4,[]);
  2320. current_asmdata.getjumplabel(l);
  2321. cg.a_label(current_procinfo.aktlocaldata,l);
  2322. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2323. piclabel:=nil;
  2324. tmpreg:=NR_NO;
  2325. indirection_done:=false;
  2326. if assigned(ref.symbol) then
  2327. begin
  2328. if (target_info.system=system_arm_ios) and
  2329. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2330. begin
  2331. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2332. if ref.offset<>0 then
  2333. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2334. indirection_done:=true;
  2335. end
  2336. else if ref.refaddr=addr_gottpoff then
  2337. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2338. else if ref.refaddr=addr_tlsgd then
  2339. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2340. else if ref.refaddr=addr_tlsdesc then
  2341. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2342. else if ref.refaddr=addr_tpoff then
  2343. begin
  2344. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2345. Internalerror(2019092804);
  2346. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2347. end
  2348. else if (cs_create_pic in current_settings.moduleswitches) then
  2349. if (tf_pic_uses_got in target_info.flags) then
  2350. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2351. else
  2352. begin
  2353. { ideally, we would want to generate
  2354. ldr r1, LPICConstPool
  2355. LPICLocal:
  2356. ldr/str r2,[pc,r1]
  2357. ...
  2358. LPICConstPool:
  2359. .long _globsym-(LPICLocal+8)
  2360. However, we cannot be sure that the ldr/str will follow
  2361. right after the call to fixref, so we have to load the
  2362. complete address already in a register.
  2363. }
  2364. current_asmdata.getaddrlabel(piclabel);
  2365. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2366. end
  2367. else
  2368. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2369. end
  2370. else
  2371. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2372. { load consts entry }
  2373. if not indirection_done then
  2374. begin
  2375. tmpreg:=getintregister(list,OS_INT);
  2376. tmpref.symbol:=l;
  2377. tmpref.base:=NR_PC;
  2378. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2379. if (cs_create_pic in current_settings.moduleswitches) and
  2380. (tf_pic_uses_got in target_info.flags) and
  2381. assigned(ref.symbol) then
  2382. begin
  2383. {$ifdef EXTDEBUG}
  2384. if not (pi_needs_got in current_procinfo.flags) then
  2385. Comment(V_warning,'pi_needs_got not included');
  2386. {$endif EXTDEBUG}
  2387. Include(current_procinfo.flags,pi_needs_got);
  2388. reference_reset(tmpref,4,[]);
  2389. tmpref.base:=current_procinfo.got;
  2390. tmpref.index:=tmpreg;
  2391. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2392. if ref.offset<>0 then
  2393. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2394. end;
  2395. end;
  2396. if assigned(piclabel) then
  2397. begin
  2398. cg.a_label(list,piclabel);
  2399. tmpreg2:=getaddressregister(list);
  2400. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2401. tmpreg:=tmpreg2
  2402. end;
  2403. { This routine can be called with PC as base/index in case the offset
  2404. was too large to encode in a load/store. In that case, the entire
  2405. absolute expression has been re-encoded in a new constpool entry, and
  2406. we have to remove the use of PC from the original reference (the code
  2407. above made everything relative to the value loaded from the new
  2408. constpool entry) }
  2409. if is_pc(ref.base) then
  2410. ref.base:=NR_NO;
  2411. if is_pc(ref.index) then
  2412. ref.index:=NR_NO;
  2413. if (ref.base<>NR_NO) then
  2414. begin
  2415. if ref.index<>NR_NO then
  2416. begin
  2417. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2418. ref.base:=tmpreg;
  2419. end
  2420. else
  2421. if ref.base<>NR_PC then
  2422. begin
  2423. ref.index:=tmpreg;
  2424. ref.shiftimm:=0;
  2425. ref.signindex:=1;
  2426. ref.shiftmode:=SM_None;
  2427. end
  2428. else
  2429. ref.base:=tmpreg;
  2430. end
  2431. else
  2432. ref.base:=tmpreg;
  2433. ref.offset:=0;
  2434. ref.symbol:=nil;
  2435. end;
  2436. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2437. var
  2438. paraloc1,paraloc2,paraloc3 : TCGPara;
  2439. pd : tprocdef;
  2440. begin
  2441. pd:=search_system_proc('MOVE');
  2442. paraloc1.init;
  2443. paraloc2.init;
  2444. paraloc3.init;
  2445. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  2446. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  2447. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  2448. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2449. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2450. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2451. paramanager.freecgpara(list,paraloc3);
  2452. paramanager.freecgpara(list,paraloc2);
  2453. paramanager.freecgpara(list,paraloc1);
  2454. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2455. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2456. a_call_name(list,'FPC_MOVE',false);
  2457. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2458. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2459. paraloc3.done;
  2460. paraloc2.done;
  2461. paraloc1.done;
  2462. end;
  2463. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2464. const
  2465. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2466. maxtmpreg_thumb = 5;
  2467. var
  2468. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2469. srcreg,destreg,countreg,r,tmpreg:tregister;
  2470. helpsize:aint;
  2471. copysize:byte;
  2472. cgsize:Tcgsize;
  2473. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2474. maxtmpreg,
  2475. tmpregi,tmpregi2:byte;
  2476. { will never be called with count<=4 }
  2477. procedure genloop(count : aword;size : byte);
  2478. const
  2479. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2480. var
  2481. l : tasmlabel;
  2482. begin
  2483. current_asmdata.getjumplabel(l);
  2484. if count<size then size:=1;
  2485. a_load_const_reg(list,OS_INT,count div size,countreg);
  2486. cg.a_label(list,l);
  2487. srcref.addressmode:=AM_POSTINDEXED;
  2488. dstref.addressmode:=AM_POSTINDEXED;
  2489. srcref.offset:=size;
  2490. dstref.offset:=size;
  2491. r:=getintregister(list,size2opsize[size]);
  2492. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2493. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2494. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2495. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2496. a_jmp_flags(list,F_NE,l);
  2497. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2498. srcref.offset:=1;
  2499. dstref.offset:=1;
  2500. case count mod size of
  2501. 1:
  2502. begin
  2503. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2504. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2505. end;
  2506. 2:
  2507. if aligned then
  2508. begin
  2509. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2510. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2511. end
  2512. else
  2513. begin
  2514. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2515. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2516. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2517. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2518. end;
  2519. 3:
  2520. if aligned then
  2521. begin
  2522. srcref.offset:=2;
  2523. dstref.offset:=2;
  2524. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2525. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2526. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2527. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2528. end
  2529. else
  2530. begin
  2531. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2532. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2533. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2534. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2535. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2536. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2537. end;
  2538. end;
  2539. { keep the registers alive }
  2540. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2541. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2542. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2543. end;
  2544. { save estimation, if a creating a separate ref is needed or
  2545. if we can keep the original reference while copying }
  2546. function SimpleRef(const ref : treference) : boolean;
  2547. begin
  2548. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2549. ((ref.symbol=nil) and
  2550. (ref.addressmode=AM_OFFSET) and
  2551. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2552. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2553. { ldrh has a limited offset range }
  2554. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2555. )
  2556. );
  2557. end;
  2558. { will never be called with count<=4 }
  2559. procedure genloop_thumb(count : aword;size : byte);
  2560. procedure refincofs(const ref : treference;const value : longint = 1);
  2561. begin
  2562. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2563. end;
  2564. const
  2565. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2566. var
  2567. l : tasmlabel;
  2568. begin
  2569. current_asmdata.getjumplabel(l);
  2570. if count<size then size:=1;
  2571. a_load_const_reg(list,OS_INT,count div size,countreg);
  2572. cg.a_label(list,l);
  2573. r:=getintregister(list,size2opsize[size]);
  2574. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2575. refincofs(srcref);
  2576. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2577. refincofs(dstref);
  2578. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2579. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2580. a_jmp_flags(list,F_NE,l);
  2581. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2582. case count mod size of
  2583. 1:
  2584. begin
  2585. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2586. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2587. end;
  2588. 2:
  2589. if aligned then
  2590. begin
  2591. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2592. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2593. end
  2594. else
  2595. begin
  2596. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2597. refincofs(srcref);
  2598. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2599. refincofs(dstref);
  2600. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2601. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2602. end;
  2603. 3:
  2604. if aligned then
  2605. begin
  2606. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2607. refincofs(srcref,2);
  2608. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2609. refincofs(dstref,2);
  2610. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2611. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2612. end
  2613. else
  2614. begin
  2615. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2616. refincofs(srcref);
  2617. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2618. refincofs(dstref);
  2619. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2620. refincofs(srcref);
  2621. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2622. refincofs(dstref);
  2623. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2624. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2625. end;
  2626. end;
  2627. { keep the registers alive }
  2628. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2629. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2630. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2631. end;
  2632. begin
  2633. if len=0 then
  2634. exit;
  2635. if GenerateThumbCode then
  2636. maxtmpreg:=maxtmpreg_thumb
  2637. else
  2638. maxtmpreg:=maxtmpreg_arm;
  2639. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2640. dstref:=dest;
  2641. srcref:=source;
  2642. if cs_opt_size in current_settings.optimizerswitches then
  2643. helpsize:=8;
  2644. if aligned and (len=4) then
  2645. begin
  2646. tmpreg:=getintregister(list,OS_32);
  2647. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2648. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2649. end
  2650. else if aligned and (len=2) then
  2651. begin
  2652. tmpreg:=getintregister(list,OS_16);
  2653. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2654. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2655. end
  2656. else if (len<=helpsize) and aligned then
  2657. begin
  2658. tmpregi:=0;
  2659. { loading address in a separate register needed? }
  2660. if SimpleRef(source) then
  2661. begin
  2662. { ... then we don't need a loadaddr }
  2663. srcref:=source;
  2664. end
  2665. else
  2666. begin
  2667. srcreg:=getintregister(list,OS_ADDR);
  2668. a_loadaddr_ref_reg(list,source,srcreg);
  2669. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2670. end;
  2671. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2672. begin
  2673. inc(tmpregi);
  2674. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2675. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2676. inc(srcref.offset,4);
  2677. dec(len,4);
  2678. end;
  2679. { loading address in a separate register needed? }
  2680. if SimpleRef(dest) then
  2681. dstref:=dest
  2682. else
  2683. begin
  2684. destreg:=getintregister(list,OS_ADDR);
  2685. a_loadaddr_ref_reg(list,dest,destreg);
  2686. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2687. end;
  2688. tmpregi2:=1;
  2689. while (tmpregi2<=tmpregi) do
  2690. begin
  2691. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2692. inc(dstref.offset,4);
  2693. inc(tmpregi2);
  2694. end;
  2695. copysize:=4;
  2696. cgsize:=OS_32;
  2697. while len<>0 do
  2698. begin
  2699. if len<2 then
  2700. begin
  2701. copysize:=1;
  2702. cgsize:=OS_8;
  2703. end
  2704. else if len<4 then
  2705. begin
  2706. copysize:=2;
  2707. cgsize:=OS_16;
  2708. end;
  2709. dec(len,copysize);
  2710. r:=getintregister(list,cgsize);
  2711. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2712. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2713. inc(srcref.offset,copysize);
  2714. inc(dstref.offset,copysize);
  2715. end;{end of while}
  2716. end
  2717. else
  2718. begin
  2719. cgsize:=OS_32;
  2720. if (len<=4) then{len<=4 and not aligned}
  2721. begin
  2722. r:=getintregister(list,cgsize);
  2723. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2724. if Len=1 then
  2725. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2726. else
  2727. begin
  2728. tmpreg:=getintregister(list,cgsize);
  2729. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2730. inc(usedtmpref.offset,1);
  2731. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2732. inc(usedtmpref2.offset,1);
  2733. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2734. if len>2 then
  2735. begin
  2736. inc(usedtmpref.offset,1);
  2737. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2738. inc(usedtmpref2.offset,1);
  2739. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2740. if len>3 then
  2741. begin
  2742. inc(usedtmpref.offset,1);
  2743. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2744. inc(usedtmpref2.offset,1);
  2745. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2746. end;
  2747. end;
  2748. end;
  2749. end{end of if len<=4}
  2750. else
  2751. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2752. destreg:=getintregister(list,OS_ADDR);
  2753. a_loadaddr_ref_reg(list,dest,destreg);
  2754. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2755. srcreg:=getintregister(list,OS_ADDR);
  2756. a_loadaddr_ref_reg(list,source,srcreg);
  2757. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2758. countreg:=getintregister(list,OS_32);
  2759. // if cs_opt_size in current_settings.optimizerswitches then
  2760. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2761. {if aligned then
  2762. genloop(len,4)
  2763. else}
  2764. if GenerateThumbCode then
  2765. genloop_thumb(len,1)
  2766. else
  2767. genloop(len,1);
  2768. end;
  2769. end;
  2770. end;
  2771. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2772. begin
  2773. g_concatcopy_internal(list,source,dest,len,false);
  2774. end;
  2775. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2776. begin
  2777. if (source.alignment in [1,3]) or
  2778. (dest.alignment in [1,3]) then
  2779. g_concatcopy_internal(list,source,dest,len,false)
  2780. else
  2781. g_concatcopy_internal(list,source,dest,len,true);
  2782. end;
  2783. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2784. var
  2785. ovloc : tlocation;
  2786. begin
  2787. ovloc.loc:=LOC_VOID;
  2788. g_overflowCheck_loc(list,l,def,ovloc);
  2789. end;
  2790. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2791. var
  2792. hl : tasmlabel;
  2793. ai:TAiCpu;
  2794. hflags : tresflags;
  2795. begin
  2796. if not(cs_check_overflow in current_settings.localswitches) then
  2797. exit;
  2798. current_asmdata.getjumplabel(hl);
  2799. case ovloc.loc of
  2800. LOC_VOID:
  2801. begin
  2802. ai:=taicpu.op_sym(A_B,hl);
  2803. ai.is_jmp:=true;
  2804. if not((def.typ=pointerdef) or
  2805. ((def.typ=orddef) and
  2806. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2807. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2808. ai.SetCondition(C_VC)
  2809. else
  2810. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2811. ai.SetCondition(C_CS)
  2812. else
  2813. ai.SetCondition(C_CC);
  2814. list.concat(ai);
  2815. end;
  2816. LOC_FLAGS:
  2817. begin
  2818. hflags:=ovloc.resflags;
  2819. inverse_flags(hflags);
  2820. cg.a_jmp_flags(list,hflags,hl);
  2821. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2822. end;
  2823. else
  2824. internalerror(200409281);
  2825. end;
  2826. a_call_name(list,'FPC_OVERFLOW',false);
  2827. a_label(list,hl);
  2828. end;
  2829. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2830. begin
  2831. { this work is done in g_proc_entry }
  2832. end;
  2833. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2834. begin
  2835. { this work is done in g_proc_exit }
  2836. end;
  2837. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2838. var
  2839. ai : taicpu;
  2840. hlabel : TAsmLabel;
  2841. begin
  2842. if GenerateThumbCode then
  2843. begin
  2844. { the optimizer has to fix this if jump range is sufficient short }
  2845. current_asmdata.getjumplabel(hlabel);
  2846. ai:=Taicpu.Op_sym(A_B,hlabel);
  2847. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2848. ai.is_jmp:=true;
  2849. list.concat(ai);
  2850. a_jmp_always(list,l);
  2851. a_label(list,hlabel);
  2852. end
  2853. else
  2854. begin
  2855. ai:=Taicpu.Op_sym(A_B,l);
  2856. ai.SetCondition(OpCmp2AsmCond[cond]);
  2857. ai.is_jmp:=true;
  2858. list.concat(ai);
  2859. end;
  2860. end;
  2861. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2862. const
  2863. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2864. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2865. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2866. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2867. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2868. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2869. begin
  2870. result:=convertop[fromsize,tosize];
  2871. if result=A_NONE then
  2872. internalerror(200312205);
  2873. end;
  2874. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2875. const
  2876. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2877. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2878. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2879. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2880. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2881. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2882. begin
  2883. result:=convertop[fromsize,tosize];
  2884. end;
  2885. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2886. var
  2887. instr: taicpu;
  2888. begin
  2889. if (shuffle=nil) or shufflescalar(shuffle) then
  2890. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2891. else
  2892. internalerror(2009112407);
  2893. list.concat(instr);
  2894. case instr.opcode of
  2895. A_VMOV:
  2896. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2897. add_move_instruction(instr);
  2898. else
  2899. { VCVT can generate an exception }
  2900. maybe_check_for_fpu_exception(list);
  2901. end;
  2902. end;
  2903. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2904. var
  2905. intreg,
  2906. tmpmmreg : tregister;
  2907. reg64 : tregister64;
  2908. begin
  2909. if assigned(shuffle) and
  2910. not(shufflescalar(shuffle)) then
  2911. internalerror(2009112413);
  2912. case fromsize of
  2913. OS_32,OS_S32:
  2914. begin
  2915. fromsize:=OS_F32;
  2916. { since we are loading an integer, no conversion may be required }
  2917. if (fromsize<>tosize) then
  2918. internalerror(2009112801);
  2919. end;
  2920. OS_64,OS_S64:
  2921. begin
  2922. fromsize:=OS_F64;
  2923. { since we are loading an integer, no conversion may be required }
  2924. if (fromsize<>tosize) then
  2925. internalerror(2009112901);
  2926. end;
  2927. OS_F32,OS_F64:
  2928. ;
  2929. else
  2930. internalerror(2019050920);
  2931. end;
  2932. if (fromsize<>tosize) then
  2933. tmpmmreg:=getmmregister(list,fromsize)
  2934. else
  2935. tmpmmreg:=reg;
  2936. if (ref.alignment in [1,2]) then
  2937. begin
  2938. case fromsize of
  2939. OS_F32:
  2940. begin
  2941. intreg:=getintregister(list,OS_32);
  2942. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2943. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2944. end;
  2945. OS_F64:
  2946. begin
  2947. reg64.reglo:=getintregister(list,OS_32);
  2948. reg64.reghi:=getintregister(list,OS_32);
  2949. cg64.a_load64_ref_reg(list,ref,reg64);
  2950. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2951. end;
  2952. else
  2953. internalerror(2009112412);
  2954. end;
  2955. end
  2956. else
  2957. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2958. if (tmpmmreg<>reg) then
  2959. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2960. end;
  2961. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2962. var
  2963. intreg,
  2964. tmpmmreg : tregister;
  2965. reg64 : tregister64;
  2966. begin
  2967. if assigned(shuffle) and
  2968. not(shufflescalar(shuffle)) then
  2969. internalerror(2009112416);
  2970. case tosize of
  2971. OS_32,OS_S32:
  2972. begin
  2973. tosize:=OS_F32;
  2974. { since we are loading an integer, no conversion may be required }
  2975. if (fromsize<>tosize) then
  2976. internalerror(2009112802);
  2977. end;
  2978. OS_64,OS_S64:
  2979. begin
  2980. tosize:=OS_F64;
  2981. { since we are loading an integer, no conversion may be required }
  2982. if (fromsize<>tosize) then
  2983. internalerror(2009112902);
  2984. end;
  2985. OS_F32,OS_F64:
  2986. ;
  2987. else
  2988. internalerror(2019050919);
  2989. end;
  2990. if (fromsize<>tosize) then
  2991. begin
  2992. tmpmmreg:=getmmregister(list,tosize);
  2993. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2994. end
  2995. else
  2996. tmpmmreg:=reg;
  2997. if (ref.alignment in [1,2]) then
  2998. begin
  2999. case tosize of
  3000. OS_F32:
  3001. begin
  3002. intreg:=getintregister(list,OS_32);
  3003. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  3004. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  3005. end;
  3006. OS_F64:
  3007. begin
  3008. reg64.reglo:=getintregister(list,OS_32);
  3009. reg64.reghi:=getintregister(list,OS_32);
  3010. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  3011. cg64.a_load64_reg_ref(list,reg64,ref);
  3012. end;
  3013. else
  3014. internalerror(2009112417);
  3015. end;
  3016. end
  3017. else
  3018. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  3019. { VSTR cannot generate an FPU exception, VCVT is handled seperately, so we do not need a check here }
  3020. end;
  3021. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  3022. begin
  3023. { this code can only be used to transfer raw data, not to perform
  3024. conversions }
  3025. if (tosize<>OS_F32) then
  3026. internalerror(2009112419);
  3027. if not(fromsize in [OS_32,OS_S32]) then
  3028. internalerror(2009112420);
  3029. if assigned(shuffle) and
  3030. not shufflescalar(shuffle) then
  3031. internalerror(2009112516);
  3032. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  3033. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3034. end;
  3035. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  3036. begin
  3037. { this code can only be used to transfer raw data, not to perform
  3038. conversions }
  3039. if (fromsize<>OS_F32) then
  3040. internalerror(2009112430);
  3041. if not(tosize in [OS_32,OS_S32]) then
  3042. internalerror(2009112409);
  3043. if assigned(shuffle) and
  3044. not shufflescalar(shuffle) then
  3045. internalerror(2009112514);
  3046. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  3047. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3048. end;
  3049. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  3050. var
  3051. tmpreg: tregister;
  3052. begin
  3053. { the vfp doesn't support xor nor any other logical operation, but
  3054. this routine is used to initialise global mm regvars. We can
  3055. easily initialise an mm reg with 0 though. }
  3056. case op of
  3057. OP_XOR:
  3058. begin
  3059. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3060. begin
  3061. if (reg_cgsize(src)<>size) or
  3062. assigned(shuffle) then
  3063. internalerror(2019081301);
  3064. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3065. end
  3066. else
  3067. begin
  3068. if (src<>dst) or
  3069. (reg_cgsize(src)<>size) or
  3070. assigned(shuffle) then
  3071. internalerror(2009112907);
  3072. tmpreg:=getintregister(list,OS_32);
  3073. a_load_const_reg(list,OS_32,0,tmpreg);
  3074. case size of
  3075. OS_F32:
  3076. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3077. OS_F64:
  3078. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3079. else
  3080. internalerror(2009112908);
  3081. end;
  3082. end;
  3083. end
  3084. else
  3085. internalerror(2009112906);
  3086. end;
  3087. end;
  3088. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3089. const
  3090. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3091. begin
  3092. if (op in overflowops) and
  3093. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3094. a_load_reg_reg(list,OS_32,size,dst,dst);
  3095. end;
  3096. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3097. procedure checkreg(var reg : TRegister);
  3098. var
  3099. tmpreg : TRegister;
  3100. begin
  3101. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3102. (getsupreg(reg)=RS_R15) then
  3103. begin
  3104. tmpreg:=getintregister(list,OS_INT);
  3105. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3106. reg:=tmpreg;
  3107. end;
  3108. end;
  3109. begin
  3110. checkreg(op1);
  3111. checkreg(op2);
  3112. checkreg(op3);
  3113. checkreg(op4);
  3114. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3115. end;
  3116. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3117. begin
  3118. if pi_needs_tls in current_procinfo.flags then
  3119. begin
  3120. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3121. a_call_name(list,'fpc_read_tp',false);
  3122. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3123. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3124. end;
  3125. end;
  3126. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3127. begin
  3128. case op of
  3129. OP_NEG:
  3130. begin
  3131. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3132. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3133. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3134. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3135. end;
  3136. OP_NOT:
  3137. begin
  3138. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3139. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3140. end;
  3141. else
  3142. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3143. end;
  3144. end;
  3145. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3146. begin
  3147. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3148. end;
  3149. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3150. var
  3151. ovloc : tlocation;
  3152. begin
  3153. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3154. end;
  3155. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3156. var
  3157. ovloc : tlocation;
  3158. begin
  3159. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3160. end;
  3161. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3162. begin
  3163. { this code can only be used to transfer raw data, not to perform
  3164. conversions }
  3165. if (mmsize<>OS_F64) then
  3166. internalerror(2009112405);
  3167. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3168. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3169. end;
  3170. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3171. begin
  3172. { this code can only be used to transfer raw data, not to perform
  3173. conversions }
  3174. if (mmsize<>OS_F64) then
  3175. internalerror(2009112406);
  3176. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3177. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3178. end;
  3179. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3180. var
  3181. tmpreg : tregister;
  3182. b : byte;
  3183. begin
  3184. ovloc.loc:=LOC_VOID;
  3185. case op of
  3186. OP_NEG,
  3187. OP_NOT :
  3188. internalerror(2012022501);
  3189. else
  3190. ;
  3191. end;
  3192. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3193. begin
  3194. case op of
  3195. OP_ADD:
  3196. begin
  3197. if is_shifter_const(lo(value),b) then
  3198. begin
  3199. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3200. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3201. end
  3202. else
  3203. begin
  3204. tmpreg:=cg.getintregister(list,OS_32);
  3205. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3206. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3207. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3208. end;
  3209. if is_shifter_const(hi(value),b) then
  3210. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3211. else
  3212. begin
  3213. tmpreg:=cg.getintregister(list,OS_32);
  3214. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3215. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3216. end;
  3217. end;
  3218. OP_SUB:
  3219. begin
  3220. if is_shifter_const(lo(value),b) then
  3221. begin
  3222. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3223. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3224. end
  3225. else
  3226. begin
  3227. tmpreg:=cg.getintregister(list,OS_32);
  3228. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3229. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3230. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3231. end;
  3232. if is_shifter_const(hi(value),b) then
  3233. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3234. else
  3235. begin
  3236. tmpreg:=cg.getintregister(list,OS_32);
  3237. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3238. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3239. end;
  3240. end;
  3241. else
  3242. internalerror(200502131);
  3243. end;
  3244. if size=OS_64 then
  3245. begin
  3246. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3247. ovloc.loc:=LOC_FLAGS;
  3248. case op of
  3249. OP_ADD:
  3250. ovloc.resflags:=F_CS;
  3251. OP_SUB:
  3252. ovloc.resflags:=F_CC;
  3253. else
  3254. internalerror(2019050918);
  3255. end;
  3256. end;
  3257. end
  3258. else
  3259. begin
  3260. case op of
  3261. OP_AND,OP_OR,OP_XOR:
  3262. begin
  3263. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3264. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3265. end;
  3266. OP_ADD:
  3267. begin
  3268. if is_shifter_const(aint(lo(value)),b) then
  3269. begin
  3270. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3271. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3272. end
  3273. else
  3274. begin
  3275. tmpreg:=cg.getintregister(list,OS_32);
  3276. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3277. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3278. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3279. end;
  3280. if is_shifter_const(aint(hi(value)),b) then
  3281. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3282. else
  3283. begin
  3284. tmpreg:=cg.getintregister(list,OS_32);
  3285. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3286. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3287. end;
  3288. end;
  3289. OP_SUB:
  3290. begin
  3291. if is_shifter_const(aint(lo(value)),b) then
  3292. begin
  3293. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3294. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3295. end
  3296. else
  3297. begin
  3298. tmpreg:=cg.getintregister(list,OS_32);
  3299. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3300. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3301. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3302. end;
  3303. if is_shifter_const(aint(hi(value)),b) then
  3304. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3305. else
  3306. begin
  3307. tmpreg:=cg.getintregister(list,OS_32);
  3308. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3309. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3310. end;
  3311. end;
  3312. else
  3313. internalerror(2003083101);
  3314. end;
  3315. end;
  3316. end;
  3317. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3318. begin
  3319. ovloc.loc:=LOC_VOID;
  3320. case op of
  3321. OP_NEG,
  3322. OP_NOT :
  3323. internalerror(2012022502);
  3324. else
  3325. ;
  3326. end;
  3327. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3328. begin
  3329. case op of
  3330. OP_ADD:
  3331. begin
  3332. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3333. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3334. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3335. end;
  3336. OP_SUB:
  3337. begin
  3338. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3339. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3340. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3341. end;
  3342. else
  3343. internalerror(2003083102);
  3344. end;
  3345. if size=OS_64 then
  3346. begin
  3347. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3348. ovloc.loc:=LOC_FLAGS;
  3349. case op of
  3350. OP_ADD:
  3351. ovloc.resflags:=F_CS;
  3352. OP_SUB:
  3353. ovloc.resflags:=F_CC;
  3354. else
  3355. internalerror(2019050917);
  3356. end;
  3357. end;
  3358. end
  3359. else
  3360. begin
  3361. case op of
  3362. OP_AND,OP_OR,OP_XOR:
  3363. begin
  3364. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3365. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3366. end;
  3367. OP_ADD:
  3368. begin
  3369. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3370. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3371. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3372. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3373. end;
  3374. OP_SUB:
  3375. begin
  3376. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3377. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3378. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3379. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3380. end;
  3381. else
  3382. internalerror(2003083104);
  3383. end;
  3384. end;
  3385. end;
  3386. procedure tthumbcgarm.init_register_allocators;
  3387. begin
  3388. inherited init_register_allocators;
  3389. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3390. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3391. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3392. else
  3393. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3394. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3395. end;
  3396. procedure tthumbcgarm.done_register_allocators;
  3397. begin
  3398. rg[R_INTREGISTER].free;
  3399. rg[R_FPUREGISTER].free;
  3400. rg[R_MMREGISTER].free;
  3401. inherited done_register_allocators;
  3402. end;
  3403. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3404. var
  3405. ref : treference;
  3406. r : byte;
  3407. regs : tcpuregisterset;
  3408. stackmisalignment : pint;
  3409. registerarea: DWord;
  3410. stack_parameters: Boolean;
  3411. begin
  3412. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3413. LocalSize:=align(LocalSize,4);
  3414. { call instruction does not put anything on the stack }
  3415. stackmisalignment:=0;
  3416. if not(nostackframe) then
  3417. begin
  3418. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3419. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3420. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3421. { save int registers }
  3422. reference_reset(ref,4,[]);
  3423. ref.index:=NR_STACK_POINTER_REG;
  3424. ref.addressmode:=AM_PREINDEXED;
  3425. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3426. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3427. begin
  3428. //!!!! a_reg_alloc(list,NR_R12);
  3429. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3430. end;
  3431. { the (old) ARM APCS requires saving both the stack pointer (to
  3432. crawl the stack) and the PC (to identify the function this
  3433. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3434. and R15 -- still needs updating for EABI and Darwin, they don't
  3435. need that }
  3436. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3437. regs:=regs+[RS_R7,RS_R14]
  3438. else
  3439. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3440. include(regs,RS_R14);
  3441. { safely estimate stack size }
  3442. if localsize+current_settings.alignment.localalignmax+4>508 then
  3443. begin
  3444. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3445. include(regs,RS_R4);
  3446. end;
  3447. registerarea:=0;
  3448. { do not save integer registers if the procedure does not return }
  3449. if po_noreturn in current_procinfo.procdef.procoptions then
  3450. regs:=[];
  3451. if regs<>[] then
  3452. begin
  3453. for r:=RS_R0 to RS_R15 do
  3454. if r in regs then
  3455. inc(registerarea,4);
  3456. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3457. { we need to run the loop twice to get cfi right }
  3458. registerarea:=0;
  3459. for r:=RS_R0 to RS_R15 do
  3460. if r in regs then
  3461. begin
  3462. inc(registerarea,4);
  3463. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),-registerarea);
  3464. end;
  3465. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  3466. end;
  3467. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3468. if stack_parameters or (LocalSize<>0) or
  3469. ((stackmisalignment<>0) and
  3470. ((pi_do_call in current_procinfo.flags) or
  3471. (po_assembler in current_procinfo.procdef.procoptions))) then
  3472. begin
  3473. { do we access stack parameters?
  3474. if yes, the previously estimated stacksize must be used }
  3475. if stack_parameters then
  3476. begin
  3477. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3478. begin
  3479. writeln(localsize);
  3480. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3481. internalerror(2013040601);
  3482. end
  3483. else
  3484. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3485. end
  3486. else
  3487. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3488. if localsize<508 then
  3489. begin
  3490. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3491. end
  3492. else if localsize<=1016 then
  3493. begin
  3494. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3495. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3496. end
  3497. else
  3498. begin
  3499. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3500. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3501. include(regs,RS_R4);
  3502. end;
  3503. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  3504. end;
  3505. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3506. begin
  3507. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3508. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  3509. end;
  3510. end;
  3511. end;
  3512. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3513. var
  3514. LocalSize : longint;
  3515. r: byte;
  3516. regs : tcpuregisterset;
  3517. registerarea : DWord;
  3518. stackmisalignment: pint;
  3519. stack_parameters : Boolean;
  3520. begin
  3521. { a routine not returning needs no exit code,
  3522. we trust this directive as arm thumb is normally used if small code shall be generated }
  3523. if po_noreturn in current_procinfo.procdef.procoptions then
  3524. exit;
  3525. if not(nostackframe) then
  3526. begin
  3527. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3528. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3529. include(regs,RS_R15);
  3530. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3531. include(regs,getsupreg(current_procinfo.framepointer));
  3532. registerarea:=0;
  3533. for r:=RS_R0 to RS_R15 do
  3534. if r in regs then
  3535. inc(registerarea,4);
  3536. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3537. LocalSize:=current_procinfo.calc_stackframe_size;
  3538. if stack_parameters then
  3539. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3540. else
  3541. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3542. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3543. (target_info.system in systems_darwin) then
  3544. begin
  3545. if (LocalSize<>0) or
  3546. ((stackmisalignment<>0) and
  3547. ((pi_do_call in current_procinfo.flags) or
  3548. (po_assembler in current_procinfo.procdef.procoptions))) then
  3549. begin
  3550. if LocalSize=0 then
  3551. else if LocalSize<=508 then
  3552. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3553. else if LocalSize<=1016 then
  3554. begin
  3555. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3556. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3557. end
  3558. else
  3559. begin
  3560. a_reg_alloc(list,NR_R3);
  3561. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3562. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3563. a_reg_dealloc(list,NR_R3);
  3564. end;
  3565. end;
  3566. if regs=[] then
  3567. begin
  3568. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3569. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3570. else
  3571. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3572. end
  3573. else
  3574. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3575. end;
  3576. end
  3577. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3578. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3579. else
  3580. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3581. end;
  3582. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3583. var
  3584. oppostfix:toppostfix;
  3585. usedtmpref: treference;
  3586. tmpreg,tmpreg2 : tregister;
  3587. dir : integer;
  3588. begin
  3589. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3590. FromSize := ToSize;
  3591. case FromSize of
  3592. { signed integer registers }
  3593. OS_8:
  3594. oppostfix:=PF_B;
  3595. OS_S8:
  3596. oppostfix:=PF_SB;
  3597. OS_16:
  3598. oppostfix:=PF_H;
  3599. OS_S16:
  3600. oppostfix:=PF_SH;
  3601. OS_32,
  3602. OS_S32:
  3603. oppostfix:=PF_None;
  3604. else
  3605. InternalError(200308298);
  3606. end;
  3607. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3608. begin
  3609. if target_info.endian=endian_big then
  3610. dir:=-1
  3611. else
  3612. dir:=1;
  3613. case FromSize of
  3614. OS_16,OS_S16:
  3615. begin
  3616. { only complicated references need an extra loadaddr }
  3617. if assigned(ref.symbol) or
  3618. (ref.index<>NR_NO) or
  3619. (ref.offset<-124) or
  3620. (ref.offset>124) or
  3621. { sometimes the compiler reused registers }
  3622. (reg=ref.index) or
  3623. (reg=ref.base) then
  3624. begin
  3625. tmpreg2:=getintregister(list,OS_INT);
  3626. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3627. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3628. end
  3629. else
  3630. usedtmpref:=ref;
  3631. if target_info.endian=endian_big then
  3632. inc(usedtmpref.offset,1);
  3633. tmpreg:=getintregister(list,OS_INT);
  3634. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3635. inc(usedtmpref.offset,dir);
  3636. if FromSize=OS_16 then
  3637. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3638. else
  3639. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3640. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3641. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3642. end;
  3643. OS_32,OS_S32:
  3644. begin
  3645. tmpreg:=getintregister(list,OS_INT);
  3646. { only complicated references need an extra loadaddr }
  3647. if assigned(ref.symbol) or
  3648. (ref.index<>NR_NO) or
  3649. (ref.offset<-124) or
  3650. (ref.offset>124) or
  3651. { sometimes the compiler reused registers }
  3652. (reg=ref.index) or
  3653. (reg=ref.base) then
  3654. begin
  3655. tmpreg2:=getintregister(list,OS_INT);
  3656. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3657. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3658. end
  3659. else
  3660. usedtmpref:=ref;
  3661. if ref.alignment=2 then
  3662. begin
  3663. if target_info.endian=endian_big then
  3664. inc(usedtmpref.offset,2);
  3665. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3666. inc(usedtmpref.offset,dir*2);
  3667. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3668. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3669. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3670. end
  3671. else
  3672. begin
  3673. if target_info.endian=endian_big then
  3674. inc(usedtmpref.offset,3);
  3675. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3676. inc(usedtmpref.offset,dir);
  3677. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3678. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3679. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3680. inc(usedtmpref.offset,dir);
  3681. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3682. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3683. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3684. inc(usedtmpref.offset,dir);
  3685. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3686. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3687. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3688. end;
  3689. end
  3690. else
  3691. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3692. end;
  3693. end
  3694. else
  3695. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3696. if (fromsize=OS_S8) and (tosize = OS_16) then
  3697. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3698. end;
  3699. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3700. var
  3701. l : tasmlabel;
  3702. hr : treference;
  3703. begin
  3704. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3705. internalerror(2002090908);
  3706. if is_thumb_imm(a) then
  3707. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,a),PF_S))
  3708. else
  3709. begin
  3710. reference_reset(hr,4,[]);
  3711. current_asmdata.getjumplabel(l);
  3712. cg.a_label(current_procinfo.aktlocaldata,l);
  3713. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3714. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3715. hr.symbol:=l;
  3716. hr.base:=NR_PC;
  3717. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3718. end;
  3719. end;
  3720. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3721. var
  3722. hsym : tsym;
  3723. href,
  3724. tmpref : treference;
  3725. paraloc : Pcgparalocation;
  3726. l : TAsmLabel;
  3727. begin
  3728. { calculate the parameter info for the procdef }
  3729. procdef.init_paraloc_info(callerside);
  3730. hsym:=tsym(procdef.parast.Find('self'));
  3731. if not(assigned(hsym) and
  3732. (hsym.typ=paravarsym)) then
  3733. internalerror(2003052504);
  3734. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3735. while paraloc<>nil do
  3736. with paraloc^ do
  3737. begin
  3738. case loc of
  3739. LOC_REGISTER:
  3740. begin
  3741. if is_thumb_imm(ioffset) then
  3742. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3743. else
  3744. begin
  3745. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3746. reference_reset(tmpref,4,[]);
  3747. current_asmdata.getjumplabel(l);
  3748. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3749. cg.a_label(current_procinfo.aktlocaldata,l);
  3750. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3751. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3752. tmpref.symbol:=l;
  3753. tmpref.base:=NR_PC;
  3754. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3755. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3756. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3757. end;
  3758. end;
  3759. LOC_REFERENCE:
  3760. begin
  3761. { offset in the wrapper needs to be adjusted for the stored
  3762. return address }
  3763. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3764. if is_thumb_imm(ioffset) then
  3765. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3766. else
  3767. begin
  3768. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3769. reference_reset(tmpref,4,[]);
  3770. current_asmdata.getjumplabel(l);
  3771. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3772. cg.a_label(current_procinfo.aktlocaldata,l);
  3773. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3774. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3775. tmpref.symbol:=l;
  3776. tmpref.base:=NR_PC;
  3777. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3778. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3779. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3780. end;
  3781. end
  3782. else
  3783. internalerror(2003091804);
  3784. end;
  3785. paraloc:=next;
  3786. end;
  3787. end;
  3788. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3789. var
  3790. href : treference;
  3791. tmpreg : TRegister;
  3792. begin
  3793. href:=ref;
  3794. if { LDR/STR limitations }
  3795. (
  3796. (((op=A_LDR) and (oppostfix=PF_None)) or
  3797. ((op=A_STR) and (oppostfix=PF_None))) and
  3798. (ref.base<>NR_STACK_POINTER_REG) and
  3799. (abs(ref.offset)>124)
  3800. ) or
  3801. { LDRB/STRB limitations }
  3802. (
  3803. (((op=A_LDR) and (oppostfix=PF_B)) or
  3804. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3805. ((op=A_STR) and (oppostfix=PF_B)) or
  3806. ((op=A_STRB) and (oppostfix=PF_None))) and
  3807. ((ref.base=NR_STACK_POINTER_REG) or
  3808. (ref.index=NR_STACK_POINTER_REG) or
  3809. (abs(ref.offset)>31)
  3810. )
  3811. ) or
  3812. { LDRH/STRH limitations }
  3813. (
  3814. (((op=A_LDR) and (oppostfix=PF_H)) or
  3815. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3816. ((op=A_STR) and (oppostfix=PF_H)) or
  3817. ((op=A_STRH) and (oppostfix=PF_None))) and
  3818. ((ref.base=NR_STACK_POINTER_REG) or
  3819. (ref.index=NR_STACK_POINTER_REG) or
  3820. (abs(ref.offset)>62) or
  3821. ((abs(ref.offset) mod 2)<>0)
  3822. )
  3823. ) then
  3824. begin
  3825. tmpreg:=getintregister(list,OS_ADDR);
  3826. a_loadaddr_ref_reg(list,ref,tmpreg);
  3827. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3828. end
  3829. else if (op=A_LDR) and
  3830. (oppostfix in [PF_None]) and
  3831. (ref.base=NR_STACK_POINTER_REG) and
  3832. (abs(ref.offset)>1020) then
  3833. begin
  3834. tmpreg:=getintregister(list,OS_ADDR);
  3835. a_loadaddr_ref_reg(list,ref,tmpreg);
  3836. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3837. end
  3838. else if (op=A_LDR) and
  3839. ((oppostfix in [PF_SH,PF_SB]) or
  3840. (abs(ref.offset)>124)) then
  3841. begin
  3842. tmpreg:=getintregister(list,OS_ADDR);
  3843. a_loadaddr_ref_reg(list,ref,tmpreg);
  3844. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3845. end;
  3846. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3847. end;
  3848. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3849. var
  3850. tmpreg : tregister;
  3851. begin
  3852. case op of
  3853. OP_NEG:
  3854. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3855. OP_NOT:
  3856. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,dst,src),PF_S));
  3857. OP_DIV,OP_IDIV:
  3858. internalerror(200308284);
  3859. OP_ROL:
  3860. begin
  3861. if not(size in [OS_32,OS_S32]) then
  3862. internalerror(2008072805);
  3863. { simulate ROL by ror'ing 32-value }
  3864. tmpreg:=getintregister(list,OS_32);
  3865. a_load_const_reg(list,OS_32,32,tmpreg);
  3866. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3867. list.concat(setoppostfix(taicpu.op_reg_reg(A_ROR,dst,src),PF_S));
  3868. end;
  3869. else
  3870. begin
  3871. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3872. list.concat(setoppostfix(
  3873. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix_thumb[op]));
  3874. end;
  3875. end;
  3876. maybeadjustresult(list,op,size,dst);
  3877. end;
  3878. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3879. var
  3880. tmpreg : tregister;
  3881. {$ifdef DUMMY}
  3882. l1 : longint;
  3883. {$endif DUMMY}
  3884. begin
  3885. //!!! ovloc.loc:=LOC_VOID;
  3886. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3887. case op of
  3888. OP_ADD:
  3889. begin
  3890. op:=OP_SUB;
  3891. a:=aint(dword(-a));
  3892. end;
  3893. OP_SUB:
  3894. begin
  3895. op:=OP_ADD;
  3896. a:=aint(dword(-a));
  3897. end
  3898. else
  3899. ;
  3900. end;
  3901. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3902. begin
  3903. // if cgsetflags or setflags then
  3904. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3905. list.concat(setoppostfix(
  3906. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix_thumb[op]));
  3907. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3908. begin
  3909. //!!! ovloc.loc:=LOC_FLAGS;
  3910. case op of
  3911. OP_ADD:
  3912. //!!! ovloc.resflags:=F_CS;
  3913. ;
  3914. OP_SUB:
  3915. //!!! ovloc.resflags:=F_CC;
  3916. ;
  3917. else
  3918. ;
  3919. end;
  3920. end;
  3921. end
  3922. else
  3923. begin
  3924. { there could be added some more sophisticated optimizations }
  3925. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3926. a_load_reg_reg(list,size,size,dst,dst)
  3927. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3928. a_load_const_reg(list,size,0,dst)
  3929. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3930. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3931. { we do this here instead in the peephole optimizer because
  3932. it saves us a register }
  3933. {$ifdef DUMMY}
  3934. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3935. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3936. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3937. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3938. begin
  3939. if l1>32 then{roozbeh does this ever happen?}
  3940. internalerror(2003082903);
  3941. shifterop_reset(so);
  3942. so.shiftmode:=SM_LSL;
  3943. so.shiftimm:=l1;
  3944. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3945. end
  3946. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3947. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3948. begin
  3949. if l1>32 then{does this ever happen?}
  3950. internalerror(2012051802);
  3951. shifterop_reset(so);
  3952. so.shiftmode:=SM_LSL;
  3953. so.shiftimm:=l1;
  3954. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3955. end
  3956. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3957. begin
  3958. { nothing to do on success }
  3959. end
  3960. {$endif DUMMY}
  3961. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3962. Just using mov x, #0 might allow some easier optimizations down the line. }
  3963. else if (op = OP_AND) and (dword(a)=0) then
  3964. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,dst,0),PF_S))
  3965. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3966. else if (op = OP_AND) and (not(dword(a))=0) then
  3967. // do nothing
  3968. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3969. broader range of shifterconstants.}
  3970. {$ifdef DUMMY}
  3971. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3972. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3973. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3974. begin
  3975. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3976. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3977. end
  3978. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3979. not(cgsetflags or setflags) and
  3980. split_into_shifter_const(a, imm1, imm2) then
  3981. begin
  3982. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3983. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3984. end
  3985. {$endif DUMMY}
  3986. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3987. begin
  3988. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3989. end
  3990. else
  3991. begin
  3992. tmpreg:=getintregister(list,size);
  3993. a_load_const_reg(list,size,a,tmpreg);
  3994. a_op_reg_reg(list,op,size,tmpreg,dst);
  3995. end;
  3996. end;
  3997. maybeadjustresult(list,op,size,dst);
  3998. end;
  3999. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  4000. begin
  4001. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  4002. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  4003. else
  4004. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  4005. end;
  4006. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4007. var
  4008. l1,l2 : tasmlabel;
  4009. ai : taicpu;
  4010. begin
  4011. current_asmdata.getjumplabel(l1);
  4012. current_asmdata.getjumplabel(l2);
  4013. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  4014. ai.is_jmp:=true;
  4015. list.concat(ai);
  4016. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,0),PF_S));
  4017. list.concat(taicpu.op_sym(A_B,l2));
  4018. cg.a_label(list,l1);
  4019. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,1),PF_S));
  4020. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4021. cg.a_label(list,l2);
  4022. end;
  4023. procedure tthumb2cgarm.init_register_allocators;
  4024. begin
  4025. inherited init_register_allocators;
  4026. { currently, we save R14 always, so we can use it }
  4027. if (target_info.system<>system_arm_ios) then
  4028. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4029. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4030. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  4031. else
  4032. { r9 is not available on Darwin according to the llvm code generator }
  4033. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4034. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4035. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  4036. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4037. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  4038. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  4039. init_mmregister_allocator;
  4040. end;
  4041. procedure tthumb2cgarm.done_register_allocators;
  4042. begin
  4043. rg[R_INTREGISTER].free;
  4044. rg[R_FPUREGISTER].free;
  4045. rg[R_MMREGISTER].free;
  4046. inherited done_register_allocators;
  4047. end;
  4048. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  4049. begin
  4050. list.concat(taicpu.op_reg(A_BLX, reg));
  4051. {
  4052. the compiler does not properly set this flag anymore in pass 1, and
  4053. for now we only need it after pass 2 (I hope) (JM)
  4054. if not(pi_do_call in current_procinfo.flags) then
  4055. internalerror(2003060703);
  4056. }
  4057. include(current_procinfo.flags,pi_do_call);
  4058. end;
  4059. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4060. var
  4061. l : tasmlabel;
  4062. hr : treference;
  4063. begin
  4064. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4065. internalerror(2002090909);
  4066. if is_thumb32_imm(a) then
  4067. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4068. else if is_thumb32_imm(not(a)) then
  4069. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4070. else if (a and $FFFF)=a then
  4071. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4072. else
  4073. begin
  4074. reference_reset(hr,4,[]);
  4075. current_asmdata.getjumplabel(l);
  4076. cg.a_label(current_procinfo.aktlocaldata,l);
  4077. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4078. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4079. hr.symbol:=l;
  4080. hr.base:=NR_PC;
  4081. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4082. end;
  4083. end;
  4084. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4085. var
  4086. oppostfix:toppostfix;
  4087. usedtmpref: treference;
  4088. tmpreg,tmpreg2 : tregister;
  4089. so : tshifterop;
  4090. dir : integer;
  4091. begin
  4092. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4093. FromSize := ToSize;
  4094. case FromSize of
  4095. { signed integer registers }
  4096. OS_8:
  4097. oppostfix:=PF_B;
  4098. OS_S8:
  4099. oppostfix:=PF_SB;
  4100. OS_16:
  4101. oppostfix:=PF_H;
  4102. OS_S16:
  4103. oppostfix:=PF_SH;
  4104. OS_32,
  4105. OS_S32:
  4106. oppostfix:=PF_None;
  4107. else
  4108. InternalError(2003082913);
  4109. end;
  4110. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4111. begin
  4112. if target_info.endian=endian_big then
  4113. dir:=-1
  4114. else
  4115. dir:=1;
  4116. case FromSize of
  4117. OS_16,OS_S16:
  4118. begin
  4119. { only complicated references need an extra loadaddr }
  4120. if assigned(ref.symbol) or
  4121. (ref.index<>NR_NO) or
  4122. (ref.offset<-255) or
  4123. (ref.offset>4094) or
  4124. { sometimes the compiler reused registers }
  4125. (reg=ref.index) or
  4126. (reg=ref.base) then
  4127. begin
  4128. tmpreg2:=getintregister(list,OS_INT);
  4129. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4130. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4131. end
  4132. else
  4133. usedtmpref:=ref;
  4134. if target_info.endian=endian_big then
  4135. inc(usedtmpref.offset,1);
  4136. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4137. tmpreg:=getintregister(list,OS_INT);
  4138. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4139. inc(usedtmpref.offset,dir);
  4140. if FromSize=OS_16 then
  4141. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4142. else
  4143. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4144. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4145. end;
  4146. OS_32,OS_S32:
  4147. begin
  4148. tmpreg:=getintregister(list,OS_INT);
  4149. { only complicated references need an extra loadaddr }
  4150. if assigned(ref.symbol) or
  4151. (ref.index<>NR_NO) or
  4152. (ref.offset<-255) or
  4153. (ref.offset>4092) or
  4154. { sometimes the compiler reused registers }
  4155. (reg=ref.index) or
  4156. (reg=ref.base) then
  4157. begin
  4158. tmpreg2:=getintregister(list,OS_INT);
  4159. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4160. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4161. end
  4162. else
  4163. usedtmpref:=ref;
  4164. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4165. if ref.alignment=2 then
  4166. begin
  4167. if target_info.endian=endian_big then
  4168. inc(usedtmpref.offset,2);
  4169. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4170. inc(usedtmpref.offset,dir*2);
  4171. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4172. so.shiftimm:=16;
  4173. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4174. end
  4175. else
  4176. begin
  4177. if target_info.endian=endian_big then
  4178. inc(usedtmpref.offset,3);
  4179. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4180. inc(usedtmpref.offset,dir);
  4181. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4182. so.shiftimm:=8;
  4183. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4184. inc(usedtmpref.offset,dir);
  4185. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4186. so.shiftimm:=16;
  4187. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4188. inc(usedtmpref.offset,dir);
  4189. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4190. so.shiftimm:=24;
  4191. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4192. end;
  4193. end
  4194. else
  4195. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4196. end;
  4197. end
  4198. else
  4199. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4200. if (fromsize=OS_S8) and (tosize = OS_16) then
  4201. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4202. end;
  4203. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4204. begin
  4205. if op = OP_NOT then
  4206. begin
  4207. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4208. case size of
  4209. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4210. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4211. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4212. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4213. OS_32,
  4214. OS_S32:
  4215. ;
  4216. else
  4217. internalerror(2019050916);
  4218. end;
  4219. end
  4220. else
  4221. inherited a_op_reg_reg(list, op, size, src, dst);
  4222. end;
  4223. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4224. var
  4225. shift, width : byte;
  4226. tmpreg : tregister;
  4227. so : tshifterop;
  4228. l1 : longint;
  4229. begin
  4230. ovloc.loc:=LOC_VOID;
  4231. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4232. case op of
  4233. OP_ADD:
  4234. begin
  4235. op:=OP_SUB;
  4236. a:=aint(dword(-a));
  4237. end;
  4238. OP_SUB:
  4239. begin
  4240. op:=OP_ADD;
  4241. a:=aint(dword(-a));
  4242. end
  4243. else
  4244. ;
  4245. end;
  4246. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4247. case op of
  4248. OP_NEG,OP_NOT,
  4249. OP_DIV,OP_IDIV:
  4250. internalerror(200308285);
  4251. OP_SHL:
  4252. begin
  4253. if a>32 then
  4254. internalerror(2014020703);
  4255. if a<>0 then
  4256. begin
  4257. shifterop_reset(so);
  4258. so.shiftmode:=SM_LSL;
  4259. so.shiftimm:=a;
  4260. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4261. end
  4262. else
  4263. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4264. end;
  4265. OP_ROL:
  4266. begin
  4267. if a>32 then
  4268. internalerror(2014020704);
  4269. if a<>0 then
  4270. begin
  4271. shifterop_reset(so);
  4272. so.shiftmode:=SM_ROR;
  4273. so.shiftimm:=32-a;
  4274. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4275. end
  4276. else
  4277. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4278. end;
  4279. OP_ROR:
  4280. begin
  4281. if a>32 then
  4282. internalerror(2014020705);
  4283. if a<>0 then
  4284. begin
  4285. shifterop_reset(so);
  4286. so.shiftmode:=SM_ROR;
  4287. so.shiftimm:=a;
  4288. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4289. end
  4290. else
  4291. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4292. end;
  4293. OP_SHR:
  4294. begin
  4295. if a>32 then
  4296. internalerror(200308292);
  4297. shifterop_reset(so);
  4298. if a<>0 then
  4299. begin
  4300. so.shiftmode:=SM_LSR;
  4301. so.shiftimm:=a;
  4302. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4303. end
  4304. else
  4305. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4306. end;
  4307. OP_SAR:
  4308. begin
  4309. if a>32 then
  4310. internalerror(200308295);
  4311. if a<>0 then
  4312. begin
  4313. shifterop_reset(so);
  4314. so.shiftmode:=SM_ASR;
  4315. so.shiftimm:=a;
  4316. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4317. end
  4318. else
  4319. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4320. end;
  4321. else
  4322. if (op in [OP_SUB, OP_ADD]) and
  4323. ((a < 0) or
  4324. (a > 4095)) then
  4325. begin
  4326. tmpreg:=getintregister(list,size);
  4327. a_load_const_reg(list, size, a, tmpreg);
  4328. if cgsetflags or setflags then
  4329. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4330. list.concat(setoppostfix(
  4331. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4332. end
  4333. else
  4334. begin
  4335. if cgsetflags or setflags then
  4336. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4337. list.concat(setoppostfix(
  4338. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4339. end;
  4340. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4341. begin
  4342. ovloc.loc:=LOC_FLAGS;
  4343. case op of
  4344. OP_ADD:
  4345. ovloc.resflags:=F_CS;
  4346. OP_SUB:
  4347. ovloc.resflags:=F_CC;
  4348. else
  4349. ;
  4350. end;
  4351. end;
  4352. end
  4353. else
  4354. begin
  4355. { there could be added some more sophisticated optimizations }
  4356. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4357. a_load_reg_reg(list,size,size,src,dst)
  4358. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4359. a_load_const_reg(list,size,0,dst)
  4360. else if (op in [OP_IMUL]) and (a=-1) then
  4361. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4362. { we do this here instead in the peephole optimizer because
  4363. it saves us a register }
  4364. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4365. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4366. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4367. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4368. begin
  4369. if l1>32 then{roozbeh does this ever happen?}
  4370. internalerror(2003082911);
  4371. shifterop_reset(so);
  4372. so.shiftmode:=SM_LSL;
  4373. so.shiftimm:=l1;
  4374. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4375. end
  4376. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4377. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4378. begin
  4379. if l1>32 then{does this ever happen?}
  4380. internalerror(2012051803);
  4381. shifterop_reset(so);
  4382. so.shiftmode:=SM_LSL;
  4383. so.shiftimm:=l1;
  4384. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4385. end
  4386. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4387. begin
  4388. { nothing to do on success }
  4389. end
  4390. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4391. Just using mov x, #0 might allow some easier optimizations down the line. }
  4392. else if (op = OP_AND) and (dword(a)=0) then
  4393. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4394. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4395. else if (op = OP_AND) and (not(dword(a))=0) then
  4396. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4397. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4398. broader range of shifterconstants.}
  4399. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4400. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4401. else if (op = OP_AND) and is_thumb32_imm(a) then
  4402. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4403. else if (op = OP_AND) and (a = $FFFF) then
  4404. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4405. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4406. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4407. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4408. begin
  4409. a_load_reg_reg(list,size,size,src,dst);
  4410. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4411. end
  4412. else
  4413. begin
  4414. tmpreg:=getintregister(list,size);
  4415. a_load_const_reg(list,size,a,tmpreg);
  4416. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4417. end;
  4418. end;
  4419. maybeadjustresult(list,op,size,dst);
  4420. end;
  4421. const
  4422. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4423. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4424. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4425. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4426. var
  4427. so : tshifterop;
  4428. tmpreg,overflowreg : tregister;
  4429. asmop : tasmop;
  4430. begin
  4431. ovloc.loc:=LOC_VOID;
  4432. case op of
  4433. OP_NEG,OP_NOT:
  4434. internalerror(200308286);
  4435. OP_ROL:
  4436. begin
  4437. if not(size in [OS_32,OS_S32]) then
  4438. internalerror(2008072806);
  4439. { simulate ROL by ror'ing 32-value }
  4440. tmpreg:=getintregister(list,OS_32);
  4441. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4442. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4443. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4444. end;
  4445. OP_ROR:
  4446. begin
  4447. if not(size in [OS_32,OS_S32]) then
  4448. internalerror(2008072802);
  4449. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4450. end;
  4451. OP_IMUL,
  4452. OP_MUL:
  4453. begin
  4454. if cgsetflags or setflags then
  4455. begin
  4456. overflowreg:=getintregister(list,size);
  4457. if op=OP_IMUL then
  4458. asmop:=A_SMULL
  4459. else
  4460. asmop:=A_UMULL;
  4461. { the arm doesn't allow that rd and rm are the same }
  4462. if dst=src2 then
  4463. begin
  4464. if dst<>src1 then
  4465. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4466. else
  4467. begin
  4468. tmpreg:=getintregister(list,size);
  4469. a_load_reg_reg(list,size,size,src2,dst);
  4470. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4471. end;
  4472. end
  4473. else
  4474. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4475. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4476. if op=OP_IMUL then
  4477. begin
  4478. shifterop_reset(so);
  4479. so.shiftmode:=SM_ASR;
  4480. so.shiftimm:=31;
  4481. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4482. end
  4483. else
  4484. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4485. ovloc.loc:=LOC_FLAGS;
  4486. ovloc.resflags:=F_NE;
  4487. end
  4488. else
  4489. begin
  4490. { the arm doesn't allow that rd and rm are the same }
  4491. if dst=src2 then
  4492. begin
  4493. if dst<>src1 then
  4494. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4495. else
  4496. begin
  4497. tmpreg:=getintregister(list,size);
  4498. a_load_reg_reg(list,size,size,src2,dst);
  4499. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4500. end;
  4501. end
  4502. else
  4503. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4504. end;
  4505. end;
  4506. else
  4507. begin
  4508. if cgsetflags or setflags then
  4509. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4510. {$ifdef dummy}
  4511. { R13 is not allowed for certain instruction operands }
  4512. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4513. begin
  4514. if getsupreg(dst)=RS_R13 then
  4515. begin
  4516. tmpreg:=getintregister(list,OS_INT);
  4517. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4518. dst:=tmpreg;
  4519. end;
  4520. if getsupreg(src1)=RS_R13 then
  4521. begin
  4522. tmpreg:=getintregister(list,OS_INT);
  4523. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4524. src1:=tmpreg;
  4525. end;
  4526. end;
  4527. {$endif}
  4528. list.concat(setoppostfix(
  4529. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4530. end;
  4531. end;
  4532. maybeadjustresult(list,op,size,dst);
  4533. end;
  4534. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4535. begin
  4536. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4537. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4538. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4539. end;
  4540. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4541. var
  4542. ref : treference;
  4543. shift : byte;
  4544. firstfloatreg,lastfloatreg,
  4545. r : byte;
  4546. regs : tcpuregisterset;
  4547. stackmisalignment: pint;
  4548. begin
  4549. LocalSize:=align(LocalSize,4);
  4550. { call instruction does not put anything on the stack }
  4551. stackmisalignment:=0;
  4552. if not(nostackframe) then
  4553. begin
  4554. firstfloatreg:=RS_NO;
  4555. lastfloatreg:=RS_NO;
  4556. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4557. begin
  4558. { save floating point registers? }
  4559. for r:=RS_F0 to RS_F7 do
  4560. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4561. begin
  4562. if firstfloatreg=RS_NO then
  4563. firstfloatreg:=r;
  4564. lastfloatreg:=r;
  4565. inc(stackmisalignment,12);
  4566. end;
  4567. end;
  4568. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4569. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4570. begin
  4571. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4572. a_reg_alloc(list,NR_R12);
  4573. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4574. end;
  4575. { save int registers }
  4576. reference_reset(ref,4,[]);
  4577. ref.index:=NR_STACK_POINTER_REG;
  4578. ref.addressmode:=AM_PREINDEXED;
  4579. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4580. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4581. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4582. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4583. include(regs,RS_R14);
  4584. if regs<>[] then
  4585. begin
  4586. for r:=RS_R0 to RS_R15 do
  4587. if (r in regs) then
  4588. inc(stackmisalignment,4);
  4589. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4590. end;
  4591. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4592. begin
  4593. { the framepointer now points to the saved R15, so the saved
  4594. framepointer is at R11-12 (for get_caller_frame) }
  4595. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4596. a_reg_dealloc(list,NR_R12);
  4597. end;
  4598. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4599. if (LocalSize<>0) or
  4600. ((stackmisalignment<>0) and
  4601. ((pi_do_call in current_procinfo.flags) or
  4602. (po_assembler in current_procinfo.procdef.procoptions))) then
  4603. begin
  4604. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4605. if not(is_shifter_const(localsize,shift)) then
  4606. begin
  4607. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4608. a_reg_alloc(list,NR_R12);
  4609. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4610. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4611. a_reg_dealloc(list,NR_R12);
  4612. end
  4613. else
  4614. begin
  4615. a_reg_dealloc(list,NR_R12);
  4616. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4617. end;
  4618. end;
  4619. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4620. begin
  4621. if firstfloatreg<>RS_NO then
  4622. begin
  4623. reference_reset(ref,4,[]);
  4624. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4625. begin
  4626. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4627. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4628. ref.base:=NR_R12;
  4629. end
  4630. else
  4631. begin
  4632. ref.base:=current_procinfo.framepointer;
  4633. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4634. end;
  4635. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4636. lastfloatreg-firstfloatreg+1,ref));
  4637. end;
  4638. end;
  4639. end;
  4640. end;
  4641. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4642. var
  4643. ref : treference;
  4644. firstfloatreg,lastfloatreg,
  4645. r : byte;
  4646. shift : byte;
  4647. regs : tcpuregisterset;
  4648. LocalSize : longint;
  4649. stackmisalignment: pint;
  4650. begin
  4651. { a routine not returning needs no exit code,
  4652. we trust this directive as arm thumb is normally used if small code shall be generated }
  4653. if po_noreturn in current_procinfo.procdef.procoptions then
  4654. exit;
  4655. if not(nostackframe) then
  4656. begin
  4657. stackmisalignment:=0;
  4658. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4659. begin
  4660. { restore floating point register }
  4661. firstfloatreg:=RS_NO;
  4662. lastfloatreg:=RS_NO;
  4663. { save floating point registers? }
  4664. for r:=RS_F0 to RS_F7 do
  4665. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4666. begin
  4667. if firstfloatreg=RS_NO then
  4668. firstfloatreg:=r;
  4669. lastfloatreg:=r;
  4670. { floating point register space is already included in
  4671. localsize below by calc_stackframe_size
  4672. inc(stackmisalignment,12);
  4673. }
  4674. end;
  4675. if firstfloatreg<>RS_NO then
  4676. begin
  4677. reference_reset(ref,4,[]);
  4678. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4679. begin
  4680. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4681. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4682. ref.base:=NR_R12;
  4683. end
  4684. else
  4685. begin
  4686. ref.base:=current_procinfo.framepointer;
  4687. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4688. end;
  4689. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4690. lastfloatreg-firstfloatreg+1,ref));
  4691. end;
  4692. end;
  4693. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4694. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4695. begin
  4696. exclude(regs,RS_R14);
  4697. include(regs,RS_R15);
  4698. end;
  4699. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4700. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4701. for r:=RS_R0 to RS_R15 do
  4702. if (r in regs) then
  4703. inc(stackmisalignment,4);
  4704. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4705. LocalSize:=current_procinfo.calc_stackframe_size;
  4706. if (LocalSize<>0) or
  4707. ((stackmisalignment<>0) and
  4708. ((pi_do_call in current_procinfo.flags) or
  4709. (po_assembler in current_procinfo.procdef.procoptions))) then
  4710. begin
  4711. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4712. if not(is_shifter_const(LocalSize,shift)) then
  4713. begin
  4714. a_reg_alloc(list,NR_R12);
  4715. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4716. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4717. a_reg_dealloc(list,NR_R12);
  4718. end
  4719. else
  4720. begin
  4721. a_reg_dealloc(list,NR_R12);
  4722. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4723. end;
  4724. end;
  4725. if regs=[] then
  4726. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4727. else
  4728. begin
  4729. reference_reset(ref,4,[]);
  4730. ref.index:=NR_STACK_POINTER_REG;
  4731. ref.addressmode:=AM_PREINDEXED;
  4732. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4733. end;
  4734. end
  4735. else
  4736. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4737. end;
  4738. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4739. var
  4740. tmpreg : tregister;
  4741. tmpref : treference;
  4742. l : tasmlabel;
  4743. begin
  4744. tmpreg:=NR_NO;
  4745. { Be sure to have a base register }
  4746. if (ref.base=NR_NO) then
  4747. begin
  4748. if ref.shiftmode<>SM_None then
  4749. internalerror(2014020706);
  4750. ref.base:=ref.index;
  4751. ref.index:=NR_NO;
  4752. end;
  4753. { absolute symbols can't be handled directly, we've to store the symbol reference
  4754. in the text segment and access it pc relative
  4755. For now, we assume that references where base or index equals to PC are already
  4756. relative, all other references are assumed to be absolute and thus they need
  4757. to be handled extra.
  4758. A proper solution would be to change refoptions to a set and store the information
  4759. if the symbol is absolute or relative there.
  4760. }
  4761. if (assigned(ref.symbol) and
  4762. not(is_pc(ref.base)) and
  4763. not(is_pc(ref.index))
  4764. ) or
  4765. { [#xxx] isn't a valid address operand }
  4766. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4767. //(ref.offset<-4095) or
  4768. (ref.offset<-255) or
  4769. (ref.offset>4095) or
  4770. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4771. ((ref.offset<-255) or
  4772. (ref.offset>255)
  4773. )
  4774. ) or
  4775. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4776. ((ref.offset<-1020) or
  4777. (ref.offset>1020) or
  4778. ((abs(ref.offset) mod 4)<>0) or
  4779. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4780. assigned(ref.symbol)
  4781. )
  4782. ) then
  4783. begin
  4784. reference_reset(tmpref,4,[]);
  4785. { load symbol }
  4786. tmpreg:=getintregister(list,OS_INT);
  4787. if assigned(ref.symbol) then
  4788. begin
  4789. current_asmdata.getjumplabel(l);
  4790. cg.a_label(current_procinfo.aktlocaldata,l);
  4791. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4792. if ref.refaddr=addr_gottpoff then
  4793. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4794. else if ref.refaddr=addr_tlsgd then
  4795. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4796. else if ref.refaddr=addr_tlsdesc then
  4797. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4798. else if ref.refaddr=addr_tpoff then
  4799. begin
  4800. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4801. Internalerror(2019092807);
  4802. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4803. end
  4804. else
  4805. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4806. { load consts entry }
  4807. tmpref.symbol:=l;
  4808. tmpref.base:=NR_R15;
  4809. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4810. { in case of LDF/STF, we got rid of the NR_R15 }
  4811. if is_pc(ref.base) then
  4812. ref.base:=NR_NO;
  4813. if is_pc(ref.index) then
  4814. ref.index:=NR_NO;
  4815. end
  4816. else
  4817. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4818. if (ref.base<>NR_NO) then
  4819. begin
  4820. if ref.index<>NR_NO then
  4821. begin
  4822. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4823. ref.base:=tmpreg;
  4824. end
  4825. else
  4826. begin
  4827. ref.index:=tmpreg;
  4828. ref.shiftimm:=0;
  4829. ref.signindex:=1;
  4830. ref.shiftmode:=SM_None;
  4831. end;
  4832. end
  4833. else
  4834. ref.base:=tmpreg;
  4835. ref.offset:=0;
  4836. ref.symbol:=nil;
  4837. end;
  4838. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4839. begin
  4840. if tmpreg<>NR_NO then
  4841. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4842. else
  4843. begin
  4844. tmpreg:=getintregister(list,OS_ADDR);
  4845. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4846. ref.base:=tmpreg;
  4847. end;
  4848. ref.offset:=0;
  4849. end;
  4850. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4851. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4852. begin
  4853. tmpreg:=getintregister(list,OS_ADDR);
  4854. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4855. ref.base := tmpreg;
  4856. end;
  4857. { floating point operations have only limited references
  4858. we expect here, that a base is already set }
  4859. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4860. begin
  4861. if ref.shiftmode<>SM_none then
  4862. internalerror(2003091202);
  4863. if tmpreg<>NR_NO then
  4864. begin
  4865. if ref.base=tmpreg then
  4866. begin
  4867. if ref.signindex<0 then
  4868. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4869. else
  4870. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4871. ref.index:=NR_NO;
  4872. end
  4873. else
  4874. begin
  4875. if ref.index<>tmpreg then
  4876. internalerror(2004031602);
  4877. if ref.signindex<0 then
  4878. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4879. else
  4880. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4881. ref.base:=tmpreg;
  4882. ref.index:=NR_NO;
  4883. end;
  4884. end
  4885. else
  4886. begin
  4887. tmpreg:=getintregister(list,OS_ADDR);
  4888. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4889. ref.base:=tmpreg;
  4890. ref.index:=NR_NO;
  4891. end;
  4892. end;
  4893. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4894. Result := ref;
  4895. end;
  4896. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4897. var
  4898. instr: taicpu;
  4899. begin
  4900. if (fromsize=OS_F32) and
  4901. (tosize=OS_F32) then
  4902. begin
  4903. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4904. list.Concat(instr);
  4905. add_move_instruction(instr);
  4906. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4907. end
  4908. else if (fromsize=OS_F64) and
  4909. (tosize=OS_F64) then
  4910. begin
  4911. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4912. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4913. end
  4914. else if (fromsize=OS_F32) and
  4915. (tosize=OS_F64) then
  4916. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4917. begin
  4918. //list.concat(nil);
  4919. end;
  4920. end;
  4921. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4922. begin
  4923. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4924. end;
  4925. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4926. begin
  4927. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4928. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4929. end;
  4930. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4931. begin
  4932. if //(shuffle=nil) and
  4933. (tosize=OS_F32) then
  4934. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4935. else
  4936. internalerror(2012100813);
  4937. end;
  4938. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4939. begin
  4940. if //(shuffle=nil) and
  4941. (fromsize=OS_F32) then
  4942. begin
  4943. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4944. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4945. end
  4946. else
  4947. internalerror(2012100814);
  4948. end;
  4949. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4950. var tmpreg: tregister;
  4951. begin
  4952. case op of
  4953. OP_NEG:
  4954. begin
  4955. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4956. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4957. tmpreg:=cg.getintregister(list,OS_32);
  4958. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4959. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4960. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4961. end;
  4962. else
  4963. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4964. end;
  4965. end;
  4966. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4967. begin
  4968. case op of
  4969. OP_NEG:
  4970. begin
  4971. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reglo,0),PF_S));
  4972. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reghi,0),PF_S));
  4973. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4974. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4975. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4976. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4977. end;
  4978. OP_NOT:
  4979. begin
  4980. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4981. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4982. end;
  4983. OP_AND,OP_OR,OP_XOR:
  4984. begin
  4985. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4986. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4987. end;
  4988. OP_ADD:
  4989. begin
  4990. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4991. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4992. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi),PF_S));
  4993. end;
  4994. OP_SUB:
  4995. begin
  4996. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4997. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4998. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4999. end;
  5000. else
  5001. internalerror(2003083105);
  5002. end;
  5003. end;
  5004. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  5005. var
  5006. tmpreg : tregister;
  5007. begin
  5008. case op of
  5009. OP_AND,OP_OR,OP_XOR:
  5010. begin
  5011. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  5012. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  5013. end;
  5014. OP_ADD:
  5015. begin
  5016. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5017. begin
  5018. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5019. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  5020. end
  5021. else
  5022. begin
  5023. tmpreg:=cg.getintregister(list,OS_32);
  5024. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5025. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5026. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  5027. end;
  5028. tmpreg:=cg.getintregister(list,OS_32);
  5029. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  5030. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg),PF_S));
  5031. end;
  5032. OP_SUB:
  5033. begin
  5034. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5035. begin
  5036. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5037. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  5038. end
  5039. else
  5040. begin
  5041. tmpreg:=cg.getintregister(list,OS_32);
  5042. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5043. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5044. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  5045. end;
  5046. tmpreg:=cg.getintregister(list,OS_32);
  5047. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  5048. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg),PF_S));
  5049. end;
  5050. else
  5051. internalerror(2003083106);
  5052. end;
  5053. end;
  5054. procedure create_codegen;
  5055. begin
  5056. if GenerateThumb2Code then
  5057. begin
  5058. cg:=tthumb2cgarm.create;
  5059. cg64:=tthumb2cg64farm.create;
  5060. casmoptimizer:=TCpuThumb2AsmOptimizer;
  5061. end
  5062. else if GenerateThumbCode then
  5063. begin
  5064. cg:=tthumbcgarm.create;
  5065. cg64:=tthumbcg64farm.create;
  5066. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5067. end
  5068. else
  5069. begin
  5070. cg:=tarmcgarm.create;
  5071. cg64:=tarmcg64farm.create;
  5072. casmoptimizer:=TCpuAsmOptimizer;
  5073. end;
  5074. end;
  5075. end.