cgcpu.pas 224 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. procedure init_mmregister_allocator;
  36. public
  37. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  38. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  39. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  40. { move instructions }
  41. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  42. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  43. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  44. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  45. { fpu move instructions }
  46. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  47. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  48. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  49. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  50. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  51. { comparison operations }
  52. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  53. l : tasmlabel);override;
  54. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  55. procedure a_jmp_name(list : TAsmList;const s : string); override;
  56. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  57. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  58. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  59. procedure g_profilecode(list : TAsmList); override;
  60. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  61. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  62. procedure g_maybe_got_init(list : TAsmList); override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  66. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  67. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  68. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  69. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  70. procedure g_save_registers(list : TAsmList);override;
  71. procedure g_restore_registers(list : TAsmList);override;
  72. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  73. procedure fixref(list : TAsmList;var ref : treference);
  74. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  75. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  78. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  79. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  80. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  81. { Transform unsupported methods into Internal errors }
  82. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  83. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  84. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  85. { clear out potential overflow bits from 8 or 16 bit operations
  86. the upper 24/16 bits of a register after an operation }
  87. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  88. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  89. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  90. procedure g_maybe_tls_init(list : TAsmList); override;
  91. end;
  92. { tcgarm is shared between normal arm and thumb-2 }
  93. tcgarm = class(tbasecgarm)
  94. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  95. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  96. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  97. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  98. size: tcgsize; a: tcgint; src, dst: tregister); override;
  99. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  100. size: tcgsize; src1, src2, dst: tregister); override;
  101. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  103. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  104. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  105. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  106. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  107. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  108. end;
  109. { normal arm cg }
  110. tarmcgarm = class(tcgarm)
  111. procedure init_register_allocators;override;
  112. procedure done_register_allocators;override;
  113. end;
  114. { 64 bit cg for all arm flavours }
  115. tbasecg64farm = class(tcg64f32)
  116. end;
  117. { tcg64farm is shared between normal arm and thumb-2 }
  118. tcg64farm = class(tbasecg64farm)
  119. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  120. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  121. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  122. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  123. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  125. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  126. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  127. end;
  128. tarmcg64farm = class(tcg64farm)
  129. end;
  130. tthumbcgarm = class(tbasecgarm)
  131. procedure init_register_allocators;override;
  132. procedure done_register_allocators;override;
  133. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  134. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  135. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  136. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  137. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  138. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  139. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  140. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  141. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  142. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  143. end;
  144. tthumbcg64farm = class(tbasecg64farm)
  145. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  146. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  147. end;
  148. tthumb2cgarm = class(tcgarm)
  149. procedure init_register_allocators;override;
  150. procedure done_register_allocators;override;
  151. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  152. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  153. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  154. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  155. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  157. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  158. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  159. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  160. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  161. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  164. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  165. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  166. end;
  167. tthumb2cg64farm = class(tcg64farm)
  168. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  169. end;
  170. const
  171. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  172. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  173. winstackpagesize = 4096;
  174. function get_fpu_postfix(def : tdef) : toppostfix;
  175. procedure create_codegen;
  176. implementation
  177. uses
  178. globals,verbose,systems,cutils,
  179. aopt,aoptcpu,
  180. fmodule,
  181. symconst,symsym,symtable,
  182. tgobj,
  183. procinfo,cpupi,
  184. paramgr;
  185. { Range check must be disabled explicitly as conversions between signed and unsigned
  186. 32-bit values are done without explicit typecasts }
  187. {$R-}
  188. function get_fpu_postfix(def : tdef) : toppostfix;
  189. begin
  190. if def.typ=floatdef then
  191. begin
  192. case tfloatdef(def).floattype of
  193. s32real:
  194. result:=PF_S;
  195. s64real:
  196. result:=PF_D;
  197. s80real:
  198. result:=PF_E;
  199. else
  200. internalerror(200401272);
  201. end;
  202. end
  203. else
  204. internalerror(200401271);
  205. end;
  206. procedure tarmcgarm.init_register_allocators;
  207. begin
  208. inherited init_register_allocators;
  209. { currently, we always save R14, so we can use it }
  210. if (target_info.system<>system_arm_ios) then
  211. begin
  212. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  213. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  214. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  215. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  216. else
  217. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  218. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  219. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  220. end
  221. else
  222. { r7 is not available on Darwin, it's used as frame pointer (always,
  223. for backtrace support -- also in gcc/clang -> R11 can be used).
  224. r9 is volatile }
  225. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  226. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  227. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  228. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  229. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  230. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  231. init_mmregister_allocator;
  232. end;
  233. procedure tarmcgarm.done_register_allocators;
  234. begin
  235. rg[R_INTREGISTER].free;
  236. rg[R_FPUREGISTER].free;
  237. rg[R_MMREGISTER].free;
  238. inherited done_register_allocators;
  239. end;
  240. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  241. var
  242. imm_shift : byte;
  243. l : tasmlabel;
  244. hr : treference;
  245. imm1, imm2: DWord;
  246. begin
  247. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  248. internalerror(2002090907);
  249. if is_shifter_const(a,imm_shift) then
  250. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  251. else if is_shifter_const(not(a),imm_shift) then
  252. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  253. { loading of constants with mov and orr }
  254. else if (split_into_shifter_const(a,imm1, imm2)) then
  255. begin
  256. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  257. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  258. end
  259. { loading of constants with mvn and bic }
  260. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  261. begin
  262. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  263. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  264. end
  265. else
  266. begin
  267. reference_reset(hr,4,[]);
  268. current_asmdata.getjumplabel(l);
  269. cg.a_label(current_procinfo.aktlocaldata,l);
  270. hr.symboldata:=current_procinfo.aktlocaldata.last;
  271. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  272. hr.symbol:=l;
  273. hr.base:=NR_PC;
  274. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  275. end;
  276. end;
  277. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  278. var
  279. oppostfix:toppostfix;
  280. usedtmpref: treference;
  281. tmpreg,tmpreg2 : tregister;
  282. so : tshifterop;
  283. dir : integer;
  284. begin
  285. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  286. FromSize := ToSize;
  287. case FromSize of
  288. { signed integer registers }
  289. OS_8:
  290. oppostfix:=PF_B;
  291. OS_S8:
  292. oppostfix:=PF_SB;
  293. OS_16:
  294. oppostfix:=PF_H;
  295. OS_S16:
  296. oppostfix:=PF_SH;
  297. OS_32,
  298. OS_S32:
  299. oppostfix:=PF_None;
  300. else
  301. InternalError(200308297);
  302. end;
  303. if (fromsize=OS_S8) and
  304. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  305. oppostfix:=PF_B;
  306. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  307. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  308. (oppostfix in [PF_SH,PF_H])) then
  309. begin
  310. if target_info.endian=endian_big then
  311. dir:=-1
  312. else
  313. dir:=1;
  314. case FromSize of
  315. OS_16,OS_S16:
  316. begin
  317. { only complicated references need an extra loadaddr }
  318. if assigned(ref.symbol) or
  319. (ref.index<>NR_NO) or
  320. (ref.offset<-4095) or
  321. (ref.offset>4094) or
  322. { sometimes the compiler reused registers }
  323. (reg=ref.index) or
  324. (reg=ref.base) then
  325. begin
  326. tmpreg2:=getintregister(list,OS_INT);
  327. a_loadaddr_ref_reg(list,ref,tmpreg2);
  328. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  329. end
  330. else
  331. usedtmpref:=ref;
  332. if target_info.endian=endian_big then
  333. inc(usedtmpref.offset,1);
  334. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  335. tmpreg:=getintregister(list,OS_INT);
  336. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  337. inc(usedtmpref.offset,dir);
  338. if FromSize=OS_16 then
  339. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  340. else
  341. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  342. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  343. end;
  344. OS_32,OS_S32:
  345. begin
  346. tmpreg:=getintregister(list,OS_INT);
  347. { only complicated references need an extra loadaddr }
  348. if assigned(ref.symbol) or
  349. (ref.index<>NR_NO) or
  350. (ref.offset<-4095) or
  351. (ref.offset>4092) or
  352. { sometimes the compiler reused registers }
  353. (reg=ref.index) or
  354. (reg=ref.base) then
  355. begin
  356. tmpreg2:=getintregister(list,OS_INT);
  357. a_loadaddr_ref_reg(list,ref,tmpreg2);
  358. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  359. end
  360. else
  361. usedtmpref:=ref;
  362. shifterop_reset(so);so.shiftmode:=SM_LSL;
  363. if ref.alignment=2 then
  364. begin
  365. if target_info.endian=endian_big then
  366. inc(usedtmpref.offset,2);
  367. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  368. inc(usedtmpref.offset,dir*2);
  369. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  370. so.shiftimm:=16;
  371. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  372. end
  373. else
  374. begin
  375. tmpreg2:=getintregister(list,OS_INT);
  376. if target_info.endian=endian_big then
  377. inc(usedtmpref.offset,3);
  378. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  379. inc(usedtmpref.offset,dir);
  380. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  381. inc(usedtmpref.offset,dir);
  382. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  383. so.shiftimm:=8;
  384. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  385. inc(usedtmpref.offset,dir);
  386. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  387. so.shiftimm:=16;
  388. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  389. so.shiftimm:=24;
  390. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  391. end;
  392. end
  393. else
  394. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  395. end;
  396. end
  397. else
  398. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  399. if (fromsize=OS_S8) and
  400. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  401. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  402. else if (fromsize=OS_S8) and (tosize = OS_16) then
  403. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  404. end;
  405. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  406. var
  407. hsym : tsym;
  408. href : treference;
  409. paraloc : Pcgparalocation;
  410. shift : byte;
  411. begin
  412. { calculate the parameter info for the procdef }
  413. procdef.init_paraloc_info(callerside);
  414. hsym:=tsym(procdef.parast.Find('self'));
  415. if not(assigned(hsym) and
  416. (hsym.typ=paravarsym)) then
  417. internalerror(2003052503);
  418. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  419. while paraloc<>nil do
  420. with paraloc^ do
  421. begin
  422. case loc of
  423. LOC_REGISTER:
  424. begin
  425. if is_shifter_const(ioffset,shift) then
  426. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  427. else
  428. begin
  429. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  430. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  431. end;
  432. end;
  433. LOC_REFERENCE:
  434. begin
  435. { offset in the wrapper needs to be adjusted for the stored
  436. return address }
  437. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  438. if is_shifter_const(ioffset,shift) then
  439. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  440. else
  441. begin
  442. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  443. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  444. end;
  445. end
  446. else
  447. internalerror(2003091803);
  448. end;
  449. paraloc:=next;
  450. end;
  451. end;
  452. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  453. var
  454. ref: treference;
  455. begin
  456. paraloc.check_simple_location;
  457. paramanager.allocparaloc(list,paraloc.location);
  458. case paraloc.location^.loc of
  459. LOC_REGISTER,LOC_CREGISTER:
  460. a_load_const_reg(list,size,a,paraloc.location^.register);
  461. LOC_REFERENCE:
  462. begin
  463. reference_reset(ref,paraloc.alignment,[]);
  464. ref.base:=paraloc.location^.reference.index;
  465. ref.offset:=paraloc.location^.reference.offset;
  466. a_load_const_ref(list,size,a,ref);
  467. end;
  468. else
  469. internalerror(2002081101);
  470. end;
  471. end;
  472. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  473. begin
  474. { doubles in softemu mode have a strange order of registers and references }
  475. if (cgpara.size=OS_F64) and
  476. (location^.size=OS_32) then
  477. begin
  478. g_concatcopy(list,ref,paralocref,4)
  479. end
  480. else
  481. inherited;
  482. end;
  483. procedure tbasecgarm.init_mmregister_allocator;
  484. begin
  485. { The register allocator currently cannot deal with multiple
  486. non-overlapping subregs per register, so we can only use
  487. half the single precision registers for now (as sub registers of the
  488. double precision ones). }
  489. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  490. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  491. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  492. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  493. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  494. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  495. ],first_mm_imreg,[])
  496. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  497. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  498. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  499. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  500. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  501. ],first_mm_imreg,[])
  502. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  503. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  504. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  505. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  506. ],first_mm_imreg,[]);
  507. end;
  508. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  509. var
  510. ref: treference;
  511. tmpreg: tregister;
  512. begin
  513. paraloc.check_simple_location;
  514. paramanager.allocparaloc(list,paraloc.location);
  515. case paraloc.location^.loc of
  516. LOC_REGISTER,LOC_CREGISTER:
  517. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  518. LOC_REFERENCE:
  519. begin
  520. reference_reset(ref,paraloc.alignment,[]);
  521. ref.base := paraloc.location^.reference.index;
  522. ref.offset := paraloc.location^.reference.offset;
  523. tmpreg := getintregister(list,OS_ADDR);
  524. a_loadaddr_ref_reg(list,r,tmpreg);
  525. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  526. end;
  527. else
  528. internalerror(2002080701);
  529. end;
  530. end;
  531. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  532. var
  533. branchopcode: tasmop;
  534. r : treference;
  535. sym : TAsmSymbol;
  536. begin
  537. { use always BL as newer binutils do not translate blx apparently
  538. generating BL is also what clang and gcc do by default }
  539. branchopcode:=A_BL;
  540. if not(weak) then
  541. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  542. else
  543. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  544. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  545. if (tf_pic_uses_got in target_info.flags) and
  546. (cs_create_pic in current_settings.moduleswitches) then
  547. begin
  548. r.refaddr:=addr_pic
  549. end
  550. else
  551. r.refaddr:=addr_full;
  552. list.concat(taicpu.op_ref(branchopcode,r));
  553. {
  554. the compiler does not properly set this flag anymore in pass 1, and
  555. for now we only need it after pass 2 (I hope) (JM)
  556. if not(pi_do_call in current_procinfo.flags) then
  557. internalerror(2003060703);
  558. }
  559. include(current_procinfo.flags,pi_do_call);
  560. end;
  561. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  562. begin
  563. { check not really correct: should only be used for non-Thumb cpus }
  564. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  565. begin
  566. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  567. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  568. end
  569. else
  570. list.concat(taicpu.op_reg(A_BLX, reg));
  571. {
  572. the compiler does not properly set this flag anymore in pass 1, and
  573. for now we only need it after pass 2 (I hope) (JM)
  574. if not(pi_do_call in current_procinfo.flags) then
  575. internalerror(2003060703);
  576. }
  577. include(current_procinfo.flags,pi_do_call);
  578. end;
  579. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  580. begin
  581. a_op_const_reg_reg(list,op,size,a,reg,reg);
  582. end;
  583. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  584. var
  585. tmpreg,tmpresreg : tregister;
  586. tmpref : treference;
  587. begin
  588. tmpreg:=getintregister(list,size);
  589. tmpresreg:=getintregister(list,size);
  590. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  591. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  592. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  593. end;
  594. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  595. var
  596. so : tshifterop;
  597. begin
  598. if op = OP_NEG then
  599. begin
  600. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  601. maybeadjustresult(list,OP_NEG,size,dst);
  602. end
  603. else if op = OP_NOT then
  604. begin
  605. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  606. begin
  607. shifterop_reset(so);
  608. so.shiftmode:=SM_LSL;
  609. if size in [OS_8, OS_S8] then
  610. so.shiftimm:=24
  611. else
  612. so.shiftimm:=16;
  613. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  614. {Using a shift here allows this to be folded into another instruction}
  615. if size in [OS_S8, OS_S16] then
  616. so.shiftmode:=SM_ASR
  617. else
  618. so.shiftmode:=SM_LSR;
  619. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  620. end
  621. else
  622. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  623. end
  624. else
  625. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  626. end;
  627. const
  628. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  629. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  630. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  631. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  632. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  633. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  634. op_reg_postfix_thumb: array[TOpCG] of TOpPostfix =
  635. (PF_None,PF_None,PF_None,PF_S,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_S,
  636. PF_None,PF_S,PF_S,PF_None,PF_S,PF_None,PF_S);
  637. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  638. size: tcgsize; a: tcgint; src, dst: tregister);
  639. var
  640. ovloc : tlocation;
  641. begin
  642. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  643. end;
  644. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  645. size: tcgsize; src1, src2, dst: tregister);
  646. var
  647. ovloc : tlocation;
  648. begin
  649. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  650. end;
  651. function opshift2shiftmode(op: TOpCg): tshiftmode;
  652. begin
  653. case op of
  654. OP_SHL: Result:=SM_LSL;
  655. OP_SHR: Result:=SM_LSR;
  656. OP_ROR: Result:=SM_ROR;
  657. OP_ROL: Result:=SM_ROR;
  658. OP_SAR: Result:=SM_ASR;
  659. else internalerror(2012070501);
  660. end
  661. end;
  662. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  663. var
  664. multiplier : dword;
  665. power : longint;
  666. shifterop : tshifterop;
  667. bitsset : byte;
  668. negative : boolean;
  669. first, doshiftadd: boolean;
  670. b,
  671. cycles : byte;
  672. maxeffort : byte;
  673. leftmostbit,i,shiftvalue: DWord;
  674. begin
  675. result:=true;
  676. cycles:=0;
  677. negative:=a<0;
  678. shifterop.rs:=NR_NO;
  679. shifterop.shiftmode:=SM_LSL;
  680. if negative then
  681. inc(cycles);
  682. multiplier:=dword(abs(a));
  683. { heuristics to estimate how much instructions are reasonable to replace the mul,
  684. this is currently based on XScale timings }
  685. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  686. actual multiplication, this requires min. 1+4 cycles
  687. because the first shift imm. might cause a stall and because we need more instructions
  688. when replacing the mul we generate max. 3 instructions to replace this mul }
  689. maxeffort:=3;
  690. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  691. a ldr, so generating one more operation to replace this is beneficial }
  692. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  693. inc(maxeffort);
  694. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  695. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  696. dec(maxeffort);
  697. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  698. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  699. dec(maxeffort);
  700. { "symmetric" bit pattern like $10101010 where
  701. res:=a*$10101010 can be simplified into
  702. temp:=a*$1010
  703. res:=temp+temp shl 16
  704. }
  705. doshiftadd:=false;
  706. leftmostbit:=BsrDWord(multiplier);
  707. shiftvalue:=0;
  708. if (maxeffort>1) and (leftmostbit>2) then
  709. begin
  710. for i:=2 to 31 do
  711. if (multiplier shr i)=(multiplier and ($ffffffff shr (32-i))) then
  712. begin
  713. doshiftadd:=true;
  714. shiftvalue:=i;
  715. dec(maxeffort);
  716. multiplier:=multiplier shr shiftvalue;
  717. break;
  718. end;
  719. end;
  720. bitsset:=popcnt(multiplier and $fffffffe);
  721. { most simple cases }
  722. if a=1 then
  723. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  724. else if a=0 then
  725. a_load_const_reg(list,OS_32,0,dst)
  726. else if a=-1 then
  727. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  728. { add up ?
  729. basically, one add is needed for each bit being set in the constant factor
  730. however, the least significant bit is for free, it can be hidden in the initial
  731. instruction
  732. }
  733. else if (bitsset+cycles<=maxeffort) and
  734. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  735. begin
  736. first:=true;
  737. while multiplier<>0 do
  738. begin
  739. shifterop.shiftimm:=BsrDWord(multiplier);
  740. if odd(multiplier) then
  741. begin
  742. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  743. dec(multiplier);
  744. end
  745. else
  746. if first then
  747. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  748. else
  749. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  750. first:=false;
  751. dec(multiplier,1 shl shifterop.shiftimm);
  752. end;
  753. if doshiftadd then
  754. begin
  755. shifterop.shiftimm:=shiftvalue;
  756. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  757. end;
  758. if negative then
  759. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  760. end
  761. { subtract from the next greater power of two? }
  762. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  763. begin
  764. first:=true;
  765. while multiplier<>0 do
  766. begin
  767. if first then
  768. begin
  769. multiplier:=(1 shl power)-multiplier;
  770. shifterop.shiftimm:=power;
  771. end
  772. else
  773. shifterop.shiftimm:=BsrDWord(multiplier);
  774. if odd(multiplier) then
  775. begin
  776. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  777. dec(multiplier);
  778. end
  779. else
  780. if first then
  781. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  782. else
  783. begin
  784. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  785. dec(multiplier,1 shl shifterop.shiftimm);
  786. end;
  787. first:=false;
  788. end;
  789. if doshiftadd then
  790. begin
  791. shifterop.shiftimm:=shiftvalue;
  792. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  793. end;
  794. if negative then
  795. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  796. end
  797. else
  798. result:=false;
  799. end;
  800. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  801. var
  802. shift, lsb, width : byte;
  803. tmpreg : tregister;
  804. so : tshifterop;
  805. l1 : longint;
  806. imm1, imm2: DWord;
  807. begin
  808. optimize_op_const(size, op, a);
  809. case op of
  810. OP_NONE:
  811. begin
  812. if src <> dst then
  813. a_load_reg_reg(list, size, size, src, dst);
  814. exit;
  815. end;
  816. OP_MOVE:
  817. begin
  818. a_load_const_reg(list, size, a, dst);
  819. exit;
  820. end;
  821. else
  822. ;
  823. end;
  824. ovloc.loc:=LOC_VOID;
  825. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  826. case op of
  827. OP_ADD:
  828. begin
  829. op:=OP_SUB;
  830. a:=aint(dword(-a));
  831. end;
  832. OP_SUB:
  833. begin
  834. op:=OP_ADD;
  835. a:=aint(dword(-a));
  836. end
  837. else
  838. ;
  839. end;
  840. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  841. case op of
  842. OP_NEG,OP_NOT:
  843. internalerror(200308281);
  844. OP_SHL,
  845. OP_SHR,
  846. OP_ROL,
  847. OP_ROR,
  848. OP_SAR:
  849. begin
  850. if a>32 then
  851. internalerror(200308294);
  852. shifterop_reset(so);
  853. so.shiftmode:=opshift2shiftmode(op);
  854. if op = OP_ROL then
  855. so.shiftimm:=32-a
  856. else
  857. so.shiftimm:=a;
  858. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  859. end;
  860. else
  861. {if (op in [OP_SUB, OP_ADD]) and
  862. ((a < 0) or
  863. (a > 4095)) then
  864. begin
  865. tmpreg:=getintregister(list,size);
  866. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  867. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  868. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  869. ));
  870. end
  871. else}
  872. begin
  873. if cgsetflags or setflags then
  874. a_reg_alloc(list,NR_DEFAULTFLAGS);
  875. list.concat(setoppostfix(
  876. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  877. end;
  878. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  879. begin
  880. ovloc.loc:=LOC_FLAGS;
  881. case op of
  882. OP_ADD:
  883. ovloc.resflags:=F_CS;
  884. OP_SUB:
  885. ovloc.resflags:=F_CC;
  886. else
  887. internalerror(2019050922);
  888. end;
  889. end;
  890. end
  891. else
  892. begin
  893. { there could be added some more sophisticated optimizations }
  894. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  895. a_op_reg_reg(list,OP_NEG,size,src,dst)
  896. { we do this here instead in the peephole optimizer because
  897. it saves us a register }
  898. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  899. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  900. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  901. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  902. begin
  903. if l1>32 then{roozbeh does this ever happen?}
  904. internalerror(200308296);
  905. shifterop_reset(so);
  906. so.shiftmode:=SM_LSL;
  907. so.shiftimm:=l1;
  908. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  909. end
  910. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  911. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  912. begin
  913. if l1>32 then{does this ever happen?}
  914. internalerror(201205181);
  915. shifterop_reset(so);
  916. so.shiftmode:=SM_LSL;
  917. so.shiftimm:=l1;
  918. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  919. end
  920. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  921. begin
  922. { nothing to do on success }
  923. end
  924. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  925. broader range of shifterconstants.}
  926. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  927. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  928. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  929. into the following instruction}
  930. else if (op = OP_AND) and
  931. is_continuous_mask(aword(a), lsb, width) and
  932. ((lsb = 0) or ((lsb + width) = 32)) then
  933. begin
  934. shifterop_reset(so);
  935. if (width = 16) and
  936. (lsb = 0) and
  937. (current_settings.cputype >= cpu_armv6) then
  938. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  939. else if (width = 8) and
  940. (lsb = 0) and
  941. (current_settings.cputype >= cpu_armv6) then
  942. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  943. else if lsb = 0 then
  944. begin
  945. so.shiftmode:=SM_LSL;
  946. so.shiftimm:=32-width;
  947. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  948. so.shiftmode:=SM_LSR;
  949. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  950. end
  951. else
  952. begin
  953. so.shiftmode:=SM_LSR;
  954. so.shiftimm:=lsb;
  955. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  956. so.shiftmode:=SM_LSL;
  957. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  958. end;
  959. end
  960. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  961. begin
  962. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  963. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  964. end
  965. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  966. not(cgsetflags or setflags) and
  967. split_into_shifter_const(a, imm1, imm2) then
  968. begin
  969. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  970. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  971. end
  972. else
  973. begin
  974. tmpreg:=getintregister(list,size);
  975. a_load_const_reg(list,size,a,tmpreg);
  976. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  977. end;
  978. end;
  979. maybeadjustresult(list,op,size,dst);
  980. end;
  981. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  982. var
  983. so : tshifterop;
  984. tmpreg,overflowreg : tregister;
  985. asmop : tasmop;
  986. begin
  987. ovloc.loc:=LOC_VOID;
  988. case op of
  989. OP_NEG,OP_NOT,
  990. OP_DIV,OP_IDIV:
  991. internalerror(200308283);
  992. OP_SHL,
  993. OP_SHR,
  994. OP_SAR,
  995. OP_ROR:
  996. begin
  997. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  998. internalerror(2008072801);
  999. shifterop_reset(so);
  1000. so.rs:=src1;
  1001. so.shiftmode:=opshift2shiftmode(op);
  1002. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1003. end;
  1004. OP_ROL:
  1005. begin
  1006. if not(size in [OS_32,OS_S32]) then
  1007. internalerror(2008072804);
  1008. { simulate ROL by ror'ing 32-value }
  1009. tmpreg:=getintregister(list,OS_32);
  1010. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  1011. shifterop_reset(so);
  1012. so.rs:=tmpreg;
  1013. so.shiftmode:=SM_ROR;
  1014. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1015. end;
  1016. OP_IMUL,
  1017. OP_MUL:
  1018. begin
  1019. if (cgsetflags or setflags) and
  1020. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1021. begin
  1022. overflowreg:=getintregister(list,size);
  1023. if op=OP_IMUL then
  1024. asmop:=A_SMULL
  1025. else
  1026. asmop:=A_UMULL;
  1027. { the arm doesn't allow that rd and rm are the same }
  1028. if dst=src2 then
  1029. begin
  1030. if dst<>src1 then
  1031. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1032. else
  1033. begin
  1034. tmpreg:=getintregister(list,size);
  1035. a_load_reg_reg(list,size,size,src2,dst);
  1036. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1037. end;
  1038. end
  1039. else
  1040. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1041. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1042. if op=OP_IMUL then
  1043. begin
  1044. shifterop_reset(so);
  1045. so.shiftmode:=SM_ASR;
  1046. so.shiftimm:=31;
  1047. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1048. end
  1049. else
  1050. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1051. ovloc.loc:=LOC_FLAGS;
  1052. ovloc.resflags:=F_NE;
  1053. end
  1054. else
  1055. begin
  1056. { the arm doesn't allow that rd and rm are the same }
  1057. if dst=src2 then
  1058. begin
  1059. if dst<>src1 then
  1060. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1061. else
  1062. begin
  1063. tmpreg:=getintregister(list,size);
  1064. a_load_reg_reg(list,size,size,src2,dst);
  1065. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1066. end;
  1067. end
  1068. else
  1069. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1070. end;
  1071. end;
  1072. else
  1073. begin
  1074. if cgsetflags or setflags then
  1075. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1076. list.concat(setoppostfix(
  1077. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1078. end;
  1079. end;
  1080. maybeadjustresult(list,op,size,dst);
  1081. end;
  1082. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1083. var
  1084. asmop: tasmop;
  1085. begin
  1086. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1087. begin
  1088. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1089. case size of
  1090. OS_32: asmop:=A_UMULL;
  1091. OS_S32: asmop:=A_SMULL;
  1092. else
  1093. InternalError(2014060802);
  1094. end;
  1095. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1096. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1097. 32x32=32 bit multiplication}
  1098. if (dstlo = NR_NO) then
  1099. dstlo:=getintregister(list,size);
  1100. if (dsthi = NR_NO) then
  1101. dsthi:=getintregister(list,size);
  1102. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1103. end
  1104. else if dsthi=NR_NO then
  1105. begin
  1106. if (dstlo = NR_NO) then
  1107. dstlo:=getintregister(list,size);
  1108. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1109. end
  1110. else
  1111. begin
  1112. internalerror(2015083022);
  1113. end;
  1114. end;
  1115. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1116. var
  1117. tmpreg1,tmpreg2 : tregister;
  1118. begin
  1119. tmpreg1:=NR_NO;
  1120. { Be sure to have a base register }
  1121. if (ref.base=NR_NO) then
  1122. begin
  1123. if ref.shiftmode<>SM_None then
  1124. internalerror(2014020707);
  1125. ref.base:=ref.index;
  1126. ref.index:=NR_NO;
  1127. end;
  1128. { absolute symbols can't be handled directly, we've to store the symbol reference
  1129. in the text segment and access it pc relative
  1130. For now, we assume that references where base or index equals to PC are already
  1131. relative, all other references are assumed to be absolute and thus they need
  1132. to be handled extra.
  1133. A proper solution would be to change refoptions to a set and store the information
  1134. if the symbol is absolute or relative there.
  1135. }
  1136. if (assigned(ref.symbol) and
  1137. not(is_pc(ref.base)) and
  1138. not(is_pc(ref.index))
  1139. ) or
  1140. { [#xxx] isn't a valid address operand }
  1141. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1142. (ref.offset<-4095) or
  1143. (ref.offset>4095) or
  1144. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1145. ((ref.offset<-255) or
  1146. (ref.offset>255)
  1147. )
  1148. ) or
  1149. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1150. ((ref.offset<-1020) or
  1151. (ref.offset>1020) or
  1152. ((abs(ref.offset) mod 4)<>0)
  1153. )
  1154. ) or
  1155. ((GenerateThumbCode) and
  1156. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1157. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1158. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1159. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1160. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1161. )
  1162. ) then
  1163. begin
  1164. fixref(list,ref);
  1165. end;
  1166. if GenerateThumbCode then
  1167. begin
  1168. { certain thumb load require base and index }
  1169. if (oppostfix in [PF_SB,PF_SH]) and
  1170. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1171. begin
  1172. tmpreg1:=getintregister(list,OS_ADDR);
  1173. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1174. ref.index:=tmpreg1;
  1175. end;
  1176. { "hi" registers cannot be used as base or index }
  1177. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1178. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1179. begin
  1180. tmpreg1:=getintregister(list,OS_ADDR);
  1181. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1182. ref.base:=tmpreg1;
  1183. end;
  1184. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1185. begin
  1186. tmpreg1:=getintregister(list,OS_ADDR);
  1187. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1188. ref.index:=tmpreg1;
  1189. end;
  1190. end;
  1191. { fold if there is base, index and offset, however, don't fold
  1192. for vfp memory instructions because we later fold the index }
  1193. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1194. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1195. begin
  1196. if tmpreg1<>NR_NO then
  1197. begin
  1198. tmpreg2:=getintregister(list,OS_ADDR);
  1199. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1200. tmpreg1:=tmpreg2;
  1201. end
  1202. else
  1203. begin
  1204. tmpreg1:=getintregister(list,OS_ADDR);
  1205. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1206. ref.base:=tmpreg1;
  1207. end;
  1208. ref.offset:=0;
  1209. end;
  1210. { floating point operations have only limited references
  1211. we expect here, that a base is already set }
  1212. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1213. begin
  1214. if ref.shiftmode<>SM_none then
  1215. internalerror(200309121);
  1216. if tmpreg1<>NR_NO then
  1217. begin
  1218. if ref.base=tmpreg1 then
  1219. begin
  1220. if ref.signindex<0 then
  1221. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1222. else
  1223. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1224. ref.index:=NR_NO;
  1225. end
  1226. else
  1227. begin
  1228. if ref.index<>tmpreg1 then
  1229. internalerror(200403161);
  1230. if ref.signindex<0 then
  1231. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1232. else
  1233. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1234. ref.base:=tmpreg1;
  1235. ref.index:=NR_NO;
  1236. end;
  1237. end
  1238. else
  1239. begin
  1240. tmpreg1:=getintregister(list,OS_ADDR);
  1241. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1242. ref.base:=tmpreg1;
  1243. ref.index:=NR_NO;
  1244. end;
  1245. end;
  1246. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1247. Result := ref;
  1248. end;
  1249. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1250. var
  1251. oppostfix:toppostfix;
  1252. usedtmpref: treference;
  1253. tmpreg : tregister;
  1254. dir : integer;
  1255. begin
  1256. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1257. FromSize := ToSize;
  1258. case ToSize of
  1259. { signed integer registers }
  1260. OS_8,
  1261. OS_S8:
  1262. oppostfix:=PF_B;
  1263. OS_16,
  1264. OS_S16:
  1265. oppostfix:=PF_H;
  1266. OS_32,
  1267. OS_S32,
  1268. { for vfp value stored in integer register }
  1269. OS_F32:
  1270. oppostfix:=PF_None;
  1271. else
  1272. InternalError(2003082912);
  1273. end;
  1274. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1275. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1276. (oppostfix =PF_H)) then
  1277. begin
  1278. if target_info.endian=endian_big then
  1279. dir:=-1
  1280. else
  1281. dir:=1;
  1282. case FromSize of
  1283. OS_16,OS_S16:
  1284. begin
  1285. tmpreg:=getintregister(list,OS_INT);
  1286. usedtmpref:=ref;
  1287. if target_info.endian=endian_big then
  1288. inc(usedtmpref.offset,1);
  1289. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1290. inc(usedtmpref.offset,dir);
  1291. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1292. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1293. end;
  1294. OS_32,OS_S32:
  1295. begin
  1296. tmpreg:=getintregister(list,OS_INT);
  1297. usedtmpref:=ref;
  1298. if ref.alignment=2 then
  1299. begin
  1300. if target_info.endian=endian_big then
  1301. inc(usedtmpref.offset,2);
  1302. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1303. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1304. inc(usedtmpref.offset,dir*2);
  1305. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1306. end
  1307. else
  1308. begin
  1309. if target_info.endian=endian_big then
  1310. inc(usedtmpref.offset,3);
  1311. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1312. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1313. inc(usedtmpref.offset,dir);
  1314. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1315. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1316. inc(usedtmpref.offset,dir);
  1317. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1318. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1319. inc(usedtmpref.offset,dir);
  1320. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1321. end;
  1322. end
  1323. else
  1324. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1325. end;
  1326. end
  1327. else
  1328. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1329. end;
  1330. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1331. var
  1332. oppostfix:toppostfix;
  1333. href: treference;
  1334. tmpreg: TRegister;
  1335. begin
  1336. case ToSize of
  1337. { signed integer registers }
  1338. OS_8,
  1339. OS_S8:
  1340. oppostfix:=PF_B;
  1341. OS_16,
  1342. OS_S16:
  1343. oppostfix:=PF_H;
  1344. OS_32,
  1345. OS_S32:
  1346. oppostfix:=PF_None;
  1347. else
  1348. InternalError(2003082910);
  1349. end;
  1350. if (tosize in [OS_S16,OS_16]) and
  1351. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1352. begin
  1353. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1354. tmpreg:=getintregister(list,OS_INT);
  1355. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1356. href:=result;
  1357. inc(href.offset);
  1358. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1359. end
  1360. else
  1361. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1362. end;
  1363. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1364. var
  1365. oppostfix:toppostfix;
  1366. so: tshifterop;
  1367. tmpreg: TRegister;
  1368. href: treference;
  1369. begin
  1370. case FromSize of
  1371. { signed integer registers }
  1372. OS_8:
  1373. oppostfix:=PF_B;
  1374. OS_S8:
  1375. oppostfix:=PF_SB;
  1376. OS_16:
  1377. oppostfix:=PF_H;
  1378. OS_S16:
  1379. oppostfix:=PF_SH;
  1380. OS_32,
  1381. OS_S32:
  1382. oppostfix:=PF_None;
  1383. else
  1384. InternalError(200308291);
  1385. end;
  1386. if (tosize=OS_S8) and
  1387. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1388. begin
  1389. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1390. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1391. end
  1392. else if (tosize in [OS_S16,OS_16]) and
  1393. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1394. begin
  1395. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1396. tmpreg:=getintregister(list,OS_INT);
  1397. href:=result;
  1398. inc(href.offset);
  1399. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1400. shifterop_reset(so);
  1401. so.shiftmode:=SM_LSL;
  1402. so.shiftimm:=8;
  1403. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1404. end
  1405. else
  1406. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1407. end;
  1408. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1409. var
  1410. so : tshifterop;
  1411. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1412. begin
  1413. if GenerateThumbCode then
  1414. begin
  1415. case shiftmode of
  1416. SM_ASR:
  1417. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1418. SM_LSR:
  1419. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1420. SM_LSL:
  1421. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1422. else
  1423. internalerror(2013090301);
  1424. end;
  1425. end
  1426. else
  1427. begin
  1428. so.shiftmode:=shiftmode;
  1429. so.shiftimm:=shiftimm;
  1430. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1431. end;
  1432. end;
  1433. var
  1434. instr: taicpu;
  1435. conv_done: boolean;
  1436. begin
  1437. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1438. internalerror(2002090901);
  1439. conv_done:=false;
  1440. if tosize<>fromsize then
  1441. begin
  1442. shifterop_reset(so);
  1443. conv_done:=true;
  1444. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1445. fromsize:=tosize;
  1446. if current_settings.cputype<cpu_armv6 then
  1447. case fromsize of
  1448. OS_8:
  1449. if GenerateThumbCode then
  1450. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1451. else
  1452. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1453. OS_S8:
  1454. begin
  1455. do_shift(SM_LSL,24,reg1);
  1456. if tosize=OS_16 then
  1457. begin
  1458. do_shift(SM_ASR,8,reg2);
  1459. do_shift(SM_LSR,16,reg2);
  1460. end
  1461. else
  1462. do_shift(SM_ASR,24,reg2);
  1463. end;
  1464. OS_16:
  1465. begin
  1466. do_shift(SM_LSL,16,reg1);
  1467. do_shift(SM_LSR,16,reg2);
  1468. end;
  1469. OS_S16:
  1470. begin
  1471. do_shift(SM_LSL,16,reg1);
  1472. do_shift(SM_ASR,16,reg2)
  1473. end;
  1474. else
  1475. conv_done:=false;
  1476. end
  1477. else
  1478. case fromsize of
  1479. OS_8:
  1480. if GenerateThumbCode then
  1481. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1482. else
  1483. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1484. OS_S8:
  1485. begin
  1486. if tosize=OS_16 then
  1487. begin
  1488. so.shiftmode:=SM_ROR;
  1489. so.shiftimm:=16;
  1490. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1491. do_shift(SM_LSR,16,reg2);
  1492. end
  1493. else
  1494. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1495. end;
  1496. OS_16:
  1497. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1498. OS_S16:
  1499. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1500. else
  1501. conv_done:=false;
  1502. end
  1503. end;
  1504. if not conv_done and (reg1<>reg2) then
  1505. begin
  1506. { same size, only a register mov required }
  1507. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1508. list.Concat(instr);
  1509. { Notify the register allocator that we have written a move instruction so
  1510. it can try to eliminate it. }
  1511. add_move_instruction(instr);
  1512. end;
  1513. end;
  1514. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1515. var
  1516. href,href2 : treference;
  1517. hloc : pcgparalocation;
  1518. begin
  1519. href:=ref;
  1520. hloc:=paraloc.location;
  1521. while assigned(hloc) do
  1522. begin
  1523. case hloc^.loc of
  1524. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1525. begin
  1526. paramanager.allocparaloc(list,paraloc.location);
  1527. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1528. end;
  1529. LOC_REGISTER :
  1530. case hloc^.size of
  1531. OS_32,
  1532. OS_F32:
  1533. begin
  1534. paramanager.allocparaloc(list,paraloc.location);
  1535. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1536. end;
  1537. OS_64,
  1538. OS_F64:
  1539. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1540. else
  1541. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1542. end;
  1543. LOC_REFERENCE :
  1544. begin
  1545. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1546. { concatcopy should choose the best way to copy the data }
  1547. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1548. end;
  1549. else
  1550. internalerror(200408241);
  1551. end;
  1552. inc(href.offset,tcgsize2size[hloc^.size]);
  1553. hloc:=hloc^.next;
  1554. end;
  1555. end;
  1556. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1557. begin
  1558. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1559. end;
  1560. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1561. var
  1562. oppostfix:toppostfix;
  1563. begin
  1564. case fromsize of
  1565. OS_32,
  1566. OS_F32:
  1567. oppostfix:=PF_S;
  1568. OS_64,
  1569. OS_F64:
  1570. oppostfix:=PF_D;
  1571. OS_F80:
  1572. oppostfix:=PF_E;
  1573. else
  1574. InternalError(200309021);
  1575. end;
  1576. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1577. if fromsize<>tosize then
  1578. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1579. end;
  1580. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1581. var
  1582. oppostfix:toppostfix;
  1583. begin
  1584. case tosize of
  1585. OS_F32:
  1586. oppostfix:=PF_S;
  1587. OS_F64:
  1588. oppostfix:=PF_D;
  1589. OS_F80:
  1590. oppostfix:=PF_E;
  1591. else
  1592. InternalError(200309022);
  1593. end;
  1594. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1595. end;
  1596. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1597. var
  1598. r : TRegister;
  1599. ai: taicpu;
  1600. l: TAsmLabel;
  1601. begin
  1602. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  1603. not(FPUARM_HAS_EXCEPTION_TRAPPING in fpu_capabilities[current_settings.fputype]) and
  1604. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  1605. begin
  1606. r:=getintregister(list,OS_INT);
  1607. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1608. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1609. current_asmdata.getjumplabel(l);
  1610. ai:=taicpu.op_sym(A_B,l);
  1611. ai.is_jmp:=true;
  1612. ai.condition:=C_EQ;
  1613. list.concat(ai);
  1614. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1615. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1616. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1617. a_label(list,l);
  1618. if clear then
  1619. current_procinfo.FPUExceptionCheckNeeded:=false;
  1620. end;
  1621. end;
  1622. { comparison operations }
  1623. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1624. l : tasmlabel);
  1625. var
  1626. tmpreg : tregister;
  1627. b : byte;
  1628. begin
  1629. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1630. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1631. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1632. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1633. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1634. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1635. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1636. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1637. else
  1638. begin
  1639. tmpreg:=getintregister(list,size);
  1640. a_load_const_reg(list,size,a,tmpreg);
  1641. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1642. end;
  1643. a_jmp_cond(list,cmp_op,l);
  1644. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1645. end;
  1646. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1647. begin
  1648. if reverse then
  1649. begin
  1650. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1651. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1652. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1653. end
  1654. { it is decided during the compilation of the system unit if this code is used or not
  1655. so no additional check for rbit is needed }
  1656. else
  1657. begin
  1658. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1659. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1660. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1661. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1662. if GenerateThumb2Code then
  1663. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1664. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1665. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1666. end;
  1667. end;
  1668. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1669. begin
  1670. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1671. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1672. a_jmp_cond(list,cmp_op,l);
  1673. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1674. end;
  1675. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1676. var
  1677. ai : taicpu;
  1678. begin
  1679. { generate far jump, leave it to the optimizer to get rid of it }
  1680. if GenerateThumbCode then
  1681. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1682. else
  1683. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1684. ai.is_jmp:=true;
  1685. list.concat(ai);
  1686. end;
  1687. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1688. var
  1689. ai : taicpu;
  1690. begin
  1691. { generate far jump, leave it to the optimizer to get rid of it }
  1692. if GenerateThumbCode then
  1693. ai:=taicpu.op_sym(A_BL,l)
  1694. else
  1695. ai:=taicpu.op_sym(A_B,l);
  1696. ai.is_jmp:=true;
  1697. list.concat(ai);
  1698. end;
  1699. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1700. var
  1701. ai : taicpu;
  1702. inv_flags : TResFlags;
  1703. hlabel : TAsmLabel;
  1704. begin
  1705. if GenerateThumbCode then
  1706. begin
  1707. inv_flags:=f;
  1708. inverse_flags(inv_flags);
  1709. { the optimizer has to fix this if jump range is sufficient short }
  1710. current_asmdata.getjumplabel(hlabel);
  1711. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1712. ai.is_jmp:=true;
  1713. list.concat(ai);
  1714. a_jmp_always(list,l);
  1715. a_label(list,hlabel);
  1716. end
  1717. else
  1718. begin
  1719. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1720. ai.is_jmp:=true;
  1721. list.concat(ai);
  1722. end;
  1723. end;
  1724. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1725. begin
  1726. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1727. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1728. end;
  1729. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1730. begin
  1731. if target_info.system = system_arm_linux then
  1732. begin
  1733. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1734. a_call_name(list,'__gnu_mcount_nc',false);
  1735. end
  1736. else
  1737. internalerror(2014091201);
  1738. end;
  1739. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1740. var
  1741. ref : treference;
  1742. shift : byte;
  1743. firstfloatreg,lastfloatreg,
  1744. r : byte;
  1745. mmregs,
  1746. regs, saveregs : tcpuregisterset;
  1747. registerarea, offset,
  1748. r7offset,
  1749. stackmisalignment : pint;
  1750. imm1, imm2: DWord;
  1751. stack_parameters : Boolean;
  1752. begin
  1753. LocalSize:=align(LocalSize,4);
  1754. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1755. { call instruction does not put anything on the stack }
  1756. registerarea:=0;
  1757. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1758. lastfloatreg:=RS_NO;
  1759. if not(nostackframe) then
  1760. begin
  1761. firstfloatreg:=RS_NO;
  1762. mmregs:=[];
  1763. case current_settings.fputype of
  1764. fpu_none,
  1765. fpu_soft,
  1766. fpu_libgcc:
  1767. ;
  1768. fpu_fpa,
  1769. fpu_fpa10,
  1770. fpu_fpa11:
  1771. begin
  1772. { save floating point registers? }
  1773. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1774. for r:=RS_F0 to RS_F7 do
  1775. if r in regs then
  1776. begin
  1777. if firstfloatreg=RS_NO then
  1778. firstfloatreg:=r;
  1779. lastfloatreg:=r;
  1780. inc(registerarea,12);
  1781. end;
  1782. end;
  1783. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1784. begin;
  1785. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1786. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1787. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1788. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1789. end
  1790. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1791. begin;
  1792. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1793. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1794. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1795. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1796. end
  1797. else
  1798. internalerror(2019050924);
  1799. end;
  1800. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1801. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1802. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1803. { save int registers }
  1804. reference_reset(ref,4,[]);
  1805. ref.index:=NR_STACK_POINTER_REG;
  1806. ref.addressmode:=AM_PREINDEXED;
  1807. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1808. if not(target_info.system in systems_darwin) then
  1809. begin
  1810. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1811. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1812. begin
  1813. a_reg_alloc(list,NR_R12);
  1814. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1815. end;
  1816. { the (old) ARM APCS requires saving both the stack pointer (to
  1817. crawl the stack) and the PC (to identify the function this
  1818. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1819. and R15 -- still needs updating for EABI and Darwin, they don't
  1820. need that }
  1821. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1822. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1823. else
  1824. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1825. include(regs,RS_R14);
  1826. if regs<>[] then
  1827. begin
  1828. for r:=RS_R0 to RS_R15 do
  1829. if r in regs then
  1830. inc(registerarea,4);
  1831. { if the stack is not 8 byte aligned, try to add an extra register,
  1832. so we can avoid the extra sub/add ...,#4 later (KB) }
  1833. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1834. for r:=RS_R3 downto RS_R0 do
  1835. if not(r in regs) then
  1836. begin
  1837. regs:=regs+[r];
  1838. inc(registerarea,4);
  1839. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1840. break;
  1841. end;
  1842. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1843. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  1844. end;
  1845. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1846. begin
  1847. offset:=-4;
  1848. for r:=RS_R15 downto RS_R0 do
  1849. if r in regs then
  1850. begin
  1851. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),offset);
  1852. dec(offset,4);
  1853. end;
  1854. { the framepointer now points to the saved R15, so the saved
  1855. framepointer is at R11-12 (for get_caller_frame) }
  1856. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1857. a_reg_dealloc(list,NR_R12);
  1858. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  1859. current_asmdata.asmcfi.cfa_def_cfa_offset(list,4);
  1860. end;
  1861. end
  1862. else
  1863. begin
  1864. { always save r14 if we use r7 as the framepointer, because
  1865. the parameter offsets are hardcoded in advance and always
  1866. assume that r14 sits on the stack right behind the saved r7
  1867. }
  1868. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1869. include(regs,RS_FRAME_POINTER_REG);
  1870. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1871. include(regs,RS_R14);
  1872. if regs<>[] then
  1873. begin
  1874. { on Darwin, you first have to save [r4-r7,lr], and then
  1875. [r8,r10,r11] and make r7 point to the previously saved
  1876. r7 so that you can perform a stack crawl based on it
  1877. ([r7] is previous stack frame, [r7+4] is return address
  1878. }
  1879. include(regs,RS_FRAME_POINTER_REG);
  1880. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1881. r7offset:=0;
  1882. for r:=RS_R0 to RS_R15 do
  1883. if r in saveregs then
  1884. begin
  1885. inc(registerarea,4);
  1886. if r<RS_FRAME_POINTER_REG then
  1887. inc(r7offset,4);
  1888. end;
  1889. { save the registers }
  1890. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1891. { make r7 point to the saved r7 (regardless of whether this
  1892. frame uses the framepointer, for backtrace purposes) }
  1893. if r7offset<>0 then
  1894. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1895. else
  1896. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1897. { now save the rest (if any) }
  1898. saveregs:=regs-saveregs;
  1899. if saveregs<>[] then
  1900. begin
  1901. for r:=RS_R8 to RS_R11 do
  1902. if r in saveregs then
  1903. inc(registerarea,4);
  1904. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1905. end;
  1906. end;
  1907. end;
  1908. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1909. if (LocalSize<>0) or
  1910. ((stackmisalignment<>0) and
  1911. ((pi_do_call in current_procinfo.flags) or
  1912. (po_assembler in current_procinfo.procdef.procoptions))) then
  1913. begin
  1914. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1915. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1916. begin
  1917. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1918. internalerror(2014030901)
  1919. else
  1920. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1921. end;
  1922. if is_shifter_const(localsize,shift) then
  1923. begin
  1924. a_reg_dealloc(list,NR_R12);
  1925. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1926. end
  1927. else if split_into_shifter_const(localsize, imm1, imm2) then
  1928. begin
  1929. a_reg_dealloc(list,NR_R12);
  1930. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1931. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1932. end
  1933. else
  1934. begin
  1935. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1936. a_reg_alloc(list,NR_R12);
  1937. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1938. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1939. a_reg_dealloc(list,NR_R12);
  1940. end;
  1941. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1942. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  1943. end;
  1944. if (mmregs<>[]) or
  1945. (firstfloatreg<>RS_NO) then
  1946. begin
  1947. reference_reset(ref,4,[]);
  1948. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1949. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1950. begin
  1951. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1952. begin
  1953. a_reg_alloc(list,NR_R12);
  1954. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1955. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1956. a_reg_dealloc(list,NR_R12);
  1957. end
  1958. else
  1959. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1960. ref.base:=NR_R12;
  1961. end
  1962. else
  1963. begin
  1964. ref.base:=current_procinfo.framepointer;
  1965. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1966. end;
  1967. case current_settings.fputype of
  1968. fpu_fpa,
  1969. fpu_fpa10,
  1970. fpu_fpa11:
  1971. begin
  1972. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1973. lastfloatreg-firstfloatreg+1,ref));
  1974. end;
  1975. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1976. begin
  1977. ref.index:=ref.base;
  1978. ref.base:=NR_NO;
  1979. if mmregs<>[] then
  1980. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1981. end
  1982. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1983. begin
  1984. ref.index:=ref.base;
  1985. ref.base:=NR_NO;
  1986. if mmregs<>[] then
  1987. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1988. end
  1989. else
  1990. internalerror(2019050923);
  1991. end;
  1992. end;
  1993. end;
  1994. end;
  1995. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1996. var
  1997. ref : treference;
  1998. LocalSize : longint;
  1999. firstfloatreg,lastfloatreg,
  2000. r,
  2001. shift : byte;
  2002. mmregs,
  2003. saveregs,
  2004. regs : tcpuregisterset;
  2005. registerarea,
  2006. stackmisalignment: pint;
  2007. paddingreg: TSuperRegister;
  2008. imm1, imm2: DWord;
  2009. begin
  2010. if not(nostackframe) then
  2011. begin
  2012. registerarea:=0;
  2013. firstfloatreg:=RS_NO;
  2014. lastfloatreg:=RS_NO;
  2015. mmregs:=[];
  2016. saveregs:=[];
  2017. case current_settings.fputype of
  2018. fpu_none,
  2019. fpu_soft,
  2020. fpu_libgcc:
  2021. ;
  2022. fpu_fpa,
  2023. fpu_fpa10,
  2024. fpu_fpa11:
  2025. begin
  2026. { restore floating point registers? }
  2027. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  2028. for r:=RS_F0 to RS_F7 do
  2029. if r in regs then
  2030. begin
  2031. if firstfloatreg=RS_NO then
  2032. firstfloatreg:=r;
  2033. lastfloatreg:=r;
  2034. { floating point register space is already included in
  2035. localsize below by calc_stackframe_size
  2036. inc(registerarea,12);
  2037. }
  2038. end;
  2039. end;
  2040. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2041. begin
  2042. { restore vfp registers? }
  2043. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  2044. they have numbers>$1f which is not really correct as they should simply have the same numbers
  2045. as the even ones by with a different subtype as it is done on x86 with al/ah }
  2046. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  2047. end
  2048. else
  2049. internalerror(2019050908);
  2050. end;
  2051. if (firstfloatreg<>RS_NO) or
  2052. (mmregs<>[]) then
  2053. begin
  2054. reference_reset(ref,4,[]);
  2055. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  2056. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2057. begin
  2058. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2059. begin
  2060. a_reg_alloc(list,NR_R12);
  2061. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2062. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2063. a_reg_dealloc(list,NR_R12);
  2064. end
  2065. else
  2066. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2067. ref.base:=NR_R12;
  2068. end
  2069. else
  2070. begin
  2071. ref.base:=current_procinfo.framepointer;
  2072. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2073. end;
  2074. case current_settings.fputype of
  2075. fpu_fpa,
  2076. fpu_fpa10,
  2077. fpu_fpa11:
  2078. begin
  2079. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2080. lastfloatreg-firstfloatreg+1,ref));
  2081. end;
  2082. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2083. begin
  2084. ref.index:=ref.base;
  2085. ref.base:=NR_NO;
  2086. if mmregs<>[] then
  2087. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2088. end
  2089. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2090. begin
  2091. ref.index:=ref.base;
  2092. ref.base:=NR_NO;
  2093. if mmregs<>[] then
  2094. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2095. end
  2096. else
  2097. internalerror(2019050921);
  2098. end;
  2099. end;
  2100. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2101. if (pi_do_call in current_procinfo.flags) or
  2102. (regs<>[]) or
  2103. ((target_info.system in systems_darwin) and
  2104. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2105. begin
  2106. exclude(regs,RS_R14);
  2107. include(regs,RS_R15);
  2108. if (target_info.system in systems_darwin) then
  2109. include(regs,RS_FRAME_POINTER_REG);
  2110. end;
  2111. if not(target_info.system in systems_darwin) then
  2112. begin
  2113. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2114. The saved PC came after that but is discarded, since we restore
  2115. the stack pointer }
  2116. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2117. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2118. end
  2119. else
  2120. begin
  2121. { restore R8-R11 already if necessary (they've been stored
  2122. before the others) }
  2123. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2124. if saveregs<>[] then
  2125. begin
  2126. reference_reset(ref,4,[]);
  2127. ref.index:=NR_STACK_POINTER_REG;
  2128. ref.addressmode:=AM_PREINDEXED;
  2129. for r:=RS_R8 to RS_R11 do
  2130. if r in saveregs then
  2131. inc(registerarea,4);
  2132. regs:=regs-saveregs;
  2133. end;
  2134. end;
  2135. for r:=RS_R0 to RS_R15 do
  2136. if r in regs then
  2137. inc(registerarea,4);
  2138. { reapply the stack padding reg, in case there was one, see the complimentary
  2139. comment in g_proc_entry() (KB) }
  2140. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2141. if paddingreg < RS_R4 then
  2142. if paddingreg in regs then
  2143. internalerror(201306190)
  2144. else
  2145. begin
  2146. regs:=regs+[paddingreg];
  2147. inc(registerarea,4);
  2148. end;
  2149. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2150. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2151. (target_info.system in systems_darwin) then
  2152. begin
  2153. LocalSize:=current_procinfo.calc_stackframe_size;
  2154. if (LocalSize<>0) or
  2155. ((stackmisalignment<>0) and
  2156. ((pi_do_call in current_procinfo.flags) or
  2157. (po_assembler in current_procinfo.procdef.procoptions))) then
  2158. begin
  2159. if pi_estimatestacksize in current_procinfo.flags then
  2160. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2161. else
  2162. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2163. if is_shifter_const(LocalSize,shift) then
  2164. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2165. else if split_into_shifter_const(localsize, imm1, imm2) then
  2166. begin
  2167. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2168. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2169. end
  2170. else
  2171. begin
  2172. a_reg_alloc(list,NR_R12);
  2173. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2174. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2175. a_reg_dealloc(list,NR_R12);
  2176. end;
  2177. end;
  2178. if (target_info.system in systems_darwin) and
  2179. (saveregs<>[]) then
  2180. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2181. if regs=[] then
  2182. begin
  2183. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2184. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2185. else
  2186. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2187. end
  2188. else
  2189. begin
  2190. reference_reset(ref,4,[]);
  2191. ref.index:=NR_STACK_POINTER_REG;
  2192. ref.addressmode:=AM_PREINDEXED;
  2193. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2194. end;
  2195. end
  2196. else
  2197. begin
  2198. { restore int registers and return }
  2199. reference_reset(ref,4,[]);
  2200. ref.index:=NR_FRAME_POINTER_REG;
  2201. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2202. end;
  2203. end
  2204. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2205. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2206. else
  2207. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2208. end;
  2209. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2210. var
  2211. ref : treference;
  2212. l : TAsmLabel;
  2213. regs : tcpuregisterset;
  2214. r: byte;
  2215. begin
  2216. if (cs_create_pic in current_settings.moduleswitches) and
  2217. (pi_needs_got in current_procinfo.flags) and
  2218. (tf_pic_uses_got in target_info.flags) then
  2219. begin
  2220. { Procedure parametrs are not initialized at this stage.
  2221. Before GOT initialization code, allocate registers used for procedure parameters
  2222. to prevent usage of these registers for temp operations in later stages of code
  2223. generation. }
  2224. regs:=rg[R_INTREGISTER].used_in_proc;
  2225. for r:=RS_R0 to RS_R3 do
  2226. if r in regs then
  2227. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2228. { Allocate scratch register R12 and use it for GOT calculations directly.
  2229. Otherwise the init code can be distorted in later stages of code generation. }
  2230. a_reg_alloc(list,NR_R12);
  2231. reference_reset(ref,4,[]);
  2232. current_asmdata.getglobaldatalabel(l);
  2233. cg.a_label(current_procinfo.aktlocaldata,l);
  2234. ref.symbol:=l;
  2235. ref.base:=NR_PC;
  2236. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2237. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2238. current_asmdata.getaddrlabel(l);
  2239. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2240. cg.a_label(list,l);
  2241. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2242. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2243. { Deallocate registers }
  2244. a_reg_dealloc(list,NR_R12);
  2245. for r:=RS_R3 downto RS_R0 do
  2246. if r in regs then
  2247. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2248. end;
  2249. end;
  2250. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2251. var
  2252. b : byte;
  2253. tmpref : treference;
  2254. instr : taicpu;
  2255. begin
  2256. if ref.addressmode<>AM_OFFSET then
  2257. internalerror(200309071);
  2258. tmpref:=ref;
  2259. { Be sure to have a base register }
  2260. if (tmpref.base=NR_NO) then
  2261. begin
  2262. if tmpref.shiftmode<>SM_None then
  2263. internalerror(2014020702);
  2264. if tmpref.signindex<0 then
  2265. internalerror(200312023);
  2266. tmpref.base:=tmpref.index;
  2267. tmpref.index:=NR_NO;
  2268. end;
  2269. if assigned(tmpref.symbol) or
  2270. not((is_shifter_const(tmpref.offset,b)) or
  2271. (is_shifter_const(-tmpref.offset,b))
  2272. ) then
  2273. fixref(list,tmpref);
  2274. { expect a base here if there is an index }
  2275. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2276. internalerror(200312022);
  2277. if tmpref.index<>NR_NO then
  2278. begin
  2279. if tmpref.shiftmode<>SM_None then
  2280. internalerror(200312021);
  2281. if tmpref.signindex<0 then
  2282. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2283. else
  2284. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2285. if tmpref.offset<>0 then
  2286. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2287. end
  2288. else
  2289. begin
  2290. if tmpref.base=NR_NO then
  2291. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2292. else
  2293. if tmpref.offset<>0 then
  2294. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2295. else
  2296. begin
  2297. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2298. list.concat(instr);
  2299. add_move_instruction(instr);
  2300. end;
  2301. end;
  2302. end;
  2303. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2304. var
  2305. tmpreg, tmpreg2 : tregister;
  2306. tmpref : treference;
  2307. l, piclabel : tasmlabel;
  2308. indirection_done : boolean;
  2309. begin
  2310. { absolute symbols can't be handled directly, we've to store the symbol reference
  2311. in the text segment and access it pc relative
  2312. For now, we assume that references where base or index equals to PC are already
  2313. relative, all other references are assumed to be absolute and thus they need
  2314. to be handled extra.
  2315. A proper solution would be to change refoptions to a set and store the information
  2316. if the symbol is absolute or relative there.
  2317. }
  2318. { create consts entry }
  2319. reference_reset(tmpref,4,[]);
  2320. current_asmdata.getjumplabel(l);
  2321. cg.a_label(current_procinfo.aktlocaldata,l);
  2322. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2323. piclabel:=nil;
  2324. tmpreg:=NR_NO;
  2325. indirection_done:=false;
  2326. if assigned(ref.symbol) then
  2327. begin
  2328. if (target_info.system=system_arm_ios) and
  2329. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2330. begin
  2331. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2332. if ref.offset<>0 then
  2333. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2334. indirection_done:=true;
  2335. end
  2336. else if ref.refaddr=addr_gottpoff then
  2337. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2338. else if ref.refaddr=addr_tlsgd then
  2339. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2340. else if ref.refaddr=addr_tlsdesc then
  2341. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2342. else if ref.refaddr=addr_tpoff then
  2343. begin
  2344. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2345. Internalerror(2019092804);
  2346. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2347. end
  2348. else if (cs_create_pic in current_settings.moduleswitches) then
  2349. if (tf_pic_uses_got in target_info.flags) then
  2350. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2351. else
  2352. begin
  2353. { ideally, we would want to generate
  2354. ldr r1, LPICConstPool
  2355. LPICLocal:
  2356. ldr/str r2,[pc,r1]
  2357. ...
  2358. LPICConstPool:
  2359. .long _globsym-(LPICLocal+8)
  2360. However, we cannot be sure that the ldr/str will follow
  2361. right after the call to fixref, so we have to load the
  2362. complete address already in a register.
  2363. }
  2364. current_asmdata.getaddrlabel(piclabel);
  2365. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2366. end
  2367. else
  2368. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2369. end
  2370. else
  2371. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2372. { load consts entry }
  2373. if not indirection_done then
  2374. begin
  2375. tmpreg:=getintregister(list,OS_INT);
  2376. tmpref.symbol:=l;
  2377. tmpref.base:=NR_PC;
  2378. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2379. if (cs_create_pic in current_settings.moduleswitches) and
  2380. (tf_pic_uses_got in target_info.flags) and
  2381. assigned(ref.symbol) then
  2382. begin
  2383. {$ifdef EXTDEBUG}
  2384. if not (pi_needs_got in current_procinfo.flags) then
  2385. Comment(V_warning,'pi_needs_got not included');
  2386. {$endif EXTDEBUG}
  2387. Include(current_procinfo.flags,pi_needs_got);
  2388. reference_reset(tmpref,4,[]);
  2389. tmpref.base:=current_procinfo.got;
  2390. tmpref.index:=tmpreg;
  2391. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2392. if ref.offset<>0 then
  2393. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2394. end;
  2395. end;
  2396. if assigned(piclabel) then
  2397. begin
  2398. cg.a_label(list,piclabel);
  2399. tmpreg2:=getaddressregister(list);
  2400. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2401. tmpreg:=tmpreg2
  2402. end;
  2403. { This routine can be called with PC as base/index in case the offset
  2404. was too large to encode in a load/store. In that case, the entire
  2405. absolute expression has been re-encoded in a new constpool entry, and
  2406. we have to remove the use of PC from the original reference (the code
  2407. above made everything relative to the value loaded from the new
  2408. constpool entry) }
  2409. if is_pc(ref.base) then
  2410. ref.base:=NR_NO;
  2411. if is_pc(ref.index) then
  2412. ref.index:=NR_NO;
  2413. if (ref.base<>NR_NO) then
  2414. begin
  2415. if ref.index<>NR_NO then
  2416. begin
  2417. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2418. ref.base:=tmpreg;
  2419. end
  2420. else
  2421. if ref.base<>NR_PC then
  2422. begin
  2423. ref.index:=tmpreg;
  2424. ref.shiftimm:=0;
  2425. ref.signindex:=1;
  2426. ref.shiftmode:=SM_None;
  2427. end
  2428. else
  2429. ref.base:=tmpreg;
  2430. end
  2431. else
  2432. ref.base:=tmpreg;
  2433. ref.offset:=0;
  2434. ref.symbol:=nil;
  2435. end;
  2436. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2437. var
  2438. paraloc1,paraloc2,paraloc3 : TCGPara;
  2439. pd : tprocdef;
  2440. begin
  2441. pd:=search_system_proc('MOVE');
  2442. paraloc1.init;
  2443. paraloc2.init;
  2444. paraloc3.init;
  2445. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  2446. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  2447. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  2448. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2449. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2450. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2451. paramanager.freecgpara(list,paraloc3);
  2452. paramanager.freecgpara(list,paraloc2);
  2453. paramanager.freecgpara(list,paraloc1);
  2454. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2455. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2456. a_call_name(list,'FPC_MOVE',false);
  2457. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2458. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2459. paraloc3.done;
  2460. paraloc2.done;
  2461. paraloc1.done;
  2462. end;
  2463. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2464. const
  2465. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2466. maxtmpreg_thumb = 5;
  2467. type
  2468. ttmpregisters = array[1..maxtmpreg_arm] of tregister;
  2469. var
  2470. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2471. srcreg,destreg,countreg,r,tmpreg:tregister;
  2472. helpsize:aint;
  2473. copysize:byte;
  2474. cgsize:Tcgsize;
  2475. tmpregisters:ttmpregisters;
  2476. maxtmpreg,
  2477. tmpregi,tmpregi2:byte;
  2478. { will never be called with count<=4 }
  2479. procedure genloop(count : aword;size : byte);
  2480. const
  2481. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2482. var
  2483. l : tasmlabel;
  2484. begin
  2485. current_asmdata.getjumplabel(l);
  2486. if count<size then size:=1;
  2487. a_load_const_reg(list,OS_INT,count div size,countreg);
  2488. cg.a_label(list,l);
  2489. srcref.addressmode:=AM_POSTINDEXED;
  2490. dstref.addressmode:=AM_POSTINDEXED;
  2491. srcref.offset:=size;
  2492. dstref.offset:=size;
  2493. r:=getintregister(list,size2opsize[size]);
  2494. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2495. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2496. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2497. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2498. a_jmp_flags(list,F_NE,l);
  2499. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2500. srcref.offset:=1;
  2501. dstref.offset:=1;
  2502. case count mod size of
  2503. 1:
  2504. begin
  2505. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2506. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2507. end;
  2508. 2:
  2509. if aligned then
  2510. begin
  2511. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2512. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2513. end
  2514. else
  2515. begin
  2516. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2517. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2518. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2519. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2520. end;
  2521. 3:
  2522. if aligned then
  2523. begin
  2524. srcref.offset:=2;
  2525. dstref.offset:=2;
  2526. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2527. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2528. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2529. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2530. end
  2531. else
  2532. begin
  2533. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2534. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2535. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2536. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2537. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2538. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2539. end;
  2540. end;
  2541. { keep the registers alive }
  2542. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2543. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2544. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2545. end;
  2546. { save estimation, if a creating a separate ref is needed or
  2547. if we can keep the original reference while copying }
  2548. function SimpleRef(const ref : treference) : boolean;
  2549. begin
  2550. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2551. ((ref.symbol=nil) and
  2552. (ref.addressmode=AM_OFFSET) and
  2553. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2554. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2555. { ldrh has a limited offset range }
  2556. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2557. )
  2558. );
  2559. end;
  2560. { will never be called with count<=4 }
  2561. procedure genloop_thumb(count : aword;size : byte);
  2562. procedure refincofs(const ref : treference;const value : longint = 1);
  2563. begin
  2564. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2565. end;
  2566. const
  2567. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2568. var
  2569. l : tasmlabel;
  2570. begin
  2571. current_asmdata.getjumplabel(l);
  2572. if count<size then size:=1;
  2573. a_load_const_reg(list,OS_INT,count div size,countreg);
  2574. cg.a_label(list,l);
  2575. r:=getintregister(list,size2opsize[size]);
  2576. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2577. refincofs(srcref);
  2578. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2579. refincofs(dstref);
  2580. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2581. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2582. a_jmp_flags(list,F_NE,l);
  2583. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2584. case count mod size of
  2585. 1:
  2586. begin
  2587. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2588. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2589. end;
  2590. 2:
  2591. if aligned then
  2592. begin
  2593. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2594. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2595. end
  2596. else
  2597. begin
  2598. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2599. refincofs(srcref);
  2600. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2601. refincofs(dstref);
  2602. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2603. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2604. end;
  2605. 3:
  2606. if aligned then
  2607. begin
  2608. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2609. refincofs(srcref,2);
  2610. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2611. refincofs(dstref,2);
  2612. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2613. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2614. end
  2615. else
  2616. begin
  2617. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2618. refincofs(srcref);
  2619. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2620. refincofs(dstref);
  2621. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2622. refincofs(srcref);
  2623. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2624. refincofs(dstref);
  2625. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2626. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2627. end;
  2628. end;
  2629. { keep the registers alive }
  2630. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2631. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2632. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2633. end;
  2634. begin
  2635. tmpregisters:=Default(ttmpregisters);
  2636. if len=0 then
  2637. exit;
  2638. if GenerateThumbCode then
  2639. maxtmpreg:=maxtmpreg_thumb
  2640. else
  2641. maxtmpreg:=maxtmpreg_arm;
  2642. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2643. dstref:=dest;
  2644. srcref:=source;
  2645. if cs_opt_size in current_settings.optimizerswitches then
  2646. helpsize:=8;
  2647. if aligned and (len=4) then
  2648. begin
  2649. tmpreg:=getintregister(list,OS_32);
  2650. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2651. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2652. end
  2653. else if aligned and (len=2) then
  2654. begin
  2655. tmpreg:=getintregister(list,OS_16);
  2656. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2657. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2658. end
  2659. else if (len<=helpsize) and aligned then
  2660. begin
  2661. tmpregi:=0;
  2662. { loading address in a separate register needed? }
  2663. if SimpleRef(source) then
  2664. begin
  2665. { ... then we don't need a loadaddr }
  2666. srcref:=source;
  2667. end
  2668. else
  2669. begin
  2670. srcreg:=getintregister(list,OS_ADDR);
  2671. a_loadaddr_ref_reg(list,source,srcreg);
  2672. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2673. end;
  2674. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2675. begin
  2676. inc(tmpregi);
  2677. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2678. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2679. inc(srcref.offset,4);
  2680. dec(len,4);
  2681. end;
  2682. { loading address in a separate register needed? }
  2683. if SimpleRef(dest) then
  2684. dstref:=dest
  2685. else
  2686. begin
  2687. destreg:=getintregister(list,OS_ADDR);
  2688. a_loadaddr_ref_reg(list,dest,destreg);
  2689. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2690. end;
  2691. tmpregi2:=1;
  2692. while (tmpregi2<=tmpregi) do
  2693. begin
  2694. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2695. inc(dstref.offset,4);
  2696. inc(tmpregi2);
  2697. end;
  2698. copysize:=4;
  2699. cgsize:=OS_32;
  2700. while len<>0 do
  2701. begin
  2702. if len<2 then
  2703. begin
  2704. copysize:=1;
  2705. cgsize:=OS_8;
  2706. end
  2707. else if len<4 then
  2708. begin
  2709. copysize:=2;
  2710. cgsize:=OS_16;
  2711. end;
  2712. dec(len,copysize);
  2713. r:=getintregister(list,cgsize);
  2714. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2715. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2716. inc(srcref.offset,copysize);
  2717. inc(dstref.offset,copysize);
  2718. end;{end of while}
  2719. end
  2720. else
  2721. begin
  2722. cgsize:=OS_32;
  2723. if (len<=4) then{len<=4 and not aligned}
  2724. begin
  2725. r:=getintregister(list,cgsize);
  2726. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2727. if Len=1 then
  2728. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2729. else
  2730. begin
  2731. tmpreg:=getintregister(list,cgsize);
  2732. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2733. inc(usedtmpref.offset,1);
  2734. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2735. inc(usedtmpref2.offset,1);
  2736. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2737. if len>2 then
  2738. begin
  2739. inc(usedtmpref.offset,1);
  2740. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2741. inc(usedtmpref2.offset,1);
  2742. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2743. if len>3 then
  2744. begin
  2745. inc(usedtmpref.offset,1);
  2746. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2747. inc(usedtmpref2.offset,1);
  2748. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2749. end;
  2750. end;
  2751. end;
  2752. end{end of if len<=4}
  2753. else
  2754. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2755. destreg:=getintregister(list,OS_ADDR);
  2756. a_loadaddr_ref_reg(list,dest,destreg);
  2757. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2758. srcreg:=getintregister(list,OS_ADDR);
  2759. a_loadaddr_ref_reg(list,source,srcreg);
  2760. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2761. countreg:=getintregister(list,OS_32);
  2762. // if cs_opt_size in current_settings.optimizerswitches then
  2763. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2764. {if aligned then
  2765. genloop(len,4)
  2766. else}
  2767. if GenerateThumbCode then
  2768. genloop_thumb(len,1)
  2769. else
  2770. genloop(len,1);
  2771. end;
  2772. end;
  2773. end;
  2774. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2775. begin
  2776. g_concatcopy_internal(list,source,dest,len,false);
  2777. end;
  2778. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2779. begin
  2780. if (source.alignment in [1,3]) or
  2781. (dest.alignment in [1,3]) then
  2782. g_concatcopy_internal(list,source,dest,len,false)
  2783. else
  2784. g_concatcopy_internal(list,source,dest,len,true);
  2785. end;
  2786. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2787. var
  2788. ovloc : tlocation;
  2789. begin
  2790. ovloc.loc:=LOC_VOID;
  2791. g_overflowCheck_loc(list,l,def,ovloc);
  2792. end;
  2793. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2794. var
  2795. hl : tasmlabel;
  2796. ai:TAiCpu;
  2797. hflags : tresflags;
  2798. begin
  2799. if not(cs_check_overflow in current_settings.localswitches) then
  2800. exit;
  2801. current_asmdata.getjumplabel(hl);
  2802. case ovloc.loc of
  2803. LOC_VOID:
  2804. begin
  2805. ai:=taicpu.op_sym(A_B,hl);
  2806. ai.is_jmp:=true;
  2807. if not((def.typ=pointerdef) or
  2808. ((def.typ=orddef) and
  2809. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2810. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2811. ai.SetCondition(C_VC)
  2812. else
  2813. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2814. ai.SetCondition(C_CS)
  2815. else
  2816. ai.SetCondition(C_CC);
  2817. list.concat(ai);
  2818. end;
  2819. LOC_FLAGS:
  2820. begin
  2821. hflags:=ovloc.resflags;
  2822. inverse_flags(hflags);
  2823. cg.a_jmp_flags(list,hflags,hl);
  2824. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2825. end;
  2826. else
  2827. internalerror(200409281);
  2828. end;
  2829. a_call_name(list,'FPC_OVERFLOW',false);
  2830. a_label(list,hl);
  2831. end;
  2832. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2833. begin
  2834. { this work is done in g_proc_entry }
  2835. end;
  2836. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2837. begin
  2838. { this work is done in g_proc_exit }
  2839. end;
  2840. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2841. var
  2842. ai : taicpu;
  2843. hlabel : TAsmLabel;
  2844. begin
  2845. if GenerateThumbCode then
  2846. begin
  2847. { the optimizer has to fix this if jump range is sufficient short }
  2848. current_asmdata.getjumplabel(hlabel);
  2849. ai:=Taicpu.Op_sym(A_B,hlabel);
  2850. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2851. ai.is_jmp:=true;
  2852. list.concat(ai);
  2853. a_jmp_always(list,l);
  2854. a_label(list,hlabel);
  2855. end
  2856. else
  2857. begin
  2858. ai:=Taicpu.Op_sym(A_B,l);
  2859. ai.SetCondition(OpCmp2AsmCond[cond]);
  2860. ai.is_jmp:=true;
  2861. list.concat(ai);
  2862. end;
  2863. end;
  2864. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2865. const
  2866. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2867. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2868. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2869. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2870. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2871. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2872. begin
  2873. result:=convertop[fromsize,tosize];
  2874. if result=A_NONE then
  2875. internalerror(200312205);
  2876. end;
  2877. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2878. const
  2879. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2880. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2881. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2882. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2883. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2884. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2885. begin
  2886. result:=convertop[fromsize,tosize];
  2887. end;
  2888. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2889. var
  2890. instr: taicpu;
  2891. begin
  2892. if (shuffle=nil) or shufflescalar(shuffle) then
  2893. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2894. else
  2895. internalerror(2009112407);
  2896. list.concat(instr);
  2897. case instr.opcode of
  2898. A_VMOV:
  2899. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2900. add_move_instruction(instr);
  2901. else
  2902. { VCVT can generate an exception }
  2903. maybe_check_for_fpu_exception(list);
  2904. end;
  2905. end;
  2906. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2907. var
  2908. intreg,
  2909. tmpmmreg : tregister;
  2910. reg64 : tregister64;
  2911. begin
  2912. if assigned(shuffle) and
  2913. not(shufflescalar(shuffle)) then
  2914. internalerror(2009112413);
  2915. case fromsize of
  2916. OS_32,OS_S32:
  2917. begin
  2918. fromsize:=OS_F32;
  2919. { since we are loading an integer, no conversion may be required }
  2920. if (fromsize<>tosize) then
  2921. internalerror(2009112801);
  2922. end;
  2923. OS_64,OS_S64:
  2924. begin
  2925. fromsize:=OS_F64;
  2926. { since we are loading an integer, no conversion may be required }
  2927. if (fromsize<>tosize) then
  2928. internalerror(2009112901);
  2929. end;
  2930. OS_F32,OS_F64:
  2931. ;
  2932. else
  2933. internalerror(2019050920);
  2934. end;
  2935. if (fromsize<>tosize) then
  2936. tmpmmreg:=getmmregister(list,fromsize)
  2937. else
  2938. tmpmmreg:=reg;
  2939. if (ref.alignment in [1,2]) then
  2940. begin
  2941. case fromsize of
  2942. OS_F32:
  2943. begin
  2944. intreg:=getintregister(list,OS_32);
  2945. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2946. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2947. end;
  2948. OS_F64:
  2949. begin
  2950. reg64.reglo:=getintregister(list,OS_32);
  2951. reg64.reghi:=getintregister(list,OS_32);
  2952. cg64.a_load64_ref_reg(list,ref,reg64);
  2953. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2954. end;
  2955. else
  2956. internalerror(2009112412);
  2957. end;
  2958. end
  2959. else
  2960. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2961. if (tmpmmreg<>reg) then
  2962. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2963. end;
  2964. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2965. var
  2966. intreg,
  2967. tmpmmreg : tregister;
  2968. reg64 : tregister64;
  2969. begin
  2970. if assigned(shuffle) and
  2971. not(shufflescalar(shuffle)) then
  2972. internalerror(2009112416);
  2973. case tosize of
  2974. OS_32,OS_S32:
  2975. begin
  2976. tosize:=OS_F32;
  2977. { since we are loading an integer, no conversion may be required }
  2978. if (fromsize<>tosize) then
  2979. internalerror(2009112802);
  2980. end;
  2981. OS_64,OS_S64:
  2982. begin
  2983. tosize:=OS_F64;
  2984. { since we are loading an integer, no conversion may be required }
  2985. if (fromsize<>tosize) then
  2986. internalerror(2009112902);
  2987. end;
  2988. OS_F32,OS_F64:
  2989. ;
  2990. else
  2991. internalerror(2019050919);
  2992. end;
  2993. if (fromsize<>tosize) then
  2994. begin
  2995. tmpmmreg:=getmmregister(list,tosize);
  2996. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2997. end
  2998. else
  2999. tmpmmreg:=reg;
  3000. if (ref.alignment in [1,2]) then
  3001. begin
  3002. case tosize of
  3003. OS_F32:
  3004. begin
  3005. intreg:=getintregister(list,OS_32);
  3006. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  3007. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  3008. end;
  3009. OS_F64:
  3010. begin
  3011. reg64.reglo:=getintregister(list,OS_32);
  3012. reg64.reghi:=getintregister(list,OS_32);
  3013. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  3014. cg64.a_load64_reg_ref(list,reg64,ref);
  3015. end;
  3016. else
  3017. internalerror(2009112417);
  3018. end;
  3019. end
  3020. else
  3021. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  3022. { VSTR cannot generate an FPU exception, VCVT is handled separately, so we do not need a check here }
  3023. end;
  3024. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  3025. begin
  3026. { this code can only be used to transfer raw data, not to perform
  3027. conversions }
  3028. if (tosize<>OS_F32) then
  3029. internalerror(2009112419);
  3030. if not(fromsize in [OS_32,OS_S32]) then
  3031. internalerror(2009112420);
  3032. if assigned(shuffle) and
  3033. not shufflescalar(shuffle) then
  3034. internalerror(2009112516);
  3035. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  3036. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3037. end;
  3038. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  3039. begin
  3040. { this code can only be used to transfer raw data, not to perform
  3041. conversions }
  3042. if (fromsize<>OS_F32) then
  3043. internalerror(2009112430);
  3044. if not(tosize in [OS_32,OS_S32]) then
  3045. internalerror(2009112409);
  3046. if assigned(shuffle) and
  3047. not shufflescalar(shuffle) then
  3048. internalerror(2009112514);
  3049. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  3050. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3051. end;
  3052. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  3053. var
  3054. tmpreg: tregister;
  3055. begin
  3056. { the vfp doesn't support xor nor any other logical operation, but
  3057. this routine is used to initialise global mm regvars. We can
  3058. easily initialise an mm reg with 0 though. }
  3059. case op of
  3060. OP_XOR:
  3061. begin
  3062. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3063. begin
  3064. if (reg_cgsize(src)<>size) or
  3065. assigned(shuffle) then
  3066. internalerror(2019081301);
  3067. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3068. end
  3069. else
  3070. begin
  3071. if (src<>dst) or
  3072. (reg_cgsize(src)<>size) or
  3073. assigned(shuffle) then
  3074. internalerror(2009112907);
  3075. tmpreg:=getintregister(list,OS_32);
  3076. a_load_const_reg(list,OS_32,0,tmpreg);
  3077. case size of
  3078. OS_F32:
  3079. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3080. OS_F64:
  3081. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3082. else
  3083. internalerror(2009112908);
  3084. end;
  3085. end;
  3086. end
  3087. else
  3088. internalerror(2009112906);
  3089. end;
  3090. end;
  3091. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3092. const
  3093. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3094. begin
  3095. if (op in overflowops) and
  3096. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3097. a_load_reg_reg(list,OS_32,size,dst,dst);
  3098. end;
  3099. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3100. procedure checkreg(var reg : TRegister);
  3101. var
  3102. tmpreg : TRegister;
  3103. begin
  3104. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3105. (getsupreg(reg)=RS_R15) then
  3106. begin
  3107. tmpreg:=getintregister(list,OS_INT);
  3108. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3109. reg:=tmpreg;
  3110. end;
  3111. end;
  3112. begin
  3113. checkreg(op1);
  3114. checkreg(op2);
  3115. checkreg(op3);
  3116. checkreg(op4);
  3117. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3118. end;
  3119. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3120. begin
  3121. if pi_needs_tls in current_procinfo.flags then
  3122. begin
  3123. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3124. a_call_name(list,'fpc_read_tp',false);
  3125. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3126. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3127. end;
  3128. end;
  3129. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3130. begin
  3131. case op of
  3132. OP_NEG:
  3133. begin
  3134. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3135. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3136. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3137. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3138. end;
  3139. OP_NOT:
  3140. begin
  3141. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3142. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3143. end;
  3144. else
  3145. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3146. end;
  3147. end;
  3148. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3149. begin
  3150. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3151. end;
  3152. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3153. var
  3154. ovloc : tlocation;
  3155. begin
  3156. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3157. end;
  3158. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3159. var
  3160. ovloc : tlocation;
  3161. begin
  3162. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3163. end;
  3164. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3165. begin
  3166. { this code can only be used to transfer raw data, not to perform
  3167. conversions }
  3168. if (mmsize<>OS_F64) then
  3169. internalerror(2009112405);
  3170. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3171. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3172. end;
  3173. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3174. begin
  3175. { this code can only be used to transfer raw data, not to perform
  3176. conversions }
  3177. if (mmsize<>OS_F64) then
  3178. internalerror(2009112406);
  3179. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3180. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3181. end;
  3182. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3183. var
  3184. tmpreg : tregister;
  3185. b : byte;
  3186. begin
  3187. ovloc.loc:=LOC_VOID;
  3188. case op of
  3189. OP_NEG,
  3190. OP_NOT :
  3191. internalerror(2012022501);
  3192. else
  3193. ;
  3194. end;
  3195. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3196. begin
  3197. case op of
  3198. OP_ADD:
  3199. begin
  3200. if is_shifter_const(lo(value),b) then
  3201. begin
  3202. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3203. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3204. end
  3205. else
  3206. begin
  3207. tmpreg:=cg.getintregister(list,OS_32);
  3208. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3209. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3210. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3211. end;
  3212. if is_shifter_const(hi(value),b) then
  3213. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3214. else
  3215. begin
  3216. tmpreg:=cg.getintregister(list,OS_32);
  3217. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3218. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3219. end;
  3220. end;
  3221. OP_SUB:
  3222. begin
  3223. if is_shifter_const(lo(value),b) then
  3224. begin
  3225. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3226. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3227. end
  3228. else
  3229. begin
  3230. tmpreg:=cg.getintregister(list,OS_32);
  3231. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3232. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3233. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3234. end;
  3235. if is_shifter_const(hi(value),b) then
  3236. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3237. else
  3238. begin
  3239. tmpreg:=cg.getintregister(list,OS_32);
  3240. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3241. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3242. end;
  3243. end;
  3244. else
  3245. internalerror(200502131);
  3246. end;
  3247. if size=OS_64 then
  3248. begin
  3249. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3250. ovloc.loc:=LOC_FLAGS;
  3251. case op of
  3252. OP_ADD:
  3253. ovloc.resflags:=F_CS;
  3254. OP_SUB:
  3255. ovloc.resflags:=F_CC;
  3256. else
  3257. internalerror(2019050918);
  3258. end;
  3259. end;
  3260. end
  3261. else
  3262. begin
  3263. case op of
  3264. OP_AND,OP_OR,OP_XOR:
  3265. begin
  3266. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3267. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3268. end;
  3269. OP_ADD:
  3270. begin
  3271. if is_shifter_const(aint(lo(value)),b) then
  3272. begin
  3273. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3274. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3275. end
  3276. else
  3277. begin
  3278. tmpreg:=cg.getintregister(list,OS_32);
  3279. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3280. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3281. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3282. end;
  3283. if is_shifter_const(aint(hi(value)),b) then
  3284. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3285. else
  3286. begin
  3287. tmpreg:=cg.getintregister(list,OS_32);
  3288. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3289. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3290. end;
  3291. end;
  3292. OP_SUB:
  3293. begin
  3294. if is_shifter_const(aint(lo(value)),b) then
  3295. begin
  3296. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3297. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3298. end
  3299. else
  3300. begin
  3301. tmpreg:=cg.getintregister(list,OS_32);
  3302. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3303. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3304. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3305. end;
  3306. if is_shifter_const(aint(hi(value)),b) then
  3307. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3308. else
  3309. begin
  3310. tmpreg:=cg.getintregister(list,OS_32);
  3311. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3312. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3313. end;
  3314. end;
  3315. else
  3316. internalerror(2003083101);
  3317. end;
  3318. end;
  3319. end;
  3320. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3321. begin
  3322. ovloc.loc:=LOC_VOID;
  3323. case op of
  3324. OP_NEG,
  3325. OP_NOT :
  3326. internalerror(2012022502);
  3327. else
  3328. ;
  3329. end;
  3330. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3331. begin
  3332. case op of
  3333. OP_ADD:
  3334. begin
  3335. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3336. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3337. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3338. end;
  3339. OP_SUB:
  3340. begin
  3341. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3342. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3343. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3344. end;
  3345. else
  3346. internalerror(2003083102);
  3347. end;
  3348. if size=OS_64 then
  3349. begin
  3350. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3351. ovloc.loc:=LOC_FLAGS;
  3352. case op of
  3353. OP_ADD:
  3354. ovloc.resflags:=F_CS;
  3355. OP_SUB:
  3356. ovloc.resflags:=F_CC;
  3357. else
  3358. internalerror(2019050917);
  3359. end;
  3360. end;
  3361. end
  3362. else
  3363. begin
  3364. case op of
  3365. OP_AND,OP_OR,OP_XOR:
  3366. begin
  3367. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3368. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3369. end;
  3370. OP_ADD:
  3371. begin
  3372. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3373. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3374. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3375. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3376. end;
  3377. OP_SUB:
  3378. begin
  3379. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3380. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3381. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3382. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3383. end;
  3384. else
  3385. internalerror(2003083104);
  3386. end;
  3387. end;
  3388. end;
  3389. procedure tthumbcgarm.init_register_allocators;
  3390. begin
  3391. inherited init_register_allocators;
  3392. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3393. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3394. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3395. else
  3396. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3397. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3398. end;
  3399. procedure tthumbcgarm.done_register_allocators;
  3400. begin
  3401. rg[R_INTREGISTER].free;
  3402. rg[R_FPUREGISTER].free;
  3403. rg[R_MMREGISTER].free;
  3404. inherited done_register_allocators;
  3405. end;
  3406. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3407. var
  3408. ref : treference;
  3409. r : byte;
  3410. regs : tcpuregisterset;
  3411. stackmisalignment : pint;
  3412. registerarea: DWord;
  3413. stack_parameters: Boolean;
  3414. begin
  3415. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3416. LocalSize:=align(LocalSize,4);
  3417. { call instruction does not put anything on the stack }
  3418. stackmisalignment:=0;
  3419. if not(nostackframe) then
  3420. begin
  3421. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3422. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3423. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3424. { save int registers }
  3425. reference_reset(ref,4,[]);
  3426. ref.index:=NR_STACK_POINTER_REG;
  3427. ref.addressmode:=AM_PREINDEXED;
  3428. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3429. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3430. begin
  3431. //!!!! a_reg_alloc(list,NR_R12);
  3432. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3433. end;
  3434. { the (old) ARM APCS requires saving both the stack pointer (to
  3435. crawl the stack) and the PC (to identify the function this
  3436. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3437. and R15 -- still needs updating for EABI and Darwin, they don't
  3438. need that }
  3439. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3440. regs:=regs+[RS_R7,RS_R14]
  3441. else
  3442. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3443. include(regs,RS_R14);
  3444. { safely estimate stack size }
  3445. if localsize+current_settings.alignment.localalignmax+4>508 then
  3446. begin
  3447. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3448. include(regs,RS_R4);
  3449. end;
  3450. registerarea:=0;
  3451. { do not save integer registers if the procedure does not return }
  3452. if po_noreturn in current_procinfo.procdef.procoptions then
  3453. regs:=[];
  3454. if regs<>[] then
  3455. begin
  3456. for r:=RS_R0 to RS_R15 do
  3457. if r in regs then
  3458. inc(registerarea,4);
  3459. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3460. { we need to run the loop twice to get cfi right }
  3461. registerarea:=0;
  3462. for r:=RS_R0 to RS_R15 do
  3463. if r in regs then
  3464. begin
  3465. inc(registerarea,4);
  3466. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),-registerarea);
  3467. end;
  3468. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  3469. end;
  3470. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3471. if stack_parameters or (LocalSize<>0) or
  3472. ((stackmisalignment<>0) and
  3473. ((pi_do_call in current_procinfo.flags) or
  3474. (po_assembler in current_procinfo.procdef.procoptions))) then
  3475. begin
  3476. { do we access stack parameters?
  3477. if yes, the previously estimated stacksize must be used }
  3478. if stack_parameters then
  3479. begin
  3480. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3481. begin
  3482. writeln(localsize);
  3483. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3484. internalerror(2013040601);
  3485. end
  3486. else
  3487. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3488. end
  3489. else
  3490. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3491. if localsize<508 then
  3492. begin
  3493. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3494. end
  3495. else if localsize<=1016 then
  3496. begin
  3497. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3498. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3499. end
  3500. else
  3501. begin
  3502. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3503. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3504. include(regs,RS_R4);
  3505. end;
  3506. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  3507. end;
  3508. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3509. begin
  3510. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3511. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  3512. end;
  3513. end;
  3514. end;
  3515. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3516. var
  3517. LocalSize : longint;
  3518. r: byte;
  3519. regs : tcpuregisterset;
  3520. registerarea : DWord;
  3521. stackmisalignment: pint;
  3522. stack_parameters : Boolean;
  3523. begin
  3524. { a routine not returning needs no exit code,
  3525. we trust this directive as arm thumb is normally used if small code shall be generated }
  3526. if po_noreturn in current_procinfo.procdef.procoptions then
  3527. exit;
  3528. if not(nostackframe) then
  3529. begin
  3530. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3531. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3532. include(regs,RS_R15);
  3533. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3534. include(regs,getsupreg(current_procinfo.framepointer));
  3535. registerarea:=0;
  3536. for r:=RS_R0 to RS_R15 do
  3537. if r in regs then
  3538. inc(registerarea,4);
  3539. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3540. LocalSize:=current_procinfo.calc_stackframe_size;
  3541. if stack_parameters then
  3542. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3543. else
  3544. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3545. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3546. (target_info.system in systems_darwin) then
  3547. begin
  3548. if (LocalSize<>0) or
  3549. ((stackmisalignment<>0) and
  3550. ((pi_do_call in current_procinfo.flags) or
  3551. (po_assembler in current_procinfo.procdef.procoptions))) then
  3552. begin
  3553. if LocalSize=0 then
  3554. else if LocalSize<=508 then
  3555. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3556. else if LocalSize<=1016 then
  3557. begin
  3558. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3559. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3560. end
  3561. else
  3562. begin
  3563. a_reg_alloc(list,NR_R3);
  3564. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3565. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3566. a_reg_dealloc(list,NR_R3);
  3567. end;
  3568. end;
  3569. if regs=[] then
  3570. begin
  3571. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3572. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3573. else
  3574. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3575. end
  3576. else
  3577. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3578. end;
  3579. end
  3580. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3581. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3582. else
  3583. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3584. end;
  3585. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3586. var
  3587. oppostfix:toppostfix;
  3588. usedtmpref: treference;
  3589. tmpreg,tmpreg2 : tregister;
  3590. dir : integer;
  3591. begin
  3592. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3593. FromSize := ToSize;
  3594. case FromSize of
  3595. { signed integer registers }
  3596. OS_8:
  3597. oppostfix:=PF_B;
  3598. OS_S8:
  3599. oppostfix:=PF_SB;
  3600. OS_16:
  3601. oppostfix:=PF_H;
  3602. OS_S16:
  3603. oppostfix:=PF_SH;
  3604. OS_32,
  3605. OS_S32:
  3606. oppostfix:=PF_None;
  3607. else
  3608. InternalError(200308298);
  3609. end;
  3610. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3611. begin
  3612. if target_info.endian=endian_big then
  3613. dir:=-1
  3614. else
  3615. dir:=1;
  3616. case FromSize of
  3617. OS_16,OS_S16:
  3618. begin
  3619. { only complicated references need an extra loadaddr }
  3620. if assigned(ref.symbol) or
  3621. (ref.index<>NR_NO) or
  3622. (ref.offset<-124) or
  3623. (ref.offset>124) or
  3624. { sometimes the compiler reused registers }
  3625. (reg=ref.index) or
  3626. (reg=ref.base) then
  3627. begin
  3628. tmpreg2:=getintregister(list,OS_INT);
  3629. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3630. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3631. end
  3632. else
  3633. usedtmpref:=ref;
  3634. if target_info.endian=endian_big then
  3635. inc(usedtmpref.offset,1);
  3636. tmpreg:=getintregister(list,OS_INT);
  3637. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3638. inc(usedtmpref.offset,dir);
  3639. if FromSize=OS_16 then
  3640. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3641. else
  3642. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3643. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3644. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3645. end;
  3646. OS_32,OS_S32:
  3647. begin
  3648. tmpreg:=getintregister(list,OS_INT);
  3649. { only complicated references need an extra loadaddr }
  3650. if assigned(ref.symbol) or
  3651. (ref.index<>NR_NO) or
  3652. (ref.offset<-124) or
  3653. (ref.offset>124) or
  3654. { sometimes the compiler reused registers }
  3655. (reg=ref.index) or
  3656. (reg=ref.base) then
  3657. begin
  3658. tmpreg2:=getintregister(list,OS_INT);
  3659. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3660. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3661. end
  3662. else
  3663. usedtmpref:=ref;
  3664. if ref.alignment=2 then
  3665. begin
  3666. if target_info.endian=endian_big then
  3667. inc(usedtmpref.offset,2);
  3668. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3669. inc(usedtmpref.offset,dir*2);
  3670. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3671. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3672. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3673. end
  3674. else
  3675. begin
  3676. if target_info.endian=endian_big then
  3677. inc(usedtmpref.offset,3);
  3678. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3679. inc(usedtmpref.offset,dir);
  3680. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3681. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3682. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3683. inc(usedtmpref.offset,dir);
  3684. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3685. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3686. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3687. inc(usedtmpref.offset,dir);
  3688. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3689. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3690. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3691. end;
  3692. end
  3693. else
  3694. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3695. end;
  3696. end
  3697. else
  3698. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3699. if (fromsize=OS_S8) and (tosize = OS_16) then
  3700. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3701. end;
  3702. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3703. var
  3704. l : tasmlabel;
  3705. hr : treference;
  3706. begin
  3707. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3708. internalerror(2002090908);
  3709. if is_thumb_imm(a) then
  3710. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,a),PF_S))
  3711. else
  3712. begin
  3713. reference_reset(hr,4,[]);
  3714. current_asmdata.getjumplabel(l);
  3715. cg.a_label(current_procinfo.aktlocaldata,l);
  3716. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3717. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3718. hr.symbol:=l;
  3719. hr.base:=NR_PC;
  3720. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3721. end;
  3722. end;
  3723. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3724. var
  3725. hsym : tsym;
  3726. href,
  3727. tmpref : treference;
  3728. paraloc : Pcgparalocation;
  3729. l : TAsmLabel;
  3730. begin
  3731. { calculate the parameter info for the procdef }
  3732. procdef.init_paraloc_info(callerside);
  3733. hsym:=tsym(procdef.parast.Find('self'));
  3734. if not(assigned(hsym) and
  3735. (hsym.typ=paravarsym)) then
  3736. internalerror(2003052504);
  3737. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3738. while paraloc<>nil do
  3739. with paraloc^ do
  3740. begin
  3741. case loc of
  3742. LOC_REGISTER:
  3743. begin
  3744. if is_thumb_imm(ioffset) then
  3745. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3746. else
  3747. begin
  3748. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3749. reference_reset(tmpref,4,[]);
  3750. current_asmdata.getjumplabel(l);
  3751. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3752. cg.a_label(current_procinfo.aktlocaldata,l);
  3753. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3754. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3755. tmpref.symbol:=l;
  3756. tmpref.base:=NR_PC;
  3757. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3758. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3759. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3760. end;
  3761. end;
  3762. LOC_REFERENCE:
  3763. begin
  3764. { offset in the wrapper needs to be adjusted for the stored
  3765. return address }
  3766. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3767. if is_thumb_imm(ioffset) then
  3768. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3769. else
  3770. begin
  3771. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3772. reference_reset(tmpref,4,[]);
  3773. current_asmdata.getjumplabel(l);
  3774. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3775. cg.a_label(current_procinfo.aktlocaldata,l);
  3776. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3777. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3778. tmpref.symbol:=l;
  3779. tmpref.base:=NR_PC;
  3780. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3781. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3782. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3783. end;
  3784. end
  3785. else
  3786. internalerror(2003091804);
  3787. end;
  3788. paraloc:=next;
  3789. end;
  3790. end;
  3791. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3792. var
  3793. href : treference;
  3794. tmpreg : TRegister;
  3795. begin
  3796. href:=ref;
  3797. if { LDR/STR limitations }
  3798. (
  3799. (((op=A_LDR) and (oppostfix=PF_None)) or
  3800. ((op=A_STR) and (oppostfix=PF_None))) and
  3801. (ref.base<>NR_STACK_POINTER_REG) and
  3802. (abs(ref.offset)>124)
  3803. ) or
  3804. { LDRB/STRB limitations }
  3805. (
  3806. (((op=A_LDR) and (oppostfix=PF_B)) or
  3807. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3808. ((op=A_STR) and (oppostfix=PF_B)) or
  3809. ((op=A_STRB) and (oppostfix=PF_None))) and
  3810. ((ref.base=NR_STACK_POINTER_REG) or
  3811. (ref.index=NR_STACK_POINTER_REG) or
  3812. (abs(ref.offset)>31)
  3813. )
  3814. ) or
  3815. { LDRH/STRH limitations }
  3816. (
  3817. (((op=A_LDR) and (oppostfix=PF_H)) or
  3818. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3819. ((op=A_STR) and (oppostfix=PF_H)) or
  3820. ((op=A_STRH) and (oppostfix=PF_None))) and
  3821. ((ref.base=NR_STACK_POINTER_REG) or
  3822. (ref.index=NR_STACK_POINTER_REG) or
  3823. (abs(ref.offset)>62) or
  3824. ((abs(ref.offset) mod 2)<>0)
  3825. )
  3826. ) then
  3827. begin
  3828. tmpreg:=getintregister(list,OS_ADDR);
  3829. a_loadaddr_ref_reg(list,ref,tmpreg);
  3830. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3831. end
  3832. else if (op=A_LDR) and
  3833. (oppostfix in [PF_None]) and
  3834. (ref.base=NR_STACK_POINTER_REG) and
  3835. (abs(ref.offset)>1020) then
  3836. begin
  3837. tmpreg:=getintregister(list,OS_ADDR);
  3838. a_loadaddr_ref_reg(list,ref,tmpreg);
  3839. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3840. end
  3841. else if (op=A_LDR) and
  3842. ((oppostfix in [PF_SH,PF_SB]) or
  3843. (abs(ref.offset)>124)) then
  3844. begin
  3845. tmpreg:=getintregister(list,OS_ADDR);
  3846. a_loadaddr_ref_reg(list,ref,tmpreg);
  3847. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3848. end;
  3849. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3850. end;
  3851. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3852. var
  3853. tmpreg : tregister;
  3854. begin
  3855. case op of
  3856. OP_NEG:
  3857. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3858. OP_NOT:
  3859. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,dst,src),PF_S));
  3860. OP_DIV,OP_IDIV:
  3861. internalerror(200308284);
  3862. OP_ROL:
  3863. begin
  3864. if not(size in [OS_32,OS_S32]) then
  3865. internalerror(2008072805);
  3866. { simulate ROL by ror'ing 32-value }
  3867. tmpreg:=getintregister(list,OS_32);
  3868. a_load_const_reg(list,OS_32,32,tmpreg);
  3869. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3870. list.concat(setoppostfix(taicpu.op_reg_reg(A_ROR,dst,src),PF_S));
  3871. end;
  3872. else
  3873. begin
  3874. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3875. list.concat(setoppostfix(
  3876. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix_thumb[op]));
  3877. end;
  3878. end;
  3879. maybeadjustresult(list,op,size,dst);
  3880. end;
  3881. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3882. var
  3883. tmpreg : tregister;
  3884. {$ifdef DUMMY}
  3885. l1 : longint;
  3886. {$endif DUMMY}
  3887. begin
  3888. //!!! ovloc.loc:=LOC_VOID;
  3889. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3890. case op of
  3891. OP_ADD:
  3892. begin
  3893. op:=OP_SUB;
  3894. a:=aint(dword(-a));
  3895. end;
  3896. OP_SUB:
  3897. begin
  3898. op:=OP_ADD;
  3899. a:=aint(dword(-a));
  3900. end
  3901. else
  3902. ;
  3903. end;
  3904. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3905. begin
  3906. // if cgsetflags or setflags then
  3907. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3908. list.concat(setoppostfix(
  3909. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix_thumb[op]));
  3910. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3911. begin
  3912. //!!! ovloc.loc:=LOC_FLAGS;
  3913. case op of
  3914. OP_ADD:
  3915. //!!! ovloc.resflags:=F_CS;
  3916. ;
  3917. OP_SUB:
  3918. //!!! ovloc.resflags:=F_CC;
  3919. ;
  3920. else
  3921. ;
  3922. end;
  3923. end;
  3924. end
  3925. else
  3926. begin
  3927. { there could be added some more sophisticated optimizations }
  3928. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3929. a_load_reg_reg(list,size,size,dst,dst)
  3930. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3931. a_load_const_reg(list,size,0,dst)
  3932. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3933. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3934. { we do this here instead in the peephole optimizer because
  3935. it saves us a register }
  3936. {$ifdef DUMMY}
  3937. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3938. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3939. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3940. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3941. begin
  3942. if l1>32 then{roozbeh does this ever happen?}
  3943. internalerror(2003082903);
  3944. shifterop_reset(so);
  3945. so.shiftmode:=SM_LSL;
  3946. so.shiftimm:=l1;
  3947. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3948. end
  3949. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3950. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3951. begin
  3952. if l1>32 then{does this ever happen?}
  3953. internalerror(2012051802);
  3954. shifterop_reset(so);
  3955. so.shiftmode:=SM_LSL;
  3956. so.shiftimm:=l1;
  3957. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3958. end
  3959. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3960. begin
  3961. { nothing to do on success }
  3962. end
  3963. {$endif DUMMY}
  3964. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3965. Just using mov x, #0 might allow some easier optimizations down the line. }
  3966. else if (op = OP_AND) and (dword(a)=0) then
  3967. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,dst,0),PF_S))
  3968. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3969. else if (op = OP_AND) and (not(dword(a))=0) then
  3970. // do nothing
  3971. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3972. broader range of shifterconstants.}
  3973. {$ifdef DUMMY}
  3974. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3975. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3976. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3977. begin
  3978. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3979. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3980. end
  3981. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3982. not(cgsetflags or setflags) and
  3983. split_into_shifter_const(a, imm1, imm2) then
  3984. begin
  3985. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3986. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3987. end
  3988. {$endif DUMMY}
  3989. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3990. begin
  3991. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3992. end
  3993. else
  3994. begin
  3995. tmpreg:=getintregister(list,size);
  3996. a_load_const_reg(list,size,a,tmpreg);
  3997. a_op_reg_reg(list,op,size,tmpreg,dst);
  3998. end;
  3999. end;
  4000. maybeadjustresult(list,op,size,dst);
  4001. end;
  4002. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  4003. begin
  4004. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  4005. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  4006. else
  4007. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  4008. end;
  4009. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4010. var
  4011. l1,l2 : tasmlabel;
  4012. ai : taicpu;
  4013. begin
  4014. current_asmdata.getjumplabel(l1);
  4015. current_asmdata.getjumplabel(l2);
  4016. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  4017. ai.is_jmp:=true;
  4018. list.concat(ai);
  4019. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,0),PF_S));
  4020. list.concat(taicpu.op_sym(A_B,l2));
  4021. cg.a_label(list,l1);
  4022. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,1),PF_S));
  4023. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4024. cg.a_label(list,l2);
  4025. end;
  4026. procedure tthumb2cgarm.init_register_allocators;
  4027. begin
  4028. inherited init_register_allocators;
  4029. { currently, we save R14 always, so we can use it }
  4030. if (target_info.system<>system_arm_ios) then
  4031. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4032. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4033. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  4034. else
  4035. { r9 is not available on Darwin according to the llvm code generator }
  4036. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4037. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4038. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  4039. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4040. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  4041. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  4042. init_mmregister_allocator;
  4043. end;
  4044. procedure tthumb2cgarm.done_register_allocators;
  4045. begin
  4046. rg[R_INTREGISTER].free;
  4047. rg[R_FPUREGISTER].free;
  4048. rg[R_MMREGISTER].free;
  4049. inherited done_register_allocators;
  4050. end;
  4051. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  4052. begin
  4053. list.concat(taicpu.op_reg(A_BLX, reg));
  4054. {
  4055. the compiler does not properly set this flag anymore in pass 1, and
  4056. for now we only need it after pass 2 (I hope) (JM)
  4057. if not(pi_do_call in current_procinfo.flags) then
  4058. internalerror(2003060703);
  4059. }
  4060. include(current_procinfo.flags,pi_do_call);
  4061. end;
  4062. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4063. var
  4064. l : tasmlabel;
  4065. hr : treference;
  4066. begin
  4067. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4068. internalerror(2002090909);
  4069. if is_thumb32_imm(a) then
  4070. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4071. else if is_thumb32_imm(not(a)) then
  4072. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4073. else if (a and $FFFF)=a then
  4074. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4075. else
  4076. begin
  4077. reference_reset(hr,4,[]);
  4078. current_asmdata.getjumplabel(l);
  4079. cg.a_label(current_procinfo.aktlocaldata,l);
  4080. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4081. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4082. hr.symbol:=l;
  4083. hr.base:=NR_PC;
  4084. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4085. end;
  4086. end;
  4087. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4088. var
  4089. oppostfix:toppostfix;
  4090. usedtmpref: treference;
  4091. tmpreg,tmpreg2 : tregister;
  4092. so : tshifterop;
  4093. dir : integer;
  4094. begin
  4095. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4096. FromSize := ToSize;
  4097. case FromSize of
  4098. { signed integer registers }
  4099. OS_8:
  4100. oppostfix:=PF_B;
  4101. OS_S8:
  4102. oppostfix:=PF_SB;
  4103. OS_16:
  4104. oppostfix:=PF_H;
  4105. OS_S16:
  4106. oppostfix:=PF_SH;
  4107. OS_32,
  4108. OS_S32:
  4109. oppostfix:=PF_None;
  4110. else
  4111. InternalError(2003082913);
  4112. end;
  4113. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4114. begin
  4115. if target_info.endian=endian_big then
  4116. dir:=-1
  4117. else
  4118. dir:=1;
  4119. case FromSize of
  4120. OS_16,OS_S16:
  4121. begin
  4122. { only complicated references need an extra loadaddr }
  4123. if assigned(ref.symbol) or
  4124. (ref.index<>NR_NO) or
  4125. (ref.offset<-255) or
  4126. (ref.offset>4094) or
  4127. { sometimes the compiler reused registers }
  4128. (reg=ref.index) or
  4129. (reg=ref.base) then
  4130. begin
  4131. tmpreg2:=getintregister(list,OS_INT);
  4132. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4133. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4134. end
  4135. else
  4136. usedtmpref:=ref;
  4137. if target_info.endian=endian_big then
  4138. inc(usedtmpref.offset,1);
  4139. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4140. tmpreg:=getintregister(list,OS_INT);
  4141. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4142. inc(usedtmpref.offset,dir);
  4143. if FromSize=OS_16 then
  4144. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4145. else
  4146. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4147. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4148. end;
  4149. OS_32,OS_S32:
  4150. begin
  4151. tmpreg:=getintregister(list,OS_INT);
  4152. { only complicated references need an extra loadaddr }
  4153. if assigned(ref.symbol) or
  4154. (ref.index<>NR_NO) or
  4155. (ref.offset<-255) or
  4156. (ref.offset>4092) or
  4157. { sometimes the compiler reused registers }
  4158. (reg=ref.index) or
  4159. (reg=ref.base) then
  4160. begin
  4161. tmpreg2:=getintregister(list,OS_INT);
  4162. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4163. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4164. end
  4165. else
  4166. usedtmpref:=ref;
  4167. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4168. if ref.alignment=2 then
  4169. begin
  4170. if target_info.endian=endian_big then
  4171. inc(usedtmpref.offset,2);
  4172. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4173. inc(usedtmpref.offset,dir*2);
  4174. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4175. so.shiftimm:=16;
  4176. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4177. end
  4178. else
  4179. begin
  4180. if target_info.endian=endian_big then
  4181. inc(usedtmpref.offset,3);
  4182. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4183. inc(usedtmpref.offset,dir);
  4184. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4185. so.shiftimm:=8;
  4186. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4187. inc(usedtmpref.offset,dir);
  4188. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4189. so.shiftimm:=16;
  4190. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4191. inc(usedtmpref.offset,dir);
  4192. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4193. so.shiftimm:=24;
  4194. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4195. end;
  4196. end
  4197. else
  4198. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4199. end;
  4200. end
  4201. else
  4202. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4203. if (fromsize=OS_S8) and (tosize = OS_16) then
  4204. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4205. end;
  4206. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4207. begin
  4208. if op = OP_NOT then
  4209. begin
  4210. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4211. case size of
  4212. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4213. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4214. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4215. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4216. OS_32,
  4217. OS_S32:
  4218. ;
  4219. else
  4220. internalerror(2019050916);
  4221. end;
  4222. end
  4223. else
  4224. inherited a_op_reg_reg(list, op, size, src, dst);
  4225. end;
  4226. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4227. var
  4228. shift, width : byte;
  4229. tmpreg : tregister;
  4230. so : tshifterop;
  4231. l1 : longint;
  4232. begin
  4233. ovloc.loc:=LOC_VOID;
  4234. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4235. case op of
  4236. OP_ADD:
  4237. begin
  4238. op:=OP_SUB;
  4239. a:=aint(dword(-a));
  4240. end;
  4241. OP_SUB:
  4242. begin
  4243. op:=OP_ADD;
  4244. a:=aint(dword(-a));
  4245. end
  4246. else
  4247. ;
  4248. end;
  4249. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4250. case op of
  4251. OP_NEG,OP_NOT,
  4252. OP_DIV,OP_IDIV:
  4253. internalerror(200308285);
  4254. OP_SHL:
  4255. begin
  4256. if a>32 then
  4257. internalerror(2014020703);
  4258. if a<>0 then
  4259. begin
  4260. shifterop_reset(so);
  4261. so.shiftmode:=SM_LSL;
  4262. so.shiftimm:=a;
  4263. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4264. end
  4265. else
  4266. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4267. end;
  4268. OP_ROL:
  4269. begin
  4270. if a>32 then
  4271. internalerror(2014020704);
  4272. if a<>0 then
  4273. begin
  4274. shifterop_reset(so);
  4275. so.shiftmode:=SM_ROR;
  4276. so.shiftimm:=32-a;
  4277. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4278. end
  4279. else
  4280. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4281. end;
  4282. OP_ROR:
  4283. begin
  4284. if a>32 then
  4285. internalerror(2014020705);
  4286. if a<>0 then
  4287. begin
  4288. shifterop_reset(so);
  4289. so.shiftmode:=SM_ROR;
  4290. so.shiftimm:=a;
  4291. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4292. end
  4293. else
  4294. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4295. end;
  4296. OP_SHR:
  4297. begin
  4298. if a>32 then
  4299. internalerror(200308292);
  4300. shifterop_reset(so);
  4301. if a<>0 then
  4302. begin
  4303. so.shiftmode:=SM_LSR;
  4304. so.shiftimm:=a;
  4305. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4306. end
  4307. else
  4308. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4309. end;
  4310. OP_SAR:
  4311. begin
  4312. if a>32 then
  4313. internalerror(200308295);
  4314. if a<>0 then
  4315. begin
  4316. shifterop_reset(so);
  4317. so.shiftmode:=SM_ASR;
  4318. so.shiftimm:=a;
  4319. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4320. end
  4321. else
  4322. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4323. end;
  4324. else
  4325. if (op in [OP_SUB, OP_ADD]) and
  4326. ((a < 0) or
  4327. (a > 4095)) then
  4328. begin
  4329. tmpreg:=getintregister(list,size);
  4330. a_load_const_reg(list, size, a, tmpreg);
  4331. if cgsetflags or setflags then
  4332. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4333. list.concat(setoppostfix(
  4334. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4335. end
  4336. else
  4337. begin
  4338. if cgsetflags or setflags then
  4339. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4340. list.concat(setoppostfix(
  4341. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4342. end;
  4343. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4344. begin
  4345. ovloc.loc:=LOC_FLAGS;
  4346. case op of
  4347. OP_ADD:
  4348. ovloc.resflags:=F_CS;
  4349. OP_SUB:
  4350. ovloc.resflags:=F_CC;
  4351. else
  4352. ;
  4353. end;
  4354. end;
  4355. end
  4356. else
  4357. begin
  4358. { there could be added some more sophisticated optimizations }
  4359. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4360. a_load_reg_reg(list,size,size,src,dst)
  4361. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4362. a_load_const_reg(list,size,0,dst)
  4363. else if (op in [OP_IMUL]) and (a=-1) then
  4364. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4365. { we do this here instead in the peephole optimizer because
  4366. it saves us a register }
  4367. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4368. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4369. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4370. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4371. begin
  4372. if l1>32 then{roozbeh does this ever happen?}
  4373. internalerror(2003082911);
  4374. shifterop_reset(so);
  4375. so.shiftmode:=SM_LSL;
  4376. so.shiftimm:=l1;
  4377. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4378. end
  4379. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4380. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4381. begin
  4382. if l1>32 then{does this ever happen?}
  4383. internalerror(2012051803);
  4384. shifterop_reset(so);
  4385. so.shiftmode:=SM_LSL;
  4386. so.shiftimm:=l1;
  4387. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4388. end
  4389. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4390. begin
  4391. { nothing to do on success }
  4392. end
  4393. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4394. Just using mov x, #0 might allow some easier optimizations down the line. }
  4395. else if (op = OP_AND) and (dword(a)=0) then
  4396. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4397. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4398. else if (op = OP_AND) and (not(dword(a))=0) then
  4399. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4400. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4401. broader range of shifterconstants.}
  4402. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4403. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4404. else if (op = OP_AND) and is_thumb32_imm(a) then
  4405. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4406. else if (op = OP_AND) and (a = $FFFF) then
  4407. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4408. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4409. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4410. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4411. begin
  4412. a_load_reg_reg(list,size,size,src,dst);
  4413. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4414. end
  4415. else
  4416. begin
  4417. tmpreg:=getintregister(list,size);
  4418. a_load_const_reg(list,size,a,tmpreg);
  4419. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4420. end;
  4421. end;
  4422. maybeadjustresult(list,op,size,dst);
  4423. end;
  4424. const
  4425. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4426. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4427. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4428. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4429. var
  4430. so : tshifterop;
  4431. tmpreg,overflowreg : tregister;
  4432. asmop : tasmop;
  4433. begin
  4434. ovloc.loc:=LOC_VOID;
  4435. case op of
  4436. OP_NEG,OP_NOT:
  4437. internalerror(200308286);
  4438. OP_ROL:
  4439. begin
  4440. if not(size in [OS_32,OS_S32]) then
  4441. internalerror(2008072806);
  4442. { simulate ROL by ror'ing 32-value }
  4443. tmpreg:=getintregister(list,OS_32);
  4444. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4445. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4446. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4447. end;
  4448. OP_ROR:
  4449. begin
  4450. if not(size in [OS_32,OS_S32]) then
  4451. internalerror(2008072802);
  4452. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4453. end;
  4454. OP_IMUL,
  4455. OP_MUL:
  4456. begin
  4457. if cgsetflags or setflags then
  4458. begin
  4459. overflowreg:=getintregister(list,size);
  4460. if op=OP_IMUL then
  4461. asmop:=A_SMULL
  4462. else
  4463. asmop:=A_UMULL;
  4464. { the arm doesn't allow that rd and rm are the same }
  4465. if dst=src2 then
  4466. begin
  4467. if dst<>src1 then
  4468. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4469. else
  4470. begin
  4471. tmpreg:=getintregister(list,size);
  4472. a_load_reg_reg(list,size,size,src2,dst);
  4473. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4474. end;
  4475. end
  4476. else
  4477. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4478. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4479. if op=OP_IMUL then
  4480. begin
  4481. shifterop_reset(so);
  4482. so.shiftmode:=SM_ASR;
  4483. so.shiftimm:=31;
  4484. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4485. end
  4486. else
  4487. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4488. ovloc.loc:=LOC_FLAGS;
  4489. ovloc.resflags:=F_NE;
  4490. end
  4491. else
  4492. begin
  4493. { the arm doesn't allow that rd and rm are the same }
  4494. if dst=src2 then
  4495. begin
  4496. if dst<>src1 then
  4497. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4498. else
  4499. begin
  4500. tmpreg:=getintregister(list,size);
  4501. a_load_reg_reg(list,size,size,src2,dst);
  4502. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4503. end;
  4504. end
  4505. else
  4506. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4507. end;
  4508. end;
  4509. else
  4510. begin
  4511. if cgsetflags or setflags then
  4512. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4513. {$ifdef dummy}
  4514. { R13 is not allowed for certain instruction operands }
  4515. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4516. begin
  4517. if getsupreg(dst)=RS_R13 then
  4518. begin
  4519. tmpreg:=getintregister(list,OS_INT);
  4520. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4521. dst:=tmpreg;
  4522. end;
  4523. if getsupreg(src1)=RS_R13 then
  4524. begin
  4525. tmpreg:=getintregister(list,OS_INT);
  4526. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4527. src1:=tmpreg;
  4528. end;
  4529. end;
  4530. {$endif}
  4531. list.concat(setoppostfix(
  4532. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4533. end;
  4534. end;
  4535. maybeadjustresult(list,op,size,dst);
  4536. end;
  4537. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4538. begin
  4539. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4540. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4541. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4542. end;
  4543. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4544. var
  4545. ref : treference;
  4546. shift : byte;
  4547. firstfloatreg,lastfloatreg,
  4548. r : byte;
  4549. regs : tcpuregisterset;
  4550. stackmisalignment: pint;
  4551. begin
  4552. LocalSize:=align(LocalSize,4);
  4553. { call instruction does not put anything on the stack }
  4554. stackmisalignment:=0;
  4555. if not(nostackframe) then
  4556. begin
  4557. firstfloatreg:=RS_NO;
  4558. lastfloatreg:=RS_NO;
  4559. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4560. begin
  4561. { save floating point registers? }
  4562. for r:=RS_F0 to RS_F7 do
  4563. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4564. begin
  4565. if firstfloatreg=RS_NO then
  4566. firstfloatreg:=r;
  4567. lastfloatreg:=r;
  4568. inc(stackmisalignment,12);
  4569. end;
  4570. end;
  4571. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4572. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4573. begin
  4574. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4575. a_reg_alloc(list,NR_R12);
  4576. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4577. end;
  4578. { save int registers }
  4579. reference_reset(ref,4,[]);
  4580. ref.index:=NR_STACK_POINTER_REG;
  4581. ref.addressmode:=AM_PREINDEXED;
  4582. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4583. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4584. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4585. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4586. include(regs,RS_R14);
  4587. if regs<>[] then
  4588. begin
  4589. for r:=RS_R0 to RS_R15 do
  4590. if (r in regs) then
  4591. inc(stackmisalignment,4);
  4592. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4593. end;
  4594. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4595. begin
  4596. { the framepointer now points to the saved R15, so the saved
  4597. framepointer is at R11-12 (for get_caller_frame) }
  4598. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4599. a_reg_dealloc(list,NR_R12);
  4600. end;
  4601. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4602. if (LocalSize<>0) or
  4603. ((stackmisalignment<>0) and
  4604. ((pi_do_call in current_procinfo.flags) or
  4605. (po_assembler in current_procinfo.procdef.procoptions))) then
  4606. begin
  4607. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4608. if not(is_shifter_const(localsize,shift)) then
  4609. begin
  4610. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4611. a_reg_alloc(list,NR_R12);
  4612. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4613. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4614. a_reg_dealloc(list,NR_R12);
  4615. end
  4616. else
  4617. begin
  4618. a_reg_dealloc(list,NR_R12);
  4619. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4620. end;
  4621. end;
  4622. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4623. begin
  4624. if firstfloatreg<>RS_NO then
  4625. begin
  4626. reference_reset(ref,4,[]);
  4627. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4628. begin
  4629. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4630. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4631. ref.base:=NR_R12;
  4632. end
  4633. else
  4634. begin
  4635. ref.base:=current_procinfo.framepointer;
  4636. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4637. end;
  4638. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4639. lastfloatreg-firstfloatreg+1,ref));
  4640. end;
  4641. end;
  4642. end;
  4643. end;
  4644. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4645. var
  4646. ref : treference;
  4647. firstfloatreg,lastfloatreg,
  4648. r : byte;
  4649. shift : byte;
  4650. regs : tcpuregisterset;
  4651. LocalSize : longint;
  4652. stackmisalignment: pint;
  4653. begin
  4654. { a routine not returning needs no exit code,
  4655. we trust this directive as arm thumb is normally used if small code shall be generated }
  4656. if po_noreturn in current_procinfo.procdef.procoptions then
  4657. exit;
  4658. if not(nostackframe) then
  4659. begin
  4660. stackmisalignment:=0;
  4661. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4662. begin
  4663. { restore floating point register }
  4664. firstfloatreg:=RS_NO;
  4665. lastfloatreg:=RS_NO;
  4666. { save floating point registers? }
  4667. for r:=RS_F0 to RS_F7 do
  4668. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4669. begin
  4670. if firstfloatreg=RS_NO then
  4671. firstfloatreg:=r;
  4672. lastfloatreg:=r;
  4673. { floating point register space is already included in
  4674. localsize below by calc_stackframe_size
  4675. inc(stackmisalignment,12);
  4676. }
  4677. end;
  4678. if firstfloatreg<>RS_NO then
  4679. begin
  4680. reference_reset(ref,4,[]);
  4681. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4682. begin
  4683. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4684. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4685. ref.base:=NR_R12;
  4686. end
  4687. else
  4688. begin
  4689. ref.base:=current_procinfo.framepointer;
  4690. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4691. end;
  4692. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4693. lastfloatreg-firstfloatreg+1,ref));
  4694. end;
  4695. end;
  4696. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4697. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4698. begin
  4699. exclude(regs,RS_R14);
  4700. include(regs,RS_R15);
  4701. end;
  4702. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4703. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4704. for r:=RS_R0 to RS_R15 do
  4705. if (r in regs) then
  4706. inc(stackmisalignment,4);
  4707. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4708. LocalSize:=current_procinfo.calc_stackframe_size;
  4709. if (LocalSize<>0) or
  4710. ((stackmisalignment<>0) and
  4711. ((pi_do_call in current_procinfo.flags) or
  4712. (po_assembler in current_procinfo.procdef.procoptions))) then
  4713. begin
  4714. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4715. if not(is_shifter_const(LocalSize,shift)) then
  4716. begin
  4717. a_reg_alloc(list,NR_R12);
  4718. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4719. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4720. a_reg_dealloc(list,NR_R12);
  4721. end
  4722. else
  4723. begin
  4724. a_reg_dealloc(list,NR_R12);
  4725. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4726. end;
  4727. end;
  4728. if regs=[] then
  4729. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4730. else
  4731. begin
  4732. reference_reset(ref,4,[]);
  4733. ref.index:=NR_STACK_POINTER_REG;
  4734. ref.addressmode:=AM_PREINDEXED;
  4735. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4736. end;
  4737. end
  4738. else
  4739. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4740. end;
  4741. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4742. var
  4743. tmpreg : tregister;
  4744. tmpref : treference;
  4745. l : tasmlabel;
  4746. begin
  4747. tmpreg:=NR_NO;
  4748. { Be sure to have a base register }
  4749. if (ref.base=NR_NO) then
  4750. begin
  4751. if ref.shiftmode<>SM_None then
  4752. internalerror(2014020706);
  4753. ref.base:=ref.index;
  4754. ref.index:=NR_NO;
  4755. end;
  4756. { absolute symbols can't be handled directly, we've to store the symbol reference
  4757. in the text segment and access it pc relative
  4758. For now, we assume that references where base or index equals to PC are already
  4759. relative, all other references are assumed to be absolute and thus they need
  4760. to be handled extra.
  4761. A proper solution would be to change refoptions to a set and store the information
  4762. if the symbol is absolute or relative there.
  4763. }
  4764. if (assigned(ref.symbol) and
  4765. not(is_pc(ref.base)) and
  4766. not(is_pc(ref.index))
  4767. ) or
  4768. { [#xxx] isn't a valid address operand }
  4769. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4770. //(ref.offset<-4095) or
  4771. (ref.offset<-255) or
  4772. (ref.offset>4095) or
  4773. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4774. ((ref.offset<-255) or
  4775. (ref.offset>255)
  4776. )
  4777. ) or
  4778. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4779. ((ref.offset<-1020) or
  4780. (ref.offset>1020) or
  4781. ((abs(ref.offset) mod 4)<>0) or
  4782. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4783. assigned(ref.symbol)
  4784. )
  4785. ) then
  4786. begin
  4787. reference_reset(tmpref,4,[]);
  4788. { load symbol }
  4789. tmpreg:=getintregister(list,OS_INT);
  4790. if assigned(ref.symbol) then
  4791. begin
  4792. current_asmdata.getjumplabel(l);
  4793. cg.a_label(current_procinfo.aktlocaldata,l);
  4794. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4795. if ref.refaddr=addr_gottpoff then
  4796. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4797. else if ref.refaddr=addr_tlsgd then
  4798. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4799. else if ref.refaddr=addr_tlsdesc then
  4800. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4801. else if ref.refaddr=addr_tpoff then
  4802. begin
  4803. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4804. Internalerror(2019092807);
  4805. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4806. end
  4807. else
  4808. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4809. { load consts entry }
  4810. tmpref.symbol:=l;
  4811. tmpref.base:=NR_R15;
  4812. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4813. { in case of LDF/STF, we got rid of the NR_R15 }
  4814. if is_pc(ref.base) then
  4815. ref.base:=NR_NO;
  4816. if is_pc(ref.index) then
  4817. ref.index:=NR_NO;
  4818. end
  4819. else
  4820. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4821. if (ref.base<>NR_NO) then
  4822. begin
  4823. if ref.index<>NR_NO then
  4824. begin
  4825. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4826. ref.base:=tmpreg;
  4827. end
  4828. else
  4829. begin
  4830. ref.index:=tmpreg;
  4831. ref.shiftimm:=0;
  4832. ref.signindex:=1;
  4833. ref.shiftmode:=SM_None;
  4834. end;
  4835. end
  4836. else
  4837. ref.base:=tmpreg;
  4838. ref.offset:=0;
  4839. ref.symbol:=nil;
  4840. end;
  4841. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4842. begin
  4843. if tmpreg<>NR_NO then
  4844. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4845. else
  4846. begin
  4847. tmpreg:=getintregister(list,OS_ADDR);
  4848. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4849. ref.base:=tmpreg;
  4850. end;
  4851. ref.offset:=0;
  4852. end;
  4853. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4854. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4855. begin
  4856. tmpreg:=getintregister(list,OS_ADDR);
  4857. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4858. ref.base := tmpreg;
  4859. end;
  4860. { floating point operations have only limited references
  4861. we expect here, that a base is already set }
  4862. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4863. begin
  4864. if ref.shiftmode<>SM_none then
  4865. internalerror(2003091202);
  4866. if tmpreg<>NR_NO then
  4867. begin
  4868. if ref.base=tmpreg then
  4869. begin
  4870. if ref.signindex<0 then
  4871. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4872. else
  4873. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4874. ref.index:=NR_NO;
  4875. end
  4876. else
  4877. begin
  4878. if ref.index<>tmpreg then
  4879. internalerror(2004031602);
  4880. if ref.signindex<0 then
  4881. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4882. else
  4883. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4884. ref.base:=tmpreg;
  4885. ref.index:=NR_NO;
  4886. end;
  4887. end
  4888. else
  4889. begin
  4890. tmpreg:=getintregister(list,OS_ADDR);
  4891. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4892. ref.base:=tmpreg;
  4893. ref.index:=NR_NO;
  4894. end;
  4895. end;
  4896. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4897. Result := ref;
  4898. end;
  4899. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4900. var
  4901. instr: taicpu;
  4902. begin
  4903. if (fromsize=OS_F32) and
  4904. (tosize=OS_F32) then
  4905. begin
  4906. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4907. list.Concat(instr);
  4908. add_move_instruction(instr);
  4909. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4910. end
  4911. else if (fromsize=OS_F64) and
  4912. (tosize=OS_F64) then
  4913. begin
  4914. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4915. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4916. end
  4917. else if (fromsize=OS_F32) and
  4918. (tosize=OS_F64) then
  4919. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4920. begin
  4921. //list.concat(nil);
  4922. end;
  4923. end;
  4924. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4925. begin
  4926. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4927. end;
  4928. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4929. begin
  4930. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4931. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4932. end;
  4933. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4934. begin
  4935. if //(shuffle=nil) and
  4936. (tosize=OS_F32) then
  4937. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4938. else
  4939. internalerror(2012100813);
  4940. end;
  4941. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4942. begin
  4943. if //(shuffle=nil) and
  4944. (fromsize=OS_F32) then
  4945. begin
  4946. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4947. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4948. end
  4949. else
  4950. internalerror(2012100814);
  4951. end;
  4952. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4953. var tmpreg: tregister;
  4954. begin
  4955. case op of
  4956. OP_NEG:
  4957. begin
  4958. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4959. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4960. tmpreg:=cg.getintregister(list,OS_32);
  4961. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4962. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4963. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4964. end;
  4965. else
  4966. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4967. end;
  4968. end;
  4969. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4970. begin
  4971. case op of
  4972. OP_NEG:
  4973. begin
  4974. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reglo,0),PF_S));
  4975. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reghi,0),PF_S));
  4976. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4977. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4978. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4979. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4980. end;
  4981. OP_NOT:
  4982. begin
  4983. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4984. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4985. end;
  4986. OP_AND,OP_OR,OP_XOR:
  4987. begin
  4988. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4989. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4990. end;
  4991. OP_ADD:
  4992. begin
  4993. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4994. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4995. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi),PF_S));
  4996. end;
  4997. OP_SUB:
  4998. begin
  4999. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5000. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  5001. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  5002. end;
  5003. else
  5004. internalerror(2003083105);
  5005. end;
  5006. end;
  5007. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  5008. var
  5009. tmpreg : tregister;
  5010. begin
  5011. case op of
  5012. OP_AND,OP_OR,OP_XOR:
  5013. begin
  5014. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  5015. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  5016. end;
  5017. OP_ADD:
  5018. begin
  5019. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5020. begin
  5021. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5022. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  5023. end
  5024. else
  5025. begin
  5026. tmpreg:=cg.getintregister(list,OS_32);
  5027. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5028. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5029. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  5030. end;
  5031. tmpreg:=cg.getintregister(list,OS_32);
  5032. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  5033. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg),PF_S));
  5034. end;
  5035. OP_SUB:
  5036. begin
  5037. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5038. begin
  5039. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5040. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  5041. end
  5042. else
  5043. begin
  5044. tmpreg:=cg.getintregister(list,OS_32);
  5045. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5046. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5047. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  5048. end;
  5049. tmpreg:=cg.getintregister(list,OS_32);
  5050. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  5051. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg),PF_S));
  5052. end;
  5053. else
  5054. internalerror(2003083106);
  5055. end;
  5056. end;
  5057. procedure create_codegen;
  5058. begin
  5059. if GenerateThumb2Code then
  5060. begin
  5061. cg:=tthumb2cgarm.create;
  5062. cg64:=tthumb2cg64farm.create;
  5063. casmoptimizer:=TCpuThumb2AsmOptimizer;
  5064. end
  5065. else if GenerateThumbCode then
  5066. begin
  5067. cg:=tthumbcgarm.create;
  5068. cg64:=tthumbcg64farm.create;
  5069. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5070. end
  5071. else
  5072. begin
  5073. cg:=tarmcgarm.create;
  5074. cg64:=tarmcg64farm.create;
  5075. casmoptimizer:=TCpuAsmOptimizer;
  5076. end;
  5077. end;
  5078. end.