cgcpu.pas 224 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. procedure init_mmregister_allocator;
  36. public
  37. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  38. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  39. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  40. { move instructions }
  41. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  42. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  43. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  44. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  45. { fpu move instructions }
  46. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  47. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  48. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  49. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  50. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  51. { comparison operations }
  52. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  53. l : tasmlabel);override;
  54. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  55. procedure a_jmp_name(list : TAsmList;const s : string); override;
  56. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  57. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  58. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  59. procedure g_profilecode(list : TAsmList); override;
  60. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  61. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  62. procedure g_maybe_got_init(list : TAsmList); override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  66. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  67. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  68. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  69. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  70. procedure g_save_registers(list : TAsmList);override;
  71. procedure g_restore_registers(list : TAsmList);override;
  72. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  73. procedure fixref(list : TAsmList;var ref : treference);
  74. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  75. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  78. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  79. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  80. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  81. { Transform unsupported methods into Internal errors }
  82. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  83. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  84. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  85. { clear out potential overflow bits from 8 or 16 bit operations
  86. the upper 24/16 bits of a register after an operation }
  87. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  88. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  89. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  90. procedure g_maybe_tls_init(list : TAsmList); override;
  91. end;
  92. { tcgarm is shared between normal arm and thumb-2 }
  93. tcgarm = class(tbasecgarm)
  94. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  95. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  96. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  97. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  98. size: tcgsize; a: tcgint; src, dst: tregister); override;
  99. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  100. size: tcgsize; src1, src2, dst: tregister); override;
  101. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  103. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  104. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  105. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  106. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  107. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  108. end;
  109. { normal arm cg }
  110. tarmcgarm = class(tcgarm)
  111. procedure init_register_allocators;override;
  112. procedure done_register_allocators;override;
  113. end;
  114. { 64 bit cg for all arm flavours }
  115. tbasecg64farm = class(tcg64f32)
  116. end;
  117. { tcg64farm is shared between normal arm and thumb-2 }
  118. tcg64farm = class(tbasecg64farm)
  119. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  120. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  121. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  122. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  123. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  125. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  126. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  127. end;
  128. tarmcg64farm = class(tcg64farm)
  129. end;
  130. tthumbcgarm = class(tbasecgarm)
  131. procedure init_register_allocators;override;
  132. procedure done_register_allocators;override;
  133. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  134. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  135. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  136. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  137. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  138. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  139. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  140. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  141. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  142. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  143. end;
  144. tthumbcg64farm = class(tbasecg64farm)
  145. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  146. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  147. end;
  148. tthumb2cgarm = class(tcgarm)
  149. procedure init_register_allocators;override;
  150. procedure done_register_allocators;override;
  151. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  152. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  153. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  154. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  155. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  157. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  158. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  159. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  160. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  161. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  164. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  165. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  166. end;
  167. tthumb2cg64farm = class(tcg64farm)
  168. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  169. end;
  170. const
  171. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  172. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  173. winstackpagesize = 4096;
  174. function get_fpu_postfix(def : tdef) : toppostfix;
  175. procedure create_codegen;
  176. implementation
  177. uses
  178. globals,verbose,systems,cutils,
  179. aopt,aoptcpu,
  180. fmodule,
  181. symconst,symsym,symtable,
  182. tgobj,
  183. procinfo,cpupi,
  184. paramgr;
  185. { Range check must be disabled explicitly as conversions between signed and unsigned
  186. 32-bit values are done without explicit typecasts }
  187. {$R-}
  188. function get_fpu_postfix(def : tdef) : toppostfix;
  189. begin
  190. if def.typ=floatdef then
  191. begin
  192. case tfloatdef(def).floattype of
  193. s32real:
  194. result:=PF_S;
  195. s64real:
  196. result:=PF_D;
  197. s80real:
  198. result:=PF_E;
  199. else
  200. internalerror(200401272);
  201. end;
  202. end
  203. else
  204. internalerror(200401271);
  205. end;
  206. procedure tarmcgarm.init_register_allocators;
  207. begin
  208. inherited init_register_allocators;
  209. { currently, we always save R14, so we can use it }
  210. if (target_info.system<>system_arm_ios) then
  211. begin
  212. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  213. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  214. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  215. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  216. else
  217. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  218. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  219. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  220. end
  221. else
  222. { r7 is not available on Darwin, it's used as frame pointer (always,
  223. for backtrace support -- also in gcc/clang -> R11 can be used).
  224. r9 is volatile }
  225. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  226. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  227. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  228. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  229. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  230. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  231. init_mmregister_allocator;
  232. end;
  233. procedure tarmcgarm.done_register_allocators;
  234. begin
  235. rg[R_INTREGISTER].free;
  236. rg[R_FPUREGISTER].free;
  237. rg[R_MMREGISTER].free;
  238. inherited done_register_allocators;
  239. end;
  240. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  241. var
  242. imm_shift : byte;
  243. l : tasmlabel;
  244. hr : treference;
  245. imm1, imm2: DWord;
  246. begin
  247. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  248. internalerror(2002090907);
  249. if is_shifter_const(a,imm_shift) then
  250. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  251. else if is_shifter_const(not(a),imm_shift) then
  252. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  253. { loading of constants with mov and orr }
  254. else if (split_into_shifter_const(a,imm1, imm2)) then
  255. begin
  256. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  257. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  258. end
  259. { loading of constants with mvn and bic }
  260. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  261. begin
  262. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  263. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  264. end
  265. else
  266. begin
  267. reference_reset(hr,4,[]);
  268. current_asmdata.getjumplabel(l);
  269. cg.a_label(current_procinfo.aktlocaldata,l);
  270. hr.symboldata:=current_procinfo.aktlocaldata.last;
  271. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  272. hr.symbol:=l;
  273. hr.base:=NR_PC;
  274. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  275. end;
  276. end;
  277. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  278. var
  279. oppostfix:toppostfix;
  280. usedtmpref: treference;
  281. tmpreg,tmpreg2 : tregister;
  282. so : tshifterop;
  283. dir : integer;
  284. begin
  285. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  286. FromSize := ToSize;
  287. case FromSize of
  288. { signed integer registers }
  289. OS_8:
  290. oppostfix:=PF_B;
  291. OS_S8:
  292. oppostfix:=PF_SB;
  293. OS_16:
  294. oppostfix:=PF_H;
  295. OS_S16:
  296. oppostfix:=PF_SH;
  297. OS_32,
  298. OS_S32:
  299. oppostfix:=PF_None;
  300. else
  301. InternalError(200308297);
  302. end;
  303. if (fromsize=OS_S8) and
  304. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  305. oppostfix:=PF_B;
  306. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  307. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  308. (oppostfix in [PF_SH,PF_H])) then
  309. begin
  310. if target_info.endian=endian_big then
  311. dir:=-1
  312. else
  313. dir:=1;
  314. case FromSize of
  315. OS_16,OS_S16:
  316. begin
  317. { only complicated references need an extra loadaddr }
  318. if assigned(ref.symbol) or
  319. (ref.index<>NR_NO) or
  320. (ref.offset<-4095) or
  321. (ref.offset>4094) or
  322. { sometimes the compiler reused registers }
  323. (reg=ref.index) or
  324. (reg=ref.base) then
  325. begin
  326. tmpreg2:=getintregister(list,OS_INT);
  327. a_loadaddr_ref_reg(list,ref,tmpreg2);
  328. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  329. end
  330. else
  331. usedtmpref:=ref;
  332. if target_info.endian=endian_big then
  333. inc(usedtmpref.offset,1);
  334. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  335. tmpreg:=getintregister(list,OS_INT);
  336. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  337. inc(usedtmpref.offset,dir);
  338. if FromSize=OS_16 then
  339. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  340. else
  341. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  342. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  343. end;
  344. OS_32,OS_S32:
  345. begin
  346. tmpreg:=getintregister(list,OS_INT);
  347. { only complicated references need an extra loadaddr }
  348. if assigned(ref.symbol) or
  349. (ref.index<>NR_NO) or
  350. (ref.offset<-4095) or
  351. (ref.offset>4092) or
  352. { sometimes the compiler reused registers }
  353. (reg=ref.index) or
  354. (reg=ref.base) then
  355. begin
  356. tmpreg2:=getintregister(list,OS_INT);
  357. a_loadaddr_ref_reg(list,ref,tmpreg2);
  358. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  359. end
  360. else
  361. usedtmpref:=ref;
  362. shifterop_reset(so);so.shiftmode:=SM_LSL;
  363. if ref.alignment=2 then
  364. begin
  365. if target_info.endian=endian_big then
  366. inc(usedtmpref.offset,2);
  367. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  368. inc(usedtmpref.offset,dir*2);
  369. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  370. so.shiftimm:=16;
  371. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  372. end
  373. else
  374. begin
  375. tmpreg2:=getintregister(list,OS_INT);
  376. if target_info.endian=endian_big then
  377. inc(usedtmpref.offset,3);
  378. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  379. inc(usedtmpref.offset,dir);
  380. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  381. inc(usedtmpref.offset,dir);
  382. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  383. so.shiftimm:=8;
  384. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  385. inc(usedtmpref.offset,dir);
  386. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  387. so.shiftimm:=16;
  388. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  389. so.shiftimm:=24;
  390. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  391. end;
  392. end
  393. else
  394. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  395. end;
  396. end
  397. else
  398. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  399. if (fromsize=OS_S8) and
  400. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  401. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  402. else if (fromsize=OS_S8) and (tosize = OS_16) then
  403. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  404. end;
  405. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  406. var
  407. hsym : tsym;
  408. href : treference;
  409. paraloc : Pcgparalocation;
  410. shift : byte;
  411. begin
  412. { calculate the parameter info for the procdef }
  413. procdef.init_paraloc_info(callerside);
  414. hsym:=tsym(procdef.parast.Find('self'));
  415. if not(assigned(hsym) and
  416. (hsym.typ=paravarsym)) then
  417. internalerror(2003052503);
  418. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  419. while paraloc<>nil do
  420. with paraloc^ do
  421. begin
  422. case loc of
  423. LOC_REGISTER:
  424. begin
  425. if is_shifter_const(ioffset,shift) then
  426. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  427. else
  428. begin
  429. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  430. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  431. end;
  432. end;
  433. LOC_REFERENCE:
  434. begin
  435. { offset in the wrapper needs to be adjusted for the stored
  436. return address }
  437. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  438. if is_shifter_const(ioffset,shift) then
  439. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  440. else
  441. begin
  442. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  443. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  444. end;
  445. end
  446. else
  447. internalerror(2003091803);
  448. end;
  449. paraloc:=next;
  450. end;
  451. end;
  452. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  453. var
  454. ref: treference;
  455. begin
  456. paraloc.check_simple_location;
  457. paramanager.allocparaloc(list,paraloc.location);
  458. case paraloc.location^.loc of
  459. LOC_REGISTER,LOC_CREGISTER:
  460. a_load_const_reg(list,size,a,paraloc.location^.register);
  461. LOC_REFERENCE:
  462. begin
  463. reference_reset(ref,paraloc.alignment,[]);
  464. ref.base:=paraloc.location^.reference.index;
  465. ref.offset:=paraloc.location^.reference.offset;
  466. a_load_const_ref(list,size,a,ref);
  467. end;
  468. else
  469. internalerror(2002081101);
  470. end;
  471. end;
  472. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  473. begin
  474. { doubles in softemu mode have a strange order of registers and references }
  475. if (cgpara.size=OS_F64) and
  476. (location^.size=OS_32) then
  477. begin
  478. g_concatcopy(list,ref,paralocref,4)
  479. end
  480. else
  481. inherited;
  482. end;
  483. procedure tbasecgarm.init_mmregister_allocator;
  484. begin
  485. { The register allocator currently cannot deal with multiple
  486. non-overlapping subregs per register, so we can only use
  487. half the single precision registers for now (as sub registers of the
  488. double precision ones). }
  489. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  490. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  491. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  492. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  493. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  494. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  495. ],first_mm_imreg,[])
  496. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  497. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  498. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  499. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  500. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  501. ],first_mm_imreg,[])
  502. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  503. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  504. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  505. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  506. ],first_mm_imreg,[]);
  507. end;
  508. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  509. var
  510. ref: treference;
  511. tmpreg: tregister;
  512. begin
  513. paraloc.check_simple_location;
  514. paramanager.allocparaloc(list,paraloc.location);
  515. case paraloc.location^.loc of
  516. LOC_REGISTER,LOC_CREGISTER:
  517. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  518. LOC_REFERENCE:
  519. begin
  520. reference_reset(ref,paraloc.alignment,[]);
  521. ref.base := paraloc.location^.reference.index;
  522. ref.offset := paraloc.location^.reference.offset;
  523. tmpreg := getintregister(list,OS_ADDR);
  524. a_loadaddr_ref_reg(list,r,tmpreg);
  525. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  526. end;
  527. else
  528. internalerror(2002080701);
  529. end;
  530. end;
  531. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  532. var
  533. branchopcode: tasmop;
  534. r : treference;
  535. sym : TAsmSymbol;
  536. begin
  537. { use always BL as newer binutils do not translate blx apparently
  538. generating BL is also what clang and gcc do by default }
  539. branchopcode:=A_BL;
  540. if not(weak) then
  541. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  542. else
  543. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  544. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  545. if (tf_pic_uses_got in target_info.flags) and
  546. (cs_create_pic in current_settings.moduleswitches) then
  547. begin
  548. r.refaddr:=addr_pic
  549. end
  550. else
  551. r.refaddr:=addr_full;
  552. list.concat(taicpu.op_ref(branchopcode,r));
  553. {
  554. the compiler does not properly set this flag anymore in pass 1, and
  555. for now we only need it after pass 2 (I hope) (JM)
  556. if not(pi_do_call in current_procinfo.flags) then
  557. internalerror(2003060703);
  558. }
  559. include(current_procinfo.flags,pi_do_call);
  560. end;
  561. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  562. begin
  563. { check not really correct: should only be used for non-Thumb cpus }
  564. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  565. begin
  566. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  567. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  568. end
  569. else
  570. list.concat(taicpu.op_reg(A_BLX, reg));
  571. {
  572. the compiler does not properly set this flag anymore in pass 1, and
  573. for now we only need it after pass 2 (I hope) (JM)
  574. if not(pi_do_call in current_procinfo.flags) then
  575. internalerror(2003060703);
  576. }
  577. include(current_procinfo.flags,pi_do_call);
  578. end;
  579. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  580. begin
  581. a_op_const_reg_reg(list,op,size,a,reg,reg);
  582. end;
  583. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  584. var
  585. tmpreg,tmpresreg : tregister;
  586. tmpref : treference;
  587. begin
  588. tmpreg:=getintregister(list,size);
  589. tmpresreg:=getintregister(list,size);
  590. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  591. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  592. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  593. end;
  594. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  595. var
  596. so : tshifterop;
  597. begin
  598. if op = OP_NEG then
  599. begin
  600. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  601. maybeadjustresult(list,OP_NEG,size,dst);
  602. end
  603. else if op = OP_NOT then
  604. begin
  605. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  606. begin
  607. shifterop_reset(so);
  608. so.shiftmode:=SM_LSL;
  609. if size in [OS_8, OS_S8] then
  610. so.shiftimm:=24
  611. else
  612. so.shiftimm:=16;
  613. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  614. {Using a shift here allows this to be folded into another instruction}
  615. if size in [OS_S8, OS_S16] then
  616. so.shiftmode:=SM_ASR
  617. else
  618. so.shiftmode:=SM_LSR;
  619. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  620. end
  621. else
  622. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  623. end
  624. else
  625. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  626. end;
  627. const
  628. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  629. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  630. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  631. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  632. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  633. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  634. op_reg_postfix_thumb: array[TOpCG] of TOpPostfix =
  635. (PF_None,PF_None,PF_None,PF_S,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_S,
  636. PF_None,PF_S,PF_S,PF_None,PF_S,PF_None,PF_S);
  637. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  638. size: tcgsize; a: tcgint; src, dst: tregister);
  639. var
  640. ovloc : tlocation;
  641. begin
  642. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  643. end;
  644. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  645. size: tcgsize; src1, src2, dst: tregister);
  646. var
  647. ovloc : tlocation;
  648. begin
  649. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  650. end;
  651. function opshift2shiftmode(op: TOpCg): tshiftmode;
  652. begin
  653. case op of
  654. OP_SHL: Result:=SM_LSL;
  655. OP_SHR: Result:=SM_LSR;
  656. OP_ROR: Result:=SM_ROR;
  657. OP_ROL: Result:=SM_ROR;
  658. OP_SAR: Result:=SM_ASR;
  659. else internalerror(2012070501);
  660. end
  661. end;
  662. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  663. var
  664. multiplier : dword;
  665. power : longint;
  666. shifterop : tshifterop;
  667. bitsset : byte;
  668. negative : boolean;
  669. first, doshiftadd: boolean;
  670. b,
  671. cycles : byte;
  672. maxeffort : byte;
  673. leftmostbit,i,shiftvalue: DWord;
  674. begin
  675. result:=true;
  676. cycles:=0;
  677. negative:=a<0;
  678. shifterop.rs:=NR_NO;
  679. shifterop.shiftmode:=SM_LSL;
  680. if negative then
  681. inc(cycles);
  682. multiplier:=dword(abs(a));
  683. { heuristics to estimate how much instructions are reasonable to replace the mul,
  684. this is currently based on XScale timings }
  685. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  686. actual multiplication, this requires min. 1+4 cycles
  687. because the first shift imm. might cause a stall and because we need more instructions
  688. when replacing the mul we generate max. 3 instructions to replace this mul }
  689. maxeffort:=3;
  690. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  691. a ldr, so generating one more operation to replace this is beneficial }
  692. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  693. inc(maxeffort);
  694. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  695. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  696. dec(maxeffort);
  697. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  698. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  699. dec(maxeffort);
  700. { "symmetric" bit pattern like $10101010 where
  701. res:=a*$10101010 can be simplified into
  702. temp:=a*$1010
  703. res:=temp+temp shl 16
  704. }
  705. doshiftadd:=false;
  706. leftmostbit:=BsrDWord(multiplier);
  707. shiftvalue:=0;
  708. if (maxeffort>1) and (leftmostbit>2) then
  709. begin
  710. for i:=2 to 31 do
  711. if (multiplier shr i)=(multiplier and ($ffffffff shr (32-i))) then
  712. begin
  713. doshiftadd:=true;
  714. shiftvalue:=i;
  715. dec(maxeffort);
  716. multiplier:=multiplier shr shiftvalue;
  717. break;
  718. end;
  719. end;
  720. bitsset:=popcnt(multiplier and $fffffffe);
  721. { most simple cases }
  722. if a=1 then
  723. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  724. else if a=0 then
  725. a_load_const_reg(list,OS_32,0,dst)
  726. else if a=-1 then
  727. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  728. { add up ?
  729. basically, one add is needed for each bit being set in the constant factor
  730. however, the least significant bit is for free, it can be hidden in the initial
  731. instruction
  732. }
  733. else if (bitsset+cycles<=maxeffort) and
  734. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  735. begin
  736. first:=true;
  737. while multiplier<>0 do
  738. begin
  739. shifterop.shiftimm:=BsrDWord(multiplier);
  740. if odd(multiplier) then
  741. begin
  742. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  743. dec(multiplier);
  744. end
  745. else
  746. if first then
  747. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  748. else
  749. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  750. first:=false;
  751. dec(multiplier,1 shl shifterop.shiftimm);
  752. end;
  753. if doshiftadd then
  754. begin
  755. shifterop.shiftimm:=shiftvalue;
  756. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  757. end;
  758. if negative then
  759. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  760. end
  761. { subtract from the next greater power of two? }
  762. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  763. begin
  764. first:=true;
  765. while multiplier<>0 do
  766. begin
  767. if first then
  768. begin
  769. multiplier:=(1 shl power)-multiplier;
  770. shifterop.shiftimm:=power;
  771. end
  772. else
  773. shifterop.shiftimm:=BsrDWord(multiplier);
  774. if odd(multiplier) then
  775. begin
  776. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  777. dec(multiplier);
  778. end
  779. else
  780. if first then
  781. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  782. else
  783. begin
  784. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  785. dec(multiplier,1 shl shifterop.shiftimm);
  786. end;
  787. first:=false;
  788. end;
  789. if doshiftadd then
  790. begin
  791. shifterop.shiftimm:=shiftvalue;
  792. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  793. end;
  794. if negative then
  795. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  796. end
  797. else
  798. result:=false;
  799. end;
  800. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  801. var
  802. shift, lsb, width : byte;
  803. tmpreg : tregister;
  804. so : tshifterop;
  805. l1 : longint;
  806. imm1, imm2: DWord;
  807. begin
  808. optimize_op_const(size, op, a);
  809. case op of
  810. OP_NONE:
  811. begin
  812. if src <> dst then
  813. a_load_reg_reg(list, size, size, src, dst);
  814. exit;
  815. end;
  816. OP_MOVE:
  817. begin
  818. a_load_const_reg(list, size, a, dst);
  819. exit;
  820. end;
  821. else
  822. ;
  823. end;
  824. ovloc.loc:=LOC_VOID;
  825. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  826. case op of
  827. OP_ADD:
  828. begin
  829. op:=OP_SUB;
  830. a:=aint(dword(-a));
  831. end;
  832. OP_SUB:
  833. begin
  834. op:=OP_ADD;
  835. a:=aint(dword(-a));
  836. end
  837. else
  838. ;
  839. end;
  840. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  841. case op of
  842. OP_NEG,OP_NOT:
  843. internalerror(200308281);
  844. OP_SHL,
  845. OP_SHR,
  846. OP_ROL,
  847. OP_ROR,
  848. OP_SAR:
  849. begin
  850. if a>32 then
  851. internalerror(200308294);
  852. shifterop_reset(so);
  853. so.shiftmode:=opshift2shiftmode(op);
  854. if op = OP_ROL then
  855. so.shiftimm:=32-a
  856. else
  857. so.shiftimm:=a;
  858. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  859. end;
  860. else
  861. {if (op in [OP_SUB, OP_ADD]) and
  862. ((a < 0) or
  863. (a > 4095)) then
  864. begin
  865. tmpreg:=getintregister(list,size);
  866. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  867. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  868. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  869. ));
  870. end
  871. else}
  872. begin
  873. if cgsetflags or setflags then
  874. a_reg_alloc(list,NR_DEFAULTFLAGS);
  875. list.concat(setoppostfix(
  876. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  877. end;
  878. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  879. begin
  880. ovloc.loc:=LOC_FLAGS;
  881. case op of
  882. OP_ADD:
  883. ovloc.resflags:=F_CS;
  884. OP_SUB:
  885. ovloc.resflags:=F_CC;
  886. else
  887. internalerror(2019050922);
  888. end;
  889. end;
  890. end
  891. else
  892. begin
  893. { there could be added some more sophisticated optimizations }
  894. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  895. a_op_reg_reg(list,OP_NEG,size,src,dst)
  896. { we do this here instead in the peephole optimizer because
  897. it saves us a register }
  898. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  899. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  900. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  901. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  902. begin
  903. if l1>32 then{roozbeh does this ever happen?}
  904. internalerror(200308296);
  905. shifterop_reset(so);
  906. so.shiftmode:=SM_LSL;
  907. so.shiftimm:=l1;
  908. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  909. end
  910. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  911. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  912. begin
  913. if l1>32 then{does this ever happen?}
  914. internalerror(201205181);
  915. shifterop_reset(so);
  916. so.shiftmode:=SM_LSL;
  917. so.shiftimm:=l1;
  918. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  919. end
  920. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  921. begin
  922. { nothing to do on success }
  923. end
  924. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  925. broader range of shifterconstants.}
  926. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  927. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  928. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  929. into the following instruction}
  930. else if (op = OP_AND) and
  931. is_continuous_mask(aword(a), lsb, width) and
  932. ((lsb = 0) or ((lsb + width) = 32)) then
  933. begin
  934. shifterop_reset(so);
  935. if (width = 16) and
  936. (lsb = 0) and
  937. (current_settings.cputype >= cpu_armv6) then
  938. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  939. else if (width = 8) and
  940. (lsb = 0) and
  941. (current_settings.cputype >= cpu_armv6) then
  942. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  943. else if lsb = 0 then
  944. begin
  945. so.shiftmode:=SM_LSL;
  946. so.shiftimm:=32-width;
  947. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  948. so.shiftmode:=SM_LSR;
  949. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  950. end
  951. else
  952. begin
  953. so.shiftmode:=SM_LSR;
  954. so.shiftimm:=lsb;
  955. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  956. so.shiftmode:=SM_LSL;
  957. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  958. end;
  959. end
  960. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  961. begin
  962. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  963. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  964. end
  965. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  966. not(cgsetflags or setflags) and
  967. split_into_shifter_const(a, imm1, imm2) then
  968. begin
  969. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  970. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  971. end
  972. else
  973. begin
  974. tmpreg:=getintregister(list,size);
  975. a_load_const_reg(list,size,a,tmpreg);
  976. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  977. end;
  978. end;
  979. maybeadjustresult(list,op,size,dst);
  980. end;
  981. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  982. var
  983. so : tshifterop;
  984. tmpreg,overflowreg : tregister;
  985. asmop : tasmop;
  986. begin
  987. ovloc.loc:=LOC_VOID;
  988. case op of
  989. OP_NEG,OP_NOT,
  990. OP_DIV,OP_IDIV:
  991. internalerror(200308283);
  992. OP_SHL,
  993. OP_SHR,
  994. OP_SAR,
  995. OP_ROR:
  996. begin
  997. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  998. internalerror(2008072801);
  999. shifterop_reset(so);
  1000. so.rs:=src1;
  1001. so.shiftmode:=opshift2shiftmode(op);
  1002. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1003. end;
  1004. OP_ROL:
  1005. begin
  1006. if not(size in [OS_32,OS_S32]) then
  1007. internalerror(2008072804);
  1008. { simulate ROL by ror'ing 32-value }
  1009. tmpreg:=getintregister(list,OS_32);
  1010. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  1011. shifterop_reset(so);
  1012. so.rs:=tmpreg;
  1013. so.shiftmode:=SM_ROR;
  1014. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1015. end;
  1016. OP_IMUL,
  1017. OP_MUL:
  1018. begin
  1019. if (cgsetflags or setflags) and
  1020. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1021. begin
  1022. overflowreg:=getintregister(list,size);
  1023. if op=OP_IMUL then
  1024. asmop:=A_SMULL
  1025. else
  1026. asmop:=A_UMULL;
  1027. { the arm doesn't allow that rd and rm are the same }
  1028. if dst=src2 then
  1029. begin
  1030. if dst<>src1 then
  1031. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1032. else
  1033. begin
  1034. tmpreg:=getintregister(list,size);
  1035. a_load_reg_reg(list,size,size,src2,dst);
  1036. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1037. end;
  1038. end
  1039. else
  1040. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1041. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1042. if op=OP_IMUL then
  1043. begin
  1044. shifterop_reset(so);
  1045. so.shiftmode:=SM_ASR;
  1046. so.shiftimm:=31;
  1047. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1048. end
  1049. else
  1050. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1051. ovloc.loc:=LOC_FLAGS;
  1052. ovloc.resflags:=F_NE;
  1053. end
  1054. else
  1055. begin
  1056. { the arm doesn't allow that rd and rm are the same }
  1057. if dst=src2 then
  1058. begin
  1059. if dst<>src1 then
  1060. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1061. else
  1062. begin
  1063. tmpreg:=getintregister(list,size);
  1064. a_load_reg_reg(list,size,size,src2,dst);
  1065. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1066. end;
  1067. end
  1068. else
  1069. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1070. end;
  1071. end;
  1072. else
  1073. begin
  1074. if cgsetflags or setflags then
  1075. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1076. list.concat(setoppostfix(
  1077. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1078. end;
  1079. end;
  1080. maybeadjustresult(list,op,size,dst);
  1081. end;
  1082. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1083. var
  1084. asmop: tasmop;
  1085. begin
  1086. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1087. begin
  1088. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1089. case size of
  1090. OS_32: asmop:=A_UMULL;
  1091. OS_S32: asmop:=A_SMULL;
  1092. else
  1093. InternalError(2014060802);
  1094. end;
  1095. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1096. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1097. 32x32=32 bit multiplication}
  1098. if (dstlo = NR_NO) then
  1099. dstlo:=getintregister(list,size);
  1100. if (dsthi = NR_NO) then
  1101. dsthi:=getintregister(list,size);
  1102. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1103. end
  1104. else if dsthi=NR_NO then
  1105. begin
  1106. if (dstlo = NR_NO) then
  1107. dstlo:=getintregister(list,size);
  1108. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1109. end
  1110. else
  1111. begin
  1112. internalerror(2015083022);
  1113. end;
  1114. end;
  1115. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1116. var
  1117. tmpreg1,tmpreg2 : tregister;
  1118. begin
  1119. tmpreg1:=NR_NO;
  1120. { Be sure to have a base register }
  1121. if (ref.base=NR_NO) then
  1122. begin
  1123. if ref.shiftmode<>SM_None then
  1124. internalerror(2014020707);
  1125. ref.base:=ref.index;
  1126. ref.index:=NR_NO;
  1127. end;
  1128. { absolute symbols can't be handled directly, we've to store the symbol reference
  1129. in the text segment and access it pc relative
  1130. For now, we assume that references where base or index equals to PC are already
  1131. relative, all other references are assumed to be absolute and thus they need
  1132. to be handled extra.
  1133. A proper solution would be to change refoptions to a set and store the information
  1134. if the symbol is absolute or relative there.
  1135. }
  1136. if (assigned(ref.symbol) and
  1137. not(is_pc(ref.base)) and
  1138. not(is_pc(ref.index))
  1139. ) or
  1140. { [#xxx] isn't a valid address operand }
  1141. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1142. (ref.offset<-4095) or
  1143. (ref.offset>4095) or
  1144. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1145. ((ref.offset<-255) or
  1146. (ref.offset>255)
  1147. )
  1148. ) or
  1149. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1150. ((ref.offset<-1020) or
  1151. (ref.offset>1020) or
  1152. ((abs(ref.offset) mod 4)<>0)
  1153. )
  1154. ) or
  1155. ((GenerateThumbCode) and
  1156. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1157. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1158. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1159. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1160. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1161. )
  1162. ) then
  1163. begin
  1164. fixref(list,ref);
  1165. end;
  1166. if GenerateThumbCode then
  1167. begin
  1168. { certain thumb load require base and index }
  1169. if (oppostfix in [PF_SB,PF_SH]) and
  1170. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1171. begin
  1172. tmpreg1:=getintregister(list,OS_ADDR);
  1173. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1174. ref.index:=tmpreg1;
  1175. end;
  1176. { "hi" registers cannot be used as base or index }
  1177. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1178. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1179. begin
  1180. tmpreg1:=getintregister(list,OS_ADDR);
  1181. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1182. ref.base:=tmpreg1;
  1183. end;
  1184. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1185. begin
  1186. tmpreg1:=getintregister(list,OS_ADDR);
  1187. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1188. ref.index:=tmpreg1;
  1189. end;
  1190. end;
  1191. { fold if there is base, index and offset, however, don't fold
  1192. for vfp memory instructions because we later fold the index }
  1193. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1194. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1195. begin
  1196. if tmpreg1<>NR_NO then
  1197. begin
  1198. tmpreg2:=getintregister(list,OS_ADDR);
  1199. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1200. tmpreg1:=tmpreg2;
  1201. end
  1202. else
  1203. begin
  1204. tmpreg1:=getintregister(list,OS_ADDR);
  1205. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1206. ref.base:=tmpreg1;
  1207. end;
  1208. ref.offset:=0;
  1209. end;
  1210. { floating point operations have only limited references
  1211. we expect here, that a base is already set }
  1212. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1213. begin
  1214. if ref.shiftmode<>SM_none then
  1215. internalerror(200309121);
  1216. if tmpreg1<>NR_NO then
  1217. begin
  1218. if ref.base=tmpreg1 then
  1219. begin
  1220. if ref.signindex<0 then
  1221. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1222. else
  1223. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1224. ref.index:=NR_NO;
  1225. end
  1226. else
  1227. begin
  1228. if ref.index<>tmpreg1 then
  1229. internalerror(200403161);
  1230. if ref.signindex<0 then
  1231. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1232. else
  1233. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1234. ref.base:=tmpreg1;
  1235. ref.index:=NR_NO;
  1236. end;
  1237. end
  1238. else
  1239. begin
  1240. tmpreg1:=getintregister(list,OS_ADDR);
  1241. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1242. ref.base:=tmpreg1;
  1243. ref.index:=NR_NO;
  1244. end;
  1245. end;
  1246. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1247. Result := ref;
  1248. end;
  1249. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1250. var
  1251. oppostfix:toppostfix;
  1252. usedtmpref: treference;
  1253. tmpreg : tregister;
  1254. dir : integer;
  1255. begin
  1256. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1257. FromSize := ToSize;
  1258. case ToSize of
  1259. { signed integer registers }
  1260. OS_8,
  1261. OS_S8:
  1262. oppostfix:=PF_B;
  1263. OS_16,
  1264. OS_S16:
  1265. oppostfix:=PF_H;
  1266. OS_32,
  1267. OS_S32,
  1268. { for vfp value stored in integer register }
  1269. OS_F32:
  1270. oppostfix:=PF_None;
  1271. else
  1272. InternalError(2003082912);
  1273. end;
  1274. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1275. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1276. (oppostfix =PF_H)) then
  1277. begin
  1278. if target_info.endian=endian_big then
  1279. dir:=-1
  1280. else
  1281. dir:=1;
  1282. case FromSize of
  1283. OS_16,OS_S16:
  1284. begin
  1285. tmpreg:=getintregister(list,OS_INT);
  1286. usedtmpref:=ref;
  1287. if target_info.endian=endian_big then
  1288. inc(usedtmpref.offset,1);
  1289. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1290. inc(usedtmpref.offset,dir);
  1291. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1292. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1293. end;
  1294. OS_32,OS_S32:
  1295. begin
  1296. tmpreg:=getintregister(list,OS_INT);
  1297. usedtmpref:=ref;
  1298. if ref.alignment=2 then
  1299. begin
  1300. if target_info.endian=endian_big then
  1301. inc(usedtmpref.offset,2);
  1302. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1303. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1304. inc(usedtmpref.offset,dir*2);
  1305. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1306. end
  1307. else
  1308. begin
  1309. if target_info.endian=endian_big then
  1310. inc(usedtmpref.offset,3);
  1311. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1312. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1313. inc(usedtmpref.offset,dir);
  1314. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1315. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1316. inc(usedtmpref.offset,dir);
  1317. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1318. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1319. inc(usedtmpref.offset,dir);
  1320. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1321. end;
  1322. end
  1323. else
  1324. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1325. end;
  1326. end
  1327. else
  1328. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1329. end;
  1330. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1331. var
  1332. oppostfix:toppostfix;
  1333. href: treference;
  1334. tmpreg: TRegister;
  1335. begin
  1336. case ToSize of
  1337. { signed integer registers }
  1338. OS_8,
  1339. OS_S8:
  1340. oppostfix:=PF_B;
  1341. OS_16,
  1342. OS_S16:
  1343. oppostfix:=PF_H;
  1344. OS_32,
  1345. OS_S32:
  1346. oppostfix:=PF_None;
  1347. else
  1348. InternalError(2003082910);
  1349. end;
  1350. if (tosize in [OS_S16,OS_16]) and
  1351. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1352. begin
  1353. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1354. tmpreg:=getintregister(list,OS_INT);
  1355. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1356. href:=result;
  1357. inc(href.offset);
  1358. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1359. end
  1360. else
  1361. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1362. end;
  1363. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1364. var
  1365. oppostfix:toppostfix;
  1366. so: tshifterop;
  1367. tmpreg: TRegister;
  1368. href: treference;
  1369. begin
  1370. case FromSize of
  1371. { signed integer registers }
  1372. OS_8:
  1373. oppostfix:=PF_B;
  1374. OS_S8:
  1375. oppostfix:=PF_SB;
  1376. OS_16:
  1377. oppostfix:=PF_H;
  1378. OS_S16:
  1379. oppostfix:=PF_SH;
  1380. OS_32,
  1381. OS_S32:
  1382. oppostfix:=PF_None;
  1383. else
  1384. InternalError(200308291);
  1385. end;
  1386. if (tosize=OS_S8) and
  1387. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1388. begin
  1389. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1390. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1391. end
  1392. else if (tosize in [OS_S16,OS_16]) and
  1393. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1394. begin
  1395. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1396. tmpreg:=getintregister(list,OS_INT);
  1397. href:=result;
  1398. inc(href.offset);
  1399. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1400. shifterop_reset(so);
  1401. so.shiftmode:=SM_LSL;
  1402. so.shiftimm:=8;
  1403. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1404. end
  1405. else
  1406. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1407. end;
  1408. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1409. var
  1410. so : tshifterop;
  1411. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1412. begin
  1413. if GenerateThumbCode then
  1414. begin
  1415. case shiftmode of
  1416. SM_ASR:
  1417. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1418. SM_LSR:
  1419. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1420. SM_LSL:
  1421. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1422. else
  1423. internalerror(2013090301);
  1424. end;
  1425. end
  1426. else
  1427. begin
  1428. so.shiftmode:=shiftmode;
  1429. so.shiftimm:=shiftimm;
  1430. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1431. end;
  1432. end;
  1433. var
  1434. instr: taicpu;
  1435. conv_done: boolean;
  1436. begin
  1437. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1438. internalerror(2002090901);
  1439. conv_done:=false;
  1440. if tosize<>fromsize then
  1441. begin
  1442. shifterop_reset(so);
  1443. conv_done:=true;
  1444. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1445. fromsize:=tosize;
  1446. if current_settings.cputype<cpu_armv6 then
  1447. case fromsize of
  1448. OS_8:
  1449. if GenerateThumbCode then
  1450. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1451. else
  1452. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1453. OS_S8:
  1454. begin
  1455. do_shift(SM_LSL,24,reg1);
  1456. if tosize=OS_16 then
  1457. begin
  1458. do_shift(SM_ASR,8,reg2);
  1459. do_shift(SM_LSR,16,reg2);
  1460. end
  1461. else
  1462. do_shift(SM_ASR,24,reg2);
  1463. end;
  1464. OS_16:
  1465. begin
  1466. do_shift(SM_LSL,16,reg1);
  1467. do_shift(SM_LSR,16,reg2);
  1468. end;
  1469. OS_S16:
  1470. begin
  1471. do_shift(SM_LSL,16,reg1);
  1472. do_shift(SM_ASR,16,reg2)
  1473. end;
  1474. else
  1475. conv_done:=false;
  1476. end
  1477. else
  1478. case fromsize of
  1479. OS_8:
  1480. if GenerateThumbCode then
  1481. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1482. else
  1483. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1484. OS_S8:
  1485. begin
  1486. if tosize=OS_16 then
  1487. begin
  1488. so.shiftmode:=SM_ROR;
  1489. so.shiftimm:=16;
  1490. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1491. do_shift(SM_LSR,16,reg2);
  1492. end
  1493. else
  1494. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1495. end;
  1496. OS_16:
  1497. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1498. OS_S16:
  1499. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1500. else
  1501. conv_done:=false;
  1502. end
  1503. end;
  1504. if not conv_done and (reg1<>reg2) then
  1505. begin
  1506. { same size, only a register mov required }
  1507. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1508. list.Concat(instr);
  1509. { Notify the register allocator that we have written a move instruction so
  1510. it can try to eliminate it. }
  1511. add_move_instruction(instr);
  1512. end;
  1513. end;
  1514. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1515. var
  1516. href,href2 : treference;
  1517. hloc : pcgparalocation;
  1518. begin
  1519. href:=ref;
  1520. hloc:=paraloc.location;
  1521. while assigned(hloc) do
  1522. begin
  1523. case hloc^.loc of
  1524. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1525. begin
  1526. paramanager.allocparaloc(list,paraloc.location);
  1527. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1528. end;
  1529. LOC_REGISTER :
  1530. case hloc^.size of
  1531. OS_32,
  1532. OS_F32:
  1533. begin
  1534. paramanager.allocparaloc(list,paraloc.location);
  1535. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1536. end;
  1537. OS_64,
  1538. OS_F64:
  1539. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1540. else
  1541. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1542. end;
  1543. LOC_REFERENCE :
  1544. begin
  1545. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1546. { concatcopy should choose the best way to copy the data }
  1547. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1548. end;
  1549. else
  1550. internalerror(200408241);
  1551. end;
  1552. inc(href.offset,tcgsize2size[hloc^.size]);
  1553. hloc:=hloc^.next;
  1554. end;
  1555. end;
  1556. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1557. begin
  1558. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1559. end;
  1560. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1561. var
  1562. oppostfix:toppostfix;
  1563. begin
  1564. case fromsize of
  1565. OS_32,
  1566. OS_F32:
  1567. oppostfix:=PF_S;
  1568. OS_64,
  1569. OS_F64:
  1570. oppostfix:=PF_D;
  1571. OS_F80:
  1572. oppostfix:=PF_E;
  1573. else
  1574. InternalError(200309021);
  1575. end;
  1576. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1577. if fromsize<>tosize then
  1578. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1579. end;
  1580. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1581. var
  1582. oppostfix:toppostfix;
  1583. begin
  1584. case tosize of
  1585. OS_F32:
  1586. oppostfix:=PF_S;
  1587. OS_F64:
  1588. oppostfix:=PF_D;
  1589. OS_F80:
  1590. oppostfix:=PF_E;
  1591. else
  1592. InternalError(200309022);
  1593. end;
  1594. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1595. end;
  1596. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1597. var
  1598. r : TRegister;
  1599. ai: taicpu;
  1600. l: TAsmLabel;
  1601. begin
  1602. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  1603. not(FPUARM_HAS_EXCEPTION_TRAPPING in fpu_capabilities[current_settings.fputype]) and
  1604. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  1605. begin
  1606. r:=getintregister(list,OS_INT);
  1607. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1608. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1609. current_asmdata.getjumplabel(l);
  1610. ai:=taicpu.op_sym(A_B,l);
  1611. ai.is_jmp:=true;
  1612. ai.condition:=C_EQ;
  1613. list.concat(ai);
  1614. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1615. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1616. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1617. a_label(list,l);
  1618. if clear then
  1619. current_procinfo.FPUExceptionCheckNeeded:=false;
  1620. end;
  1621. end;
  1622. { comparison operations }
  1623. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1624. l : tasmlabel);
  1625. var
  1626. tmpreg : tregister;
  1627. b : byte;
  1628. begin
  1629. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1630. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1631. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1632. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1633. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1634. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1635. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1636. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1637. else
  1638. begin
  1639. tmpreg:=getintregister(list,size);
  1640. a_load_const_reg(list,size,a,tmpreg);
  1641. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1642. end;
  1643. a_jmp_cond(list,cmp_op,l);
  1644. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1645. end;
  1646. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1647. begin
  1648. if reverse then
  1649. begin
  1650. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1651. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1652. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1653. end
  1654. { it is decided during the compilation of the system unit if this code is used or not
  1655. so no additional check for rbit is needed }
  1656. else
  1657. begin
  1658. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1659. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1660. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1661. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1662. if GenerateThumb2Code then
  1663. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1664. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1665. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1666. end;
  1667. end;
  1668. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1669. begin
  1670. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1671. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1672. a_jmp_cond(list,cmp_op,l);
  1673. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1674. end;
  1675. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1676. var
  1677. ai : taicpu;
  1678. begin
  1679. { generate far jump, leave it to the optimizer to get rid of it }
  1680. if GenerateThumbCode then
  1681. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1682. else
  1683. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1684. ai.is_jmp:=true;
  1685. list.concat(ai);
  1686. end;
  1687. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1688. var
  1689. ai : taicpu;
  1690. begin
  1691. { generate far jump, leave it to the optimizer to get rid of it }
  1692. if GenerateThumbCode then
  1693. ai:=taicpu.op_sym(A_BL,l)
  1694. else
  1695. ai:=taicpu.op_sym(A_B,l);
  1696. ai.is_jmp:=true;
  1697. list.concat(ai);
  1698. end;
  1699. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1700. var
  1701. ai : taicpu;
  1702. inv_flags : TResFlags;
  1703. hlabel : TAsmLabel;
  1704. begin
  1705. if GenerateThumbCode then
  1706. begin
  1707. inv_flags:=f;
  1708. inverse_flags(inv_flags);
  1709. { the optimizer has to fix this if jump range is sufficient short }
  1710. current_asmdata.getjumplabel(hlabel);
  1711. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1712. ai.is_jmp:=true;
  1713. list.concat(ai);
  1714. a_jmp_always(list,l);
  1715. a_label(list,hlabel);
  1716. end
  1717. else
  1718. begin
  1719. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1720. ai.is_jmp:=true;
  1721. list.concat(ai);
  1722. end;
  1723. end;
  1724. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1725. begin
  1726. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1727. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1728. end;
  1729. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1730. begin
  1731. if target_info.system = system_arm_linux then
  1732. begin
  1733. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1734. a_call_name(list,'__gnu_mcount_nc',false);
  1735. end
  1736. else
  1737. internalerror(2014091201);
  1738. end;
  1739. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1740. var
  1741. ref : treference;
  1742. shift : byte;
  1743. firstfloatreg,lastfloatreg,
  1744. r : byte;
  1745. mmregs,
  1746. regs, saveregs : tcpuregisterset;
  1747. registerarea, offset,
  1748. r7offset,
  1749. stackmisalignment : pint;
  1750. imm1, imm2: DWord;
  1751. stack_parameters : Boolean;
  1752. begin
  1753. LocalSize:=align(LocalSize,4);
  1754. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1755. { call instruction does not put anything on the stack }
  1756. registerarea:=0;
  1757. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1758. lastfloatreg:=RS_NO;
  1759. if not(nostackframe) then
  1760. begin
  1761. firstfloatreg:=RS_NO;
  1762. mmregs:=[];
  1763. case current_settings.fputype of
  1764. fpu_none,
  1765. fpu_soft,
  1766. fpu_libgcc:
  1767. ;
  1768. fpu_fpa,
  1769. fpu_fpa10,
  1770. fpu_fpa11:
  1771. begin
  1772. { save floating point registers? }
  1773. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1774. for r:=RS_F0 to RS_F7 do
  1775. if r in regs then
  1776. begin
  1777. if firstfloatreg=RS_NO then
  1778. firstfloatreg:=r;
  1779. lastfloatreg:=r;
  1780. inc(registerarea,12);
  1781. end;
  1782. end;
  1783. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1784. begin;
  1785. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1786. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1787. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1788. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1789. end
  1790. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1791. begin;
  1792. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1793. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1794. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1795. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1796. end
  1797. else
  1798. internalerror(2019050924);
  1799. end;
  1800. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1801. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1802. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1803. { save int registers }
  1804. reference_reset(ref,4,[]);
  1805. ref.index:=NR_STACK_POINTER_REG;
  1806. ref.addressmode:=AM_PREINDEXED;
  1807. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1808. if not(target_info.system in systems_darwin) then
  1809. begin
  1810. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1811. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1812. begin
  1813. a_reg_alloc(list,NR_R12);
  1814. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1815. end;
  1816. { the (old) ARM APCS requires saving both the stack pointer (to
  1817. crawl the stack) and the PC (to identify the function this
  1818. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1819. and R15 -- still needs updating for EABI and Darwin, they don't
  1820. need that }
  1821. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1822. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1823. else
  1824. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1825. include(regs,RS_R14);
  1826. if regs<>[] then
  1827. begin
  1828. for r:=RS_R0 to RS_R15 do
  1829. if r in regs then
  1830. inc(registerarea,4);
  1831. { if the stack is not 8 byte aligned, try to add an extra register,
  1832. so we can avoid the extra sub/add ...,#4 later (KB) }
  1833. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1834. for r:=RS_R3 downto RS_R0 do
  1835. if not(r in regs) then
  1836. begin
  1837. regs:=regs+[r];
  1838. inc(registerarea,4);
  1839. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1840. break;
  1841. end;
  1842. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1843. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  1844. end;
  1845. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1846. begin
  1847. offset:=-4;
  1848. for r:=RS_R15 downto RS_R0 do
  1849. if r in regs then
  1850. begin
  1851. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),offset);
  1852. dec(offset,4);
  1853. end;
  1854. { the framepointer now points to the saved R15, so the saved
  1855. framepointer is at R11-12 (for get_caller_frame) }
  1856. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1857. a_reg_dealloc(list,NR_R12);
  1858. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  1859. current_asmdata.asmcfi.cfa_def_cfa_offset(list,4);
  1860. end;
  1861. end
  1862. else
  1863. begin
  1864. { always save r14 if we use r7 as the framepointer, because
  1865. the parameter offsets are hardcoded in advance and always
  1866. assume that r14 sits on the stack right behind the saved r7
  1867. }
  1868. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1869. include(regs,RS_FRAME_POINTER_REG);
  1870. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1871. include(regs,RS_R14);
  1872. if regs<>[] then
  1873. begin
  1874. { on Darwin, you first have to save [r4-r7,lr], and then
  1875. [r8,r10,r11] and make r7 point to the previously saved
  1876. r7 so that you can perform a stack crawl based on it
  1877. ([r7] is previous stack frame, [r7+4] is return address
  1878. }
  1879. include(regs,RS_FRAME_POINTER_REG);
  1880. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1881. r7offset:=0;
  1882. for r:=RS_R0 to RS_R15 do
  1883. if r in saveregs then
  1884. begin
  1885. inc(registerarea,4);
  1886. if r<RS_FRAME_POINTER_REG then
  1887. inc(r7offset,4);
  1888. end;
  1889. { save the registers }
  1890. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1891. { make r7 point to the saved r7 (regardless of whether this
  1892. frame uses the framepointer, for backtrace purposes) }
  1893. if r7offset<>0 then
  1894. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1895. else
  1896. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1897. { now save the rest (if any) }
  1898. saveregs:=regs-saveregs;
  1899. if saveregs<>[] then
  1900. begin
  1901. for r:=RS_R8 to RS_R11 do
  1902. if r in saveregs then
  1903. inc(registerarea,4);
  1904. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1905. end;
  1906. end;
  1907. end;
  1908. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1909. if (LocalSize<>0) or
  1910. ((stackmisalignment<>0) and
  1911. ((pi_do_call in current_procinfo.flags) or
  1912. (po_assembler in current_procinfo.procdef.procoptions))) then
  1913. begin
  1914. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1915. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1916. begin
  1917. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1918. internalerror(2014030901)
  1919. else
  1920. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1921. end;
  1922. if is_shifter_const(localsize,shift) then
  1923. begin
  1924. a_reg_dealloc(list,NR_R12);
  1925. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1926. end
  1927. else if split_into_shifter_const(localsize, imm1, imm2) then
  1928. begin
  1929. a_reg_dealloc(list,NR_R12);
  1930. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1931. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1932. end
  1933. else
  1934. begin
  1935. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1936. a_reg_alloc(list,NR_R12);
  1937. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1938. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1939. a_reg_dealloc(list,NR_R12);
  1940. end;
  1941. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1942. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  1943. end;
  1944. if (mmregs<>[]) or
  1945. (firstfloatreg<>RS_NO) then
  1946. begin
  1947. reference_reset(ref,4,[]);
  1948. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1949. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1950. begin
  1951. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1952. begin
  1953. a_reg_alloc(list,NR_R12);
  1954. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1955. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1956. a_reg_dealloc(list,NR_R12);
  1957. end
  1958. else
  1959. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1960. ref.base:=NR_R12;
  1961. end
  1962. else
  1963. begin
  1964. ref.base:=current_procinfo.framepointer;
  1965. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1966. end;
  1967. case current_settings.fputype of
  1968. fpu_fpa,
  1969. fpu_fpa10,
  1970. fpu_fpa11:
  1971. begin
  1972. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1973. lastfloatreg-firstfloatreg+1,ref));
  1974. end;
  1975. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1976. begin
  1977. ref.index:=ref.base;
  1978. ref.base:=NR_NO;
  1979. if mmregs<>[] then
  1980. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1981. end
  1982. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1983. begin
  1984. ref.index:=ref.base;
  1985. ref.base:=NR_NO;
  1986. if mmregs<>[] then
  1987. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1988. end
  1989. else
  1990. internalerror(2019050923);
  1991. end;
  1992. end;
  1993. end;
  1994. end;
  1995. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1996. var
  1997. ref : treference;
  1998. LocalSize : longint;
  1999. firstfloatreg,lastfloatreg,
  2000. r,
  2001. shift : byte;
  2002. mmregs,
  2003. saveregs,
  2004. regs : tcpuregisterset;
  2005. registerarea,
  2006. stackmisalignment: pint;
  2007. paddingreg: TSuperRegister;
  2008. imm1, imm2: DWord;
  2009. begin
  2010. if not(nostackframe) then
  2011. begin
  2012. registerarea:=0;
  2013. firstfloatreg:=RS_NO;
  2014. lastfloatreg:=RS_NO;
  2015. mmregs:=[];
  2016. saveregs:=[];
  2017. case current_settings.fputype of
  2018. fpu_none,
  2019. fpu_soft,
  2020. fpu_libgcc:
  2021. ;
  2022. fpu_fpa,
  2023. fpu_fpa10,
  2024. fpu_fpa11:
  2025. begin
  2026. { restore floating point registers? }
  2027. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  2028. for r:=RS_F0 to RS_F7 do
  2029. if r in regs then
  2030. begin
  2031. if firstfloatreg=RS_NO then
  2032. firstfloatreg:=r;
  2033. lastfloatreg:=r;
  2034. { floating point register space is already included in
  2035. localsize below by calc_stackframe_size
  2036. inc(registerarea,12);
  2037. }
  2038. end;
  2039. end;
  2040. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2041. begin
  2042. { restore vfp registers? }
  2043. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  2044. they have numbers>$1f which is not really correct as they should simply have the same numbers
  2045. as the even ones by with a different subtype as it is done on x86 with al/ah }
  2046. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  2047. end
  2048. else
  2049. internalerror(2019050908);
  2050. end;
  2051. if (firstfloatreg<>RS_NO) or
  2052. (mmregs<>[]) then
  2053. begin
  2054. reference_reset(ref,4,[]);
  2055. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  2056. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2057. begin
  2058. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2059. begin
  2060. a_reg_alloc(list,NR_R12);
  2061. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2062. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2063. a_reg_dealloc(list,NR_R12);
  2064. end
  2065. else
  2066. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2067. ref.base:=NR_R12;
  2068. end
  2069. else
  2070. begin
  2071. ref.base:=current_procinfo.framepointer;
  2072. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2073. end;
  2074. case current_settings.fputype of
  2075. fpu_fpa,
  2076. fpu_fpa10,
  2077. fpu_fpa11:
  2078. begin
  2079. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2080. lastfloatreg-firstfloatreg+1,ref));
  2081. end;
  2082. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2083. begin
  2084. ref.index:=ref.base;
  2085. ref.base:=NR_NO;
  2086. if mmregs<>[] then
  2087. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2088. end
  2089. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2090. begin
  2091. ref.index:=ref.base;
  2092. ref.base:=NR_NO;
  2093. if mmregs<>[] then
  2094. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2095. end
  2096. else
  2097. internalerror(2019050921);
  2098. end;
  2099. end;
  2100. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2101. if (pi_do_call in current_procinfo.flags) or
  2102. (regs<>[]) or
  2103. ((target_info.system in systems_darwin) and
  2104. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2105. begin
  2106. exclude(regs,RS_R14);
  2107. include(regs,RS_R15);
  2108. if (target_info.system in systems_darwin) then
  2109. include(regs,RS_FRAME_POINTER_REG);
  2110. end;
  2111. if not(target_info.system in systems_darwin) then
  2112. begin
  2113. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2114. The saved PC came after that but is discarded, since we restore
  2115. the stack pointer }
  2116. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2117. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2118. end
  2119. else
  2120. begin
  2121. { restore R8-R11 already if necessary (they've been stored
  2122. before the others) }
  2123. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2124. if saveregs<>[] then
  2125. begin
  2126. reference_reset(ref,4,[]);
  2127. ref.index:=NR_STACK_POINTER_REG;
  2128. ref.addressmode:=AM_PREINDEXED;
  2129. for r:=RS_R8 to RS_R11 do
  2130. if r in saveregs then
  2131. inc(registerarea,4);
  2132. regs:=regs-saveregs;
  2133. end;
  2134. end;
  2135. for r:=RS_R0 to RS_R15 do
  2136. if r in regs then
  2137. inc(registerarea,4);
  2138. { reapply the stack padding reg, in case there was one, see the complimentary
  2139. comment in g_proc_entry() (KB) }
  2140. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2141. if paddingreg < RS_R4 then
  2142. if paddingreg in regs then
  2143. internalerror(201306190)
  2144. else
  2145. begin
  2146. regs:=regs+[paddingreg];
  2147. inc(registerarea,4);
  2148. end;
  2149. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2150. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2151. (target_info.system in systems_darwin) then
  2152. begin
  2153. LocalSize:=current_procinfo.calc_stackframe_size;
  2154. if (LocalSize<>0) or
  2155. ((stackmisalignment<>0) and
  2156. ((pi_do_call in current_procinfo.flags) or
  2157. (po_assembler in current_procinfo.procdef.procoptions))) then
  2158. begin
  2159. if pi_estimatestacksize in current_procinfo.flags then
  2160. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2161. else
  2162. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2163. if is_shifter_const(LocalSize,shift) then
  2164. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2165. else if split_into_shifter_const(localsize, imm1, imm2) then
  2166. begin
  2167. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2168. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2169. end
  2170. else
  2171. begin
  2172. a_reg_alloc(list,NR_R12);
  2173. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2174. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2175. a_reg_dealloc(list,NR_R12);
  2176. end;
  2177. end;
  2178. if (target_info.system in systems_darwin) and
  2179. (saveregs<>[]) then
  2180. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2181. if regs=[] then
  2182. begin
  2183. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2184. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2185. else
  2186. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2187. end
  2188. else
  2189. begin
  2190. reference_reset(ref,4,[]);
  2191. ref.index:=NR_STACK_POINTER_REG;
  2192. ref.addressmode:=AM_PREINDEXED;
  2193. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2194. end;
  2195. end
  2196. else
  2197. begin
  2198. { restore int registers and return }
  2199. reference_reset(ref,4,[]);
  2200. ref.index:=NR_FRAME_POINTER_REG;
  2201. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2202. end;
  2203. end
  2204. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2205. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2206. else
  2207. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2208. end;
  2209. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2210. var
  2211. ref : treference;
  2212. l : TAsmLabel;
  2213. regs : tcpuregisterset;
  2214. r: byte;
  2215. begin
  2216. if (cs_create_pic in current_settings.moduleswitches) and
  2217. (pi_needs_got in current_procinfo.flags) and
  2218. (tf_pic_uses_got in target_info.flags) then
  2219. begin
  2220. { Procedure parametrs are not initialized at this stage.
  2221. Before GOT initialization code, allocate registers used for procedure parameters
  2222. to prevent usage of these registers for temp operations in later stages of code
  2223. generation. }
  2224. regs:=rg[R_INTREGISTER].used_in_proc;
  2225. for r:=RS_R0 to RS_R3 do
  2226. if r in regs then
  2227. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2228. { Allocate scratch register R12 and use it for GOT calculations directly.
  2229. Otherwise the init code can be distorted in later stages of code generation. }
  2230. a_reg_alloc(list,NR_R12);
  2231. reference_reset(ref,4,[]);
  2232. current_asmdata.getglobaldatalabel(l);
  2233. cg.a_label(current_procinfo.aktlocaldata,l);
  2234. ref.symbol:=l;
  2235. ref.base:=NR_PC;
  2236. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2237. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2238. current_asmdata.getaddrlabel(l);
  2239. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2240. cg.a_label(list,l);
  2241. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2242. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2243. { Deallocate registers }
  2244. a_reg_dealloc(list,NR_R12);
  2245. for r:=RS_R3 downto RS_R0 do
  2246. if r in regs then
  2247. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2248. end;
  2249. end;
  2250. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2251. var
  2252. b : byte;
  2253. tmpref : treference;
  2254. instr : taicpu;
  2255. begin
  2256. if ref.addressmode<>AM_OFFSET then
  2257. internalerror(200309071);
  2258. tmpref:=ref;
  2259. { Be sure to have a base register }
  2260. if (tmpref.base=NR_NO) then
  2261. begin
  2262. if tmpref.shiftmode<>SM_None then
  2263. internalerror(2014020702);
  2264. if tmpref.signindex<0 then
  2265. internalerror(200312023);
  2266. tmpref.base:=tmpref.index;
  2267. tmpref.index:=NR_NO;
  2268. end;
  2269. if assigned(tmpref.symbol) or
  2270. not((is_shifter_const(tmpref.offset,b)) or
  2271. (is_shifter_const(-tmpref.offset,b))
  2272. ) then
  2273. fixref(list,tmpref);
  2274. { expect a base here if there is an index }
  2275. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2276. internalerror(200312022);
  2277. if tmpref.index<>NR_NO then
  2278. begin
  2279. if tmpref.shiftmode<>SM_None then
  2280. internalerror(200312021);
  2281. if tmpref.signindex<0 then
  2282. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2283. else
  2284. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2285. if tmpref.offset<>0 then
  2286. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2287. end
  2288. else
  2289. begin
  2290. if tmpref.base=NR_NO then
  2291. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2292. else
  2293. if tmpref.offset<>0 then
  2294. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2295. else
  2296. begin
  2297. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2298. list.concat(instr);
  2299. add_move_instruction(instr);
  2300. end;
  2301. end;
  2302. end;
  2303. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2304. var
  2305. tmpreg, tmpreg2 : tregister;
  2306. tmpref : treference;
  2307. l, piclabel : tasmlabel;
  2308. indirection_done : boolean;
  2309. begin
  2310. { absolute symbols can't be handled directly, we've to store the symbol reference
  2311. in the text segment and access it pc relative
  2312. For now, we assume that references where base or index equals to PC are already
  2313. relative, all other references are assumed to be absolute and thus they need
  2314. to be handled extra.
  2315. A proper solution would be to change refoptions to a set and store the information
  2316. if the symbol is absolute or relative there.
  2317. }
  2318. { create consts entry }
  2319. reference_reset(tmpref,4,[]);
  2320. current_asmdata.getjumplabel(l);
  2321. cg.a_label(current_procinfo.aktlocaldata,l);
  2322. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2323. piclabel:=nil;
  2324. tmpreg:=NR_NO;
  2325. indirection_done:=false;
  2326. if assigned(ref.symbol) then
  2327. begin
  2328. if (target_info.system=system_arm_ios) and
  2329. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2330. begin
  2331. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2332. if ref.offset<>0 then
  2333. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2334. indirection_done:=true;
  2335. end
  2336. else if ref.refaddr=addr_gottpoff then
  2337. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2338. else if ref.refaddr=addr_tlsgd then
  2339. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2340. else if ref.refaddr=addr_tlsdesc then
  2341. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2342. else if ref.refaddr=addr_tpoff then
  2343. begin
  2344. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2345. Internalerror(2019092804);
  2346. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2347. end
  2348. else if (cs_create_pic in current_settings.moduleswitches) then
  2349. if (tf_pic_uses_got in target_info.flags) then
  2350. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2351. else
  2352. begin
  2353. { ideally, we would want to generate
  2354. ldr r1, LPICConstPool
  2355. LPICLocal:
  2356. ldr/str r2,[pc,r1]
  2357. ...
  2358. LPICConstPool:
  2359. .long _globsym-(LPICLocal+8)
  2360. However, we cannot be sure that the ldr/str will follow
  2361. right after the call to fixref, so we have to load the
  2362. complete address already in a register.
  2363. }
  2364. current_asmdata.getaddrlabel(piclabel);
  2365. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2366. end
  2367. else
  2368. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2369. end
  2370. else
  2371. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2372. { load consts entry }
  2373. if not indirection_done then
  2374. begin
  2375. tmpreg:=getintregister(list,OS_INT);
  2376. tmpref.symbol:=l;
  2377. tmpref.base:=NR_PC;
  2378. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2379. if (cs_create_pic in current_settings.moduleswitches) and
  2380. (tf_pic_uses_got in target_info.flags) and
  2381. assigned(ref.symbol) then
  2382. begin
  2383. {$ifdef EXTDEBUG}
  2384. if not (pi_needs_got in current_procinfo.flags) then
  2385. Comment(V_warning,'pi_needs_got not included');
  2386. {$endif EXTDEBUG}
  2387. Include(current_procinfo.flags,pi_needs_got);
  2388. reference_reset(tmpref,4,[]);
  2389. tmpref.base:=current_procinfo.got;
  2390. tmpref.index:=tmpreg;
  2391. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2392. if ref.offset<>0 then
  2393. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2394. end;
  2395. end;
  2396. if assigned(piclabel) then
  2397. begin
  2398. cg.a_label(list,piclabel);
  2399. tmpreg2:=getaddressregister(list);
  2400. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2401. tmpreg:=tmpreg2
  2402. end;
  2403. { This routine can be called with PC as base/index in case the offset
  2404. was too large to encode in a load/store. In that case, the entire
  2405. absolute expression has been re-encoded in a new constpool entry, and
  2406. we have to remove the use of PC from the original reference (the code
  2407. above made everything relative to the value loaded from the new
  2408. constpool entry) }
  2409. if is_pc(ref.base) then
  2410. ref.base:=NR_NO;
  2411. if is_pc(ref.index) then
  2412. ref.index:=NR_NO;
  2413. if (ref.base<>NR_NO) then
  2414. begin
  2415. if ref.index<>NR_NO then
  2416. begin
  2417. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2418. ref.base:=tmpreg;
  2419. end
  2420. else
  2421. if ref.base<>NR_PC then
  2422. begin
  2423. ref.index:=tmpreg;
  2424. ref.shiftimm:=0;
  2425. ref.signindex:=1;
  2426. ref.shiftmode:=SM_None;
  2427. end
  2428. else
  2429. ref.base:=tmpreg;
  2430. end
  2431. else
  2432. ref.base:=tmpreg;
  2433. ref.offset:=0;
  2434. ref.symbol:=nil;
  2435. end;
  2436. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2437. var
  2438. paraloc1,paraloc2,paraloc3 : TCGPara;
  2439. pd : tprocdef;
  2440. begin
  2441. pd:=search_system_proc('MOVE');
  2442. paraloc1.init;
  2443. paraloc2.init;
  2444. paraloc3.init;
  2445. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  2446. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  2447. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  2448. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2449. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2450. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2451. paramanager.freecgpara(list,paraloc3);
  2452. paramanager.freecgpara(list,paraloc2);
  2453. paramanager.freecgpara(list,paraloc1);
  2454. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2455. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2456. a_call_name(list,'FPC_MOVE',false);
  2457. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2458. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2459. paraloc3.done;
  2460. paraloc2.done;
  2461. paraloc1.done;
  2462. end;
  2463. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2464. const
  2465. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2466. maxtmpreg_thumb = 5;
  2467. var
  2468. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2469. srcreg,destreg,countreg,r,tmpreg:tregister;
  2470. helpsize:aint;
  2471. copysize:byte;
  2472. cgsize:Tcgsize;
  2473. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2474. maxtmpreg,
  2475. tmpregi,tmpregi2:byte;
  2476. { will never be called with count<=4 }
  2477. procedure genloop(count : aword;size : byte);
  2478. const
  2479. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2480. var
  2481. l : tasmlabel;
  2482. begin
  2483. current_asmdata.getjumplabel(l);
  2484. if count<size then size:=1;
  2485. a_load_const_reg(list,OS_INT,count div size,countreg);
  2486. cg.a_label(list,l);
  2487. srcref.addressmode:=AM_POSTINDEXED;
  2488. dstref.addressmode:=AM_POSTINDEXED;
  2489. srcref.offset:=size;
  2490. dstref.offset:=size;
  2491. r:=getintregister(list,size2opsize[size]);
  2492. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2493. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2494. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2495. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2496. a_jmp_flags(list,F_NE,l);
  2497. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2498. srcref.offset:=1;
  2499. dstref.offset:=1;
  2500. case count mod size of
  2501. 1:
  2502. begin
  2503. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2504. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2505. end;
  2506. 2:
  2507. if aligned then
  2508. begin
  2509. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2510. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2511. end
  2512. else
  2513. begin
  2514. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2515. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2516. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2517. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2518. end;
  2519. 3:
  2520. if aligned then
  2521. begin
  2522. srcref.offset:=2;
  2523. dstref.offset:=2;
  2524. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2525. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2526. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2527. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2528. end
  2529. else
  2530. begin
  2531. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2532. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2533. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2534. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2535. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2536. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2537. end;
  2538. end;
  2539. { keep the registers alive }
  2540. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2541. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2542. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2543. end;
  2544. { save estimation, if a creating a separate ref is needed or
  2545. if we can keep the original reference while copying }
  2546. function SimpleRef(const ref : treference) : boolean;
  2547. begin
  2548. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2549. ((ref.symbol=nil) and
  2550. (ref.addressmode=AM_OFFSET) and
  2551. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2552. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2553. { ldrh has a limited offset range }
  2554. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2555. )
  2556. );
  2557. end;
  2558. { will never be called with count<=4 }
  2559. procedure genloop_thumb(count : aword;size : byte);
  2560. procedure refincofs(const ref : treference;const value : longint = 1);
  2561. begin
  2562. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2563. end;
  2564. const
  2565. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2566. var
  2567. l : tasmlabel;
  2568. begin
  2569. current_asmdata.getjumplabel(l);
  2570. if count<size then size:=1;
  2571. a_load_const_reg(list,OS_INT,count div size,countreg);
  2572. cg.a_label(list,l);
  2573. r:=getintregister(list,size2opsize[size]);
  2574. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2575. refincofs(srcref);
  2576. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2577. refincofs(dstref);
  2578. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2579. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2580. a_jmp_flags(list,F_NE,l);
  2581. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2582. case count mod size of
  2583. 1:
  2584. begin
  2585. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2586. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2587. end;
  2588. 2:
  2589. if aligned then
  2590. begin
  2591. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2592. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2593. end
  2594. else
  2595. begin
  2596. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2597. refincofs(srcref);
  2598. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2599. refincofs(dstref);
  2600. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2601. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2602. end;
  2603. 3:
  2604. if aligned then
  2605. begin
  2606. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2607. refincofs(srcref,2);
  2608. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2609. refincofs(dstref,2);
  2610. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2611. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2612. end
  2613. else
  2614. begin
  2615. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2616. refincofs(srcref);
  2617. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2618. refincofs(dstref);
  2619. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2620. refincofs(srcref);
  2621. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2622. refincofs(dstref);
  2623. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2624. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2625. end;
  2626. end;
  2627. { keep the registers alive }
  2628. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2629. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2630. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2631. end;
  2632. begin
  2633. if len=0 then
  2634. exit;
  2635. if GenerateThumbCode then
  2636. maxtmpreg:=maxtmpreg_thumb
  2637. else
  2638. maxtmpreg:=maxtmpreg_arm;
  2639. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2640. dstref:=dest;
  2641. srcref:=source;
  2642. if cs_opt_size in current_settings.optimizerswitches then
  2643. helpsize:=8;
  2644. if aligned and (len=4) then
  2645. begin
  2646. tmpreg:=getintregister(list,OS_32);
  2647. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2648. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2649. end
  2650. else if aligned and (len=2) then
  2651. begin
  2652. tmpreg:=getintregister(list,OS_16);
  2653. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2654. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2655. end
  2656. else if (len<=helpsize) and aligned then
  2657. begin
  2658. tmpregi:=0;
  2659. { loading address in a separate register needed? }
  2660. if SimpleRef(source) then
  2661. begin
  2662. { ... then we don't need a loadaddr }
  2663. srcref:=source;
  2664. end
  2665. else
  2666. begin
  2667. srcreg:=getintregister(list,OS_ADDR);
  2668. a_loadaddr_ref_reg(list,source,srcreg);
  2669. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2670. end;
  2671. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2672. begin
  2673. inc(tmpregi);
  2674. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2675. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2676. inc(srcref.offset,4);
  2677. dec(len,4);
  2678. end;
  2679. { loading address in a separate register needed? }
  2680. if SimpleRef(dest) then
  2681. dstref:=dest
  2682. else
  2683. begin
  2684. destreg:=getintregister(list,OS_ADDR);
  2685. a_loadaddr_ref_reg(list,dest,destreg);
  2686. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2687. end;
  2688. tmpregi2:=1;
  2689. while (tmpregi2<=tmpregi) do
  2690. begin
  2691. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2692. inc(dstref.offset,4);
  2693. inc(tmpregi2);
  2694. end;
  2695. copysize:=4;
  2696. cgsize:=OS_32;
  2697. while len<>0 do
  2698. begin
  2699. if len<2 then
  2700. begin
  2701. copysize:=1;
  2702. cgsize:=OS_8;
  2703. end
  2704. else if len<4 then
  2705. begin
  2706. copysize:=2;
  2707. cgsize:=OS_16;
  2708. end;
  2709. dec(len,copysize);
  2710. r:=getintregister(list,cgsize);
  2711. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2712. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2713. inc(srcref.offset,copysize);
  2714. inc(dstref.offset,copysize);
  2715. end;{end of while}
  2716. end
  2717. else
  2718. begin
  2719. cgsize:=OS_32;
  2720. if (len<=4) then{len<=4 and not aligned}
  2721. begin
  2722. r:=getintregister(list,cgsize);
  2723. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2724. if Len=1 then
  2725. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2726. else
  2727. begin
  2728. tmpreg:=getintregister(list,cgsize);
  2729. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2730. inc(usedtmpref.offset,1);
  2731. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2732. inc(usedtmpref2.offset,1);
  2733. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2734. if len>2 then
  2735. begin
  2736. inc(usedtmpref.offset,1);
  2737. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2738. inc(usedtmpref2.offset,1);
  2739. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2740. if len>3 then
  2741. begin
  2742. inc(usedtmpref.offset,1);
  2743. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2744. inc(usedtmpref2.offset,1);
  2745. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2746. end;
  2747. end;
  2748. end;
  2749. end{end of if len<=4}
  2750. else
  2751. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2752. destreg:=getintregister(list,OS_ADDR);
  2753. a_loadaddr_ref_reg(list,dest,destreg);
  2754. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2755. srcreg:=getintregister(list,OS_ADDR);
  2756. a_loadaddr_ref_reg(list,source,srcreg);
  2757. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2758. countreg:=getintregister(list,OS_32);
  2759. // if cs_opt_size in current_settings.optimizerswitches then
  2760. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2761. {if aligned then
  2762. genloop(len,4)
  2763. else}
  2764. if GenerateThumbCode then
  2765. genloop_thumb(len,1)
  2766. else
  2767. genloop(len,1);
  2768. end;
  2769. end;
  2770. end;
  2771. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2772. begin
  2773. g_concatcopy_internal(list,source,dest,len,false);
  2774. end;
  2775. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2776. begin
  2777. if (source.alignment in [1,3]) or
  2778. (dest.alignment in [1,3]) then
  2779. g_concatcopy_internal(list,source,dest,len,false)
  2780. else
  2781. g_concatcopy_internal(list,source,dest,len,true);
  2782. end;
  2783. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2784. var
  2785. ovloc : tlocation;
  2786. begin
  2787. ovloc.loc:=LOC_VOID;
  2788. g_overflowCheck_loc(list,l,def,ovloc);
  2789. end;
  2790. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2791. var
  2792. hl : tasmlabel;
  2793. ai:TAiCpu;
  2794. hflags : tresflags;
  2795. begin
  2796. if not(cs_check_overflow in current_settings.localswitches) then
  2797. exit;
  2798. current_asmdata.getjumplabel(hl);
  2799. case ovloc.loc of
  2800. LOC_VOID:
  2801. begin
  2802. ai:=taicpu.op_sym(A_B,hl);
  2803. ai.is_jmp:=true;
  2804. if not((def.typ=pointerdef) or
  2805. ((def.typ=orddef) and
  2806. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2807. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2808. ai.SetCondition(C_VC)
  2809. else
  2810. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2811. ai.SetCondition(C_CS)
  2812. else
  2813. ai.SetCondition(C_CC);
  2814. list.concat(ai);
  2815. end;
  2816. LOC_FLAGS:
  2817. begin
  2818. hflags:=ovloc.resflags;
  2819. inverse_flags(hflags);
  2820. cg.a_jmp_flags(list,hflags,hl);
  2821. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2822. end;
  2823. else
  2824. internalerror(200409281);
  2825. end;
  2826. a_call_name(list,'FPC_OVERFLOW',false);
  2827. a_label(list,hl);
  2828. end;
  2829. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2830. begin
  2831. { this work is done in g_proc_entry }
  2832. end;
  2833. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2834. begin
  2835. { this work is done in g_proc_exit }
  2836. end;
  2837. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2838. var
  2839. ai : taicpu;
  2840. hlabel : TAsmLabel;
  2841. begin
  2842. if GenerateThumbCode then
  2843. begin
  2844. { the optimizer has to fix this if jump range is sufficient short }
  2845. current_asmdata.getjumplabel(hlabel);
  2846. ai:=Taicpu.Op_sym(A_B,hlabel);
  2847. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2848. ai.is_jmp:=true;
  2849. list.concat(ai);
  2850. a_jmp_always(list,l);
  2851. a_label(list,hlabel);
  2852. end
  2853. else
  2854. begin
  2855. ai:=Taicpu.Op_sym(A_B,l);
  2856. ai.SetCondition(OpCmp2AsmCond[cond]);
  2857. ai.is_jmp:=true;
  2858. list.concat(ai);
  2859. end;
  2860. end;
  2861. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2862. const
  2863. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2864. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2865. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2866. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2867. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2868. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2869. begin
  2870. result:=convertop[fromsize,tosize];
  2871. if result=A_NONE then
  2872. internalerror(200312205);
  2873. end;
  2874. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2875. const
  2876. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2877. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2878. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2879. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2880. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2881. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2882. begin
  2883. result:=convertop[fromsize,tosize];
  2884. end;
  2885. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2886. var
  2887. instr: taicpu;
  2888. begin
  2889. if (shuffle=nil) or shufflescalar(shuffle) then
  2890. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2891. else
  2892. internalerror(2009112407);
  2893. list.concat(instr);
  2894. case instr.opcode of
  2895. A_VMOV:
  2896. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2897. add_move_instruction(instr);
  2898. else
  2899. { VCVT can generate an exception }
  2900. maybe_check_for_fpu_exception(list);
  2901. end;
  2902. end;
  2903. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2904. var
  2905. intreg,
  2906. tmpmmreg : tregister;
  2907. reg64 : tregister64;
  2908. begin
  2909. if assigned(shuffle) and
  2910. not(shufflescalar(shuffle)) then
  2911. internalerror(2009112413);
  2912. case fromsize of
  2913. OS_32,OS_S32:
  2914. begin
  2915. fromsize:=OS_F32;
  2916. { since we are loading an integer, no conversion may be required }
  2917. if (fromsize<>tosize) then
  2918. internalerror(2009112801);
  2919. end;
  2920. OS_64,OS_S64:
  2921. begin
  2922. fromsize:=OS_F64;
  2923. { since we are loading an integer, no conversion may be required }
  2924. if (fromsize<>tosize) then
  2925. internalerror(2009112901);
  2926. end;
  2927. OS_F32,OS_F64:
  2928. ;
  2929. else
  2930. internalerror(2019050920);
  2931. end;
  2932. if (fromsize<>tosize) then
  2933. tmpmmreg:=getmmregister(list,fromsize)
  2934. else
  2935. tmpmmreg:=reg;
  2936. if (ref.alignment in [1,2]) then
  2937. begin
  2938. case fromsize of
  2939. OS_F32:
  2940. begin
  2941. intreg:=getintregister(list,OS_32);
  2942. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2943. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2944. end;
  2945. OS_F64:
  2946. begin
  2947. reg64.reglo:=getintregister(list,OS_32);
  2948. reg64.reghi:=getintregister(list,OS_32);
  2949. cg64.a_load64_ref_reg(list,ref,reg64);
  2950. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2951. end;
  2952. else
  2953. internalerror(2009112412);
  2954. end;
  2955. end
  2956. else
  2957. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2958. if (tmpmmreg<>reg) then
  2959. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2960. end;
  2961. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2962. var
  2963. intreg,
  2964. tmpmmreg : tregister;
  2965. reg64 : tregister64;
  2966. begin
  2967. if assigned(shuffle) and
  2968. not(shufflescalar(shuffle)) then
  2969. internalerror(2009112416);
  2970. case tosize of
  2971. OS_32,OS_S32:
  2972. begin
  2973. tosize:=OS_F32;
  2974. { since we are loading an integer, no conversion may be required }
  2975. if (fromsize<>tosize) then
  2976. internalerror(2009112802);
  2977. end;
  2978. OS_64,OS_S64:
  2979. begin
  2980. tosize:=OS_F64;
  2981. { since we are loading an integer, no conversion may be required }
  2982. if (fromsize<>tosize) then
  2983. internalerror(2009112902);
  2984. end;
  2985. OS_F32,OS_F64:
  2986. ;
  2987. else
  2988. internalerror(2019050919);
  2989. end;
  2990. if (fromsize<>tosize) then
  2991. begin
  2992. tmpmmreg:=getmmregister(list,tosize);
  2993. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2994. end
  2995. else
  2996. tmpmmreg:=reg;
  2997. if (ref.alignment in [1,2]) then
  2998. begin
  2999. case tosize of
  3000. OS_F32:
  3001. begin
  3002. intreg:=getintregister(list,OS_32);
  3003. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  3004. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  3005. end;
  3006. OS_F64:
  3007. begin
  3008. reg64.reglo:=getintregister(list,OS_32);
  3009. reg64.reghi:=getintregister(list,OS_32);
  3010. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  3011. cg64.a_load64_reg_ref(list,reg64,ref);
  3012. end;
  3013. else
  3014. internalerror(2009112417);
  3015. end;
  3016. end
  3017. else
  3018. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  3019. { VSTR cannot generate an FPU exception, VCVT is handled seperately, so we do not need a check here }
  3020. end;
  3021. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  3022. begin
  3023. { this code can only be used to transfer raw data, not to perform
  3024. conversions }
  3025. if (tosize<>OS_F32) then
  3026. internalerror(2009112419);
  3027. if not(fromsize in [OS_32,OS_S32]) then
  3028. internalerror(2009112420);
  3029. if assigned(shuffle) and
  3030. not shufflescalar(shuffle) then
  3031. internalerror(2009112516);
  3032. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  3033. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3034. end;
  3035. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  3036. begin
  3037. { this code can only be used to transfer raw data, not to perform
  3038. conversions }
  3039. if (fromsize<>OS_F32) then
  3040. internalerror(2009112430);
  3041. if not(tosize in [OS_32,OS_S32]) then
  3042. internalerror(2009112409);
  3043. if assigned(shuffle) and
  3044. not shufflescalar(shuffle) then
  3045. internalerror(2009112514);
  3046. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  3047. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3048. end;
  3049. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  3050. var
  3051. tmpreg: tregister;
  3052. begin
  3053. { the vfp doesn't support xor nor any other logical operation, but
  3054. this routine is used to initialise global mm regvars. We can
  3055. easily initialise an mm reg with 0 though. }
  3056. case op of
  3057. OP_XOR:
  3058. begin
  3059. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3060. begin
  3061. if (reg_cgsize(src)<>size) or
  3062. assigned(shuffle) then
  3063. internalerror(2019081301);
  3064. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3065. end
  3066. else
  3067. begin
  3068. if (src<>dst) or
  3069. (reg_cgsize(src)<>size) or
  3070. assigned(shuffle) then
  3071. internalerror(2009112907);
  3072. tmpreg:=getintregister(list,OS_32);
  3073. a_load_const_reg(list,OS_32,0,tmpreg);
  3074. case size of
  3075. OS_F32:
  3076. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3077. OS_F64:
  3078. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3079. else
  3080. internalerror(2009112908);
  3081. end;
  3082. end;
  3083. end
  3084. else
  3085. internalerror(2009112906);
  3086. end;
  3087. end;
  3088. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3089. const
  3090. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3091. begin
  3092. if (op in overflowops) and
  3093. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3094. a_load_reg_reg(list,OS_32,size,dst,dst);
  3095. end;
  3096. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3097. procedure checkreg(var reg : TRegister);
  3098. var
  3099. tmpreg : TRegister;
  3100. begin
  3101. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3102. (getsupreg(reg)=RS_R15) then
  3103. begin
  3104. tmpreg:=getintregister(list,OS_INT);
  3105. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3106. reg:=tmpreg;
  3107. end;
  3108. end;
  3109. begin
  3110. checkreg(op1);
  3111. checkreg(op2);
  3112. checkreg(op3);
  3113. checkreg(op4);
  3114. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3115. end;
  3116. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3117. begin
  3118. if pi_needs_tls in current_procinfo.flags then
  3119. begin
  3120. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3121. a_call_name(list,'fpc_read_tp',false);
  3122. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3123. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3124. end;
  3125. end;
  3126. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3127. begin
  3128. case op of
  3129. OP_NEG:
  3130. begin
  3131. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3132. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3133. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3134. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3135. end;
  3136. OP_NOT:
  3137. begin
  3138. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3139. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3140. end;
  3141. else
  3142. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3143. end;
  3144. end;
  3145. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3146. begin
  3147. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3148. end;
  3149. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3150. var
  3151. ovloc : tlocation;
  3152. begin
  3153. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3154. end;
  3155. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3156. var
  3157. ovloc : tlocation;
  3158. begin
  3159. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3160. end;
  3161. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3162. begin
  3163. { this code can only be used to transfer raw data, not to perform
  3164. conversions }
  3165. if (mmsize<>OS_F64) then
  3166. internalerror(2009112405);
  3167. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3168. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3169. end;
  3170. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3171. begin
  3172. { this code can only be used to transfer raw data, not to perform
  3173. conversions }
  3174. if (mmsize<>OS_F64) then
  3175. internalerror(2009112406);
  3176. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3177. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3178. end;
  3179. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3180. var
  3181. tmpreg : tregister;
  3182. b : byte;
  3183. begin
  3184. ovloc.loc:=LOC_VOID;
  3185. case op of
  3186. OP_NEG,
  3187. OP_NOT :
  3188. internalerror(2012022501);
  3189. else
  3190. ;
  3191. end;
  3192. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3193. begin
  3194. case op of
  3195. OP_ADD:
  3196. begin
  3197. if is_shifter_const(lo(value),b) then
  3198. begin
  3199. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3200. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3201. end
  3202. else
  3203. begin
  3204. tmpreg:=cg.getintregister(list,OS_32);
  3205. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3206. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3207. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3208. end;
  3209. if is_shifter_const(hi(value),b) then
  3210. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3211. else
  3212. begin
  3213. tmpreg:=cg.getintregister(list,OS_32);
  3214. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3215. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3216. end;
  3217. end;
  3218. OP_SUB:
  3219. begin
  3220. if is_shifter_const(lo(value),b) then
  3221. begin
  3222. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3223. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3224. end
  3225. else
  3226. begin
  3227. tmpreg:=cg.getintregister(list,OS_32);
  3228. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3229. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3230. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3231. end;
  3232. if is_shifter_const(hi(value),b) then
  3233. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3234. else
  3235. begin
  3236. tmpreg:=cg.getintregister(list,OS_32);
  3237. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3238. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3239. end;
  3240. end;
  3241. else
  3242. internalerror(200502131);
  3243. end;
  3244. if size=OS_64 then
  3245. begin
  3246. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3247. ovloc.loc:=LOC_FLAGS;
  3248. case op of
  3249. OP_ADD:
  3250. ovloc.resflags:=F_CS;
  3251. OP_SUB:
  3252. ovloc.resflags:=F_CC;
  3253. else
  3254. internalerror(2019050918);
  3255. end;
  3256. end;
  3257. end
  3258. else
  3259. begin
  3260. case op of
  3261. OP_AND,OP_OR,OP_XOR:
  3262. begin
  3263. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3264. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3265. end;
  3266. OP_ADD:
  3267. begin
  3268. if is_shifter_const(aint(lo(value)),b) then
  3269. begin
  3270. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3271. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3272. end
  3273. else
  3274. begin
  3275. tmpreg:=cg.getintregister(list,OS_32);
  3276. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3277. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3278. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3279. end;
  3280. if is_shifter_const(aint(hi(value)),b) then
  3281. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3282. else
  3283. begin
  3284. tmpreg:=cg.getintregister(list,OS_32);
  3285. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3286. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3287. end;
  3288. end;
  3289. OP_SUB:
  3290. begin
  3291. if is_shifter_const(aint(lo(value)),b) then
  3292. begin
  3293. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3294. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3295. end
  3296. else
  3297. begin
  3298. tmpreg:=cg.getintregister(list,OS_32);
  3299. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3300. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3301. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3302. end;
  3303. if is_shifter_const(aint(hi(value)),b) then
  3304. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3305. else
  3306. begin
  3307. tmpreg:=cg.getintregister(list,OS_32);
  3308. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3309. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3310. end;
  3311. end;
  3312. else
  3313. internalerror(2003083101);
  3314. end;
  3315. end;
  3316. end;
  3317. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3318. begin
  3319. ovloc.loc:=LOC_VOID;
  3320. case op of
  3321. OP_NEG,
  3322. OP_NOT :
  3323. internalerror(2012022502);
  3324. else
  3325. ;
  3326. end;
  3327. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3328. begin
  3329. case op of
  3330. OP_ADD:
  3331. begin
  3332. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3333. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3334. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3335. end;
  3336. OP_SUB:
  3337. begin
  3338. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3339. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3340. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3341. end;
  3342. else
  3343. internalerror(2003083102);
  3344. end;
  3345. if size=OS_64 then
  3346. begin
  3347. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3348. ovloc.loc:=LOC_FLAGS;
  3349. case op of
  3350. OP_ADD:
  3351. ovloc.resflags:=F_CS;
  3352. OP_SUB:
  3353. ovloc.resflags:=F_CC;
  3354. else
  3355. internalerror(2019050917);
  3356. end;
  3357. end;
  3358. end
  3359. else
  3360. begin
  3361. case op of
  3362. OP_AND,OP_OR,OP_XOR:
  3363. begin
  3364. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3365. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3366. end;
  3367. OP_ADD:
  3368. begin
  3369. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3370. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3371. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3372. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3373. end;
  3374. OP_SUB:
  3375. begin
  3376. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3377. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3378. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3379. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3380. end;
  3381. else
  3382. internalerror(2003083104);
  3383. end;
  3384. end;
  3385. end;
  3386. procedure tthumbcgarm.init_register_allocators;
  3387. begin
  3388. inherited init_register_allocators;
  3389. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3390. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3391. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3392. else
  3393. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3394. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3395. end;
  3396. procedure tthumbcgarm.done_register_allocators;
  3397. begin
  3398. rg[R_INTREGISTER].free;
  3399. rg[R_FPUREGISTER].free;
  3400. rg[R_MMREGISTER].free;
  3401. inherited done_register_allocators;
  3402. end;
  3403. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3404. var
  3405. ref : treference;
  3406. r : byte;
  3407. regs : tcpuregisterset;
  3408. stackmisalignment : pint;
  3409. registerarea: DWord;
  3410. stack_parameters: Boolean;
  3411. begin
  3412. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3413. LocalSize:=align(LocalSize,4);
  3414. { call instruction does not put anything on the stack }
  3415. stackmisalignment:=0;
  3416. if not(nostackframe) then
  3417. begin
  3418. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3419. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3420. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3421. { save int registers }
  3422. reference_reset(ref,4,[]);
  3423. ref.index:=NR_STACK_POINTER_REG;
  3424. ref.addressmode:=AM_PREINDEXED;
  3425. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3426. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3427. begin
  3428. //!!!! a_reg_alloc(list,NR_R12);
  3429. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3430. end;
  3431. { the (old) ARM APCS requires saving both the stack pointer (to
  3432. crawl the stack) and the PC (to identify the function this
  3433. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3434. and R15 -- still needs updating for EABI and Darwin, they don't
  3435. need that }
  3436. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3437. regs:=regs+[RS_R7,RS_R14]
  3438. else
  3439. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3440. include(regs,RS_R14);
  3441. { safely estimate stack size }
  3442. if localsize+current_settings.alignment.localalignmax+4>508 then
  3443. begin
  3444. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3445. include(regs,RS_R4);
  3446. end;
  3447. registerarea:=0;
  3448. if regs<>[] then
  3449. begin
  3450. for r:=RS_R0 to RS_R15 do
  3451. if r in regs then
  3452. inc(registerarea,4);
  3453. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3454. { we need to run the loop twice to get cfi right }
  3455. registerarea:=0;
  3456. for r:=RS_R0 to RS_R15 do
  3457. if r in regs then
  3458. begin
  3459. inc(registerarea,4);
  3460. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),-registerarea);
  3461. end;
  3462. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  3463. end;
  3464. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3465. if stack_parameters or (LocalSize<>0) or
  3466. ((stackmisalignment<>0) and
  3467. ((pi_do_call in current_procinfo.flags) or
  3468. (po_assembler in current_procinfo.procdef.procoptions))) then
  3469. begin
  3470. { do we access stack parameters?
  3471. if yes, the previously estimated stacksize must be used }
  3472. if stack_parameters then
  3473. begin
  3474. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3475. begin
  3476. writeln(localsize);
  3477. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3478. internalerror(2013040601);
  3479. end
  3480. else
  3481. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3482. end
  3483. else
  3484. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3485. if localsize<508 then
  3486. begin
  3487. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3488. end
  3489. else if localsize<=1016 then
  3490. begin
  3491. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3492. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3493. end
  3494. else
  3495. begin
  3496. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3497. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3498. include(regs,RS_R4);
  3499. end;
  3500. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  3501. end;
  3502. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3503. begin
  3504. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3505. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  3506. end;
  3507. end;
  3508. end;
  3509. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3510. var
  3511. LocalSize : longint;
  3512. r: byte;
  3513. regs : tcpuregisterset;
  3514. registerarea : DWord;
  3515. stackmisalignment: pint;
  3516. stack_parameters : Boolean;
  3517. begin
  3518. { a routine not returning needs no exit code,
  3519. we trust this directive as arm thumb is normally used if small code shall be generated }
  3520. if po_noreturn in current_procinfo.procdef.procoptions then
  3521. exit;
  3522. if not(nostackframe) then
  3523. begin
  3524. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3525. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3526. include(regs,RS_R15);
  3527. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3528. include(regs,getsupreg(current_procinfo.framepointer));
  3529. registerarea:=0;
  3530. for r:=RS_R0 to RS_R15 do
  3531. if r in regs then
  3532. inc(registerarea,4);
  3533. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3534. LocalSize:=current_procinfo.calc_stackframe_size;
  3535. if stack_parameters then
  3536. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3537. else
  3538. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3539. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3540. (target_info.system in systems_darwin) then
  3541. begin
  3542. if (LocalSize<>0) or
  3543. ((stackmisalignment<>0) and
  3544. ((pi_do_call in current_procinfo.flags) or
  3545. (po_assembler in current_procinfo.procdef.procoptions))) then
  3546. begin
  3547. if LocalSize=0 then
  3548. else if LocalSize<=508 then
  3549. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3550. else if LocalSize<=1016 then
  3551. begin
  3552. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3553. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3554. end
  3555. else
  3556. begin
  3557. a_reg_alloc(list,NR_R3);
  3558. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3559. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3560. a_reg_dealloc(list,NR_R3);
  3561. end;
  3562. end;
  3563. if regs=[] then
  3564. begin
  3565. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3566. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3567. else
  3568. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3569. end
  3570. else
  3571. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3572. end;
  3573. end
  3574. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3575. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3576. else
  3577. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3578. end;
  3579. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3580. var
  3581. oppostfix:toppostfix;
  3582. usedtmpref: treference;
  3583. tmpreg,tmpreg2 : tregister;
  3584. dir : integer;
  3585. begin
  3586. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3587. FromSize := ToSize;
  3588. case FromSize of
  3589. { signed integer registers }
  3590. OS_8:
  3591. oppostfix:=PF_B;
  3592. OS_S8:
  3593. oppostfix:=PF_SB;
  3594. OS_16:
  3595. oppostfix:=PF_H;
  3596. OS_S16:
  3597. oppostfix:=PF_SH;
  3598. OS_32,
  3599. OS_S32:
  3600. oppostfix:=PF_None;
  3601. else
  3602. InternalError(200308298);
  3603. end;
  3604. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3605. begin
  3606. if target_info.endian=endian_big then
  3607. dir:=-1
  3608. else
  3609. dir:=1;
  3610. case FromSize of
  3611. OS_16,OS_S16:
  3612. begin
  3613. { only complicated references need an extra loadaddr }
  3614. if assigned(ref.symbol) or
  3615. (ref.index<>NR_NO) or
  3616. (ref.offset<-124) or
  3617. (ref.offset>124) or
  3618. { sometimes the compiler reused registers }
  3619. (reg=ref.index) or
  3620. (reg=ref.base) then
  3621. begin
  3622. tmpreg2:=getintregister(list,OS_INT);
  3623. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3624. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3625. end
  3626. else
  3627. usedtmpref:=ref;
  3628. if target_info.endian=endian_big then
  3629. inc(usedtmpref.offset,1);
  3630. tmpreg:=getintregister(list,OS_INT);
  3631. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3632. inc(usedtmpref.offset,dir);
  3633. if FromSize=OS_16 then
  3634. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3635. else
  3636. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3637. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3638. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3639. end;
  3640. OS_32,OS_S32:
  3641. begin
  3642. tmpreg:=getintregister(list,OS_INT);
  3643. { only complicated references need an extra loadaddr }
  3644. if assigned(ref.symbol) or
  3645. (ref.index<>NR_NO) or
  3646. (ref.offset<-124) or
  3647. (ref.offset>124) or
  3648. { sometimes the compiler reused registers }
  3649. (reg=ref.index) or
  3650. (reg=ref.base) then
  3651. begin
  3652. tmpreg2:=getintregister(list,OS_INT);
  3653. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3654. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3655. end
  3656. else
  3657. usedtmpref:=ref;
  3658. if ref.alignment=2 then
  3659. begin
  3660. if target_info.endian=endian_big then
  3661. inc(usedtmpref.offset,2);
  3662. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3663. inc(usedtmpref.offset,dir*2);
  3664. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3665. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3666. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3667. end
  3668. else
  3669. begin
  3670. if target_info.endian=endian_big then
  3671. inc(usedtmpref.offset,3);
  3672. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3673. inc(usedtmpref.offset,dir);
  3674. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3675. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3676. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3677. inc(usedtmpref.offset,dir);
  3678. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3679. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3680. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3681. inc(usedtmpref.offset,dir);
  3682. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3683. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3684. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3685. end;
  3686. end
  3687. else
  3688. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3689. end;
  3690. end
  3691. else
  3692. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3693. if (fromsize=OS_S8) and (tosize = OS_16) then
  3694. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3695. end;
  3696. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3697. var
  3698. l : tasmlabel;
  3699. hr : treference;
  3700. begin
  3701. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3702. internalerror(2002090908);
  3703. if is_thumb_imm(a) then
  3704. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,a),PF_S))
  3705. else
  3706. begin
  3707. reference_reset(hr,4,[]);
  3708. current_asmdata.getjumplabel(l);
  3709. cg.a_label(current_procinfo.aktlocaldata,l);
  3710. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3711. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3712. hr.symbol:=l;
  3713. hr.base:=NR_PC;
  3714. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3715. end;
  3716. end;
  3717. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3718. var
  3719. hsym : tsym;
  3720. href,
  3721. tmpref : treference;
  3722. paraloc : Pcgparalocation;
  3723. l : TAsmLabel;
  3724. begin
  3725. { calculate the parameter info for the procdef }
  3726. procdef.init_paraloc_info(callerside);
  3727. hsym:=tsym(procdef.parast.Find('self'));
  3728. if not(assigned(hsym) and
  3729. (hsym.typ=paravarsym)) then
  3730. internalerror(2003052504);
  3731. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3732. while paraloc<>nil do
  3733. with paraloc^ do
  3734. begin
  3735. case loc of
  3736. LOC_REGISTER:
  3737. begin
  3738. if is_thumb_imm(ioffset) then
  3739. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3740. else
  3741. begin
  3742. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3743. reference_reset(tmpref,4,[]);
  3744. current_asmdata.getjumplabel(l);
  3745. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3746. cg.a_label(current_procinfo.aktlocaldata,l);
  3747. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3748. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3749. tmpref.symbol:=l;
  3750. tmpref.base:=NR_PC;
  3751. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3752. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3753. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3754. end;
  3755. end;
  3756. LOC_REFERENCE:
  3757. begin
  3758. { offset in the wrapper needs to be adjusted for the stored
  3759. return address }
  3760. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3761. if is_thumb_imm(ioffset) then
  3762. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3763. else
  3764. begin
  3765. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3766. reference_reset(tmpref,4,[]);
  3767. current_asmdata.getjumplabel(l);
  3768. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3769. cg.a_label(current_procinfo.aktlocaldata,l);
  3770. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3771. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3772. tmpref.symbol:=l;
  3773. tmpref.base:=NR_PC;
  3774. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3775. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3776. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3777. end;
  3778. end
  3779. else
  3780. internalerror(2003091804);
  3781. end;
  3782. paraloc:=next;
  3783. end;
  3784. end;
  3785. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3786. var
  3787. href : treference;
  3788. tmpreg : TRegister;
  3789. begin
  3790. href:=ref;
  3791. if { LDR/STR limitations }
  3792. (
  3793. (((op=A_LDR) and (oppostfix=PF_None)) or
  3794. ((op=A_STR) and (oppostfix=PF_None))) and
  3795. (ref.base<>NR_STACK_POINTER_REG) and
  3796. (abs(ref.offset)>124)
  3797. ) or
  3798. { LDRB/STRB limitations }
  3799. (
  3800. (((op=A_LDR) and (oppostfix=PF_B)) or
  3801. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3802. ((op=A_STR) and (oppostfix=PF_B)) or
  3803. ((op=A_STRB) and (oppostfix=PF_None))) and
  3804. ((ref.base=NR_STACK_POINTER_REG) or
  3805. (ref.index=NR_STACK_POINTER_REG) or
  3806. (abs(ref.offset)>31)
  3807. )
  3808. ) or
  3809. { LDRH/STRH limitations }
  3810. (
  3811. (((op=A_LDR) and (oppostfix=PF_H)) or
  3812. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3813. ((op=A_STR) and (oppostfix=PF_H)) or
  3814. ((op=A_STRH) and (oppostfix=PF_None))) and
  3815. ((ref.base=NR_STACK_POINTER_REG) or
  3816. (ref.index=NR_STACK_POINTER_REG) or
  3817. (abs(ref.offset)>62) or
  3818. ((abs(ref.offset) mod 2)<>0)
  3819. )
  3820. ) then
  3821. begin
  3822. tmpreg:=getintregister(list,OS_ADDR);
  3823. a_loadaddr_ref_reg(list,ref,tmpreg);
  3824. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3825. end
  3826. else if (op=A_LDR) and
  3827. (oppostfix in [PF_None]) and
  3828. (ref.base=NR_STACK_POINTER_REG) and
  3829. (abs(ref.offset)>1020) then
  3830. begin
  3831. tmpreg:=getintregister(list,OS_ADDR);
  3832. a_loadaddr_ref_reg(list,ref,tmpreg);
  3833. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3834. end
  3835. else if (op=A_LDR) and
  3836. ((oppostfix in [PF_SH,PF_SB]) or
  3837. (abs(ref.offset)>124)) then
  3838. begin
  3839. tmpreg:=getintregister(list,OS_ADDR);
  3840. a_loadaddr_ref_reg(list,ref,tmpreg);
  3841. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3842. end;
  3843. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3844. end;
  3845. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3846. var
  3847. tmpreg : tregister;
  3848. begin
  3849. case op of
  3850. OP_NEG:
  3851. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3852. OP_NOT:
  3853. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,dst,src),PF_S));
  3854. OP_DIV,OP_IDIV:
  3855. internalerror(200308284);
  3856. OP_ROL:
  3857. begin
  3858. if not(size in [OS_32,OS_S32]) then
  3859. internalerror(2008072805);
  3860. { simulate ROL by ror'ing 32-value }
  3861. tmpreg:=getintregister(list,OS_32);
  3862. a_load_const_reg(list,OS_32,32,tmpreg);
  3863. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3864. list.concat(setoppostfix(taicpu.op_reg_reg(A_ROR,dst,src),PF_S));
  3865. end;
  3866. else
  3867. begin
  3868. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3869. list.concat(setoppostfix(
  3870. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix_thumb[op]));
  3871. end;
  3872. end;
  3873. maybeadjustresult(list,op,size,dst);
  3874. end;
  3875. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3876. var
  3877. tmpreg : tregister;
  3878. {$ifdef DUMMY}
  3879. l1 : longint;
  3880. {$endif DUMMY}
  3881. begin
  3882. //!!! ovloc.loc:=LOC_VOID;
  3883. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3884. case op of
  3885. OP_ADD:
  3886. begin
  3887. op:=OP_SUB;
  3888. a:=aint(dword(-a));
  3889. end;
  3890. OP_SUB:
  3891. begin
  3892. op:=OP_ADD;
  3893. a:=aint(dword(-a));
  3894. end
  3895. else
  3896. ;
  3897. end;
  3898. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3899. begin
  3900. // if cgsetflags or setflags then
  3901. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3902. list.concat(setoppostfix(
  3903. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix_thumb[op]));
  3904. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3905. begin
  3906. //!!! ovloc.loc:=LOC_FLAGS;
  3907. case op of
  3908. OP_ADD:
  3909. //!!! ovloc.resflags:=F_CS;
  3910. ;
  3911. OP_SUB:
  3912. //!!! ovloc.resflags:=F_CC;
  3913. ;
  3914. else
  3915. ;
  3916. end;
  3917. end;
  3918. end
  3919. else
  3920. begin
  3921. { there could be added some more sophisticated optimizations }
  3922. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3923. a_load_reg_reg(list,size,size,dst,dst)
  3924. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3925. a_load_const_reg(list,size,0,dst)
  3926. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3927. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3928. { we do this here instead in the peephole optimizer because
  3929. it saves us a register }
  3930. {$ifdef DUMMY}
  3931. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3932. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3933. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3934. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3935. begin
  3936. if l1>32 then{roozbeh does this ever happen?}
  3937. internalerror(2003082903);
  3938. shifterop_reset(so);
  3939. so.shiftmode:=SM_LSL;
  3940. so.shiftimm:=l1;
  3941. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3942. end
  3943. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3944. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3945. begin
  3946. if l1>32 then{does this ever happen?}
  3947. internalerror(2012051802);
  3948. shifterop_reset(so);
  3949. so.shiftmode:=SM_LSL;
  3950. so.shiftimm:=l1;
  3951. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3952. end
  3953. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3954. begin
  3955. { nothing to do on success }
  3956. end
  3957. {$endif DUMMY}
  3958. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3959. Just using mov x, #0 might allow some easier optimizations down the line. }
  3960. else if (op = OP_AND) and (dword(a)=0) then
  3961. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,dst,0),PF_S))
  3962. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3963. else if (op = OP_AND) and (not(dword(a))=0) then
  3964. // do nothing
  3965. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3966. broader range of shifterconstants.}
  3967. {$ifdef DUMMY}
  3968. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3969. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3970. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3971. begin
  3972. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3973. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3974. end
  3975. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3976. not(cgsetflags or setflags) and
  3977. split_into_shifter_const(a, imm1, imm2) then
  3978. begin
  3979. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3980. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3981. end
  3982. {$endif DUMMY}
  3983. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3984. begin
  3985. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3986. end
  3987. else
  3988. begin
  3989. tmpreg:=getintregister(list,size);
  3990. a_load_const_reg(list,size,a,tmpreg);
  3991. a_op_reg_reg(list,op,size,tmpreg,dst);
  3992. end;
  3993. end;
  3994. maybeadjustresult(list,op,size,dst);
  3995. end;
  3996. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3997. begin
  3998. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3999. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  4000. else
  4001. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  4002. end;
  4003. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4004. var
  4005. l1,l2 : tasmlabel;
  4006. ai : taicpu;
  4007. begin
  4008. current_asmdata.getjumplabel(l1);
  4009. current_asmdata.getjumplabel(l2);
  4010. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  4011. ai.is_jmp:=true;
  4012. list.concat(ai);
  4013. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,0),PF_S));
  4014. list.concat(taicpu.op_sym(A_B,l2));
  4015. cg.a_label(list,l1);
  4016. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,1),PF_S));
  4017. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4018. cg.a_label(list,l2);
  4019. end;
  4020. procedure tthumb2cgarm.init_register_allocators;
  4021. begin
  4022. inherited init_register_allocators;
  4023. { currently, we save R14 always, so we can use it }
  4024. if (target_info.system<>system_arm_ios) then
  4025. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4026. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4027. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  4028. else
  4029. { r9 is not available on Darwin according to the llvm code generator }
  4030. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4031. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4032. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  4033. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4034. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  4035. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  4036. init_mmregister_allocator;
  4037. end;
  4038. procedure tthumb2cgarm.done_register_allocators;
  4039. begin
  4040. rg[R_INTREGISTER].free;
  4041. rg[R_FPUREGISTER].free;
  4042. rg[R_MMREGISTER].free;
  4043. inherited done_register_allocators;
  4044. end;
  4045. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  4046. begin
  4047. list.concat(taicpu.op_reg(A_BLX, reg));
  4048. {
  4049. the compiler does not properly set this flag anymore in pass 1, and
  4050. for now we only need it after pass 2 (I hope) (JM)
  4051. if not(pi_do_call in current_procinfo.flags) then
  4052. internalerror(2003060703);
  4053. }
  4054. include(current_procinfo.flags,pi_do_call);
  4055. end;
  4056. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4057. var
  4058. l : tasmlabel;
  4059. hr : treference;
  4060. begin
  4061. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4062. internalerror(2002090909);
  4063. if is_thumb32_imm(a) then
  4064. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4065. else if is_thumb32_imm(not(a)) then
  4066. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4067. else if (a and $FFFF)=a then
  4068. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4069. else
  4070. begin
  4071. reference_reset(hr,4,[]);
  4072. current_asmdata.getjumplabel(l);
  4073. cg.a_label(current_procinfo.aktlocaldata,l);
  4074. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4075. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4076. hr.symbol:=l;
  4077. hr.base:=NR_PC;
  4078. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4079. end;
  4080. end;
  4081. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4082. var
  4083. oppostfix:toppostfix;
  4084. usedtmpref: treference;
  4085. tmpreg,tmpreg2 : tregister;
  4086. so : tshifterop;
  4087. dir : integer;
  4088. begin
  4089. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4090. FromSize := ToSize;
  4091. case FromSize of
  4092. { signed integer registers }
  4093. OS_8:
  4094. oppostfix:=PF_B;
  4095. OS_S8:
  4096. oppostfix:=PF_SB;
  4097. OS_16:
  4098. oppostfix:=PF_H;
  4099. OS_S16:
  4100. oppostfix:=PF_SH;
  4101. OS_32,
  4102. OS_S32:
  4103. oppostfix:=PF_None;
  4104. else
  4105. InternalError(2003082913);
  4106. end;
  4107. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4108. begin
  4109. if target_info.endian=endian_big then
  4110. dir:=-1
  4111. else
  4112. dir:=1;
  4113. case FromSize of
  4114. OS_16,OS_S16:
  4115. begin
  4116. { only complicated references need an extra loadaddr }
  4117. if assigned(ref.symbol) or
  4118. (ref.index<>NR_NO) or
  4119. (ref.offset<-255) or
  4120. (ref.offset>4094) or
  4121. { sometimes the compiler reused registers }
  4122. (reg=ref.index) or
  4123. (reg=ref.base) then
  4124. begin
  4125. tmpreg2:=getintregister(list,OS_INT);
  4126. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4127. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4128. end
  4129. else
  4130. usedtmpref:=ref;
  4131. if target_info.endian=endian_big then
  4132. inc(usedtmpref.offset,1);
  4133. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4134. tmpreg:=getintregister(list,OS_INT);
  4135. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4136. inc(usedtmpref.offset,dir);
  4137. if FromSize=OS_16 then
  4138. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4139. else
  4140. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4141. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4142. end;
  4143. OS_32,OS_S32:
  4144. begin
  4145. tmpreg:=getintregister(list,OS_INT);
  4146. { only complicated references need an extra loadaddr }
  4147. if assigned(ref.symbol) or
  4148. (ref.index<>NR_NO) or
  4149. (ref.offset<-255) or
  4150. (ref.offset>4092) or
  4151. { sometimes the compiler reused registers }
  4152. (reg=ref.index) or
  4153. (reg=ref.base) then
  4154. begin
  4155. tmpreg2:=getintregister(list,OS_INT);
  4156. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4157. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4158. end
  4159. else
  4160. usedtmpref:=ref;
  4161. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4162. if ref.alignment=2 then
  4163. begin
  4164. if target_info.endian=endian_big then
  4165. inc(usedtmpref.offset,2);
  4166. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4167. inc(usedtmpref.offset,dir*2);
  4168. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4169. so.shiftimm:=16;
  4170. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4171. end
  4172. else
  4173. begin
  4174. if target_info.endian=endian_big then
  4175. inc(usedtmpref.offset,3);
  4176. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4177. inc(usedtmpref.offset,dir);
  4178. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4179. so.shiftimm:=8;
  4180. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4181. inc(usedtmpref.offset,dir);
  4182. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4183. so.shiftimm:=16;
  4184. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4185. inc(usedtmpref.offset,dir);
  4186. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4187. so.shiftimm:=24;
  4188. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4189. end;
  4190. end
  4191. else
  4192. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4193. end;
  4194. end
  4195. else
  4196. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4197. if (fromsize=OS_S8) and (tosize = OS_16) then
  4198. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4199. end;
  4200. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4201. begin
  4202. if op = OP_NOT then
  4203. begin
  4204. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4205. case size of
  4206. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4207. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4208. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4209. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4210. OS_32,
  4211. OS_S32:
  4212. ;
  4213. else
  4214. internalerror(2019050916);
  4215. end;
  4216. end
  4217. else
  4218. inherited a_op_reg_reg(list, op, size, src, dst);
  4219. end;
  4220. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4221. var
  4222. shift, width : byte;
  4223. tmpreg : tregister;
  4224. so : tshifterop;
  4225. l1 : longint;
  4226. begin
  4227. ovloc.loc:=LOC_VOID;
  4228. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4229. case op of
  4230. OP_ADD:
  4231. begin
  4232. op:=OP_SUB;
  4233. a:=aint(dword(-a));
  4234. end;
  4235. OP_SUB:
  4236. begin
  4237. op:=OP_ADD;
  4238. a:=aint(dword(-a));
  4239. end
  4240. else
  4241. ;
  4242. end;
  4243. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4244. case op of
  4245. OP_NEG,OP_NOT,
  4246. OP_DIV,OP_IDIV:
  4247. internalerror(200308285);
  4248. OP_SHL:
  4249. begin
  4250. if a>32 then
  4251. internalerror(2014020703);
  4252. if a<>0 then
  4253. begin
  4254. shifterop_reset(so);
  4255. so.shiftmode:=SM_LSL;
  4256. so.shiftimm:=a;
  4257. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4258. end
  4259. else
  4260. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4261. end;
  4262. OP_ROL:
  4263. begin
  4264. if a>32 then
  4265. internalerror(2014020704);
  4266. if a<>0 then
  4267. begin
  4268. shifterop_reset(so);
  4269. so.shiftmode:=SM_ROR;
  4270. so.shiftimm:=32-a;
  4271. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4272. end
  4273. else
  4274. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4275. end;
  4276. OP_ROR:
  4277. begin
  4278. if a>32 then
  4279. internalerror(2014020705);
  4280. if a<>0 then
  4281. begin
  4282. shifterop_reset(so);
  4283. so.shiftmode:=SM_ROR;
  4284. so.shiftimm:=a;
  4285. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4286. end
  4287. else
  4288. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4289. end;
  4290. OP_SHR:
  4291. begin
  4292. if a>32 then
  4293. internalerror(200308292);
  4294. shifterop_reset(so);
  4295. if a<>0 then
  4296. begin
  4297. so.shiftmode:=SM_LSR;
  4298. so.shiftimm:=a;
  4299. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4300. end
  4301. else
  4302. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4303. end;
  4304. OP_SAR:
  4305. begin
  4306. if a>32 then
  4307. internalerror(200308295);
  4308. if a<>0 then
  4309. begin
  4310. shifterop_reset(so);
  4311. so.shiftmode:=SM_ASR;
  4312. so.shiftimm:=a;
  4313. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4314. end
  4315. else
  4316. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4317. end;
  4318. else
  4319. if (op in [OP_SUB, OP_ADD]) and
  4320. ((a < 0) or
  4321. (a > 4095)) then
  4322. begin
  4323. tmpreg:=getintregister(list,size);
  4324. a_load_const_reg(list, size, a, tmpreg);
  4325. if cgsetflags or setflags then
  4326. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4327. list.concat(setoppostfix(
  4328. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4329. end
  4330. else
  4331. begin
  4332. if cgsetflags or setflags then
  4333. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4334. list.concat(setoppostfix(
  4335. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4336. end;
  4337. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4338. begin
  4339. ovloc.loc:=LOC_FLAGS;
  4340. case op of
  4341. OP_ADD:
  4342. ovloc.resflags:=F_CS;
  4343. OP_SUB:
  4344. ovloc.resflags:=F_CC;
  4345. else
  4346. ;
  4347. end;
  4348. end;
  4349. end
  4350. else
  4351. begin
  4352. { there could be added some more sophisticated optimizations }
  4353. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4354. a_load_reg_reg(list,size,size,src,dst)
  4355. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4356. a_load_const_reg(list,size,0,dst)
  4357. else if (op in [OP_IMUL]) and (a=-1) then
  4358. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4359. { we do this here instead in the peephole optimizer because
  4360. it saves us a register }
  4361. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4362. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4363. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4364. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4365. begin
  4366. if l1>32 then{roozbeh does this ever happen?}
  4367. internalerror(2003082911);
  4368. shifterop_reset(so);
  4369. so.shiftmode:=SM_LSL;
  4370. so.shiftimm:=l1;
  4371. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4372. end
  4373. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4374. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4375. begin
  4376. if l1>32 then{does this ever happen?}
  4377. internalerror(2012051803);
  4378. shifterop_reset(so);
  4379. so.shiftmode:=SM_LSL;
  4380. so.shiftimm:=l1;
  4381. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4382. end
  4383. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4384. begin
  4385. { nothing to do on success }
  4386. end
  4387. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4388. Just using mov x, #0 might allow some easier optimizations down the line. }
  4389. else if (op = OP_AND) and (dword(a)=0) then
  4390. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4391. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4392. else if (op = OP_AND) and (not(dword(a))=0) then
  4393. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4394. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4395. broader range of shifterconstants.}
  4396. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4397. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4398. else if (op = OP_AND) and is_thumb32_imm(a) then
  4399. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4400. else if (op = OP_AND) and (a = $FFFF) then
  4401. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4402. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4403. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4404. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4405. begin
  4406. a_load_reg_reg(list,size,size,src,dst);
  4407. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4408. end
  4409. else
  4410. begin
  4411. tmpreg:=getintregister(list,size);
  4412. a_load_const_reg(list,size,a,tmpreg);
  4413. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4414. end;
  4415. end;
  4416. maybeadjustresult(list,op,size,dst);
  4417. end;
  4418. const
  4419. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4420. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4421. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4422. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4423. var
  4424. so : tshifterop;
  4425. tmpreg,overflowreg : tregister;
  4426. asmop : tasmop;
  4427. begin
  4428. ovloc.loc:=LOC_VOID;
  4429. case op of
  4430. OP_NEG,OP_NOT:
  4431. internalerror(200308286);
  4432. OP_ROL:
  4433. begin
  4434. if not(size in [OS_32,OS_S32]) then
  4435. internalerror(2008072806);
  4436. { simulate ROL by ror'ing 32-value }
  4437. tmpreg:=getintregister(list,OS_32);
  4438. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4439. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4440. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4441. end;
  4442. OP_ROR:
  4443. begin
  4444. if not(size in [OS_32,OS_S32]) then
  4445. internalerror(2008072802);
  4446. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4447. end;
  4448. OP_IMUL,
  4449. OP_MUL:
  4450. begin
  4451. if cgsetflags or setflags then
  4452. begin
  4453. overflowreg:=getintregister(list,size);
  4454. if op=OP_IMUL then
  4455. asmop:=A_SMULL
  4456. else
  4457. asmop:=A_UMULL;
  4458. { the arm doesn't allow that rd and rm are the same }
  4459. if dst=src2 then
  4460. begin
  4461. if dst<>src1 then
  4462. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4463. else
  4464. begin
  4465. tmpreg:=getintregister(list,size);
  4466. a_load_reg_reg(list,size,size,src2,dst);
  4467. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4468. end;
  4469. end
  4470. else
  4471. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4472. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4473. if op=OP_IMUL then
  4474. begin
  4475. shifterop_reset(so);
  4476. so.shiftmode:=SM_ASR;
  4477. so.shiftimm:=31;
  4478. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4479. end
  4480. else
  4481. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4482. ovloc.loc:=LOC_FLAGS;
  4483. ovloc.resflags:=F_NE;
  4484. end
  4485. else
  4486. begin
  4487. { the arm doesn't allow that rd and rm are the same }
  4488. if dst=src2 then
  4489. begin
  4490. if dst<>src1 then
  4491. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4492. else
  4493. begin
  4494. tmpreg:=getintregister(list,size);
  4495. a_load_reg_reg(list,size,size,src2,dst);
  4496. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4497. end;
  4498. end
  4499. else
  4500. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4501. end;
  4502. end;
  4503. else
  4504. begin
  4505. if cgsetflags or setflags then
  4506. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4507. {$ifdef dummy}
  4508. { R13 is not allowed for certain instruction operands }
  4509. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4510. begin
  4511. if getsupreg(dst)=RS_R13 then
  4512. begin
  4513. tmpreg:=getintregister(list,OS_INT);
  4514. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4515. dst:=tmpreg;
  4516. end;
  4517. if getsupreg(src1)=RS_R13 then
  4518. begin
  4519. tmpreg:=getintregister(list,OS_INT);
  4520. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4521. src1:=tmpreg;
  4522. end;
  4523. end;
  4524. {$endif}
  4525. list.concat(setoppostfix(
  4526. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4527. end;
  4528. end;
  4529. maybeadjustresult(list,op,size,dst);
  4530. end;
  4531. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4532. begin
  4533. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4534. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4535. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4536. end;
  4537. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4538. var
  4539. ref : treference;
  4540. shift : byte;
  4541. firstfloatreg,lastfloatreg,
  4542. r : byte;
  4543. regs : tcpuregisterset;
  4544. stackmisalignment: pint;
  4545. begin
  4546. LocalSize:=align(LocalSize,4);
  4547. { call instruction does not put anything on the stack }
  4548. stackmisalignment:=0;
  4549. if not(nostackframe) then
  4550. begin
  4551. firstfloatreg:=RS_NO;
  4552. lastfloatreg:=RS_NO;
  4553. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4554. begin
  4555. { save floating point registers? }
  4556. for r:=RS_F0 to RS_F7 do
  4557. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4558. begin
  4559. if firstfloatreg=RS_NO then
  4560. firstfloatreg:=r;
  4561. lastfloatreg:=r;
  4562. inc(stackmisalignment,12);
  4563. end;
  4564. end;
  4565. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4566. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4567. begin
  4568. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4569. a_reg_alloc(list,NR_R12);
  4570. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4571. end;
  4572. { save int registers }
  4573. reference_reset(ref,4,[]);
  4574. ref.index:=NR_STACK_POINTER_REG;
  4575. ref.addressmode:=AM_PREINDEXED;
  4576. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4577. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4578. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4579. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4580. include(regs,RS_R14);
  4581. if regs<>[] then
  4582. begin
  4583. for r:=RS_R0 to RS_R15 do
  4584. if (r in regs) then
  4585. inc(stackmisalignment,4);
  4586. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4587. end;
  4588. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4589. begin
  4590. { the framepointer now points to the saved R15, so the saved
  4591. framepointer is at R11-12 (for get_caller_frame) }
  4592. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4593. a_reg_dealloc(list,NR_R12);
  4594. end;
  4595. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4596. if (LocalSize<>0) or
  4597. ((stackmisalignment<>0) and
  4598. ((pi_do_call in current_procinfo.flags) or
  4599. (po_assembler in current_procinfo.procdef.procoptions))) then
  4600. begin
  4601. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4602. if not(is_shifter_const(localsize,shift)) then
  4603. begin
  4604. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4605. a_reg_alloc(list,NR_R12);
  4606. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4607. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4608. a_reg_dealloc(list,NR_R12);
  4609. end
  4610. else
  4611. begin
  4612. a_reg_dealloc(list,NR_R12);
  4613. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4614. end;
  4615. end;
  4616. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4617. begin
  4618. if firstfloatreg<>RS_NO then
  4619. begin
  4620. reference_reset(ref,4,[]);
  4621. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4622. begin
  4623. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4624. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4625. ref.base:=NR_R12;
  4626. end
  4627. else
  4628. begin
  4629. ref.base:=current_procinfo.framepointer;
  4630. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4631. end;
  4632. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4633. lastfloatreg-firstfloatreg+1,ref));
  4634. end;
  4635. end;
  4636. end;
  4637. end;
  4638. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4639. var
  4640. ref : treference;
  4641. firstfloatreg,lastfloatreg,
  4642. r : byte;
  4643. shift : byte;
  4644. regs : tcpuregisterset;
  4645. LocalSize : longint;
  4646. stackmisalignment: pint;
  4647. begin
  4648. { a routine not returning needs no exit code,
  4649. we trust this directive as arm thumb is normally used if small code shall be generated }
  4650. if po_noreturn in current_procinfo.procdef.procoptions then
  4651. exit;
  4652. if not(nostackframe) then
  4653. begin
  4654. stackmisalignment:=0;
  4655. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4656. begin
  4657. { restore floating point register }
  4658. firstfloatreg:=RS_NO;
  4659. lastfloatreg:=RS_NO;
  4660. { save floating point registers? }
  4661. for r:=RS_F0 to RS_F7 do
  4662. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4663. begin
  4664. if firstfloatreg=RS_NO then
  4665. firstfloatreg:=r;
  4666. lastfloatreg:=r;
  4667. { floating point register space is already included in
  4668. localsize below by calc_stackframe_size
  4669. inc(stackmisalignment,12);
  4670. }
  4671. end;
  4672. if firstfloatreg<>RS_NO then
  4673. begin
  4674. reference_reset(ref,4,[]);
  4675. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4676. begin
  4677. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4678. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4679. ref.base:=NR_R12;
  4680. end
  4681. else
  4682. begin
  4683. ref.base:=current_procinfo.framepointer;
  4684. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4685. end;
  4686. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4687. lastfloatreg-firstfloatreg+1,ref));
  4688. end;
  4689. end;
  4690. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4691. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4692. begin
  4693. exclude(regs,RS_R14);
  4694. include(regs,RS_R15);
  4695. end;
  4696. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4697. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4698. for r:=RS_R0 to RS_R15 do
  4699. if (r in regs) then
  4700. inc(stackmisalignment,4);
  4701. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4702. LocalSize:=current_procinfo.calc_stackframe_size;
  4703. if (LocalSize<>0) or
  4704. ((stackmisalignment<>0) and
  4705. ((pi_do_call in current_procinfo.flags) or
  4706. (po_assembler in current_procinfo.procdef.procoptions))) then
  4707. begin
  4708. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4709. if not(is_shifter_const(LocalSize,shift)) then
  4710. begin
  4711. a_reg_alloc(list,NR_R12);
  4712. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4713. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4714. a_reg_dealloc(list,NR_R12);
  4715. end
  4716. else
  4717. begin
  4718. a_reg_dealloc(list,NR_R12);
  4719. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4720. end;
  4721. end;
  4722. if regs=[] then
  4723. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4724. else
  4725. begin
  4726. reference_reset(ref,4,[]);
  4727. ref.index:=NR_STACK_POINTER_REG;
  4728. ref.addressmode:=AM_PREINDEXED;
  4729. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4730. end;
  4731. end
  4732. else
  4733. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4734. end;
  4735. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4736. var
  4737. tmpreg : tregister;
  4738. tmpref : treference;
  4739. l : tasmlabel;
  4740. begin
  4741. tmpreg:=NR_NO;
  4742. { Be sure to have a base register }
  4743. if (ref.base=NR_NO) then
  4744. begin
  4745. if ref.shiftmode<>SM_None then
  4746. internalerror(2014020706);
  4747. ref.base:=ref.index;
  4748. ref.index:=NR_NO;
  4749. end;
  4750. { absolute symbols can't be handled directly, we've to store the symbol reference
  4751. in the text segment and access it pc relative
  4752. For now, we assume that references where base or index equals to PC are already
  4753. relative, all other references are assumed to be absolute and thus they need
  4754. to be handled extra.
  4755. A proper solution would be to change refoptions to a set and store the information
  4756. if the symbol is absolute or relative there.
  4757. }
  4758. if (assigned(ref.symbol) and
  4759. not(is_pc(ref.base)) and
  4760. not(is_pc(ref.index))
  4761. ) or
  4762. { [#xxx] isn't a valid address operand }
  4763. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4764. //(ref.offset<-4095) or
  4765. (ref.offset<-255) or
  4766. (ref.offset>4095) or
  4767. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4768. ((ref.offset<-255) or
  4769. (ref.offset>255)
  4770. )
  4771. ) or
  4772. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4773. ((ref.offset<-1020) or
  4774. (ref.offset>1020) or
  4775. ((abs(ref.offset) mod 4)<>0) or
  4776. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4777. assigned(ref.symbol)
  4778. )
  4779. ) then
  4780. begin
  4781. reference_reset(tmpref,4,[]);
  4782. { load symbol }
  4783. tmpreg:=getintregister(list,OS_INT);
  4784. if assigned(ref.symbol) then
  4785. begin
  4786. current_asmdata.getjumplabel(l);
  4787. cg.a_label(current_procinfo.aktlocaldata,l);
  4788. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4789. if ref.refaddr=addr_gottpoff then
  4790. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4791. else if ref.refaddr=addr_tlsgd then
  4792. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4793. else if ref.refaddr=addr_tlsdesc then
  4794. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4795. else if ref.refaddr=addr_tpoff then
  4796. begin
  4797. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4798. Internalerror(2019092807);
  4799. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4800. end
  4801. else
  4802. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4803. { load consts entry }
  4804. tmpref.symbol:=l;
  4805. tmpref.base:=NR_R15;
  4806. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4807. { in case of LDF/STF, we got rid of the NR_R15 }
  4808. if is_pc(ref.base) then
  4809. ref.base:=NR_NO;
  4810. if is_pc(ref.index) then
  4811. ref.index:=NR_NO;
  4812. end
  4813. else
  4814. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4815. if (ref.base<>NR_NO) then
  4816. begin
  4817. if ref.index<>NR_NO then
  4818. begin
  4819. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4820. ref.base:=tmpreg;
  4821. end
  4822. else
  4823. begin
  4824. ref.index:=tmpreg;
  4825. ref.shiftimm:=0;
  4826. ref.signindex:=1;
  4827. ref.shiftmode:=SM_None;
  4828. end;
  4829. end
  4830. else
  4831. ref.base:=tmpreg;
  4832. ref.offset:=0;
  4833. ref.symbol:=nil;
  4834. end;
  4835. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4836. begin
  4837. if tmpreg<>NR_NO then
  4838. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4839. else
  4840. begin
  4841. tmpreg:=getintregister(list,OS_ADDR);
  4842. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4843. ref.base:=tmpreg;
  4844. end;
  4845. ref.offset:=0;
  4846. end;
  4847. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4848. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4849. begin
  4850. tmpreg:=getintregister(list,OS_ADDR);
  4851. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4852. ref.base := tmpreg;
  4853. end;
  4854. { floating point operations have only limited references
  4855. we expect here, that a base is already set }
  4856. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4857. begin
  4858. if ref.shiftmode<>SM_none then
  4859. internalerror(2003091202);
  4860. if tmpreg<>NR_NO then
  4861. begin
  4862. if ref.base=tmpreg then
  4863. begin
  4864. if ref.signindex<0 then
  4865. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4866. else
  4867. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4868. ref.index:=NR_NO;
  4869. end
  4870. else
  4871. begin
  4872. if ref.index<>tmpreg then
  4873. internalerror(2004031602);
  4874. if ref.signindex<0 then
  4875. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4876. else
  4877. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4878. ref.base:=tmpreg;
  4879. ref.index:=NR_NO;
  4880. end;
  4881. end
  4882. else
  4883. begin
  4884. tmpreg:=getintregister(list,OS_ADDR);
  4885. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4886. ref.base:=tmpreg;
  4887. ref.index:=NR_NO;
  4888. end;
  4889. end;
  4890. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4891. Result := ref;
  4892. end;
  4893. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4894. var
  4895. instr: taicpu;
  4896. begin
  4897. if (fromsize=OS_F32) and
  4898. (tosize=OS_F32) then
  4899. begin
  4900. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4901. list.Concat(instr);
  4902. add_move_instruction(instr);
  4903. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4904. end
  4905. else if (fromsize=OS_F64) and
  4906. (tosize=OS_F64) then
  4907. begin
  4908. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4909. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4910. end
  4911. else if (fromsize=OS_F32) and
  4912. (tosize=OS_F64) then
  4913. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4914. begin
  4915. //list.concat(nil);
  4916. end;
  4917. end;
  4918. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4919. begin
  4920. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4921. end;
  4922. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4923. begin
  4924. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4925. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4926. end;
  4927. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4928. begin
  4929. if //(shuffle=nil) and
  4930. (tosize=OS_F32) then
  4931. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4932. else
  4933. internalerror(2012100813);
  4934. end;
  4935. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4936. begin
  4937. if //(shuffle=nil) and
  4938. (fromsize=OS_F32) then
  4939. begin
  4940. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4941. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4942. end
  4943. else
  4944. internalerror(2012100814);
  4945. end;
  4946. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4947. var tmpreg: tregister;
  4948. begin
  4949. case op of
  4950. OP_NEG:
  4951. begin
  4952. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4953. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4954. tmpreg:=cg.getintregister(list,OS_32);
  4955. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4956. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4957. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4958. end;
  4959. else
  4960. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4961. end;
  4962. end;
  4963. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4964. begin
  4965. case op of
  4966. OP_NEG:
  4967. begin
  4968. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reglo,0),PF_S));
  4969. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reghi,0),PF_S));
  4970. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4971. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4972. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4973. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4974. end;
  4975. OP_NOT:
  4976. begin
  4977. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4978. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4979. end;
  4980. OP_AND,OP_OR,OP_XOR:
  4981. begin
  4982. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4983. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4984. end;
  4985. OP_ADD:
  4986. begin
  4987. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4988. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4989. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi),PF_S));
  4990. end;
  4991. OP_SUB:
  4992. begin
  4993. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4994. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4995. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4996. end;
  4997. else
  4998. internalerror(2003083105);
  4999. end;
  5000. end;
  5001. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  5002. var
  5003. tmpreg : tregister;
  5004. begin
  5005. case op of
  5006. OP_AND,OP_OR,OP_XOR:
  5007. begin
  5008. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  5009. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  5010. end;
  5011. OP_ADD:
  5012. begin
  5013. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5014. begin
  5015. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5016. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  5017. end
  5018. else
  5019. begin
  5020. tmpreg:=cg.getintregister(list,OS_32);
  5021. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5022. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5023. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  5024. end;
  5025. tmpreg:=cg.getintregister(list,OS_32);
  5026. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  5027. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg),PF_S));
  5028. end;
  5029. OP_SUB:
  5030. begin
  5031. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5032. begin
  5033. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5034. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  5035. end
  5036. else
  5037. begin
  5038. tmpreg:=cg.getintregister(list,OS_32);
  5039. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5040. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5041. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  5042. end;
  5043. tmpreg:=cg.getintregister(list,OS_32);
  5044. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  5045. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg),PF_S));
  5046. end;
  5047. else
  5048. internalerror(2003083106);
  5049. end;
  5050. end;
  5051. procedure create_codegen;
  5052. begin
  5053. if GenerateThumb2Code then
  5054. begin
  5055. cg:=tthumb2cgarm.create;
  5056. cg64:=tthumb2cg64farm.create;
  5057. casmoptimizer:=TCpuThumb2AsmOptimizer;
  5058. end
  5059. else if GenerateThumbCode then
  5060. begin
  5061. cg:=tthumbcgarm.create;
  5062. cg64:=tthumbcg64farm.create;
  5063. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5064. end
  5065. else
  5066. begin
  5067. cg:=tarmcgarm.create;
  5068. cg64:=tarmcg64farm.create;
  5069. casmoptimizer:=TCpuAsmOptimizer;
  5070. end;
  5071. end;
  5072. end.