cgcpu.pas 224 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. procedure init_mmregister_allocator;
  36. public
  37. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  38. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  39. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  40. { move instructions }
  41. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  42. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  43. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  44. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  45. { fpu move instructions }
  46. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  47. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  48. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  49. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  50. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  51. { comparison operations }
  52. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  53. l : tasmlabel);override;
  54. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  55. procedure a_jmp_name(list : TAsmList;const s : string); override;
  56. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  57. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  58. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  59. procedure g_profilecode(list : TAsmList); override;
  60. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  61. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  62. procedure g_maybe_got_init(list : TAsmList); override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  66. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  67. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  68. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  69. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  70. procedure g_save_registers(list : TAsmList);override;
  71. procedure g_restore_registers(list : TAsmList);override;
  72. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  73. procedure fixref(list : TAsmList;var ref : treference);
  74. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  75. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  78. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  79. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  80. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  81. { Transform unsupported methods into Internal errors }
  82. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  83. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  84. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  85. { clear out potential overflow bits from 8 or 16 bit operations
  86. the upper 24/16 bits of a register after an operation }
  87. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  88. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  89. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  90. procedure g_maybe_tls_init(list : TAsmList); override;
  91. end;
  92. { tcgarm is shared between normal arm and thumb-2 }
  93. tcgarm = class(tbasecgarm)
  94. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  95. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  96. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  97. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  98. size: tcgsize; a: tcgint; src, dst: tregister); override;
  99. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  100. size: tcgsize; src1, src2, dst: tregister); override;
  101. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  103. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  104. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  105. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  106. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  107. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  108. end;
  109. { normal arm cg }
  110. tarmcgarm = class(tcgarm)
  111. procedure init_register_allocators;override;
  112. procedure done_register_allocators;override;
  113. end;
  114. { 64 bit cg for all arm flavours }
  115. tbasecg64farm = class(tcg64f32)
  116. end;
  117. { tcg64farm is shared between normal arm and thumb-2 }
  118. tcg64farm = class(tbasecg64farm)
  119. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  120. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  121. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  122. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  123. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  125. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  126. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  127. end;
  128. tarmcg64farm = class(tcg64farm)
  129. end;
  130. tthumbcgarm = class(tbasecgarm)
  131. procedure init_register_allocators;override;
  132. procedure done_register_allocators;override;
  133. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  134. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  135. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  136. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  137. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  138. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  139. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  140. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  141. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  142. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  143. end;
  144. tthumbcg64farm = class(tbasecg64farm)
  145. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  146. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  147. end;
  148. tthumb2cgarm = class(tcgarm)
  149. procedure init_register_allocators;override;
  150. procedure done_register_allocators;override;
  151. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  152. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  153. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  154. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  155. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  157. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  158. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  159. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  160. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  161. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  164. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  165. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  166. end;
  167. tthumb2cg64farm = class(tcg64farm)
  168. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  169. end;
  170. const
  171. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  172. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  173. winstackpagesize = 4096;
  174. function get_fpu_postfix(def : tdef) : toppostfix;
  175. procedure create_codegen;
  176. implementation
  177. uses
  178. globals,verbose,systems,cutils,
  179. aopt,aoptcpu,
  180. fmodule,
  181. symconst,symsym,symtable,
  182. tgobj,
  183. procinfo,cpupi,
  184. paramgr;
  185. { Range check must be disabled explicitly as conversions between signed and unsigned
  186. 32-bit values are done without explicit typecasts }
  187. {$R-}
  188. function get_fpu_postfix(def : tdef) : toppostfix;
  189. begin
  190. if def.typ=floatdef then
  191. begin
  192. case tfloatdef(def).floattype of
  193. s32real:
  194. result:=PF_S;
  195. s64real:
  196. result:=PF_D;
  197. s80real:
  198. result:=PF_E;
  199. else
  200. internalerror(200401272);
  201. end;
  202. end
  203. else
  204. internalerror(200401271);
  205. end;
  206. procedure tarmcgarm.init_register_allocators;
  207. begin
  208. inherited init_register_allocators;
  209. { currently, we always save R14, so we can use it }
  210. if (target_info.system<>system_arm_ios) then
  211. begin
  212. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  213. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  214. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  215. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  216. else
  217. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  218. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  219. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  220. end
  221. else
  222. { r7 is not available on Darwin, it's used as frame pointer (always,
  223. for backtrace support -- also in gcc/clang -> R11 can be used).
  224. r9 is volatile }
  225. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  226. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  227. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  228. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  229. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  230. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  231. init_mmregister_allocator;
  232. end;
  233. procedure tarmcgarm.done_register_allocators;
  234. begin
  235. rg[R_INTREGISTER].free;
  236. rg[R_FPUREGISTER].free;
  237. rg[R_MMREGISTER].free;
  238. inherited done_register_allocators;
  239. end;
  240. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  241. var
  242. imm_shift : byte;
  243. l : tasmlabel;
  244. hr : treference;
  245. imm1, imm2: DWord;
  246. begin
  247. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  248. internalerror(2002090907);
  249. if is_shifter_const(a,imm_shift) then
  250. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  251. else if is_shifter_const(not(a),imm_shift) then
  252. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  253. { loading of constants with mov and orr }
  254. else if (split_into_shifter_const(a,imm1, imm2)) then
  255. begin
  256. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  257. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  258. end
  259. { loading of constants with mvn and bic }
  260. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  261. begin
  262. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  263. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  264. end
  265. else
  266. begin
  267. reference_reset(hr,4,[]);
  268. current_asmdata.getjumplabel(l);
  269. cg.a_label(current_procinfo.aktlocaldata,l);
  270. hr.symboldata:=current_procinfo.aktlocaldata.last;
  271. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  272. hr.symbol:=l;
  273. hr.base:=NR_PC;
  274. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  275. end;
  276. end;
  277. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  278. var
  279. oppostfix:toppostfix;
  280. usedtmpref: treference;
  281. tmpreg,tmpreg2 : tregister;
  282. so : tshifterop;
  283. dir : integer;
  284. begin
  285. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  286. FromSize := ToSize;
  287. case FromSize of
  288. { signed integer registers }
  289. OS_8:
  290. oppostfix:=PF_B;
  291. OS_S8:
  292. oppostfix:=PF_SB;
  293. OS_16:
  294. oppostfix:=PF_H;
  295. OS_S16:
  296. oppostfix:=PF_SH;
  297. OS_32,
  298. OS_S32:
  299. oppostfix:=PF_None;
  300. else
  301. InternalError(200308297);
  302. end;
  303. if (fromsize=OS_S8) and
  304. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  305. oppostfix:=PF_B;
  306. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  307. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  308. (oppostfix in [PF_SH,PF_H])) then
  309. begin
  310. if target_info.endian=endian_big then
  311. dir:=-1
  312. else
  313. dir:=1;
  314. case FromSize of
  315. OS_16,OS_S16:
  316. begin
  317. { only complicated references need an extra loadaddr }
  318. if assigned(ref.symbol) or
  319. (ref.index<>NR_NO) or
  320. (ref.offset<-4095) or
  321. (ref.offset>4094) or
  322. { sometimes the compiler reused registers }
  323. (reg=ref.index) or
  324. (reg=ref.base) then
  325. begin
  326. tmpreg2:=getintregister(list,OS_INT);
  327. a_loadaddr_ref_reg(list,ref,tmpreg2);
  328. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  329. end
  330. else
  331. usedtmpref:=ref;
  332. if target_info.endian=endian_big then
  333. inc(usedtmpref.offset,1);
  334. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  335. tmpreg:=getintregister(list,OS_INT);
  336. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  337. inc(usedtmpref.offset,dir);
  338. if FromSize=OS_16 then
  339. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  340. else
  341. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  342. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  343. end;
  344. OS_32,OS_S32:
  345. begin
  346. tmpreg:=getintregister(list,OS_INT);
  347. { only complicated references need an extra loadaddr }
  348. if assigned(ref.symbol) or
  349. (ref.index<>NR_NO) or
  350. (ref.offset<-4095) or
  351. (ref.offset>4092) or
  352. { sometimes the compiler reused registers }
  353. (reg=ref.index) or
  354. (reg=ref.base) then
  355. begin
  356. tmpreg2:=getintregister(list,OS_INT);
  357. a_loadaddr_ref_reg(list,ref,tmpreg2);
  358. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  359. end
  360. else
  361. usedtmpref:=ref;
  362. shifterop_reset(so);so.shiftmode:=SM_LSL;
  363. if ref.alignment=2 then
  364. begin
  365. if target_info.endian=endian_big then
  366. inc(usedtmpref.offset,2);
  367. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  368. inc(usedtmpref.offset,dir*2);
  369. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  370. so.shiftimm:=16;
  371. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  372. end
  373. else
  374. begin
  375. tmpreg2:=getintregister(list,OS_INT);
  376. if target_info.endian=endian_big then
  377. inc(usedtmpref.offset,3);
  378. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  379. inc(usedtmpref.offset,dir);
  380. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  381. inc(usedtmpref.offset,dir);
  382. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  383. so.shiftimm:=8;
  384. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  385. inc(usedtmpref.offset,dir);
  386. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  387. so.shiftimm:=16;
  388. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  389. so.shiftimm:=24;
  390. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  391. end;
  392. end
  393. else
  394. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  395. end;
  396. end
  397. else
  398. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  399. if (fromsize=OS_S8) and
  400. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  401. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  402. else if (fromsize=OS_S8) and (tosize = OS_16) then
  403. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  404. end;
  405. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  406. var
  407. hsym : tsym;
  408. href : treference;
  409. paraloc : Pcgparalocation;
  410. shift : byte;
  411. begin
  412. { calculate the parameter info for the procdef }
  413. procdef.init_paraloc_info(callerside);
  414. hsym:=tsym(procdef.parast.Find('self'));
  415. if not(assigned(hsym) and
  416. (hsym.typ=paravarsym)) then
  417. internalerror(2003052503);
  418. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  419. while paraloc<>nil do
  420. with paraloc^ do
  421. begin
  422. case loc of
  423. LOC_REGISTER:
  424. begin
  425. if is_shifter_const(ioffset,shift) then
  426. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  427. else
  428. begin
  429. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  430. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  431. end;
  432. end;
  433. LOC_REFERENCE:
  434. begin
  435. { offset in the wrapper needs to be adjusted for the stored
  436. return address }
  437. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  438. if is_shifter_const(ioffset,shift) then
  439. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  440. else
  441. begin
  442. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  443. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  444. end;
  445. end
  446. else
  447. internalerror(2003091803);
  448. end;
  449. paraloc:=next;
  450. end;
  451. end;
  452. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  453. var
  454. ref: treference;
  455. begin
  456. paraloc.check_simple_location;
  457. paramanager.allocparaloc(list,paraloc.location);
  458. case paraloc.location^.loc of
  459. LOC_REGISTER,LOC_CREGISTER:
  460. a_load_const_reg(list,size,a,paraloc.location^.register);
  461. LOC_REFERENCE:
  462. begin
  463. reference_reset(ref,paraloc.alignment,[]);
  464. ref.base:=paraloc.location^.reference.index;
  465. ref.offset:=paraloc.location^.reference.offset;
  466. a_load_const_ref(list,size,a,ref);
  467. end;
  468. else
  469. internalerror(2002081101);
  470. end;
  471. end;
  472. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  473. begin
  474. { doubles in softemu mode have a strange order of registers and references }
  475. if (cgpara.size=OS_F64) and
  476. (location^.size=OS_32) then
  477. begin
  478. g_concatcopy(list,ref,paralocref,4)
  479. end
  480. else
  481. inherited;
  482. end;
  483. procedure tbasecgarm.init_mmregister_allocator;
  484. begin
  485. { The register allocator currently cannot deal with multiple
  486. non-overlapping subregs per register, so we can only use
  487. half the single precision registers for now (as sub registers of the
  488. double precision ones). }
  489. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  490. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  491. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  492. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  493. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  494. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  495. ],first_mm_imreg,[])
  496. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  497. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  498. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  499. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  500. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  501. ],first_mm_imreg,[])
  502. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  503. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  504. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  505. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  506. ],first_mm_imreg,[]);
  507. end;
  508. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  509. var
  510. ref: treference;
  511. tmpreg: tregister;
  512. begin
  513. paraloc.check_simple_location;
  514. paramanager.allocparaloc(list,paraloc.location);
  515. case paraloc.location^.loc of
  516. LOC_REGISTER,LOC_CREGISTER:
  517. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  518. LOC_REFERENCE:
  519. begin
  520. reference_reset(ref,paraloc.alignment,[]);
  521. ref.base := paraloc.location^.reference.index;
  522. ref.offset := paraloc.location^.reference.offset;
  523. tmpreg := getintregister(list,OS_ADDR);
  524. a_loadaddr_ref_reg(list,r,tmpreg);
  525. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  526. end;
  527. else
  528. internalerror(2002080701);
  529. end;
  530. end;
  531. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  532. var
  533. branchopcode: tasmop;
  534. r : treference;
  535. sym : TAsmSymbol;
  536. begin
  537. { use always BL as newer binutils do not translate blx apparently
  538. generating BL is also what clang and gcc do by default }
  539. branchopcode:=A_BL;
  540. if not(weak) then
  541. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  542. else
  543. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  544. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  545. if (tf_pic_uses_got in target_info.flags) and
  546. (cs_create_pic in current_settings.moduleswitches) then
  547. begin
  548. r.refaddr:=addr_pic
  549. end
  550. else
  551. r.refaddr:=addr_full;
  552. list.concat(taicpu.op_ref(branchopcode,r));
  553. {
  554. the compiler does not properly set this flag anymore in pass 1, and
  555. for now we only need it after pass 2 (I hope) (JM)
  556. if not(pi_do_call in current_procinfo.flags) then
  557. internalerror(2003060703);
  558. }
  559. include(current_procinfo.flags,pi_do_call);
  560. end;
  561. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  562. begin
  563. { check not really correct: should only be used for non-Thumb cpus }
  564. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  565. begin
  566. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  567. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  568. end
  569. else
  570. list.concat(taicpu.op_reg(A_BLX, reg));
  571. {
  572. the compiler does not properly set this flag anymore in pass 1, and
  573. for now we only need it after pass 2 (I hope) (JM)
  574. if not(pi_do_call in current_procinfo.flags) then
  575. internalerror(2003060703);
  576. }
  577. include(current_procinfo.flags,pi_do_call);
  578. end;
  579. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  580. begin
  581. a_op_const_reg_reg(list,op,size,a,reg,reg);
  582. end;
  583. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  584. var
  585. tmpreg,tmpresreg : tregister;
  586. tmpref : treference;
  587. begin
  588. tmpreg:=getintregister(list,size);
  589. tmpresreg:=getintregister(list,size);
  590. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  591. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  592. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  593. end;
  594. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  595. var
  596. so : tshifterop;
  597. begin
  598. if op = OP_NEG then
  599. begin
  600. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  601. maybeadjustresult(list,OP_NEG,size,dst);
  602. end
  603. else if op = OP_NOT then
  604. begin
  605. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  606. begin
  607. shifterop_reset(so);
  608. so.shiftmode:=SM_LSL;
  609. if size in [OS_8, OS_S8] then
  610. so.shiftimm:=24
  611. else
  612. so.shiftimm:=16;
  613. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  614. {Using a shift here allows this to be folded into another instruction}
  615. if size in [OS_S8, OS_S16] then
  616. so.shiftmode:=SM_ASR
  617. else
  618. so.shiftmode:=SM_LSR;
  619. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  620. end
  621. else
  622. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  623. end
  624. else
  625. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  626. end;
  627. const
  628. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  629. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  630. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  631. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  632. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  633. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  634. op_reg_postfix_thumb: array[TOpCG] of TOpPostfix =
  635. (PF_None,PF_None,PF_None,PF_S,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_S,
  636. PF_None,PF_S,PF_S,PF_None,PF_S,PF_None,PF_S);
  637. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  638. size: tcgsize; a: tcgint; src, dst: tregister);
  639. var
  640. ovloc : tlocation;
  641. begin
  642. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  643. end;
  644. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  645. size: tcgsize; src1, src2, dst: tregister);
  646. var
  647. ovloc : tlocation;
  648. begin
  649. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  650. end;
  651. function opshift2shiftmode(op: TOpCg): tshiftmode;
  652. begin
  653. case op of
  654. OP_SHL: Result:=SM_LSL;
  655. OP_SHR: Result:=SM_LSR;
  656. OP_ROR: Result:=SM_ROR;
  657. OP_ROL: Result:=SM_ROR;
  658. OP_SAR: Result:=SM_ASR;
  659. else internalerror(2012070501);
  660. end
  661. end;
  662. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  663. var
  664. multiplier : dword;
  665. power : longint;
  666. shifterop : tshifterop;
  667. bitsset : byte;
  668. negative : boolean;
  669. first, doshiftadd: boolean;
  670. b,
  671. cycles : byte;
  672. maxeffort : byte;
  673. leftmostbit,i,shiftvalue: DWord;
  674. begin
  675. result:=true;
  676. cycles:=0;
  677. negative:=a<0;
  678. shifterop.rs:=NR_NO;
  679. shifterop.shiftmode:=SM_LSL;
  680. if negative then
  681. inc(cycles);
  682. multiplier:=dword(abs(a));
  683. { heuristics to estimate how much instructions are reasonable to replace the mul,
  684. this is currently based on XScale timings }
  685. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  686. actual multiplication, this requires min. 1+4 cycles
  687. because the first shift imm. might cause a stall and because we need more instructions
  688. when replacing the mul we generate max. 3 instructions to replace this mul }
  689. maxeffort:=3;
  690. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  691. a ldr, so generating one more operation to replace this is beneficial }
  692. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  693. inc(maxeffort);
  694. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  695. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  696. dec(maxeffort);
  697. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  698. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  699. dec(maxeffort);
  700. { "symmetric" bit pattern like $10101010 where
  701. res:=a*$10101010 can be simplified into
  702. temp:=a*$1010
  703. res:=temp+temp shl 16
  704. }
  705. doshiftadd:=false;
  706. leftmostbit:=BsrDWord(multiplier);
  707. shiftvalue:=0;
  708. if (maxeffort>1) and (leftmostbit>2) then
  709. begin
  710. for i:=2 to 31 do
  711. if (multiplier shr i)=(multiplier and ($ffffffff shr (32-i))) then
  712. begin
  713. doshiftadd:=true;
  714. shiftvalue:=i;
  715. dec(maxeffort);
  716. multiplier:=multiplier shr shiftvalue;
  717. break;
  718. end;
  719. end;
  720. bitsset:=popcnt(multiplier and $fffffffe);
  721. { most simple cases }
  722. if a=1 then
  723. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  724. else if a=0 then
  725. a_load_const_reg(list,OS_32,0,dst)
  726. else if a=-1 then
  727. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  728. { add up ?
  729. basically, one add is needed for each bit being set in the constant factor
  730. however, the least significant bit is for free, it can be hidden in the initial
  731. instruction
  732. }
  733. else if (bitsset+cycles<=maxeffort) and
  734. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  735. begin
  736. first:=true;
  737. while multiplier<>0 do
  738. begin
  739. shifterop.shiftimm:=BsrDWord(multiplier);
  740. if odd(multiplier) then
  741. begin
  742. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  743. dec(multiplier);
  744. end
  745. else
  746. if first then
  747. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  748. else
  749. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  750. first:=false;
  751. dec(multiplier,1 shl shifterop.shiftimm);
  752. end;
  753. if doshiftadd then
  754. begin
  755. shifterop.shiftimm:=shiftvalue;
  756. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  757. end;
  758. if negative then
  759. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  760. end
  761. { subtract from the next greater power of two? }
  762. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  763. begin
  764. first:=true;
  765. while multiplier<>0 do
  766. begin
  767. if first then
  768. begin
  769. multiplier:=(1 shl power)-multiplier;
  770. shifterop.shiftimm:=power;
  771. end
  772. else
  773. shifterop.shiftimm:=BsrDWord(multiplier);
  774. if odd(multiplier) then
  775. begin
  776. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  777. dec(multiplier);
  778. end
  779. else
  780. if first then
  781. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  782. else
  783. begin
  784. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  785. dec(multiplier,1 shl shifterop.shiftimm);
  786. end;
  787. first:=false;
  788. end;
  789. if doshiftadd then
  790. begin
  791. shifterop.shiftimm:=shiftvalue;
  792. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  793. end;
  794. if negative then
  795. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  796. end
  797. else
  798. result:=false;
  799. end;
  800. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  801. var
  802. shift, lsb, width : byte;
  803. tmpreg : tregister;
  804. so : tshifterop;
  805. l1 : longint;
  806. imm1, imm2: DWord;
  807. begin
  808. optimize_op_const(size, op, a);
  809. case op of
  810. OP_NONE:
  811. begin
  812. if src <> dst then
  813. a_load_reg_reg(list, size, size, src, dst);
  814. exit;
  815. end;
  816. OP_MOVE:
  817. begin
  818. a_load_const_reg(list, size, a, dst);
  819. exit;
  820. end;
  821. else
  822. ;
  823. end;
  824. ovloc.loc:=LOC_VOID;
  825. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  826. case op of
  827. OP_ADD:
  828. begin
  829. op:=OP_SUB;
  830. a:=aint(dword(-a));
  831. end;
  832. OP_SUB:
  833. begin
  834. op:=OP_ADD;
  835. a:=aint(dword(-a));
  836. end
  837. else
  838. ;
  839. end;
  840. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  841. case op of
  842. OP_NEG,OP_NOT:
  843. internalerror(200308281);
  844. OP_SHL,
  845. OP_SHR,
  846. OP_ROL,
  847. OP_ROR,
  848. OP_SAR:
  849. begin
  850. if a>32 then
  851. internalerror(200308294);
  852. shifterop_reset(so);
  853. so.shiftmode:=opshift2shiftmode(op);
  854. if op = OP_ROL then
  855. so.shiftimm:=32-a
  856. else
  857. so.shiftimm:=a;
  858. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  859. end;
  860. else
  861. {if (op in [OP_SUB, OP_ADD]) and
  862. ((a < 0) or
  863. (a > 4095)) then
  864. begin
  865. tmpreg:=getintregister(list,size);
  866. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  867. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  868. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  869. ));
  870. end
  871. else}
  872. begin
  873. if cgsetflags or setflags then
  874. a_reg_alloc(list,NR_DEFAULTFLAGS);
  875. list.concat(setoppostfix(
  876. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  877. end;
  878. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  879. begin
  880. ovloc.loc:=LOC_FLAGS;
  881. case op of
  882. OP_ADD:
  883. ovloc.resflags:=F_CS;
  884. OP_SUB:
  885. ovloc.resflags:=F_CC;
  886. else
  887. internalerror(2019050922);
  888. end;
  889. end;
  890. end
  891. else
  892. begin
  893. { there could be added some more sophisticated optimizations }
  894. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  895. a_op_reg_reg(list,OP_NEG,size,src,dst)
  896. { we do this here instead in the peephole optimizer because
  897. it saves us a register }
  898. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  899. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  900. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  901. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  902. begin
  903. if l1>32 then{roozbeh does this ever happen?}
  904. internalerror(200308296);
  905. shifterop_reset(so);
  906. so.shiftmode:=SM_LSL;
  907. so.shiftimm:=l1;
  908. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  909. end
  910. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  911. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  912. begin
  913. if l1>32 then{does this ever happen?}
  914. internalerror(201205181);
  915. shifterop_reset(so);
  916. so.shiftmode:=SM_LSL;
  917. so.shiftimm:=l1;
  918. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  919. end
  920. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  921. begin
  922. { nothing to do on success }
  923. end
  924. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  925. broader range of shifterconstants.}
  926. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  927. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  928. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  929. into the following instruction}
  930. else if (op = OP_AND) and
  931. is_continuous_mask(aword(a), lsb, width) and
  932. ((lsb = 0) or ((lsb + width) = 32)) then
  933. begin
  934. shifterop_reset(so);
  935. if (width = 16) and
  936. (lsb = 0) and
  937. (current_settings.cputype >= cpu_armv6) then
  938. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  939. else if (width = 8) and
  940. (lsb = 0) and
  941. (current_settings.cputype >= cpu_armv6) then
  942. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  943. else if lsb = 0 then
  944. begin
  945. so.shiftmode:=SM_LSL;
  946. so.shiftimm:=32-width;
  947. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  948. so.shiftmode:=SM_LSR;
  949. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  950. end
  951. else
  952. begin
  953. so.shiftmode:=SM_LSR;
  954. so.shiftimm:=lsb;
  955. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  956. so.shiftmode:=SM_LSL;
  957. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  958. end;
  959. end
  960. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  961. begin
  962. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  963. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  964. end
  965. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  966. not(cgsetflags or setflags) and
  967. split_into_shifter_const(a, imm1, imm2) then
  968. begin
  969. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  970. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  971. end
  972. else
  973. begin
  974. tmpreg:=getintregister(list,size);
  975. a_load_const_reg(list,size,a,tmpreg);
  976. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  977. end;
  978. end;
  979. maybeadjustresult(list,op,size,dst);
  980. end;
  981. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  982. var
  983. so : tshifterop;
  984. tmpreg,overflowreg : tregister;
  985. asmop : tasmop;
  986. begin
  987. ovloc.loc:=LOC_VOID;
  988. case op of
  989. OP_NEG,OP_NOT,
  990. OP_DIV,OP_IDIV:
  991. internalerror(200308283);
  992. OP_SHL,
  993. OP_SHR,
  994. OP_SAR,
  995. OP_ROR:
  996. begin
  997. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  998. internalerror(2008072801);
  999. shifterop_reset(so);
  1000. so.rs:=src1;
  1001. so.shiftmode:=opshift2shiftmode(op);
  1002. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1003. end;
  1004. OP_ROL:
  1005. begin
  1006. if not(size in [OS_32,OS_S32]) then
  1007. internalerror(2008072804);
  1008. { simulate ROL by ror'ing 32-value }
  1009. tmpreg:=getintregister(list,OS_32);
  1010. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  1011. shifterop_reset(so);
  1012. so.rs:=tmpreg;
  1013. so.shiftmode:=SM_ROR;
  1014. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1015. end;
  1016. OP_IMUL,
  1017. OP_MUL:
  1018. begin
  1019. if (cgsetflags or setflags) and
  1020. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1021. begin
  1022. overflowreg:=getintregister(list,size);
  1023. if op=OP_IMUL then
  1024. asmop:=A_SMULL
  1025. else
  1026. asmop:=A_UMULL;
  1027. { the arm doesn't allow that rd and rm are the same }
  1028. if dst=src2 then
  1029. begin
  1030. if dst<>src1 then
  1031. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1032. else
  1033. begin
  1034. tmpreg:=getintregister(list,size);
  1035. a_load_reg_reg(list,size,size,src2,dst);
  1036. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1037. end;
  1038. end
  1039. else
  1040. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1041. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1042. if op=OP_IMUL then
  1043. begin
  1044. shifterop_reset(so);
  1045. so.shiftmode:=SM_ASR;
  1046. so.shiftimm:=31;
  1047. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1048. end
  1049. else
  1050. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1051. ovloc.loc:=LOC_FLAGS;
  1052. ovloc.resflags:=F_NE;
  1053. end
  1054. else
  1055. begin
  1056. { the arm doesn't allow that rd and rm are the same }
  1057. if dst=src2 then
  1058. begin
  1059. if dst<>src1 then
  1060. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1061. else
  1062. begin
  1063. tmpreg:=getintregister(list,size);
  1064. a_load_reg_reg(list,size,size,src2,dst);
  1065. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1066. end;
  1067. end
  1068. else
  1069. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1070. end;
  1071. end;
  1072. else
  1073. begin
  1074. if cgsetflags or setflags then
  1075. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1076. list.concat(setoppostfix(
  1077. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1078. end;
  1079. end;
  1080. maybeadjustresult(list,op,size,dst);
  1081. end;
  1082. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1083. var
  1084. asmop: tasmop;
  1085. begin
  1086. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1087. begin
  1088. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1089. case size of
  1090. OS_32: asmop:=A_UMULL;
  1091. OS_S32: asmop:=A_SMULL;
  1092. else
  1093. InternalError(2014060802);
  1094. end;
  1095. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1096. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1097. 32x32=32 bit multiplication}
  1098. if (dstlo = NR_NO) then
  1099. dstlo:=getintregister(list,size);
  1100. if (dsthi = NR_NO) then
  1101. dsthi:=getintregister(list,size);
  1102. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1103. end
  1104. else if dsthi=NR_NO then
  1105. begin
  1106. if (dstlo = NR_NO) then
  1107. dstlo:=getintregister(list,size);
  1108. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1109. end
  1110. else
  1111. begin
  1112. internalerror(2015083022);
  1113. end;
  1114. end;
  1115. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1116. var
  1117. tmpreg1,tmpreg2 : tregister;
  1118. begin
  1119. tmpreg1:=NR_NO;
  1120. { Be sure to have a base register }
  1121. if (ref.base=NR_NO) then
  1122. begin
  1123. if ref.shiftmode<>SM_None then
  1124. internalerror(2014020707);
  1125. ref.base:=ref.index;
  1126. ref.index:=NR_NO;
  1127. end;
  1128. { absolute symbols can't be handled directly, we've to store the symbol reference
  1129. in the text segment and access it pc relative
  1130. For now, we assume that references where base or index equals to PC are already
  1131. relative, all other references are assumed to be absolute and thus they need
  1132. to be handled extra.
  1133. A proper solution would be to change refoptions to a set and store the information
  1134. if the symbol is absolute or relative there.
  1135. }
  1136. if (assigned(ref.symbol) and
  1137. not(is_pc(ref.base)) and
  1138. not(is_pc(ref.index))
  1139. ) or
  1140. { [#xxx] isn't a valid address operand }
  1141. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1142. (ref.offset<-4095) or
  1143. (ref.offset>4095) or
  1144. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1145. ((ref.offset<-255) or
  1146. (ref.offset>255)
  1147. )
  1148. ) or
  1149. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1150. ((ref.offset<-1020) or
  1151. (ref.offset>1020) or
  1152. ((abs(ref.offset) mod 4)<>0)
  1153. )
  1154. ) or
  1155. ((GenerateThumbCode) and
  1156. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1157. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1158. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1159. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1160. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1161. )
  1162. ) then
  1163. begin
  1164. fixref(list,ref);
  1165. end;
  1166. if GenerateThumbCode then
  1167. begin
  1168. { certain thumb load require base and index }
  1169. if (oppostfix in [PF_SB,PF_SH]) and
  1170. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1171. begin
  1172. tmpreg1:=getintregister(list,OS_ADDR);
  1173. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1174. ref.index:=tmpreg1;
  1175. end;
  1176. { "hi" registers cannot be used as base or index }
  1177. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1178. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1179. begin
  1180. tmpreg1:=getintregister(list,OS_ADDR);
  1181. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1182. ref.base:=tmpreg1;
  1183. end;
  1184. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1185. begin
  1186. tmpreg1:=getintregister(list,OS_ADDR);
  1187. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1188. ref.index:=tmpreg1;
  1189. end;
  1190. end;
  1191. { fold if there is base, index and offset, however, don't fold
  1192. for vfp memory instructions because we later fold the index }
  1193. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1194. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1195. begin
  1196. if tmpreg1<>NR_NO then
  1197. begin
  1198. tmpreg2:=getintregister(list,OS_ADDR);
  1199. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1200. tmpreg1:=tmpreg2;
  1201. end
  1202. else
  1203. begin
  1204. tmpreg1:=getintregister(list,OS_ADDR);
  1205. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1206. ref.base:=tmpreg1;
  1207. end;
  1208. ref.offset:=0;
  1209. end;
  1210. { floating point operations have only limited references
  1211. we expect here, that a base is already set }
  1212. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1213. begin
  1214. if ref.shiftmode<>SM_none then
  1215. internalerror(200309121);
  1216. if tmpreg1<>NR_NO then
  1217. begin
  1218. if ref.base=tmpreg1 then
  1219. begin
  1220. if ref.signindex<0 then
  1221. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1222. else
  1223. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1224. ref.index:=NR_NO;
  1225. end
  1226. else
  1227. begin
  1228. if ref.index<>tmpreg1 then
  1229. internalerror(200403161);
  1230. if ref.signindex<0 then
  1231. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1232. else
  1233. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1234. ref.base:=tmpreg1;
  1235. ref.index:=NR_NO;
  1236. end;
  1237. end
  1238. else
  1239. begin
  1240. tmpreg1:=getintregister(list,OS_ADDR);
  1241. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1242. ref.base:=tmpreg1;
  1243. ref.index:=NR_NO;
  1244. end;
  1245. end;
  1246. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1247. Result := ref;
  1248. end;
  1249. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1250. var
  1251. oppostfix:toppostfix;
  1252. usedtmpref: treference;
  1253. tmpreg : tregister;
  1254. dir : integer;
  1255. begin
  1256. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1257. FromSize := ToSize;
  1258. case ToSize of
  1259. { signed integer registers }
  1260. OS_8,
  1261. OS_S8:
  1262. oppostfix:=PF_B;
  1263. OS_16,
  1264. OS_S16:
  1265. oppostfix:=PF_H;
  1266. OS_32,
  1267. OS_S32,
  1268. { for vfp value stored in integer register }
  1269. OS_F32:
  1270. oppostfix:=PF_None;
  1271. else
  1272. InternalError(2003082912);
  1273. end;
  1274. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1275. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1276. (oppostfix =PF_H)) then
  1277. begin
  1278. if target_info.endian=endian_big then
  1279. dir:=-1
  1280. else
  1281. dir:=1;
  1282. case FromSize of
  1283. OS_16,OS_S16:
  1284. begin
  1285. tmpreg:=getintregister(list,OS_INT);
  1286. usedtmpref:=ref;
  1287. if target_info.endian=endian_big then
  1288. inc(usedtmpref.offset,1);
  1289. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1290. inc(usedtmpref.offset,dir);
  1291. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1292. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1293. end;
  1294. OS_32,OS_S32:
  1295. begin
  1296. tmpreg:=getintregister(list,OS_INT);
  1297. usedtmpref:=ref;
  1298. if ref.alignment=2 then
  1299. begin
  1300. if target_info.endian=endian_big then
  1301. inc(usedtmpref.offset,2);
  1302. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1303. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1304. inc(usedtmpref.offset,dir*2);
  1305. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1306. end
  1307. else
  1308. begin
  1309. if target_info.endian=endian_big then
  1310. inc(usedtmpref.offset,3);
  1311. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1312. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1313. inc(usedtmpref.offset,dir);
  1314. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1315. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1316. inc(usedtmpref.offset,dir);
  1317. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1318. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1319. inc(usedtmpref.offset,dir);
  1320. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1321. end;
  1322. end
  1323. else
  1324. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1325. end;
  1326. end
  1327. else
  1328. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1329. end;
  1330. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1331. var
  1332. oppostfix:toppostfix;
  1333. href: treference;
  1334. tmpreg: TRegister;
  1335. begin
  1336. case ToSize of
  1337. { signed integer registers }
  1338. OS_8,
  1339. OS_S8:
  1340. oppostfix:=PF_B;
  1341. OS_16,
  1342. OS_S16:
  1343. oppostfix:=PF_H;
  1344. OS_32,
  1345. OS_S32:
  1346. oppostfix:=PF_None;
  1347. else
  1348. InternalError(2003082910);
  1349. end;
  1350. if (tosize in [OS_S16,OS_16]) and
  1351. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1352. begin
  1353. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1354. tmpreg:=getintregister(list,OS_INT);
  1355. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1356. href:=result;
  1357. inc(href.offset);
  1358. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1359. end
  1360. else
  1361. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1362. end;
  1363. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1364. var
  1365. oppostfix:toppostfix;
  1366. so: tshifterop;
  1367. tmpreg: TRegister;
  1368. href: treference;
  1369. begin
  1370. case FromSize of
  1371. { signed integer registers }
  1372. OS_8:
  1373. oppostfix:=PF_B;
  1374. OS_S8:
  1375. oppostfix:=PF_SB;
  1376. OS_16:
  1377. oppostfix:=PF_H;
  1378. OS_S16:
  1379. oppostfix:=PF_SH;
  1380. OS_32,
  1381. OS_S32:
  1382. oppostfix:=PF_None;
  1383. else
  1384. InternalError(200308291);
  1385. end;
  1386. if (tosize=OS_S8) and
  1387. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1388. begin
  1389. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1390. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1391. end
  1392. else if (tosize in [OS_S16,OS_16]) and
  1393. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1394. begin
  1395. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1396. tmpreg:=getintregister(list,OS_INT);
  1397. href:=result;
  1398. inc(href.offset);
  1399. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1400. shifterop_reset(so);
  1401. so.shiftmode:=SM_LSL;
  1402. so.shiftimm:=8;
  1403. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1404. end
  1405. else
  1406. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1407. end;
  1408. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1409. var
  1410. so : tshifterop;
  1411. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1412. begin
  1413. if GenerateThumbCode then
  1414. begin
  1415. case shiftmode of
  1416. SM_ASR:
  1417. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1418. SM_LSR:
  1419. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1420. SM_LSL:
  1421. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1422. else
  1423. internalerror(2013090301);
  1424. end;
  1425. end
  1426. else
  1427. begin
  1428. so.shiftmode:=shiftmode;
  1429. so.shiftimm:=shiftimm;
  1430. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1431. end;
  1432. end;
  1433. var
  1434. instr: taicpu;
  1435. conv_done: boolean;
  1436. begin
  1437. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1438. internalerror(2002090901);
  1439. conv_done:=false;
  1440. if tosize<>fromsize then
  1441. begin
  1442. shifterop_reset(so);
  1443. conv_done:=true;
  1444. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1445. fromsize:=tosize;
  1446. if current_settings.cputype<cpu_armv6 then
  1447. case fromsize of
  1448. OS_8:
  1449. if GenerateThumbCode then
  1450. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1451. else
  1452. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1453. OS_S8:
  1454. begin
  1455. do_shift(SM_LSL,24,reg1);
  1456. if tosize=OS_16 then
  1457. begin
  1458. do_shift(SM_ASR,8,reg2);
  1459. do_shift(SM_LSR,16,reg2);
  1460. end
  1461. else
  1462. do_shift(SM_ASR,24,reg2);
  1463. end;
  1464. OS_16:
  1465. begin
  1466. do_shift(SM_LSL,16,reg1);
  1467. do_shift(SM_LSR,16,reg2);
  1468. end;
  1469. OS_S16:
  1470. begin
  1471. do_shift(SM_LSL,16,reg1);
  1472. do_shift(SM_ASR,16,reg2)
  1473. end;
  1474. else
  1475. conv_done:=false;
  1476. end
  1477. else
  1478. case fromsize of
  1479. OS_8:
  1480. if GenerateThumbCode then
  1481. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1482. else
  1483. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1484. OS_S8:
  1485. begin
  1486. if tosize=OS_16 then
  1487. begin
  1488. so.shiftmode:=SM_ROR;
  1489. so.shiftimm:=16;
  1490. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1491. do_shift(SM_LSR,16,reg2);
  1492. end
  1493. else
  1494. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1495. end;
  1496. OS_16:
  1497. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1498. OS_S16:
  1499. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1500. else
  1501. conv_done:=false;
  1502. end
  1503. end;
  1504. if not conv_done and (reg1<>reg2) then
  1505. begin
  1506. { same size, only a register mov required }
  1507. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1508. list.Concat(instr);
  1509. { Notify the register allocator that we have written a move instruction so
  1510. it can try to eliminate it. }
  1511. add_move_instruction(instr);
  1512. end;
  1513. end;
  1514. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1515. var
  1516. href,href2 : treference;
  1517. hloc : pcgparalocation;
  1518. begin
  1519. href:=ref;
  1520. hloc:=paraloc.location;
  1521. while assigned(hloc) do
  1522. begin
  1523. case hloc^.loc of
  1524. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1525. begin
  1526. paramanager.allocparaloc(list,paraloc.location);
  1527. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1528. end;
  1529. LOC_REGISTER :
  1530. case hloc^.size of
  1531. OS_32,
  1532. OS_F32:
  1533. begin
  1534. paramanager.allocparaloc(list,paraloc.location);
  1535. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1536. end;
  1537. OS_64,
  1538. OS_F64:
  1539. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1540. else
  1541. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1542. end;
  1543. LOC_REFERENCE :
  1544. begin
  1545. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1546. { concatcopy should choose the best way to copy the data }
  1547. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1548. end;
  1549. else
  1550. internalerror(200408241);
  1551. end;
  1552. inc(href.offset,tcgsize2size[hloc^.size]);
  1553. hloc:=hloc^.next;
  1554. end;
  1555. end;
  1556. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1557. begin
  1558. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1559. end;
  1560. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1561. var
  1562. oppostfix:toppostfix;
  1563. begin
  1564. case fromsize of
  1565. OS_32,
  1566. OS_F32:
  1567. oppostfix:=PF_S;
  1568. OS_64,
  1569. OS_F64:
  1570. oppostfix:=PF_D;
  1571. OS_F80:
  1572. oppostfix:=PF_E;
  1573. else
  1574. InternalError(200309021);
  1575. end;
  1576. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1577. if fromsize<>tosize then
  1578. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1579. end;
  1580. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1581. var
  1582. oppostfix:toppostfix;
  1583. begin
  1584. case tosize of
  1585. OS_F32:
  1586. oppostfix:=PF_S;
  1587. OS_F64:
  1588. oppostfix:=PF_D;
  1589. OS_F80:
  1590. oppostfix:=PF_E;
  1591. else
  1592. InternalError(200309022);
  1593. end;
  1594. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1595. end;
  1596. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1597. var
  1598. r : TRegister;
  1599. ai: taicpu;
  1600. l: TAsmLabel;
  1601. begin
  1602. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  1603. not(FPUARM_HAS_EXCEPTION_TRAPPING in fpu_capabilities[current_settings.fputype]) and
  1604. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  1605. begin
  1606. r:=getintregister(list,OS_INT);
  1607. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1608. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1609. current_asmdata.getjumplabel(l);
  1610. ai:=taicpu.op_sym(A_B,l);
  1611. ai.is_jmp:=true;
  1612. ai.condition:=C_EQ;
  1613. list.concat(ai);
  1614. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1615. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1616. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1617. a_label(list,l);
  1618. if clear then
  1619. current_procinfo.FPUExceptionCheckNeeded:=false;
  1620. end;
  1621. end;
  1622. { comparison operations }
  1623. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1624. l : tasmlabel);
  1625. var
  1626. tmpreg : tregister;
  1627. b : byte;
  1628. begin
  1629. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1630. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1631. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1632. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1633. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1634. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1635. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1636. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1637. else
  1638. begin
  1639. tmpreg:=getintregister(list,size);
  1640. a_load_const_reg(list,size,a,tmpreg);
  1641. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1642. end;
  1643. a_jmp_cond(list,cmp_op,l);
  1644. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1645. end;
  1646. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1647. begin
  1648. if reverse then
  1649. begin
  1650. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1651. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1652. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1653. end
  1654. { it is decided during the compilation of the system unit if this code is used or not
  1655. so no additional check for rbit is needed }
  1656. else
  1657. begin
  1658. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1659. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1660. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1661. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1662. if GenerateThumb2Code then
  1663. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1664. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1665. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1666. end;
  1667. end;
  1668. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1669. begin
  1670. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1671. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1672. a_jmp_cond(list,cmp_op,l);
  1673. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1674. end;
  1675. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1676. var
  1677. ai : taicpu;
  1678. begin
  1679. { generate far jump, leave it to the optimizer to get rid of it }
  1680. if GenerateThumbCode then
  1681. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1682. else
  1683. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1684. ai.is_jmp:=true;
  1685. list.concat(ai);
  1686. end;
  1687. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1688. var
  1689. ai : taicpu;
  1690. begin
  1691. { generate far jump, leave it to the optimizer to get rid of it }
  1692. if GenerateThumbCode then
  1693. ai:=taicpu.op_sym(A_BL,l)
  1694. else
  1695. ai:=taicpu.op_sym(A_B,l);
  1696. ai.is_jmp:=true;
  1697. list.concat(ai);
  1698. end;
  1699. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1700. var
  1701. ai : taicpu;
  1702. inv_flags : TResFlags;
  1703. hlabel : TAsmLabel;
  1704. begin
  1705. if GenerateThumbCode then
  1706. begin
  1707. inv_flags:=f;
  1708. inverse_flags(inv_flags);
  1709. { the optimizer has to fix this if jump range is sufficient short }
  1710. current_asmdata.getjumplabel(hlabel);
  1711. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1712. ai.is_jmp:=true;
  1713. list.concat(ai);
  1714. a_jmp_always(list,l);
  1715. a_label(list,hlabel);
  1716. end
  1717. else
  1718. begin
  1719. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1720. ai.is_jmp:=true;
  1721. list.concat(ai);
  1722. end;
  1723. end;
  1724. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1725. begin
  1726. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1727. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1728. end;
  1729. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1730. begin
  1731. if target_info.system = system_arm_linux then
  1732. begin
  1733. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1734. a_call_name(list,'__gnu_mcount_nc',false);
  1735. end
  1736. else
  1737. internalerror(2014091201);
  1738. end;
  1739. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1740. var
  1741. ref : treference;
  1742. shift : byte;
  1743. firstfloatreg,lastfloatreg,
  1744. r : byte;
  1745. mmregs,
  1746. regs, saveregs : tcpuregisterset;
  1747. registerarea, offset,
  1748. r7offset,
  1749. stackmisalignment : pint;
  1750. imm1, imm2: DWord;
  1751. stack_parameters : Boolean;
  1752. begin
  1753. LocalSize:=align(LocalSize,4);
  1754. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1755. { call instruction does not put anything on the stack }
  1756. registerarea:=0;
  1757. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1758. lastfloatreg:=RS_NO;
  1759. if not(nostackframe) then
  1760. begin
  1761. firstfloatreg:=RS_NO;
  1762. mmregs:=[];
  1763. case current_settings.fputype of
  1764. fpu_none,
  1765. fpu_soft,
  1766. fpu_libgcc:
  1767. ;
  1768. fpu_fpa,
  1769. fpu_fpa10,
  1770. fpu_fpa11:
  1771. begin
  1772. { save floating point registers? }
  1773. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1774. for r:=RS_F0 to RS_F7 do
  1775. if r in regs then
  1776. begin
  1777. if firstfloatreg=RS_NO then
  1778. firstfloatreg:=r;
  1779. lastfloatreg:=r;
  1780. inc(registerarea,12);
  1781. end;
  1782. end;
  1783. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1784. begin;
  1785. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1786. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1787. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1788. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1789. end
  1790. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1791. begin;
  1792. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1793. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1794. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1795. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1796. end
  1797. else
  1798. internalerror(2019050924);
  1799. end;
  1800. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1801. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1802. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1803. { save int registers }
  1804. reference_reset(ref,4,[]);
  1805. ref.index:=NR_STACK_POINTER_REG;
  1806. ref.addressmode:=AM_PREINDEXED;
  1807. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1808. if not(target_info.system in systems_darwin) then
  1809. begin
  1810. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1811. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1812. begin
  1813. a_reg_alloc(list,NR_R12);
  1814. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1815. end;
  1816. { the (old) ARM APCS requires saving both the stack pointer (to
  1817. crawl the stack) and the PC (to identify the function this
  1818. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1819. and R15 -- still needs updating for EABI and Darwin, they don't
  1820. need that }
  1821. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1822. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1823. else
  1824. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1825. include(regs,RS_R14);
  1826. if regs<>[] then
  1827. begin
  1828. for r:=RS_R0 to RS_R15 do
  1829. if r in regs then
  1830. inc(registerarea,4);
  1831. { if the stack is not 8 byte aligned, try to add an extra register,
  1832. so we can avoid the extra sub/add ...,#4 later (KB) }
  1833. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1834. for r:=RS_R3 downto RS_R0 do
  1835. if not(r in regs) then
  1836. begin
  1837. regs:=regs+[r];
  1838. inc(registerarea,4);
  1839. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1840. break;
  1841. end;
  1842. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1843. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  1844. end;
  1845. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1846. begin
  1847. offset:=-4;
  1848. for r:=RS_R15 downto RS_R0 do
  1849. if r in regs then
  1850. begin
  1851. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),offset);
  1852. dec(offset,4);
  1853. end;
  1854. { the framepointer now points to the saved R15, so the saved
  1855. framepointer is at R11-12 (for get_caller_frame) }
  1856. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1857. a_reg_dealloc(list,NR_R12);
  1858. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  1859. current_asmdata.asmcfi.cfa_def_cfa_offset(list,4);
  1860. end;
  1861. end
  1862. else
  1863. begin
  1864. { always save r14 if we use r7 as the framepointer, because
  1865. the parameter offsets are hardcoded in advance and always
  1866. assume that r14 sits on the stack right behind the saved r7
  1867. }
  1868. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1869. include(regs,RS_FRAME_POINTER_REG);
  1870. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1871. include(regs,RS_R14);
  1872. if regs<>[] then
  1873. begin
  1874. { on Darwin, you first have to save [r4-r7,lr], and then
  1875. [r8,r10,r11] and make r7 point to the previously saved
  1876. r7 so that you can perform a stack crawl based on it
  1877. ([r7] is previous stack frame, [r7+4] is return address
  1878. }
  1879. include(regs,RS_FRAME_POINTER_REG);
  1880. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1881. r7offset:=0;
  1882. for r:=RS_R0 to RS_R15 do
  1883. if r in saveregs then
  1884. begin
  1885. inc(registerarea,4);
  1886. if r<RS_FRAME_POINTER_REG then
  1887. inc(r7offset,4);
  1888. end;
  1889. { save the registers }
  1890. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1891. { make r7 point to the saved r7 (regardless of whether this
  1892. frame uses the framepointer, for backtrace purposes) }
  1893. if r7offset<>0 then
  1894. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1895. else
  1896. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1897. { now save the rest (if any) }
  1898. saveregs:=regs-saveregs;
  1899. if saveregs<>[] then
  1900. begin
  1901. for r:=RS_R8 to RS_R11 do
  1902. if r in saveregs then
  1903. inc(registerarea,4);
  1904. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1905. end;
  1906. end;
  1907. end;
  1908. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1909. if (LocalSize<>0) or
  1910. ((stackmisalignment<>0) and
  1911. ((pi_do_call in current_procinfo.flags) or
  1912. (po_assembler in current_procinfo.procdef.procoptions))) then
  1913. begin
  1914. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1915. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1916. begin
  1917. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1918. internalerror(2014030901)
  1919. else
  1920. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1921. end;
  1922. if is_shifter_const(localsize,shift) then
  1923. begin
  1924. a_reg_dealloc(list,NR_R12);
  1925. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1926. end
  1927. else if split_into_shifter_const(localsize, imm1, imm2) then
  1928. begin
  1929. a_reg_dealloc(list,NR_R12);
  1930. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1931. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1932. end
  1933. else
  1934. begin
  1935. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1936. a_reg_alloc(list,NR_R12);
  1937. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1938. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1939. a_reg_dealloc(list,NR_R12);
  1940. end;
  1941. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1942. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  1943. end;
  1944. if (mmregs<>[]) or
  1945. (firstfloatreg<>RS_NO) then
  1946. begin
  1947. reference_reset(ref,4,[]);
  1948. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1949. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1950. begin
  1951. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1952. begin
  1953. a_reg_alloc(list,NR_R12);
  1954. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1955. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1956. a_reg_dealloc(list,NR_R12);
  1957. end
  1958. else
  1959. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1960. ref.base:=NR_R12;
  1961. end
  1962. else
  1963. begin
  1964. ref.base:=current_procinfo.framepointer;
  1965. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1966. end;
  1967. case current_settings.fputype of
  1968. fpu_fpa,
  1969. fpu_fpa10,
  1970. fpu_fpa11:
  1971. begin
  1972. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1973. lastfloatreg-firstfloatreg+1,ref));
  1974. end;
  1975. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1976. begin
  1977. ref.index:=ref.base;
  1978. ref.base:=NR_NO;
  1979. if mmregs<>[] then
  1980. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1981. end
  1982. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1983. begin
  1984. ref.index:=ref.base;
  1985. ref.base:=NR_NO;
  1986. if mmregs<>[] then
  1987. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1988. end
  1989. else
  1990. internalerror(2019050923);
  1991. end;
  1992. end;
  1993. end;
  1994. end;
  1995. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1996. var
  1997. ref : treference;
  1998. LocalSize : longint;
  1999. firstfloatreg,lastfloatreg,
  2000. r,
  2001. shift : byte;
  2002. mmregs,
  2003. saveregs,
  2004. regs : tcpuregisterset;
  2005. registerarea,
  2006. stackmisalignment: pint;
  2007. paddingreg: TSuperRegister;
  2008. imm1, imm2: DWord;
  2009. begin
  2010. if not(nostackframe) then
  2011. begin
  2012. registerarea:=0;
  2013. firstfloatreg:=RS_NO;
  2014. lastfloatreg:=RS_NO;
  2015. mmregs:=[];
  2016. saveregs:=[];
  2017. case current_settings.fputype of
  2018. fpu_none,
  2019. fpu_soft,
  2020. fpu_libgcc:
  2021. ;
  2022. fpu_fpa,
  2023. fpu_fpa10,
  2024. fpu_fpa11:
  2025. begin
  2026. { restore floating point registers? }
  2027. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  2028. for r:=RS_F0 to RS_F7 do
  2029. if r in regs then
  2030. begin
  2031. if firstfloatreg=RS_NO then
  2032. firstfloatreg:=r;
  2033. lastfloatreg:=r;
  2034. { floating point register space is already included in
  2035. localsize below by calc_stackframe_size
  2036. inc(registerarea,12);
  2037. }
  2038. end;
  2039. end;
  2040. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2041. begin
  2042. { restore vfp registers? }
  2043. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  2044. they have numbers>$1f which is not really correct as they should simply have the same numbers
  2045. as the even ones by with a different subtype as it is done on x86 with al/ah }
  2046. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  2047. end
  2048. else
  2049. internalerror(2019050908);
  2050. end;
  2051. if (firstfloatreg<>RS_NO) or
  2052. (mmregs<>[]) then
  2053. begin
  2054. reference_reset(ref,4,[]);
  2055. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  2056. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2057. begin
  2058. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2059. begin
  2060. a_reg_alloc(list,NR_R12);
  2061. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2062. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2063. a_reg_dealloc(list,NR_R12);
  2064. end
  2065. else
  2066. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2067. ref.base:=NR_R12;
  2068. end
  2069. else
  2070. begin
  2071. ref.base:=current_procinfo.framepointer;
  2072. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2073. end;
  2074. case current_settings.fputype of
  2075. fpu_fpa,
  2076. fpu_fpa10,
  2077. fpu_fpa11:
  2078. begin
  2079. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2080. lastfloatreg-firstfloatreg+1,ref));
  2081. end;
  2082. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2083. begin
  2084. ref.index:=ref.base;
  2085. ref.base:=NR_NO;
  2086. if mmregs<>[] then
  2087. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2088. end
  2089. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2090. begin
  2091. ref.index:=ref.base;
  2092. ref.base:=NR_NO;
  2093. if mmregs<>[] then
  2094. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2095. end
  2096. else
  2097. internalerror(2019050921);
  2098. end;
  2099. end;
  2100. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2101. if (pi_do_call in current_procinfo.flags) or
  2102. (regs<>[]) or
  2103. ((target_info.system in systems_darwin) and
  2104. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2105. begin
  2106. exclude(regs,RS_R14);
  2107. include(regs,RS_R15);
  2108. if (target_info.system in systems_darwin) then
  2109. include(regs,RS_FRAME_POINTER_REG);
  2110. end;
  2111. if not(target_info.system in systems_darwin) then
  2112. begin
  2113. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2114. The saved PC came after that but is discarded, since we restore
  2115. the stack pointer }
  2116. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2117. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2118. end
  2119. else
  2120. begin
  2121. { restore R8-R11 already if necessary (they've been stored
  2122. before the others) }
  2123. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2124. if saveregs<>[] then
  2125. begin
  2126. reference_reset(ref,4,[]);
  2127. ref.index:=NR_STACK_POINTER_REG;
  2128. ref.addressmode:=AM_PREINDEXED;
  2129. for r:=RS_R8 to RS_R11 do
  2130. if r in saveregs then
  2131. inc(registerarea,4);
  2132. regs:=regs-saveregs;
  2133. end;
  2134. end;
  2135. for r:=RS_R0 to RS_R15 do
  2136. if r in regs then
  2137. inc(registerarea,4);
  2138. { reapply the stack padding reg, in case there was one, see the complimentary
  2139. comment in g_proc_entry() (KB) }
  2140. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2141. if paddingreg < RS_R4 then
  2142. if paddingreg in regs then
  2143. internalerror(201306190)
  2144. else
  2145. begin
  2146. regs:=regs+[paddingreg];
  2147. inc(registerarea,4);
  2148. end;
  2149. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2150. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2151. (target_info.system in systems_darwin) then
  2152. begin
  2153. LocalSize:=current_procinfo.calc_stackframe_size;
  2154. if (LocalSize<>0) or
  2155. ((stackmisalignment<>0) and
  2156. ((pi_do_call in current_procinfo.flags) or
  2157. (po_assembler in current_procinfo.procdef.procoptions))) then
  2158. begin
  2159. if pi_estimatestacksize in current_procinfo.flags then
  2160. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2161. else
  2162. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2163. if is_shifter_const(LocalSize,shift) then
  2164. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2165. else if split_into_shifter_const(localsize, imm1, imm2) then
  2166. begin
  2167. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2168. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2169. end
  2170. else
  2171. begin
  2172. a_reg_alloc(list,NR_R12);
  2173. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2174. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2175. a_reg_dealloc(list,NR_R12);
  2176. end;
  2177. end;
  2178. if (target_info.system in systems_darwin) and
  2179. (saveregs<>[]) then
  2180. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2181. if regs=[] then
  2182. begin
  2183. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2184. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2185. else
  2186. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2187. end
  2188. else
  2189. begin
  2190. reference_reset(ref,4,[]);
  2191. ref.index:=NR_STACK_POINTER_REG;
  2192. ref.addressmode:=AM_PREINDEXED;
  2193. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2194. end;
  2195. end
  2196. else
  2197. begin
  2198. { restore int registers and return }
  2199. reference_reset(ref,4,[]);
  2200. ref.index:=NR_FRAME_POINTER_REG;
  2201. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2202. end;
  2203. end
  2204. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2205. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2206. else
  2207. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2208. end;
  2209. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2210. var
  2211. ref : treference;
  2212. l : TAsmLabel;
  2213. regs : tcpuregisterset;
  2214. r: byte;
  2215. begin
  2216. if (cs_create_pic in current_settings.moduleswitches) and
  2217. (pi_needs_got in current_procinfo.flags) and
  2218. (tf_pic_uses_got in target_info.flags) then
  2219. begin
  2220. { Procedure parametrs are not initialized at this stage.
  2221. Before GOT initialization code, allocate registers used for procedure parameters
  2222. to prevent usage of these registers for temp operations in later stages of code
  2223. generation. }
  2224. regs:=rg[R_INTREGISTER].used_in_proc;
  2225. for r:=RS_R0 to RS_R3 do
  2226. if r in regs then
  2227. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2228. { Allocate scratch register R12 and use it for GOT calculations directly.
  2229. Otherwise the init code can be distorted in later stages of code generation. }
  2230. a_reg_alloc(list,NR_R12);
  2231. reference_reset(ref,4,[]);
  2232. current_asmdata.getglobaldatalabel(l);
  2233. cg.a_label(current_procinfo.aktlocaldata,l);
  2234. ref.symbol:=l;
  2235. ref.base:=NR_PC;
  2236. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2237. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2238. current_asmdata.getaddrlabel(l);
  2239. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2240. cg.a_label(list,l);
  2241. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2242. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2243. { Deallocate registers }
  2244. a_reg_dealloc(list,NR_R12);
  2245. for r:=RS_R3 downto RS_R0 do
  2246. if r in regs then
  2247. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2248. end;
  2249. end;
  2250. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2251. var
  2252. b : byte;
  2253. tmpref : treference;
  2254. instr : taicpu;
  2255. begin
  2256. if ref.addressmode<>AM_OFFSET then
  2257. internalerror(200309071);
  2258. tmpref:=ref;
  2259. { Be sure to have a base register }
  2260. if (tmpref.base=NR_NO) then
  2261. begin
  2262. if tmpref.shiftmode<>SM_None then
  2263. internalerror(2014020702);
  2264. if tmpref.signindex<0 then
  2265. internalerror(200312023);
  2266. tmpref.base:=tmpref.index;
  2267. tmpref.index:=NR_NO;
  2268. end;
  2269. if assigned(tmpref.symbol) or
  2270. not((is_shifter_const(tmpref.offset,b)) or
  2271. (is_shifter_const(-tmpref.offset,b))
  2272. ) then
  2273. fixref(list,tmpref);
  2274. { expect a base here if there is an index }
  2275. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2276. internalerror(200312022);
  2277. if tmpref.index<>NR_NO then
  2278. begin
  2279. if tmpref.shiftmode<>SM_None then
  2280. internalerror(200312021);
  2281. if tmpref.signindex<0 then
  2282. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2283. else
  2284. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2285. if tmpref.offset<>0 then
  2286. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2287. end
  2288. else
  2289. begin
  2290. if tmpref.base=NR_NO then
  2291. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2292. else
  2293. if tmpref.offset<>0 then
  2294. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2295. else
  2296. begin
  2297. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2298. list.concat(instr);
  2299. add_move_instruction(instr);
  2300. end;
  2301. end;
  2302. end;
  2303. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2304. var
  2305. tmpreg, tmpreg2 : tregister;
  2306. tmpref : treference;
  2307. l, piclabel : tasmlabel;
  2308. indirection_done : boolean;
  2309. begin
  2310. { absolute symbols can't be handled directly, we've to store the symbol reference
  2311. in the text segment and access it pc relative
  2312. For now, we assume that references where base or index equals to PC are already
  2313. relative, all other references are assumed to be absolute and thus they need
  2314. to be handled extra.
  2315. A proper solution would be to change refoptions to a set and store the information
  2316. if the symbol is absolute or relative there.
  2317. }
  2318. { create consts entry }
  2319. reference_reset(tmpref,4,[]);
  2320. current_asmdata.getjumplabel(l);
  2321. cg.a_label(current_procinfo.aktlocaldata,l);
  2322. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2323. piclabel:=nil;
  2324. tmpreg:=NR_NO;
  2325. indirection_done:=false;
  2326. if assigned(ref.symbol) then
  2327. begin
  2328. if (target_info.system=system_arm_ios) and
  2329. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2330. begin
  2331. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2332. if ref.offset<>0 then
  2333. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2334. indirection_done:=true;
  2335. end
  2336. else if ref.refaddr=addr_gottpoff then
  2337. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2338. else if ref.refaddr=addr_tlsgd then
  2339. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2340. else if ref.refaddr=addr_tlsdesc then
  2341. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2342. else if ref.refaddr=addr_tpoff then
  2343. begin
  2344. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2345. Internalerror(2019092804);
  2346. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2347. end
  2348. else if (cs_create_pic in current_settings.moduleswitches) then
  2349. if (tf_pic_uses_got in target_info.flags) then
  2350. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2351. else
  2352. begin
  2353. { ideally, we would want to generate
  2354. ldr r1, LPICConstPool
  2355. LPICLocal:
  2356. ldr/str r2,[pc,r1]
  2357. ...
  2358. LPICConstPool:
  2359. .long _globsym-(LPICLocal+8)
  2360. However, we cannot be sure that the ldr/str will follow
  2361. right after the call to fixref, so we have to load the
  2362. complete address already in a register.
  2363. }
  2364. current_asmdata.getaddrlabel(piclabel);
  2365. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2366. end
  2367. else
  2368. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2369. end
  2370. else
  2371. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2372. { load consts entry }
  2373. if not indirection_done then
  2374. begin
  2375. tmpreg:=getintregister(list,OS_INT);
  2376. tmpref.symbol:=l;
  2377. tmpref.base:=NR_PC;
  2378. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2379. if (cs_create_pic in current_settings.moduleswitches) and
  2380. (tf_pic_uses_got in target_info.flags) and
  2381. assigned(ref.symbol) then
  2382. begin
  2383. {$ifdef EXTDEBUG}
  2384. if not (pi_needs_got in current_procinfo.flags) then
  2385. Comment(V_warning,'pi_needs_got not included');
  2386. {$endif EXTDEBUG}
  2387. Include(current_procinfo.flags,pi_needs_got);
  2388. reference_reset(tmpref,4,[]);
  2389. tmpref.base:=current_procinfo.got;
  2390. tmpref.index:=tmpreg;
  2391. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2392. if ref.offset<>0 then
  2393. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2394. end;
  2395. end;
  2396. if assigned(piclabel) then
  2397. begin
  2398. cg.a_label(list,piclabel);
  2399. tmpreg2:=getaddressregister(list);
  2400. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2401. tmpreg:=tmpreg2
  2402. end;
  2403. { This routine can be called with PC as base/index in case the offset
  2404. was too large to encode in a load/store. In that case, the entire
  2405. absolute expression has been re-encoded in a new constpool entry, and
  2406. we have to remove the use of PC from the original reference (the code
  2407. above made everything relative to the value loaded from the new
  2408. constpool entry) }
  2409. if is_pc(ref.base) then
  2410. ref.base:=NR_NO;
  2411. if is_pc(ref.index) then
  2412. ref.index:=NR_NO;
  2413. if (ref.base<>NR_NO) then
  2414. begin
  2415. if ref.index<>NR_NO then
  2416. begin
  2417. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2418. ref.base:=tmpreg;
  2419. end
  2420. else
  2421. if ref.base<>NR_PC then
  2422. begin
  2423. ref.index:=tmpreg;
  2424. ref.shiftimm:=0;
  2425. ref.signindex:=1;
  2426. ref.shiftmode:=SM_None;
  2427. end
  2428. else
  2429. ref.base:=tmpreg;
  2430. end
  2431. else
  2432. ref.base:=tmpreg;
  2433. ref.offset:=0;
  2434. ref.symbol:=nil;
  2435. end;
  2436. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2437. var
  2438. paraloc1,paraloc2,paraloc3 : TCGPara;
  2439. pd : tprocdef;
  2440. begin
  2441. pd:=search_system_proc('MOVE');
  2442. paraloc1.init;
  2443. paraloc2.init;
  2444. paraloc3.init;
  2445. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  2446. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  2447. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  2448. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2449. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2450. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2451. paramanager.freecgpara(list,paraloc3);
  2452. paramanager.freecgpara(list,paraloc2);
  2453. paramanager.freecgpara(list,paraloc1);
  2454. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2455. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2456. a_call_name(list,'FPC_MOVE',false);
  2457. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2458. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2459. paraloc3.done;
  2460. paraloc2.done;
  2461. paraloc1.done;
  2462. end;
  2463. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2464. const
  2465. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2466. maxtmpreg_thumb = 5;
  2467. var
  2468. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2469. srcreg,destreg,countreg,r,tmpreg:tregister;
  2470. helpsize:aint;
  2471. copysize:byte;
  2472. cgsize:Tcgsize;
  2473. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2474. maxtmpreg,
  2475. tmpregi,tmpregi2:byte;
  2476. { will never be called with count<=4 }
  2477. procedure genloop(count : aword;size : byte);
  2478. const
  2479. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2480. var
  2481. l : tasmlabel;
  2482. begin
  2483. current_asmdata.getjumplabel(l);
  2484. if count<size then size:=1;
  2485. a_load_const_reg(list,OS_INT,count div size,countreg);
  2486. cg.a_label(list,l);
  2487. srcref.addressmode:=AM_POSTINDEXED;
  2488. dstref.addressmode:=AM_POSTINDEXED;
  2489. srcref.offset:=size;
  2490. dstref.offset:=size;
  2491. r:=getintregister(list,size2opsize[size]);
  2492. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2493. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2494. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2495. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2496. a_jmp_flags(list,F_NE,l);
  2497. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2498. srcref.offset:=1;
  2499. dstref.offset:=1;
  2500. case count mod size of
  2501. 1:
  2502. begin
  2503. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2504. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2505. end;
  2506. 2:
  2507. if aligned then
  2508. begin
  2509. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2510. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2511. end
  2512. else
  2513. begin
  2514. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2515. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2516. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2517. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2518. end;
  2519. 3:
  2520. if aligned then
  2521. begin
  2522. srcref.offset:=2;
  2523. dstref.offset:=2;
  2524. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2525. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2526. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2527. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2528. end
  2529. else
  2530. begin
  2531. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2532. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2533. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2534. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2535. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2536. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2537. end;
  2538. end;
  2539. { keep the registers alive }
  2540. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2541. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2542. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2543. end;
  2544. { save estimation, if a creating a separate ref is needed or
  2545. if we can keep the original reference while copying }
  2546. function SimpleRef(const ref : treference) : boolean;
  2547. begin
  2548. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2549. ((ref.symbol=nil) and
  2550. (ref.addressmode=AM_OFFSET) and
  2551. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2552. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2553. { ldrh has a limited offset range }
  2554. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2555. )
  2556. );
  2557. end;
  2558. { will never be called with count<=4 }
  2559. procedure genloop_thumb(count : aword;size : byte);
  2560. procedure refincofs(const ref : treference;const value : longint = 1);
  2561. begin
  2562. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2563. end;
  2564. const
  2565. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2566. var
  2567. l : tasmlabel;
  2568. begin
  2569. current_asmdata.getjumplabel(l);
  2570. if count<size then size:=1;
  2571. a_load_const_reg(list,OS_INT,count div size,countreg);
  2572. cg.a_label(list,l);
  2573. r:=getintregister(list,size2opsize[size]);
  2574. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2575. refincofs(srcref);
  2576. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2577. refincofs(dstref);
  2578. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2579. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2580. a_jmp_flags(list,F_NE,l);
  2581. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2582. case count mod size of
  2583. 1:
  2584. begin
  2585. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2586. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2587. end;
  2588. 2:
  2589. if aligned then
  2590. begin
  2591. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2592. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2593. end
  2594. else
  2595. begin
  2596. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2597. refincofs(srcref);
  2598. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2599. refincofs(dstref);
  2600. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2601. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2602. end;
  2603. 3:
  2604. if aligned then
  2605. begin
  2606. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2607. refincofs(srcref,2);
  2608. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2609. refincofs(dstref,2);
  2610. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2611. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2612. end
  2613. else
  2614. begin
  2615. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2616. refincofs(srcref);
  2617. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2618. refincofs(dstref);
  2619. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2620. refincofs(srcref);
  2621. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2622. refincofs(dstref);
  2623. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2624. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2625. end;
  2626. end;
  2627. { keep the registers alive }
  2628. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2629. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2630. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2631. end;
  2632. begin
  2633. if len=0 then
  2634. exit;
  2635. if GenerateThumbCode then
  2636. maxtmpreg:=maxtmpreg_thumb
  2637. else
  2638. maxtmpreg:=maxtmpreg_arm;
  2639. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2640. dstref:=dest;
  2641. srcref:=source;
  2642. if cs_opt_size in current_settings.optimizerswitches then
  2643. helpsize:=8;
  2644. if aligned and (len=4) then
  2645. begin
  2646. tmpreg:=getintregister(list,OS_32);
  2647. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2648. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2649. end
  2650. else if aligned and (len=2) then
  2651. begin
  2652. tmpreg:=getintregister(list,OS_16);
  2653. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2654. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2655. end
  2656. else if (len<=helpsize) and aligned then
  2657. begin
  2658. tmpregi:=0;
  2659. { loading address in a separate register needed? }
  2660. if SimpleRef(source) then
  2661. begin
  2662. { ... then we don't need a loadaddr }
  2663. srcref:=source;
  2664. end
  2665. else
  2666. begin
  2667. srcreg:=getintregister(list,OS_ADDR);
  2668. a_loadaddr_ref_reg(list,source,srcreg);
  2669. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2670. end;
  2671. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2672. begin
  2673. inc(tmpregi);
  2674. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2675. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2676. inc(srcref.offset,4);
  2677. dec(len,4);
  2678. end;
  2679. { loading address in a separate register needed? }
  2680. if SimpleRef(dest) then
  2681. dstref:=dest
  2682. else
  2683. begin
  2684. destreg:=getintregister(list,OS_ADDR);
  2685. a_loadaddr_ref_reg(list,dest,destreg);
  2686. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2687. end;
  2688. tmpregi2:=1;
  2689. while (tmpregi2<=tmpregi) do
  2690. begin
  2691. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2692. inc(dstref.offset,4);
  2693. inc(tmpregi2);
  2694. end;
  2695. copysize:=4;
  2696. cgsize:=OS_32;
  2697. while len<>0 do
  2698. begin
  2699. if len<2 then
  2700. begin
  2701. copysize:=1;
  2702. cgsize:=OS_8;
  2703. end
  2704. else if len<4 then
  2705. begin
  2706. copysize:=2;
  2707. cgsize:=OS_16;
  2708. end;
  2709. dec(len,copysize);
  2710. r:=getintregister(list,cgsize);
  2711. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2712. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2713. inc(srcref.offset,copysize);
  2714. inc(dstref.offset,copysize);
  2715. end;{end of while}
  2716. end
  2717. else
  2718. begin
  2719. cgsize:=OS_32;
  2720. if (len<=4) then{len<=4 and not aligned}
  2721. begin
  2722. r:=getintregister(list,cgsize);
  2723. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2724. if Len=1 then
  2725. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2726. else
  2727. begin
  2728. tmpreg:=getintregister(list,cgsize);
  2729. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2730. inc(usedtmpref.offset,1);
  2731. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2732. inc(usedtmpref2.offset,1);
  2733. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2734. if len>2 then
  2735. begin
  2736. inc(usedtmpref.offset,1);
  2737. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2738. inc(usedtmpref2.offset,1);
  2739. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2740. if len>3 then
  2741. begin
  2742. inc(usedtmpref.offset,1);
  2743. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2744. inc(usedtmpref2.offset,1);
  2745. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2746. end;
  2747. end;
  2748. end;
  2749. end{end of if len<=4}
  2750. else
  2751. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2752. destreg:=getintregister(list,OS_ADDR);
  2753. a_loadaddr_ref_reg(list,dest,destreg);
  2754. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2755. srcreg:=getintregister(list,OS_ADDR);
  2756. a_loadaddr_ref_reg(list,source,srcreg);
  2757. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2758. countreg:=getintregister(list,OS_32);
  2759. // if cs_opt_size in current_settings.optimizerswitches then
  2760. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2761. {if aligned then
  2762. genloop(len,4)
  2763. else}
  2764. if GenerateThumbCode then
  2765. genloop_thumb(len,1)
  2766. else
  2767. genloop(len,1);
  2768. end;
  2769. end;
  2770. end;
  2771. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2772. begin
  2773. g_concatcopy_internal(list,source,dest,len,false);
  2774. end;
  2775. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2776. begin
  2777. if (source.alignment in [1,3]) or
  2778. (dest.alignment in [1,3]) then
  2779. g_concatcopy_internal(list,source,dest,len,false)
  2780. else
  2781. g_concatcopy_internal(list,source,dest,len,true);
  2782. end;
  2783. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2784. var
  2785. ovloc : tlocation;
  2786. begin
  2787. ovloc.loc:=LOC_VOID;
  2788. g_overflowCheck_loc(list,l,def,ovloc);
  2789. end;
  2790. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2791. var
  2792. hl : tasmlabel;
  2793. ai:TAiCpu;
  2794. hflags : tresflags;
  2795. begin
  2796. if not(cs_check_overflow in current_settings.localswitches) then
  2797. exit;
  2798. current_asmdata.getjumplabel(hl);
  2799. case ovloc.loc of
  2800. LOC_VOID:
  2801. begin
  2802. ai:=taicpu.op_sym(A_B,hl);
  2803. ai.is_jmp:=true;
  2804. if not((def.typ=pointerdef) or
  2805. ((def.typ=orddef) and
  2806. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2807. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2808. ai.SetCondition(C_VC)
  2809. else
  2810. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2811. ai.SetCondition(C_CS)
  2812. else
  2813. ai.SetCondition(C_CC);
  2814. list.concat(ai);
  2815. end;
  2816. LOC_FLAGS:
  2817. begin
  2818. hflags:=ovloc.resflags;
  2819. inverse_flags(hflags);
  2820. cg.a_jmp_flags(list,hflags,hl);
  2821. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2822. end;
  2823. else
  2824. internalerror(200409281);
  2825. end;
  2826. a_call_name(list,'FPC_OVERFLOW',false);
  2827. a_label(list,hl);
  2828. end;
  2829. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2830. begin
  2831. { this work is done in g_proc_entry }
  2832. end;
  2833. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2834. begin
  2835. { this work is done in g_proc_exit }
  2836. end;
  2837. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2838. var
  2839. ai : taicpu;
  2840. hlabel : TAsmLabel;
  2841. begin
  2842. if GenerateThumbCode then
  2843. begin
  2844. { the optimizer has to fix this if jump range is sufficient short }
  2845. current_asmdata.getjumplabel(hlabel);
  2846. ai:=Taicpu.Op_sym(A_B,hlabel);
  2847. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2848. ai.is_jmp:=true;
  2849. list.concat(ai);
  2850. a_jmp_always(list,l);
  2851. a_label(list,hlabel);
  2852. end
  2853. else
  2854. begin
  2855. ai:=Taicpu.Op_sym(A_B,l);
  2856. ai.SetCondition(OpCmp2AsmCond[cond]);
  2857. ai.is_jmp:=true;
  2858. list.concat(ai);
  2859. end;
  2860. end;
  2861. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2862. const
  2863. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2864. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2865. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2866. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2867. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2868. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2869. begin
  2870. result:=convertop[fromsize,tosize];
  2871. if result=A_NONE then
  2872. internalerror(200312205);
  2873. end;
  2874. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2875. const
  2876. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2877. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2878. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2879. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2880. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2881. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2882. begin
  2883. result:=convertop[fromsize,tosize];
  2884. end;
  2885. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2886. var
  2887. instr: taicpu;
  2888. begin
  2889. if (shuffle=nil) or shufflescalar(shuffle) then
  2890. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2891. else
  2892. internalerror(2009112407);
  2893. list.concat(instr);
  2894. case instr.opcode of
  2895. A_VMOV:
  2896. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2897. add_move_instruction(instr);
  2898. else
  2899. { VCVT can generate an exception }
  2900. maybe_check_for_fpu_exception(list);
  2901. end;
  2902. end;
  2903. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2904. var
  2905. intreg,
  2906. tmpmmreg : tregister;
  2907. reg64 : tregister64;
  2908. begin
  2909. if assigned(shuffle) and
  2910. not(shufflescalar(shuffle)) then
  2911. internalerror(2009112413);
  2912. case fromsize of
  2913. OS_32,OS_S32:
  2914. begin
  2915. fromsize:=OS_F32;
  2916. { since we are loading an integer, no conversion may be required }
  2917. if (fromsize<>tosize) then
  2918. internalerror(2009112801);
  2919. end;
  2920. OS_64,OS_S64:
  2921. begin
  2922. fromsize:=OS_F64;
  2923. { since we are loading an integer, no conversion may be required }
  2924. if (fromsize<>tosize) then
  2925. internalerror(2009112901);
  2926. end;
  2927. OS_F32,OS_F64:
  2928. ;
  2929. else
  2930. internalerror(2019050920);
  2931. end;
  2932. if (fromsize<>tosize) then
  2933. tmpmmreg:=getmmregister(list,fromsize)
  2934. else
  2935. tmpmmreg:=reg;
  2936. if (ref.alignment in [1,2]) then
  2937. begin
  2938. case fromsize of
  2939. OS_F32:
  2940. begin
  2941. intreg:=getintregister(list,OS_32);
  2942. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2943. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2944. end;
  2945. OS_F64:
  2946. begin
  2947. reg64.reglo:=getintregister(list,OS_32);
  2948. reg64.reghi:=getintregister(list,OS_32);
  2949. cg64.a_load64_ref_reg(list,ref,reg64);
  2950. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2951. end;
  2952. else
  2953. internalerror(2009112412);
  2954. end;
  2955. end
  2956. else
  2957. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2958. if (tmpmmreg<>reg) then
  2959. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2960. end;
  2961. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2962. var
  2963. intreg,
  2964. tmpmmreg : tregister;
  2965. reg64 : tregister64;
  2966. begin
  2967. if assigned(shuffle) and
  2968. not(shufflescalar(shuffle)) then
  2969. internalerror(2009112416);
  2970. case tosize of
  2971. OS_32,OS_S32:
  2972. begin
  2973. tosize:=OS_F32;
  2974. { since we are loading an integer, no conversion may be required }
  2975. if (fromsize<>tosize) then
  2976. internalerror(2009112802);
  2977. end;
  2978. OS_64,OS_S64:
  2979. begin
  2980. tosize:=OS_F64;
  2981. { since we are loading an integer, no conversion may be required }
  2982. if (fromsize<>tosize) then
  2983. internalerror(2009112902);
  2984. end;
  2985. OS_F32,OS_F64:
  2986. ;
  2987. else
  2988. internalerror(2019050919);
  2989. end;
  2990. if (fromsize<>tosize) then
  2991. begin
  2992. tmpmmreg:=getmmregister(list,tosize);
  2993. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2994. end
  2995. else
  2996. tmpmmreg:=reg;
  2997. if (ref.alignment in [1,2]) then
  2998. begin
  2999. case tosize of
  3000. OS_F32:
  3001. begin
  3002. intreg:=getintregister(list,OS_32);
  3003. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  3004. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  3005. end;
  3006. OS_F64:
  3007. begin
  3008. reg64.reglo:=getintregister(list,OS_32);
  3009. reg64.reghi:=getintregister(list,OS_32);
  3010. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  3011. cg64.a_load64_reg_ref(list,reg64,ref);
  3012. end;
  3013. else
  3014. internalerror(2009112417);
  3015. end;
  3016. end
  3017. else
  3018. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  3019. { VSTR cannot generate an FPU exception, VCVT is handled seperately, so we do not need a check here }
  3020. end;
  3021. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  3022. begin
  3023. { this code can only be used to transfer raw data, not to perform
  3024. conversions }
  3025. if (tosize<>OS_F32) then
  3026. internalerror(2009112419);
  3027. if not(fromsize in [OS_32,OS_S32]) then
  3028. internalerror(2009112420);
  3029. if assigned(shuffle) and
  3030. not shufflescalar(shuffle) then
  3031. internalerror(2009112516);
  3032. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  3033. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3034. end;
  3035. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  3036. begin
  3037. { this code can only be used to transfer raw data, not to perform
  3038. conversions }
  3039. if (fromsize<>OS_F32) then
  3040. internalerror(2009112430);
  3041. if not(tosize in [OS_32,OS_S32]) then
  3042. internalerror(2009112409);
  3043. if assigned(shuffle) and
  3044. not shufflescalar(shuffle) then
  3045. internalerror(2009112514);
  3046. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  3047. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3048. end;
  3049. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  3050. var
  3051. tmpreg: tregister;
  3052. begin
  3053. { the vfp doesn't support xor nor any other logical operation, but
  3054. this routine is used to initialise global mm regvars. We can
  3055. easily initialise an mm reg with 0 though. }
  3056. case op of
  3057. OP_XOR:
  3058. begin
  3059. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3060. begin
  3061. if (reg_cgsize(src)<>size) or
  3062. assigned(shuffle) then
  3063. internalerror(2019081301);
  3064. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3065. end
  3066. else
  3067. begin
  3068. if (src<>dst) or
  3069. (reg_cgsize(src)<>size) or
  3070. assigned(shuffle) then
  3071. internalerror(2009112907);
  3072. tmpreg:=getintregister(list,OS_32);
  3073. a_load_const_reg(list,OS_32,0,tmpreg);
  3074. case size of
  3075. OS_F32:
  3076. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3077. OS_F64:
  3078. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3079. else
  3080. internalerror(2009112908);
  3081. end;
  3082. end;
  3083. end
  3084. else
  3085. internalerror(2009112906);
  3086. end;
  3087. end;
  3088. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3089. const
  3090. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3091. begin
  3092. if (op in overflowops) and
  3093. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3094. a_load_reg_reg(list,OS_32,size,dst,dst);
  3095. end;
  3096. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3097. procedure checkreg(var reg : TRegister);
  3098. var
  3099. tmpreg : TRegister;
  3100. begin
  3101. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3102. (getsupreg(reg)=RS_R15) then
  3103. begin
  3104. tmpreg:=getintregister(list,OS_INT);
  3105. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3106. reg:=tmpreg;
  3107. end;
  3108. end;
  3109. begin
  3110. checkreg(op1);
  3111. checkreg(op2);
  3112. checkreg(op3);
  3113. checkreg(op4);
  3114. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3115. end;
  3116. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3117. begin
  3118. if pi_needs_tls in current_procinfo.flags then
  3119. begin
  3120. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3121. a_call_name(list,'fpc_read_tp',false);
  3122. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3123. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3124. end;
  3125. end;
  3126. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3127. begin
  3128. case op of
  3129. OP_NEG:
  3130. begin
  3131. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3132. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3133. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3134. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3135. end;
  3136. OP_NOT:
  3137. begin
  3138. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3139. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3140. end;
  3141. else
  3142. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3143. end;
  3144. end;
  3145. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3146. begin
  3147. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3148. end;
  3149. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3150. var
  3151. ovloc : tlocation;
  3152. begin
  3153. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3154. end;
  3155. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3156. var
  3157. ovloc : tlocation;
  3158. begin
  3159. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3160. end;
  3161. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3162. begin
  3163. { this code can only be used to transfer raw data, not to perform
  3164. conversions }
  3165. if (mmsize<>OS_F64) then
  3166. internalerror(2009112405);
  3167. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3168. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3169. end;
  3170. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3171. begin
  3172. { this code can only be used to transfer raw data, not to perform
  3173. conversions }
  3174. if (mmsize<>OS_F64) then
  3175. internalerror(2009112406);
  3176. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3177. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3178. end;
  3179. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3180. var
  3181. tmpreg : tregister;
  3182. b : byte;
  3183. begin
  3184. ovloc.loc:=LOC_VOID;
  3185. case op of
  3186. OP_NEG,
  3187. OP_NOT :
  3188. internalerror(2012022501);
  3189. else
  3190. ;
  3191. end;
  3192. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3193. begin
  3194. case op of
  3195. OP_ADD:
  3196. begin
  3197. if is_shifter_const(lo(value),b) then
  3198. begin
  3199. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3200. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3201. end
  3202. else
  3203. begin
  3204. tmpreg:=cg.getintregister(list,OS_32);
  3205. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3206. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3207. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3208. end;
  3209. if is_shifter_const(hi(value),b) then
  3210. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3211. else
  3212. begin
  3213. tmpreg:=cg.getintregister(list,OS_32);
  3214. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3215. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3216. end;
  3217. end;
  3218. OP_SUB:
  3219. begin
  3220. if is_shifter_const(lo(value),b) then
  3221. begin
  3222. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3223. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3224. end
  3225. else
  3226. begin
  3227. tmpreg:=cg.getintregister(list,OS_32);
  3228. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3229. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3230. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3231. end;
  3232. if is_shifter_const(hi(value),b) then
  3233. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3234. else
  3235. begin
  3236. tmpreg:=cg.getintregister(list,OS_32);
  3237. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3238. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3239. end;
  3240. end;
  3241. else
  3242. internalerror(200502131);
  3243. end;
  3244. if size=OS_64 then
  3245. begin
  3246. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3247. ovloc.loc:=LOC_FLAGS;
  3248. case op of
  3249. OP_ADD:
  3250. ovloc.resflags:=F_CS;
  3251. OP_SUB:
  3252. ovloc.resflags:=F_CC;
  3253. else
  3254. internalerror(2019050918);
  3255. end;
  3256. end;
  3257. end
  3258. else
  3259. begin
  3260. case op of
  3261. OP_AND,OP_OR,OP_XOR:
  3262. begin
  3263. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3264. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3265. end;
  3266. OP_ADD:
  3267. begin
  3268. if is_shifter_const(aint(lo(value)),b) then
  3269. begin
  3270. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3271. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3272. end
  3273. else
  3274. begin
  3275. tmpreg:=cg.getintregister(list,OS_32);
  3276. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3277. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3278. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3279. end;
  3280. if is_shifter_const(aint(hi(value)),b) then
  3281. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3282. else
  3283. begin
  3284. tmpreg:=cg.getintregister(list,OS_32);
  3285. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3286. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3287. end;
  3288. end;
  3289. OP_SUB:
  3290. begin
  3291. if is_shifter_const(aint(lo(value)),b) then
  3292. begin
  3293. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3294. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3295. end
  3296. else
  3297. begin
  3298. tmpreg:=cg.getintregister(list,OS_32);
  3299. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3300. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3301. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3302. end;
  3303. if is_shifter_const(aint(hi(value)),b) then
  3304. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3305. else
  3306. begin
  3307. tmpreg:=cg.getintregister(list,OS_32);
  3308. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3309. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3310. end;
  3311. end;
  3312. else
  3313. internalerror(2003083101);
  3314. end;
  3315. end;
  3316. end;
  3317. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3318. begin
  3319. ovloc.loc:=LOC_VOID;
  3320. case op of
  3321. OP_NEG,
  3322. OP_NOT :
  3323. internalerror(2012022502);
  3324. else
  3325. ;
  3326. end;
  3327. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3328. begin
  3329. case op of
  3330. OP_ADD:
  3331. begin
  3332. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3333. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3334. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3335. end;
  3336. OP_SUB:
  3337. begin
  3338. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3339. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3340. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3341. end;
  3342. else
  3343. internalerror(2003083102);
  3344. end;
  3345. if size=OS_64 then
  3346. begin
  3347. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3348. ovloc.loc:=LOC_FLAGS;
  3349. case op of
  3350. OP_ADD:
  3351. ovloc.resflags:=F_CS;
  3352. OP_SUB:
  3353. ovloc.resflags:=F_CC;
  3354. else
  3355. internalerror(2019050917);
  3356. end;
  3357. end;
  3358. end
  3359. else
  3360. begin
  3361. case op of
  3362. OP_AND,OP_OR,OP_XOR:
  3363. begin
  3364. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3365. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3366. end;
  3367. OP_ADD:
  3368. begin
  3369. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3370. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3371. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3372. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3373. end;
  3374. OP_SUB:
  3375. begin
  3376. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3377. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3378. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3379. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3380. end;
  3381. else
  3382. internalerror(2003083104);
  3383. end;
  3384. end;
  3385. end;
  3386. procedure tthumbcgarm.init_register_allocators;
  3387. begin
  3388. inherited init_register_allocators;
  3389. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3390. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3391. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3392. else
  3393. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3394. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3395. end;
  3396. procedure tthumbcgarm.done_register_allocators;
  3397. begin
  3398. rg[R_INTREGISTER].free;
  3399. rg[R_FPUREGISTER].free;
  3400. rg[R_MMREGISTER].free;
  3401. inherited done_register_allocators;
  3402. end;
  3403. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3404. var
  3405. ref : treference;
  3406. r : byte;
  3407. regs : tcpuregisterset;
  3408. stackmisalignment : pint;
  3409. registerarea: DWord;
  3410. stack_parameters: Boolean;
  3411. begin
  3412. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3413. LocalSize:=align(LocalSize,4);
  3414. { call instruction does not put anything on the stack }
  3415. stackmisalignment:=0;
  3416. if not(nostackframe) then
  3417. begin
  3418. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3419. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3420. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3421. { save int registers }
  3422. reference_reset(ref,4,[]);
  3423. ref.index:=NR_STACK_POINTER_REG;
  3424. ref.addressmode:=AM_PREINDEXED;
  3425. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3426. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3427. begin
  3428. //!!!! a_reg_alloc(list,NR_R12);
  3429. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3430. end;
  3431. { the (old) ARM APCS requires saving both the stack pointer (to
  3432. crawl the stack) and the PC (to identify the function this
  3433. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3434. and R15 -- still needs updating for EABI and Darwin, they don't
  3435. need that }
  3436. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3437. regs:=regs+[RS_R7,RS_R14]
  3438. else
  3439. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3440. include(regs,RS_R14);
  3441. { safely estimate stack size }
  3442. if localsize+current_settings.alignment.localalignmax+4>508 then
  3443. begin
  3444. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3445. include(regs,RS_R4);
  3446. end;
  3447. registerarea:=0;
  3448. if regs<>[] then
  3449. begin
  3450. for r:=RS_R0 to RS_R15 do
  3451. if r in regs then
  3452. inc(registerarea,4);
  3453. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3454. { we need to run the loop twice to get cfi right }
  3455. registerarea:=0;
  3456. for r:=RS_R0 to RS_R15 do
  3457. if r in regs then
  3458. begin
  3459. inc(registerarea,4);
  3460. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),-registerarea);
  3461. end;
  3462. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  3463. end;
  3464. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3465. if stack_parameters or (LocalSize<>0) or
  3466. ((stackmisalignment<>0) and
  3467. ((pi_do_call in current_procinfo.flags) or
  3468. (po_assembler in current_procinfo.procdef.procoptions))) then
  3469. begin
  3470. { do we access stack parameters?
  3471. if yes, the previously estimated stacksize must be used }
  3472. if stack_parameters then
  3473. begin
  3474. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3475. begin
  3476. writeln(localsize);
  3477. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3478. internalerror(2013040601);
  3479. end
  3480. else
  3481. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3482. end
  3483. else
  3484. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3485. if localsize<508 then
  3486. begin
  3487. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3488. end
  3489. else if localsize<=1016 then
  3490. begin
  3491. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3492. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3493. end
  3494. else
  3495. begin
  3496. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3497. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3498. include(regs,RS_R4);
  3499. end;
  3500. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  3501. end;
  3502. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3503. begin
  3504. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3505. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  3506. end;
  3507. end;
  3508. end;
  3509. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3510. var
  3511. LocalSize : longint;
  3512. r: byte;
  3513. regs : tcpuregisterset;
  3514. registerarea : DWord;
  3515. stackmisalignment: pint;
  3516. stack_parameters : Boolean;
  3517. begin
  3518. if not(nostackframe) then
  3519. begin
  3520. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3521. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3522. include(regs,RS_R15);
  3523. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3524. include(regs,getsupreg(current_procinfo.framepointer));
  3525. registerarea:=0;
  3526. for r:=RS_R0 to RS_R15 do
  3527. if r in regs then
  3528. inc(registerarea,4);
  3529. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3530. LocalSize:=current_procinfo.calc_stackframe_size;
  3531. if stack_parameters then
  3532. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3533. else
  3534. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3535. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3536. (target_info.system in systems_darwin) then
  3537. begin
  3538. if (LocalSize<>0) or
  3539. ((stackmisalignment<>0) and
  3540. ((pi_do_call in current_procinfo.flags) or
  3541. (po_assembler in current_procinfo.procdef.procoptions))) then
  3542. begin
  3543. if LocalSize=0 then
  3544. else if LocalSize<=508 then
  3545. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3546. else if LocalSize<=1016 then
  3547. begin
  3548. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3549. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3550. end
  3551. else
  3552. begin
  3553. a_reg_alloc(list,NR_R3);
  3554. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3555. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3556. a_reg_dealloc(list,NR_R3);
  3557. end;
  3558. end;
  3559. if regs=[] then
  3560. begin
  3561. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3562. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3563. else
  3564. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3565. end
  3566. else
  3567. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3568. end;
  3569. end
  3570. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3571. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3572. else
  3573. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3574. end;
  3575. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3576. var
  3577. oppostfix:toppostfix;
  3578. usedtmpref: treference;
  3579. tmpreg,tmpreg2 : tregister;
  3580. dir : integer;
  3581. begin
  3582. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3583. FromSize := ToSize;
  3584. case FromSize of
  3585. { signed integer registers }
  3586. OS_8:
  3587. oppostfix:=PF_B;
  3588. OS_S8:
  3589. oppostfix:=PF_SB;
  3590. OS_16:
  3591. oppostfix:=PF_H;
  3592. OS_S16:
  3593. oppostfix:=PF_SH;
  3594. OS_32,
  3595. OS_S32:
  3596. oppostfix:=PF_None;
  3597. else
  3598. InternalError(200308298);
  3599. end;
  3600. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3601. begin
  3602. if target_info.endian=endian_big then
  3603. dir:=-1
  3604. else
  3605. dir:=1;
  3606. case FromSize of
  3607. OS_16,OS_S16:
  3608. begin
  3609. { only complicated references need an extra loadaddr }
  3610. if assigned(ref.symbol) or
  3611. (ref.index<>NR_NO) or
  3612. (ref.offset<-124) or
  3613. (ref.offset>124) or
  3614. { sometimes the compiler reused registers }
  3615. (reg=ref.index) or
  3616. (reg=ref.base) then
  3617. begin
  3618. tmpreg2:=getintregister(list,OS_INT);
  3619. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3620. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3621. end
  3622. else
  3623. usedtmpref:=ref;
  3624. if target_info.endian=endian_big then
  3625. inc(usedtmpref.offset,1);
  3626. tmpreg:=getintregister(list,OS_INT);
  3627. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3628. inc(usedtmpref.offset,dir);
  3629. if FromSize=OS_16 then
  3630. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3631. else
  3632. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3633. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3634. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3635. end;
  3636. OS_32,OS_S32:
  3637. begin
  3638. tmpreg:=getintregister(list,OS_INT);
  3639. { only complicated references need an extra loadaddr }
  3640. if assigned(ref.symbol) or
  3641. (ref.index<>NR_NO) or
  3642. (ref.offset<-124) or
  3643. (ref.offset>124) or
  3644. { sometimes the compiler reused registers }
  3645. (reg=ref.index) or
  3646. (reg=ref.base) then
  3647. begin
  3648. tmpreg2:=getintregister(list,OS_INT);
  3649. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3650. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3651. end
  3652. else
  3653. usedtmpref:=ref;
  3654. if ref.alignment=2 then
  3655. begin
  3656. if target_info.endian=endian_big then
  3657. inc(usedtmpref.offset,2);
  3658. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3659. inc(usedtmpref.offset,dir*2);
  3660. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3661. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3662. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3663. end
  3664. else
  3665. begin
  3666. if target_info.endian=endian_big then
  3667. inc(usedtmpref.offset,3);
  3668. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3669. inc(usedtmpref.offset,dir);
  3670. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3671. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3672. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3673. inc(usedtmpref.offset,dir);
  3674. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3675. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3676. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3677. inc(usedtmpref.offset,dir);
  3678. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3679. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3680. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3681. end;
  3682. end
  3683. else
  3684. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3685. end;
  3686. end
  3687. else
  3688. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3689. if (fromsize=OS_S8) and (tosize = OS_16) then
  3690. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3691. end;
  3692. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3693. var
  3694. l : tasmlabel;
  3695. hr : treference;
  3696. begin
  3697. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3698. internalerror(2002090908);
  3699. if is_thumb_imm(a) then
  3700. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,a),PF_S))
  3701. else
  3702. begin
  3703. reference_reset(hr,4,[]);
  3704. current_asmdata.getjumplabel(l);
  3705. cg.a_label(current_procinfo.aktlocaldata,l);
  3706. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3707. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3708. hr.symbol:=l;
  3709. hr.base:=NR_PC;
  3710. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3711. end;
  3712. end;
  3713. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3714. var
  3715. hsym : tsym;
  3716. href,
  3717. tmpref : treference;
  3718. paraloc : Pcgparalocation;
  3719. l : TAsmLabel;
  3720. begin
  3721. { calculate the parameter info for the procdef }
  3722. procdef.init_paraloc_info(callerside);
  3723. hsym:=tsym(procdef.parast.Find('self'));
  3724. if not(assigned(hsym) and
  3725. (hsym.typ=paravarsym)) then
  3726. internalerror(2003052504);
  3727. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3728. while paraloc<>nil do
  3729. with paraloc^ do
  3730. begin
  3731. case loc of
  3732. LOC_REGISTER:
  3733. begin
  3734. if is_thumb_imm(ioffset) then
  3735. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3736. else
  3737. begin
  3738. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3739. reference_reset(tmpref,4,[]);
  3740. current_asmdata.getjumplabel(l);
  3741. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3742. cg.a_label(current_procinfo.aktlocaldata,l);
  3743. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3744. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3745. tmpref.symbol:=l;
  3746. tmpref.base:=NR_PC;
  3747. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3748. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3749. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3750. end;
  3751. end;
  3752. LOC_REFERENCE:
  3753. begin
  3754. { offset in the wrapper needs to be adjusted for the stored
  3755. return address }
  3756. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3757. if is_thumb_imm(ioffset) then
  3758. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3759. else
  3760. begin
  3761. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3762. reference_reset(tmpref,4,[]);
  3763. current_asmdata.getjumplabel(l);
  3764. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3765. cg.a_label(current_procinfo.aktlocaldata,l);
  3766. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3767. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3768. tmpref.symbol:=l;
  3769. tmpref.base:=NR_PC;
  3770. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3771. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3772. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3773. end;
  3774. end
  3775. else
  3776. internalerror(2003091804);
  3777. end;
  3778. paraloc:=next;
  3779. end;
  3780. end;
  3781. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3782. var
  3783. href : treference;
  3784. tmpreg : TRegister;
  3785. begin
  3786. href:=ref;
  3787. if { LDR/STR limitations }
  3788. (
  3789. (((op=A_LDR) and (oppostfix=PF_None)) or
  3790. ((op=A_STR) and (oppostfix=PF_None))) and
  3791. (ref.base<>NR_STACK_POINTER_REG) and
  3792. (abs(ref.offset)>124)
  3793. ) or
  3794. { LDRB/STRB limitations }
  3795. (
  3796. (((op=A_LDR) and (oppostfix=PF_B)) or
  3797. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3798. ((op=A_STR) and (oppostfix=PF_B)) or
  3799. ((op=A_STRB) and (oppostfix=PF_None))) and
  3800. ((ref.base=NR_STACK_POINTER_REG) or
  3801. (ref.index=NR_STACK_POINTER_REG) or
  3802. (abs(ref.offset)>31)
  3803. )
  3804. ) or
  3805. { LDRH/STRH limitations }
  3806. (
  3807. (((op=A_LDR) and (oppostfix=PF_H)) or
  3808. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3809. ((op=A_STR) and (oppostfix=PF_H)) or
  3810. ((op=A_STRH) and (oppostfix=PF_None))) and
  3811. ((ref.base=NR_STACK_POINTER_REG) or
  3812. (ref.index=NR_STACK_POINTER_REG) or
  3813. (abs(ref.offset)>62) or
  3814. ((abs(ref.offset) mod 2)<>0)
  3815. )
  3816. ) then
  3817. begin
  3818. tmpreg:=getintregister(list,OS_ADDR);
  3819. a_loadaddr_ref_reg(list,ref,tmpreg);
  3820. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3821. end
  3822. else if (op=A_LDR) and
  3823. (oppostfix in [PF_None]) and
  3824. (ref.base=NR_STACK_POINTER_REG) and
  3825. (abs(ref.offset)>1020) then
  3826. begin
  3827. tmpreg:=getintregister(list,OS_ADDR);
  3828. a_loadaddr_ref_reg(list,ref,tmpreg);
  3829. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3830. end
  3831. else if (op=A_LDR) and
  3832. ((oppostfix in [PF_SH,PF_SB]) or
  3833. (abs(ref.offset)>124)) then
  3834. begin
  3835. tmpreg:=getintregister(list,OS_ADDR);
  3836. a_loadaddr_ref_reg(list,ref,tmpreg);
  3837. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3838. end;
  3839. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3840. end;
  3841. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3842. var
  3843. tmpreg : tregister;
  3844. begin
  3845. case op of
  3846. OP_NEG:
  3847. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3848. OP_NOT:
  3849. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,dst,src),PF_S));
  3850. OP_DIV,OP_IDIV:
  3851. internalerror(200308284);
  3852. OP_ROL:
  3853. begin
  3854. if not(size in [OS_32,OS_S32]) then
  3855. internalerror(2008072805);
  3856. { simulate ROL by ror'ing 32-value }
  3857. tmpreg:=getintregister(list,OS_32);
  3858. a_load_const_reg(list,OS_32,32,tmpreg);
  3859. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3860. list.concat(setoppostfix(taicpu.op_reg_reg(A_ROR,dst,src),PF_S));
  3861. end;
  3862. else
  3863. begin
  3864. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3865. list.concat(setoppostfix(
  3866. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix_thumb[op]));
  3867. end;
  3868. end;
  3869. maybeadjustresult(list,op,size,dst);
  3870. end;
  3871. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3872. var
  3873. tmpreg : tregister;
  3874. {$ifdef DUMMY}
  3875. l1 : longint;
  3876. {$endif DUMMY}
  3877. begin
  3878. //!!! ovloc.loc:=LOC_VOID;
  3879. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3880. case op of
  3881. OP_ADD:
  3882. begin
  3883. op:=OP_SUB;
  3884. a:=aint(dword(-a));
  3885. end;
  3886. OP_SUB:
  3887. begin
  3888. op:=OP_ADD;
  3889. a:=aint(dword(-a));
  3890. end
  3891. else
  3892. ;
  3893. end;
  3894. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3895. begin
  3896. // if cgsetflags or setflags then
  3897. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3898. list.concat(setoppostfix(
  3899. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix_thumb[op]));
  3900. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3901. begin
  3902. //!!! ovloc.loc:=LOC_FLAGS;
  3903. case op of
  3904. OP_ADD:
  3905. //!!! ovloc.resflags:=F_CS;
  3906. ;
  3907. OP_SUB:
  3908. //!!! ovloc.resflags:=F_CC;
  3909. ;
  3910. else
  3911. ;
  3912. end;
  3913. end;
  3914. end
  3915. else
  3916. begin
  3917. { there could be added some more sophisticated optimizations }
  3918. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3919. a_load_reg_reg(list,size,size,dst,dst)
  3920. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3921. a_load_const_reg(list,size,0,dst)
  3922. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3923. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3924. { we do this here instead in the peephole optimizer because
  3925. it saves us a register }
  3926. {$ifdef DUMMY}
  3927. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3928. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3929. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3930. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3931. begin
  3932. if l1>32 then{roozbeh does this ever happen?}
  3933. internalerror(2003082903);
  3934. shifterop_reset(so);
  3935. so.shiftmode:=SM_LSL;
  3936. so.shiftimm:=l1;
  3937. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3938. end
  3939. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3940. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3941. begin
  3942. if l1>32 then{does this ever happen?}
  3943. internalerror(2012051802);
  3944. shifterop_reset(so);
  3945. so.shiftmode:=SM_LSL;
  3946. so.shiftimm:=l1;
  3947. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3948. end
  3949. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3950. begin
  3951. { nothing to do on success }
  3952. end
  3953. {$endif DUMMY}
  3954. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3955. Just using mov x, #0 might allow some easier optimizations down the line. }
  3956. else if (op = OP_AND) and (dword(a)=0) then
  3957. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,dst,0),PF_S))
  3958. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3959. else if (op = OP_AND) and (not(dword(a))=0) then
  3960. // do nothing
  3961. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3962. broader range of shifterconstants.}
  3963. {$ifdef DUMMY}
  3964. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3965. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3966. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3967. begin
  3968. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3969. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3970. end
  3971. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3972. not(cgsetflags or setflags) and
  3973. split_into_shifter_const(a, imm1, imm2) then
  3974. begin
  3975. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3976. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3977. end
  3978. {$endif DUMMY}
  3979. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3980. begin
  3981. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3982. end
  3983. else
  3984. begin
  3985. tmpreg:=getintregister(list,size);
  3986. a_load_const_reg(list,size,a,tmpreg);
  3987. a_op_reg_reg(list,op,size,tmpreg,dst);
  3988. end;
  3989. end;
  3990. maybeadjustresult(list,op,size,dst);
  3991. end;
  3992. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3993. begin
  3994. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3995. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3996. else
  3997. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3998. end;
  3999. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4000. var
  4001. l1,l2 : tasmlabel;
  4002. ai : taicpu;
  4003. begin
  4004. current_asmdata.getjumplabel(l1);
  4005. current_asmdata.getjumplabel(l2);
  4006. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  4007. ai.is_jmp:=true;
  4008. list.concat(ai);
  4009. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,0),PF_S));
  4010. list.concat(taicpu.op_sym(A_B,l2));
  4011. cg.a_label(list,l1);
  4012. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,1),PF_S));
  4013. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4014. cg.a_label(list,l2);
  4015. end;
  4016. procedure tthumb2cgarm.init_register_allocators;
  4017. begin
  4018. inherited init_register_allocators;
  4019. { currently, we save R14 always, so we can use it }
  4020. if (target_info.system<>system_arm_ios) then
  4021. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4022. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4023. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  4024. else
  4025. { r9 is not available on Darwin according to the llvm code generator }
  4026. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4027. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4028. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  4029. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4030. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  4031. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  4032. init_mmregister_allocator;
  4033. end;
  4034. procedure tthumb2cgarm.done_register_allocators;
  4035. begin
  4036. rg[R_INTREGISTER].free;
  4037. rg[R_FPUREGISTER].free;
  4038. rg[R_MMREGISTER].free;
  4039. inherited done_register_allocators;
  4040. end;
  4041. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  4042. begin
  4043. list.concat(taicpu.op_reg(A_BLX, reg));
  4044. {
  4045. the compiler does not properly set this flag anymore in pass 1, and
  4046. for now we only need it after pass 2 (I hope) (JM)
  4047. if not(pi_do_call in current_procinfo.flags) then
  4048. internalerror(2003060703);
  4049. }
  4050. include(current_procinfo.flags,pi_do_call);
  4051. end;
  4052. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4053. var
  4054. l : tasmlabel;
  4055. hr : treference;
  4056. begin
  4057. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4058. internalerror(2002090909);
  4059. if is_thumb32_imm(a) then
  4060. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4061. else if is_thumb32_imm(not(a)) then
  4062. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4063. else if (a and $FFFF)=a then
  4064. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4065. else
  4066. begin
  4067. reference_reset(hr,4,[]);
  4068. current_asmdata.getjumplabel(l);
  4069. cg.a_label(current_procinfo.aktlocaldata,l);
  4070. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4071. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4072. hr.symbol:=l;
  4073. hr.base:=NR_PC;
  4074. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4075. end;
  4076. end;
  4077. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4078. var
  4079. oppostfix:toppostfix;
  4080. usedtmpref: treference;
  4081. tmpreg,tmpreg2 : tregister;
  4082. so : tshifterop;
  4083. dir : integer;
  4084. begin
  4085. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4086. FromSize := ToSize;
  4087. case FromSize of
  4088. { signed integer registers }
  4089. OS_8:
  4090. oppostfix:=PF_B;
  4091. OS_S8:
  4092. oppostfix:=PF_SB;
  4093. OS_16:
  4094. oppostfix:=PF_H;
  4095. OS_S16:
  4096. oppostfix:=PF_SH;
  4097. OS_32,
  4098. OS_S32:
  4099. oppostfix:=PF_None;
  4100. else
  4101. InternalError(2003082913);
  4102. end;
  4103. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4104. begin
  4105. if target_info.endian=endian_big then
  4106. dir:=-1
  4107. else
  4108. dir:=1;
  4109. case FromSize of
  4110. OS_16,OS_S16:
  4111. begin
  4112. { only complicated references need an extra loadaddr }
  4113. if assigned(ref.symbol) or
  4114. (ref.index<>NR_NO) or
  4115. (ref.offset<-255) or
  4116. (ref.offset>4094) or
  4117. { sometimes the compiler reused registers }
  4118. (reg=ref.index) or
  4119. (reg=ref.base) then
  4120. begin
  4121. tmpreg2:=getintregister(list,OS_INT);
  4122. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4123. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4124. end
  4125. else
  4126. usedtmpref:=ref;
  4127. if target_info.endian=endian_big then
  4128. inc(usedtmpref.offset,1);
  4129. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4130. tmpreg:=getintregister(list,OS_INT);
  4131. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4132. inc(usedtmpref.offset,dir);
  4133. if FromSize=OS_16 then
  4134. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4135. else
  4136. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4137. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4138. end;
  4139. OS_32,OS_S32:
  4140. begin
  4141. tmpreg:=getintregister(list,OS_INT);
  4142. { only complicated references need an extra loadaddr }
  4143. if assigned(ref.symbol) or
  4144. (ref.index<>NR_NO) or
  4145. (ref.offset<-255) or
  4146. (ref.offset>4092) or
  4147. { sometimes the compiler reused registers }
  4148. (reg=ref.index) or
  4149. (reg=ref.base) then
  4150. begin
  4151. tmpreg2:=getintregister(list,OS_INT);
  4152. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4153. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4154. end
  4155. else
  4156. usedtmpref:=ref;
  4157. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4158. if ref.alignment=2 then
  4159. begin
  4160. if target_info.endian=endian_big then
  4161. inc(usedtmpref.offset,2);
  4162. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4163. inc(usedtmpref.offset,dir*2);
  4164. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4165. so.shiftimm:=16;
  4166. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4167. end
  4168. else
  4169. begin
  4170. if target_info.endian=endian_big then
  4171. inc(usedtmpref.offset,3);
  4172. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4173. inc(usedtmpref.offset,dir);
  4174. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4175. so.shiftimm:=8;
  4176. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4177. inc(usedtmpref.offset,dir);
  4178. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4179. so.shiftimm:=16;
  4180. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4181. inc(usedtmpref.offset,dir);
  4182. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4183. so.shiftimm:=24;
  4184. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4185. end;
  4186. end
  4187. else
  4188. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4189. end;
  4190. end
  4191. else
  4192. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4193. if (fromsize=OS_S8) and (tosize = OS_16) then
  4194. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4195. end;
  4196. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4197. begin
  4198. if op = OP_NOT then
  4199. begin
  4200. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4201. case size of
  4202. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4203. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4204. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4205. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4206. OS_32,
  4207. OS_S32:
  4208. ;
  4209. else
  4210. internalerror(2019050916);
  4211. end;
  4212. end
  4213. else
  4214. inherited a_op_reg_reg(list, op, size, src, dst);
  4215. end;
  4216. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4217. var
  4218. shift, width : byte;
  4219. tmpreg : tregister;
  4220. so : tshifterop;
  4221. l1 : longint;
  4222. begin
  4223. ovloc.loc:=LOC_VOID;
  4224. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4225. case op of
  4226. OP_ADD:
  4227. begin
  4228. op:=OP_SUB;
  4229. a:=aint(dword(-a));
  4230. end;
  4231. OP_SUB:
  4232. begin
  4233. op:=OP_ADD;
  4234. a:=aint(dword(-a));
  4235. end
  4236. else
  4237. ;
  4238. end;
  4239. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4240. case op of
  4241. OP_NEG,OP_NOT,
  4242. OP_DIV,OP_IDIV:
  4243. internalerror(200308285);
  4244. OP_SHL:
  4245. begin
  4246. if a>32 then
  4247. internalerror(2014020703);
  4248. if a<>0 then
  4249. begin
  4250. shifterop_reset(so);
  4251. so.shiftmode:=SM_LSL;
  4252. so.shiftimm:=a;
  4253. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4254. end
  4255. else
  4256. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4257. end;
  4258. OP_ROL:
  4259. begin
  4260. if a>32 then
  4261. internalerror(2014020704);
  4262. if a<>0 then
  4263. begin
  4264. shifterop_reset(so);
  4265. so.shiftmode:=SM_ROR;
  4266. so.shiftimm:=32-a;
  4267. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4268. end
  4269. else
  4270. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4271. end;
  4272. OP_ROR:
  4273. begin
  4274. if a>32 then
  4275. internalerror(2014020705);
  4276. if a<>0 then
  4277. begin
  4278. shifterop_reset(so);
  4279. so.shiftmode:=SM_ROR;
  4280. so.shiftimm:=a;
  4281. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4282. end
  4283. else
  4284. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4285. end;
  4286. OP_SHR:
  4287. begin
  4288. if a>32 then
  4289. internalerror(200308292);
  4290. shifterop_reset(so);
  4291. if a<>0 then
  4292. begin
  4293. so.shiftmode:=SM_LSR;
  4294. so.shiftimm:=a;
  4295. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4296. end
  4297. else
  4298. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4299. end;
  4300. OP_SAR:
  4301. begin
  4302. if a>32 then
  4303. internalerror(200308295);
  4304. if a<>0 then
  4305. begin
  4306. shifterop_reset(so);
  4307. so.shiftmode:=SM_ASR;
  4308. so.shiftimm:=a;
  4309. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4310. end
  4311. else
  4312. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4313. end;
  4314. else
  4315. if (op in [OP_SUB, OP_ADD]) and
  4316. ((a < 0) or
  4317. (a > 4095)) then
  4318. begin
  4319. tmpreg:=getintregister(list,size);
  4320. a_load_const_reg(list, size, a, tmpreg);
  4321. if cgsetflags or setflags then
  4322. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4323. list.concat(setoppostfix(
  4324. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4325. end
  4326. else
  4327. begin
  4328. if cgsetflags or setflags then
  4329. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4330. list.concat(setoppostfix(
  4331. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4332. end;
  4333. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4334. begin
  4335. ovloc.loc:=LOC_FLAGS;
  4336. case op of
  4337. OP_ADD:
  4338. ovloc.resflags:=F_CS;
  4339. OP_SUB:
  4340. ovloc.resflags:=F_CC;
  4341. else
  4342. ;
  4343. end;
  4344. end;
  4345. end
  4346. else
  4347. begin
  4348. { there could be added some more sophisticated optimizations }
  4349. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4350. a_load_reg_reg(list,size,size,src,dst)
  4351. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4352. a_load_const_reg(list,size,0,dst)
  4353. else if (op in [OP_IMUL]) and (a=-1) then
  4354. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4355. { we do this here instead in the peephole optimizer because
  4356. it saves us a register }
  4357. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4358. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4359. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4360. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4361. begin
  4362. if l1>32 then{roozbeh does this ever happen?}
  4363. internalerror(2003082911);
  4364. shifterop_reset(so);
  4365. so.shiftmode:=SM_LSL;
  4366. so.shiftimm:=l1;
  4367. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4368. end
  4369. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4370. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4371. begin
  4372. if l1>32 then{does this ever happen?}
  4373. internalerror(2012051803);
  4374. shifterop_reset(so);
  4375. so.shiftmode:=SM_LSL;
  4376. so.shiftimm:=l1;
  4377. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4378. end
  4379. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4380. begin
  4381. { nothing to do on success }
  4382. end
  4383. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4384. Just using mov x, #0 might allow some easier optimizations down the line. }
  4385. else if (op = OP_AND) and (dword(a)=0) then
  4386. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4387. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4388. else if (op = OP_AND) and (not(dword(a))=0) then
  4389. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4390. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4391. broader range of shifterconstants.}
  4392. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4393. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4394. else if (op = OP_AND) and is_thumb32_imm(a) then
  4395. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4396. else if (op = OP_AND) and (a = $FFFF) then
  4397. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4398. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4399. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4400. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4401. begin
  4402. a_load_reg_reg(list,size,size,src,dst);
  4403. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4404. end
  4405. else
  4406. begin
  4407. tmpreg:=getintregister(list,size);
  4408. a_load_const_reg(list,size,a,tmpreg);
  4409. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4410. end;
  4411. end;
  4412. maybeadjustresult(list,op,size,dst);
  4413. end;
  4414. const
  4415. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4416. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4417. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4418. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4419. var
  4420. so : tshifterop;
  4421. tmpreg,overflowreg : tregister;
  4422. asmop : tasmop;
  4423. begin
  4424. ovloc.loc:=LOC_VOID;
  4425. case op of
  4426. OP_NEG,OP_NOT:
  4427. internalerror(200308286);
  4428. OP_ROL:
  4429. begin
  4430. if not(size in [OS_32,OS_S32]) then
  4431. internalerror(2008072806);
  4432. { simulate ROL by ror'ing 32-value }
  4433. tmpreg:=getintregister(list,OS_32);
  4434. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4435. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4436. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4437. end;
  4438. OP_ROR:
  4439. begin
  4440. if not(size in [OS_32,OS_S32]) then
  4441. internalerror(2008072802);
  4442. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4443. end;
  4444. OP_IMUL,
  4445. OP_MUL:
  4446. begin
  4447. if cgsetflags or setflags then
  4448. begin
  4449. overflowreg:=getintregister(list,size);
  4450. if op=OP_IMUL then
  4451. asmop:=A_SMULL
  4452. else
  4453. asmop:=A_UMULL;
  4454. { the arm doesn't allow that rd and rm are the same }
  4455. if dst=src2 then
  4456. begin
  4457. if dst<>src1 then
  4458. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4459. else
  4460. begin
  4461. tmpreg:=getintregister(list,size);
  4462. a_load_reg_reg(list,size,size,src2,dst);
  4463. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4464. end;
  4465. end
  4466. else
  4467. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4468. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4469. if op=OP_IMUL then
  4470. begin
  4471. shifterop_reset(so);
  4472. so.shiftmode:=SM_ASR;
  4473. so.shiftimm:=31;
  4474. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4475. end
  4476. else
  4477. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4478. ovloc.loc:=LOC_FLAGS;
  4479. ovloc.resflags:=F_NE;
  4480. end
  4481. else
  4482. begin
  4483. { the arm doesn't allow that rd and rm are the same }
  4484. if dst=src2 then
  4485. begin
  4486. if dst<>src1 then
  4487. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4488. else
  4489. begin
  4490. tmpreg:=getintregister(list,size);
  4491. a_load_reg_reg(list,size,size,src2,dst);
  4492. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4493. end;
  4494. end
  4495. else
  4496. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4497. end;
  4498. end;
  4499. else
  4500. begin
  4501. if cgsetflags or setflags then
  4502. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4503. {$ifdef dummy}
  4504. { R13 is not allowed for certain instruction operands }
  4505. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4506. begin
  4507. if getsupreg(dst)=RS_R13 then
  4508. begin
  4509. tmpreg:=getintregister(list,OS_INT);
  4510. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4511. dst:=tmpreg;
  4512. end;
  4513. if getsupreg(src1)=RS_R13 then
  4514. begin
  4515. tmpreg:=getintregister(list,OS_INT);
  4516. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4517. src1:=tmpreg;
  4518. end;
  4519. end;
  4520. {$endif}
  4521. list.concat(setoppostfix(
  4522. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4523. end;
  4524. end;
  4525. maybeadjustresult(list,op,size,dst);
  4526. end;
  4527. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4528. begin
  4529. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4530. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4531. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4532. end;
  4533. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4534. var
  4535. ref : treference;
  4536. shift : byte;
  4537. firstfloatreg,lastfloatreg,
  4538. r : byte;
  4539. regs : tcpuregisterset;
  4540. stackmisalignment: pint;
  4541. begin
  4542. LocalSize:=align(LocalSize,4);
  4543. { call instruction does not put anything on the stack }
  4544. stackmisalignment:=0;
  4545. if not(nostackframe) then
  4546. begin
  4547. firstfloatreg:=RS_NO;
  4548. lastfloatreg:=RS_NO;
  4549. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4550. begin
  4551. { save floating point registers? }
  4552. for r:=RS_F0 to RS_F7 do
  4553. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4554. begin
  4555. if firstfloatreg=RS_NO then
  4556. firstfloatreg:=r;
  4557. lastfloatreg:=r;
  4558. inc(stackmisalignment,12);
  4559. end;
  4560. end;
  4561. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4562. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4563. begin
  4564. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4565. a_reg_alloc(list,NR_R12);
  4566. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4567. end;
  4568. { save int registers }
  4569. reference_reset(ref,4,[]);
  4570. ref.index:=NR_STACK_POINTER_REG;
  4571. ref.addressmode:=AM_PREINDEXED;
  4572. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4573. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4574. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4575. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4576. include(regs,RS_R14);
  4577. if regs<>[] then
  4578. begin
  4579. for r:=RS_R0 to RS_R15 do
  4580. if (r in regs) then
  4581. inc(stackmisalignment,4);
  4582. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4583. end;
  4584. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4585. begin
  4586. { the framepointer now points to the saved R15, so the saved
  4587. framepointer is at R11-12 (for get_caller_frame) }
  4588. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4589. a_reg_dealloc(list,NR_R12);
  4590. end;
  4591. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4592. if (LocalSize<>0) or
  4593. ((stackmisalignment<>0) and
  4594. ((pi_do_call in current_procinfo.flags) or
  4595. (po_assembler in current_procinfo.procdef.procoptions))) then
  4596. begin
  4597. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4598. if not(is_shifter_const(localsize,shift)) then
  4599. begin
  4600. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4601. a_reg_alloc(list,NR_R12);
  4602. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4603. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4604. a_reg_dealloc(list,NR_R12);
  4605. end
  4606. else
  4607. begin
  4608. a_reg_dealloc(list,NR_R12);
  4609. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4610. end;
  4611. end;
  4612. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4613. begin
  4614. if firstfloatreg<>RS_NO then
  4615. begin
  4616. reference_reset(ref,4,[]);
  4617. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4618. begin
  4619. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4620. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4621. ref.base:=NR_R12;
  4622. end
  4623. else
  4624. begin
  4625. ref.base:=current_procinfo.framepointer;
  4626. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4627. end;
  4628. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4629. lastfloatreg-firstfloatreg+1,ref));
  4630. end;
  4631. end;
  4632. end;
  4633. end;
  4634. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4635. var
  4636. ref : treference;
  4637. firstfloatreg,lastfloatreg,
  4638. r : byte;
  4639. shift : byte;
  4640. regs : tcpuregisterset;
  4641. LocalSize : longint;
  4642. stackmisalignment: pint;
  4643. begin
  4644. if not(nostackframe) then
  4645. begin
  4646. stackmisalignment:=0;
  4647. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4648. begin
  4649. { restore floating point register }
  4650. firstfloatreg:=RS_NO;
  4651. lastfloatreg:=RS_NO;
  4652. { save floating point registers? }
  4653. for r:=RS_F0 to RS_F7 do
  4654. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4655. begin
  4656. if firstfloatreg=RS_NO then
  4657. firstfloatreg:=r;
  4658. lastfloatreg:=r;
  4659. { floating point register space is already included in
  4660. localsize below by calc_stackframe_size
  4661. inc(stackmisalignment,12);
  4662. }
  4663. end;
  4664. if firstfloatreg<>RS_NO then
  4665. begin
  4666. reference_reset(ref,4,[]);
  4667. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4668. begin
  4669. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4670. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4671. ref.base:=NR_R12;
  4672. end
  4673. else
  4674. begin
  4675. ref.base:=current_procinfo.framepointer;
  4676. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4677. end;
  4678. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4679. lastfloatreg-firstfloatreg+1,ref));
  4680. end;
  4681. end;
  4682. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4683. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4684. begin
  4685. exclude(regs,RS_R14);
  4686. include(regs,RS_R15);
  4687. end;
  4688. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4689. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4690. for r:=RS_R0 to RS_R15 do
  4691. if (r in regs) then
  4692. inc(stackmisalignment,4);
  4693. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4694. LocalSize:=current_procinfo.calc_stackframe_size;
  4695. if (LocalSize<>0) or
  4696. ((stackmisalignment<>0) and
  4697. ((pi_do_call in current_procinfo.flags) or
  4698. (po_assembler in current_procinfo.procdef.procoptions))) then
  4699. begin
  4700. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4701. if not(is_shifter_const(LocalSize,shift)) then
  4702. begin
  4703. a_reg_alloc(list,NR_R12);
  4704. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4705. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4706. a_reg_dealloc(list,NR_R12);
  4707. end
  4708. else
  4709. begin
  4710. a_reg_dealloc(list,NR_R12);
  4711. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4712. end;
  4713. end;
  4714. if regs=[] then
  4715. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4716. else
  4717. begin
  4718. reference_reset(ref,4,[]);
  4719. ref.index:=NR_STACK_POINTER_REG;
  4720. ref.addressmode:=AM_PREINDEXED;
  4721. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4722. end;
  4723. end
  4724. else
  4725. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4726. end;
  4727. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4728. var
  4729. tmpreg : tregister;
  4730. tmpref : treference;
  4731. l : tasmlabel;
  4732. begin
  4733. tmpreg:=NR_NO;
  4734. { Be sure to have a base register }
  4735. if (ref.base=NR_NO) then
  4736. begin
  4737. if ref.shiftmode<>SM_None then
  4738. internalerror(2014020706);
  4739. ref.base:=ref.index;
  4740. ref.index:=NR_NO;
  4741. end;
  4742. { absolute symbols can't be handled directly, we've to store the symbol reference
  4743. in the text segment and access it pc relative
  4744. For now, we assume that references where base or index equals to PC are already
  4745. relative, all other references are assumed to be absolute and thus they need
  4746. to be handled extra.
  4747. A proper solution would be to change refoptions to a set and store the information
  4748. if the symbol is absolute or relative there.
  4749. }
  4750. if (assigned(ref.symbol) and
  4751. not(is_pc(ref.base)) and
  4752. not(is_pc(ref.index))
  4753. ) or
  4754. { [#xxx] isn't a valid address operand }
  4755. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4756. //(ref.offset<-4095) or
  4757. (ref.offset<-255) or
  4758. (ref.offset>4095) or
  4759. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4760. ((ref.offset<-255) or
  4761. (ref.offset>255)
  4762. )
  4763. ) or
  4764. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4765. ((ref.offset<-1020) or
  4766. (ref.offset>1020) or
  4767. ((abs(ref.offset) mod 4)<>0) or
  4768. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4769. assigned(ref.symbol)
  4770. )
  4771. ) then
  4772. begin
  4773. reference_reset(tmpref,4,[]);
  4774. { load symbol }
  4775. tmpreg:=getintregister(list,OS_INT);
  4776. if assigned(ref.symbol) then
  4777. begin
  4778. current_asmdata.getjumplabel(l);
  4779. cg.a_label(current_procinfo.aktlocaldata,l);
  4780. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4781. if ref.refaddr=addr_gottpoff then
  4782. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4783. else if ref.refaddr=addr_tlsgd then
  4784. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4785. else if ref.refaddr=addr_tlsdesc then
  4786. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4787. else if ref.refaddr=addr_tpoff then
  4788. begin
  4789. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4790. Internalerror(2019092807);
  4791. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4792. end
  4793. else
  4794. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4795. { load consts entry }
  4796. tmpref.symbol:=l;
  4797. tmpref.base:=NR_R15;
  4798. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4799. { in case of LDF/STF, we got rid of the NR_R15 }
  4800. if is_pc(ref.base) then
  4801. ref.base:=NR_NO;
  4802. if is_pc(ref.index) then
  4803. ref.index:=NR_NO;
  4804. end
  4805. else
  4806. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4807. if (ref.base<>NR_NO) then
  4808. begin
  4809. if ref.index<>NR_NO then
  4810. begin
  4811. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4812. ref.base:=tmpreg;
  4813. end
  4814. else
  4815. begin
  4816. ref.index:=tmpreg;
  4817. ref.shiftimm:=0;
  4818. ref.signindex:=1;
  4819. ref.shiftmode:=SM_None;
  4820. end;
  4821. end
  4822. else
  4823. ref.base:=tmpreg;
  4824. ref.offset:=0;
  4825. ref.symbol:=nil;
  4826. end;
  4827. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4828. begin
  4829. if tmpreg<>NR_NO then
  4830. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4831. else
  4832. begin
  4833. tmpreg:=getintregister(list,OS_ADDR);
  4834. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4835. ref.base:=tmpreg;
  4836. end;
  4837. ref.offset:=0;
  4838. end;
  4839. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4840. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4841. begin
  4842. tmpreg:=getintregister(list,OS_ADDR);
  4843. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4844. ref.base := tmpreg;
  4845. end;
  4846. { floating point operations have only limited references
  4847. we expect here, that a base is already set }
  4848. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4849. begin
  4850. if ref.shiftmode<>SM_none then
  4851. internalerror(2003091202);
  4852. if tmpreg<>NR_NO then
  4853. begin
  4854. if ref.base=tmpreg then
  4855. begin
  4856. if ref.signindex<0 then
  4857. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4858. else
  4859. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4860. ref.index:=NR_NO;
  4861. end
  4862. else
  4863. begin
  4864. if ref.index<>tmpreg then
  4865. internalerror(2004031602);
  4866. if ref.signindex<0 then
  4867. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4868. else
  4869. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4870. ref.base:=tmpreg;
  4871. ref.index:=NR_NO;
  4872. end;
  4873. end
  4874. else
  4875. begin
  4876. tmpreg:=getintregister(list,OS_ADDR);
  4877. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4878. ref.base:=tmpreg;
  4879. ref.index:=NR_NO;
  4880. end;
  4881. end;
  4882. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4883. Result := ref;
  4884. end;
  4885. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4886. var
  4887. instr: taicpu;
  4888. begin
  4889. if (fromsize=OS_F32) and
  4890. (tosize=OS_F32) then
  4891. begin
  4892. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4893. list.Concat(instr);
  4894. add_move_instruction(instr);
  4895. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4896. end
  4897. else if (fromsize=OS_F64) and
  4898. (tosize=OS_F64) then
  4899. begin
  4900. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4901. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4902. end
  4903. else if (fromsize=OS_F32) and
  4904. (tosize=OS_F64) then
  4905. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4906. begin
  4907. //list.concat(nil);
  4908. end;
  4909. end;
  4910. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4911. begin
  4912. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4913. end;
  4914. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4915. begin
  4916. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4917. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4918. end;
  4919. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4920. begin
  4921. if //(shuffle=nil) and
  4922. (tosize=OS_F32) then
  4923. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4924. else
  4925. internalerror(2012100813);
  4926. end;
  4927. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4928. begin
  4929. if //(shuffle=nil) and
  4930. (fromsize=OS_F32) then
  4931. begin
  4932. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4933. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4934. end
  4935. else
  4936. internalerror(2012100814);
  4937. end;
  4938. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4939. var tmpreg: tregister;
  4940. begin
  4941. case op of
  4942. OP_NEG:
  4943. begin
  4944. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4945. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4946. tmpreg:=cg.getintregister(list,OS_32);
  4947. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4948. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4949. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4950. end;
  4951. else
  4952. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4953. end;
  4954. end;
  4955. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4956. begin
  4957. case op of
  4958. OP_NEG:
  4959. begin
  4960. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reglo,0),PF_S));
  4961. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reghi,0),PF_S));
  4962. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4963. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4964. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4965. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4966. end;
  4967. OP_NOT:
  4968. begin
  4969. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4970. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4971. end;
  4972. OP_AND,OP_OR,OP_XOR:
  4973. begin
  4974. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4975. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4976. end;
  4977. OP_ADD:
  4978. begin
  4979. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4980. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4981. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi),PF_S));
  4982. end;
  4983. OP_SUB:
  4984. begin
  4985. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4986. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4987. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4988. end;
  4989. else
  4990. internalerror(2003083105);
  4991. end;
  4992. end;
  4993. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4994. var
  4995. tmpreg : tregister;
  4996. begin
  4997. case op of
  4998. OP_AND,OP_OR,OP_XOR:
  4999. begin
  5000. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  5001. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  5002. end;
  5003. OP_ADD:
  5004. begin
  5005. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5006. begin
  5007. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5008. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  5009. end
  5010. else
  5011. begin
  5012. tmpreg:=cg.getintregister(list,OS_32);
  5013. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5014. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5015. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  5016. end;
  5017. tmpreg:=cg.getintregister(list,OS_32);
  5018. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  5019. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg),PF_S));
  5020. end;
  5021. OP_SUB:
  5022. begin
  5023. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5024. begin
  5025. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5026. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  5027. end
  5028. else
  5029. begin
  5030. tmpreg:=cg.getintregister(list,OS_32);
  5031. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5032. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5033. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  5034. end;
  5035. tmpreg:=cg.getintregister(list,OS_32);
  5036. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  5037. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg),PF_S));
  5038. end;
  5039. else
  5040. internalerror(2003083106);
  5041. end;
  5042. end;
  5043. procedure create_codegen;
  5044. begin
  5045. if GenerateThumb2Code then
  5046. begin
  5047. cg:=tthumb2cgarm.create;
  5048. cg64:=tthumb2cg64farm.create;
  5049. casmoptimizer:=TCpuThumb2AsmOptimizer;
  5050. end
  5051. else if GenerateThumbCode then
  5052. begin
  5053. cg:=tthumbcgarm.create;
  5054. cg64:=tthumbcg64farm.create;
  5055. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5056. end
  5057. else
  5058. begin
  5059. cg:=tarmcgarm.create;
  5060. cg64:=tarmcg64farm.create;
  5061. casmoptimizer:=TCpuAsmOptimizer;
  5062. end;
  5063. end;
  5064. end.