cgcpu.pas 224 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. procedure init_mmregister_allocator;
  36. public
  37. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  38. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  39. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  40. { move instructions }
  41. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  42. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  43. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  44. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  45. { fpu move instructions }
  46. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  47. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  48. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  49. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  50. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  51. { comparison operations }
  52. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  53. l : tasmlabel);override;
  54. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  55. procedure a_jmp_name(list : TAsmList;const s : string); override;
  56. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  57. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  58. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  59. procedure g_profilecode(list : TAsmList); override;
  60. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  61. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  62. procedure g_maybe_got_init(list : TAsmList); override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  66. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  67. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  68. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  69. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  70. procedure g_save_registers(list : TAsmList);override;
  71. procedure g_restore_registers(list : TAsmList);override;
  72. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  73. procedure fixref(list : TAsmList;var ref : treference);
  74. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  75. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  78. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  79. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  80. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  81. { Transform unsupported methods into Internal errors }
  82. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  83. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  84. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  85. { clear out potential overflow bits from 8 or 16 bit operations
  86. the upper 24/16 bits of a register after an operation }
  87. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  88. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  89. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  90. procedure g_maybe_tls_init(list : TAsmList); override;
  91. end;
  92. { tcgarm is shared between normal arm and thumb-2 }
  93. tcgarm = class(tbasecgarm)
  94. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  95. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  96. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  97. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  98. size: tcgsize; a: tcgint; src, dst: tregister); override;
  99. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  100. size: tcgsize; src1, src2, dst: tregister); override;
  101. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  103. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  104. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  105. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  106. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  107. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  108. end;
  109. { normal arm cg }
  110. tarmcgarm = class(tcgarm)
  111. procedure init_register_allocators;override;
  112. procedure done_register_allocators;override;
  113. end;
  114. { 64 bit cg for all arm flavours }
  115. tbasecg64farm = class(tcg64f32)
  116. end;
  117. { tcg64farm is shared between normal arm and thumb-2 }
  118. tcg64farm = class(tbasecg64farm)
  119. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  120. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  121. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  122. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  123. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  125. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  126. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  127. end;
  128. tarmcg64farm = class(tcg64farm)
  129. end;
  130. tthumbcgarm = class(tbasecgarm)
  131. procedure init_register_allocators;override;
  132. procedure done_register_allocators;override;
  133. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  134. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  135. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  136. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  137. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  138. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  139. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  140. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  141. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  142. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  143. end;
  144. tthumbcg64farm = class(tbasecg64farm)
  145. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  146. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  147. end;
  148. tthumb2cgarm = class(tcgarm)
  149. procedure init_register_allocators;override;
  150. procedure done_register_allocators;override;
  151. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  152. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  153. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  154. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  155. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  157. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  158. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  159. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  160. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  161. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  164. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  165. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  166. end;
  167. tthumb2cg64farm = class(tcg64farm)
  168. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  169. end;
  170. const
  171. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  172. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  173. winstackpagesize = 4096;
  174. function get_fpu_postfix(def : tdef) : toppostfix;
  175. procedure create_codegen;
  176. implementation
  177. uses
  178. globals,verbose,systems,cutils,
  179. aopt,aoptcpu,
  180. fmodule,
  181. symconst,symsym,symtable,
  182. tgobj,
  183. procinfo,cpupi,
  184. paramgr;
  185. { Range check must be disabled explicitly as conversions between signed and unsigned
  186. 32-bit values are done without explicit typecasts }
  187. {$R-}
  188. function get_fpu_postfix(def : tdef) : toppostfix;
  189. begin
  190. if def.typ=floatdef then
  191. begin
  192. case tfloatdef(def).floattype of
  193. s32real:
  194. result:=PF_S;
  195. s64real:
  196. result:=PF_D;
  197. s80real:
  198. result:=PF_E;
  199. else
  200. internalerror(200401272);
  201. end;
  202. end
  203. else
  204. internalerror(200401271);
  205. end;
  206. procedure tarmcgarm.init_register_allocators;
  207. begin
  208. inherited init_register_allocators;
  209. { currently, we always save R14, so we can use it }
  210. if (target_info.system<>system_arm_ios) then
  211. begin
  212. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  213. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  214. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  215. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  216. else
  217. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  218. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  219. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  220. end
  221. else
  222. { r7 is not available on Darwin, it's used as frame pointer (always,
  223. for backtrace support -- also in gcc/clang -> R11 can be used).
  224. r9 is volatile }
  225. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  226. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  227. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  228. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  229. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  230. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  231. init_mmregister_allocator;
  232. end;
  233. procedure tarmcgarm.done_register_allocators;
  234. begin
  235. rg[R_INTREGISTER].free;
  236. rg[R_FPUREGISTER].free;
  237. rg[R_MMREGISTER].free;
  238. inherited done_register_allocators;
  239. end;
  240. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  241. var
  242. imm_shift : byte;
  243. l : tasmlabel;
  244. hr : treference;
  245. imm1, imm2: DWord;
  246. begin
  247. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  248. internalerror(2002090907);
  249. if is_shifter_const(a,imm_shift) then
  250. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  251. else if is_shifter_const(not(a),imm_shift) then
  252. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  253. { loading of constants with mov and orr }
  254. else if (split_into_shifter_const(a,imm1, imm2)) then
  255. begin
  256. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  257. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  258. end
  259. { loading of constants with mvn and bic }
  260. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  261. begin
  262. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  263. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  264. end
  265. else
  266. begin
  267. reference_reset(hr,4,[]);
  268. current_asmdata.getjumplabel(l);
  269. cg.a_label(current_procinfo.aktlocaldata,l);
  270. hr.symboldata:=current_procinfo.aktlocaldata.last;
  271. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  272. hr.symbol:=l;
  273. hr.base:=NR_PC;
  274. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  275. end;
  276. end;
  277. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  278. var
  279. oppostfix:toppostfix;
  280. usedtmpref: treference;
  281. tmpreg,tmpreg2 : tregister;
  282. so : tshifterop;
  283. dir : integer;
  284. begin
  285. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  286. FromSize := ToSize;
  287. case FromSize of
  288. { signed integer registers }
  289. OS_8:
  290. oppostfix:=PF_B;
  291. OS_S8:
  292. oppostfix:=PF_SB;
  293. OS_16:
  294. oppostfix:=PF_H;
  295. OS_S16:
  296. oppostfix:=PF_SH;
  297. OS_32,
  298. OS_S32:
  299. oppostfix:=PF_None;
  300. else
  301. InternalError(200308297);
  302. end;
  303. if (fromsize=OS_S8) and
  304. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  305. oppostfix:=PF_B;
  306. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  307. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  308. (oppostfix in [PF_SH,PF_H])) then
  309. begin
  310. if target_info.endian=endian_big then
  311. dir:=-1
  312. else
  313. dir:=1;
  314. case FromSize of
  315. OS_16,OS_S16:
  316. begin
  317. { only complicated references need an extra loadaddr }
  318. if assigned(ref.symbol) or
  319. (ref.index<>NR_NO) or
  320. (ref.offset<-4095) or
  321. (ref.offset>4094) or
  322. { sometimes the compiler reused registers }
  323. (reg=ref.index) or
  324. (reg=ref.base) then
  325. begin
  326. tmpreg2:=getintregister(list,OS_INT);
  327. a_loadaddr_ref_reg(list,ref,tmpreg2);
  328. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  329. end
  330. else
  331. usedtmpref:=ref;
  332. if target_info.endian=endian_big then
  333. inc(usedtmpref.offset,1);
  334. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  335. tmpreg:=getintregister(list,OS_INT);
  336. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  337. inc(usedtmpref.offset,dir);
  338. if FromSize=OS_16 then
  339. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  340. else
  341. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  342. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  343. end;
  344. OS_32,OS_S32:
  345. begin
  346. tmpreg:=getintregister(list,OS_INT);
  347. { only complicated references need an extra loadaddr }
  348. if assigned(ref.symbol) or
  349. (ref.index<>NR_NO) or
  350. (ref.offset<-4095) or
  351. (ref.offset>4092) or
  352. { sometimes the compiler reused registers }
  353. (reg=ref.index) or
  354. (reg=ref.base) then
  355. begin
  356. tmpreg2:=getintregister(list,OS_INT);
  357. a_loadaddr_ref_reg(list,ref,tmpreg2);
  358. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  359. end
  360. else
  361. usedtmpref:=ref;
  362. shifterop_reset(so);so.shiftmode:=SM_LSL;
  363. if ref.alignment=2 then
  364. begin
  365. if target_info.endian=endian_big then
  366. inc(usedtmpref.offset,2);
  367. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  368. inc(usedtmpref.offset,dir*2);
  369. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  370. so.shiftimm:=16;
  371. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  372. end
  373. else
  374. begin
  375. tmpreg2:=getintregister(list,OS_INT);
  376. if target_info.endian=endian_big then
  377. inc(usedtmpref.offset,3);
  378. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  379. inc(usedtmpref.offset,dir);
  380. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  381. inc(usedtmpref.offset,dir);
  382. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  383. so.shiftimm:=8;
  384. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  385. inc(usedtmpref.offset,dir);
  386. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  387. so.shiftimm:=16;
  388. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  389. so.shiftimm:=24;
  390. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  391. end;
  392. end
  393. else
  394. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  395. end;
  396. end
  397. else
  398. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  399. if (fromsize=OS_S8) and
  400. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  401. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  402. else if (fromsize=OS_S8) and (tosize = OS_16) then
  403. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  404. end;
  405. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  406. var
  407. hsym : tsym;
  408. href : treference;
  409. paraloc : Pcgparalocation;
  410. shift : byte;
  411. begin
  412. { calculate the parameter info for the procdef }
  413. procdef.init_paraloc_info(callerside);
  414. hsym:=tsym(procdef.parast.Find('self'));
  415. if not(assigned(hsym) and
  416. (hsym.typ=paravarsym)) then
  417. internalerror(2003052503);
  418. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  419. while paraloc<>nil do
  420. with paraloc^ do
  421. begin
  422. case loc of
  423. LOC_REGISTER:
  424. begin
  425. if is_shifter_const(ioffset,shift) then
  426. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  427. else
  428. begin
  429. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  430. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  431. end;
  432. end;
  433. LOC_REFERENCE:
  434. begin
  435. { offset in the wrapper needs to be adjusted for the stored
  436. return address }
  437. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  438. if is_shifter_const(ioffset,shift) then
  439. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  440. else
  441. begin
  442. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  443. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  444. end;
  445. end
  446. else
  447. internalerror(2003091803);
  448. end;
  449. paraloc:=next;
  450. end;
  451. end;
  452. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  453. var
  454. ref: treference;
  455. begin
  456. paraloc.check_simple_location;
  457. paramanager.allocparaloc(list,paraloc.location);
  458. case paraloc.location^.loc of
  459. LOC_REGISTER,LOC_CREGISTER:
  460. a_load_const_reg(list,size,a,paraloc.location^.register);
  461. LOC_REFERENCE:
  462. begin
  463. reference_reset(ref,paraloc.alignment,[]);
  464. ref.base:=paraloc.location^.reference.index;
  465. ref.offset:=paraloc.location^.reference.offset;
  466. a_load_const_ref(list,size,a,ref);
  467. end;
  468. else
  469. internalerror(2002081101);
  470. end;
  471. end;
  472. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  473. begin
  474. { doubles in softemu mode have a strange order of registers and references }
  475. if (cgpara.size=OS_F64) and
  476. (location^.size=OS_32) then
  477. begin
  478. g_concatcopy(list,ref,paralocref,4)
  479. end
  480. else
  481. inherited;
  482. end;
  483. procedure tbasecgarm.init_mmregister_allocator;
  484. begin
  485. { The register allocator currently cannot deal with multiple
  486. non-overlapping subregs per register, so we can only use
  487. half the single precision registers for now (as sub registers of the
  488. double precision ones). }
  489. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  490. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  491. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  492. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  493. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  494. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  495. ],first_mm_imreg,[])
  496. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  497. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  498. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  499. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  500. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  501. ],first_mm_imreg,[])
  502. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  503. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  504. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  505. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  506. ],first_mm_imreg,[]);
  507. end;
  508. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  509. var
  510. ref: treference;
  511. tmpreg: tregister;
  512. begin
  513. paraloc.check_simple_location;
  514. paramanager.allocparaloc(list,paraloc.location);
  515. case paraloc.location^.loc of
  516. LOC_REGISTER,LOC_CREGISTER:
  517. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  518. LOC_REFERENCE:
  519. begin
  520. reference_reset(ref,paraloc.alignment,[]);
  521. ref.base := paraloc.location^.reference.index;
  522. ref.offset := paraloc.location^.reference.offset;
  523. tmpreg := getintregister(list,OS_ADDR);
  524. a_loadaddr_ref_reg(list,r,tmpreg);
  525. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  526. end;
  527. else
  528. internalerror(2002080701);
  529. end;
  530. end;
  531. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  532. var
  533. branchopcode: tasmop;
  534. r : treference;
  535. sym : TAsmSymbol;
  536. begin
  537. { use always BL as newer binutils do not translate blx apparently
  538. generating BL is also what clang and gcc do by default }
  539. branchopcode:=A_BL;
  540. if not(weak) then
  541. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  542. else
  543. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  544. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  545. if (tf_pic_uses_got in target_info.flags) and
  546. (cs_create_pic in current_settings.moduleswitches) then
  547. begin
  548. r.refaddr:=addr_pic
  549. end
  550. else
  551. r.refaddr:=addr_full;
  552. list.concat(taicpu.op_ref(branchopcode,r));
  553. {
  554. the compiler does not properly set this flag anymore in pass 1, and
  555. for now we only need it after pass 2 (I hope) (JM)
  556. if not(pi_do_call in current_procinfo.flags) then
  557. internalerror(2003060703);
  558. }
  559. include(current_procinfo.flags,pi_do_call);
  560. end;
  561. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  562. begin
  563. { check not really correct: should only be used for non-Thumb cpus }
  564. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  565. begin
  566. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  567. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  568. end
  569. else
  570. list.concat(taicpu.op_reg(A_BLX, reg));
  571. {
  572. the compiler does not properly set this flag anymore in pass 1, and
  573. for now we only need it after pass 2 (I hope) (JM)
  574. if not(pi_do_call in current_procinfo.flags) then
  575. internalerror(2003060703);
  576. }
  577. include(current_procinfo.flags,pi_do_call);
  578. end;
  579. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  580. begin
  581. a_op_const_reg_reg(list,op,size,a,reg,reg);
  582. end;
  583. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  584. var
  585. tmpreg,tmpresreg : tregister;
  586. tmpref : treference;
  587. begin
  588. tmpreg:=getintregister(list,size);
  589. tmpresreg:=getintregister(list,size);
  590. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  591. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  592. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  593. end;
  594. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  595. var
  596. so : tshifterop;
  597. begin
  598. if op = OP_NEG then
  599. begin
  600. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  601. maybeadjustresult(list,OP_NEG,size,dst);
  602. end
  603. else if op = OP_NOT then
  604. begin
  605. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  606. begin
  607. shifterop_reset(so);
  608. so.shiftmode:=SM_LSL;
  609. if size in [OS_8, OS_S8] then
  610. so.shiftimm:=24
  611. else
  612. so.shiftimm:=16;
  613. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  614. {Using a shift here allows this to be folded into another instruction}
  615. if size in [OS_S8, OS_S16] then
  616. so.shiftmode:=SM_ASR
  617. else
  618. so.shiftmode:=SM_LSR;
  619. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  620. end
  621. else
  622. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  623. end
  624. else
  625. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  626. end;
  627. const
  628. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  629. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  630. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  631. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  632. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  633. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  634. op_reg_postfix_thumb: array[TOpCG] of TOpPostfix =
  635. (PF_None,PF_None,PF_None,PF_S,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_S,
  636. PF_None,PF_S,PF_S,PF_None,PF_S,PF_None,PF_S);
  637. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  638. size: tcgsize; a: tcgint; src, dst: tregister);
  639. var
  640. ovloc : tlocation;
  641. begin
  642. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  643. end;
  644. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  645. size: tcgsize; src1, src2, dst: tregister);
  646. var
  647. ovloc : tlocation;
  648. begin
  649. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  650. end;
  651. function opshift2shiftmode(op: TOpCg): tshiftmode;
  652. begin
  653. case op of
  654. OP_SHL: Result:=SM_LSL;
  655. OP_SHR: Result:=SM_LSR;
  656. OP_ROR: Result:=SM_ROR;
  657. OP_ROL: Result:=SM_ROR;
  658. OP_SAR: Result:=SM_ASR;
  659. else internalerror(2012070501);
  660. end
  661. end;
  662. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  663. var
  664. multiplier : dword;
  665. power : longint;
  666. shifterop : tshifterop;
  667. bitsset : byte;
  668. negative : boolean;
  669. first, doshiftadd: boolean;
  670. b,
  671. cycles : byte;
  672. maxeffort : byte;
  673. leftmostbit,i,shiftvalue: DWord;
  674. begin
  675. result:=true;
  676. cycles:=0;
  677. negative:=a<0;
  678. shifterop.rs:=NR_NO;
  679. shifterop.shiftmode:=SM_LSL;
  680. if negative then
  681. inc(cycles);
  682. multiplier:=dword(abs(a));
  683. { heuristics to estimate how much instructions are reasonable to replace the mul,
  684. this is currently based on XScale timings }
  685. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  686. actual multiplication, this requires min. 1+4 cycles
  687. because the first shift imm. might cause a stall and because we need more instructions
  688. when replacing the mul we generate max. 3 instructions to replace this mul }
  689. maxeffort:=3;
  690. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  691. a ldr, so generating one more operation to replace this is beneficial }
  692. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  693. inc(maxeffort);
  694. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  695. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  696. dec(maxeffort);
  697. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  698. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  699. dec(maxeffort);
  700. { "symmetric" bit pattern like $10101010 where
  701. res:=a*$10101010 can be simplified into
  702. temp:=a*$1010
  703. res:=temp+temp shl 16
  704. }
  705. doshiftadd:=false;
  706. leftmostbit:=BsrDWord(multiplier);
  707. shiftvalue:=0;
  708. if (maxeffort>1) and (leftmostbit>2) then
  709. begin
  710. for i:=2 to 31 do
  711. if (multiplier shr i)=(multiplier and ($ffffffff shr (32-i))) then
  712. begin
  713. doshiftadd:=true;
  714. shiftvalue:=i;
  715. dec(maxeffort);
  716. multiplier:=multiplier shr shiftvalue;
  717. break;
  718. end;
  719. end;
  720. bitsset:=popcnt(multiplier and $fffffffe);
  721. { most simple cases }
  722. if a=1 then
  723. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  724. else if a=0 then
  725. a_load_const_reg(list,OS_32,0,dst)
  726. else if a=-1 then
  727. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  728. { add up ?
  729. basically, one add is needed for each bit being set in the constant factor
  730. however, the least significant bit is for free, it can be hidden in the initial
  731. instruction
  732. }
  733. else if (bitsset+cycles<=maxeffort) and
  734. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  735. begin
  736. first:=true;
  737. while multiplier<>0 do
  738. begin
  739. shifterop.shiftimm:=BsrDWord(multiplier);
  740. if odd(multiplier) then
  741. begin
  742. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  743. dec(multiplier);
  744. end
  745. else
  746. if first then
  747. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  748. else
  749. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  750. first:=false;
  751. dec(multiplier,1 shl shifterop.shiftimm);
  752. end;
  753. if doshiftadd then
  754. begin
  755. shifterop.shiftimm:=shiftvalue;
  756. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  757. end;
  758. if negative then
  759. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  760. end
  761. { subtract from the next greater power of two? }
  762. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  763. begin
  764. first:=true;
  765. while multiplier<>0 do
  766. begin
  767. if first then
  768. begin
  769. multiplier:=(1 shl power)-multiplier;
  770. shifterop.shiftimm:=power;
  771. end
  772. else
  773. shifterop.shiftimm:=BsrDWord(multiplier);
  774. if odd(multiplier) then
  775. begin
  776. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  777. dec(multiplier);
  778. end
  779. else
  780. if first then
  781. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  782. else
  783. begin
  784. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  785. dec(multiplier,1 shl shifterop.shiftimm);
  786. end;
  787. first:=false;
  788. end;
  789. if doshiftadd then
  790. begin
  791. shifterop.shiftimm:=shiftvalue;
  792. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  793. end;
  794. if negative then
  795. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  796. end
  797. else
  798. result:=false;
  799. end;
  800. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  801. var
  802. shift, lsb, width : byte;
  803. tmpreg : tregister;
  804. so : tshifterop;
  805. l1 : longint;
  806. imm1, imm2: DWord;
  807. begin
  808. optimize_op_const(size, op, a);
  809. case op of
  810. OP_NONE:
  811. begin
  812. if src <> dst then
  813. a_load_reg_reg(list, size, size, src, dst);
  814. exit;
  815. end;
  816. OP_MOVE:
  817. begin
  818. a_load_const_reg(list, size, a, dst);
  819. exit;
  820. end;
  821. else
  822. ;
  823. end;
  824. ovloc.loc:=LOC_VOID;
  825. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  826. case op of
  827. OP_ADD:
  828. begin
  829. op:=OP_SUB;
  830. a:=aint(dword(-a));
  831. end;
  832. OP_SUB:
  833. begin
  834. op:=OP_ADD;
  835. a:=aint(dword(-a));
  836. end
  837. else
  838. ;
  839. end;
  840. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  841. case op of
  842. OP_NEG,OP_NOT:
  843. internalerror(200308281);
  844. OP_SHL,
  845. OP_SHR,
  846. OP_ROL,
  847. OP_ROR,
  848. OP_SAR:
  849. begin
  850. if a>32 then
  851. internalerror(200308294);
  852. shifterop_reset(so);
  853. so.shiftmode:=opshift2shiftmode(op);
  854. if op = OP_ROL then
  855. so.shiftimm:=32-a
  856. else
  857. so.shiftimm:=a;
  858. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  859. end;
  860. else
  861. {if (op in [OP_SUB, OP_ADD]) and
  862. ((a < 0) or
  863. (a > 4095)) then
  864. begin
  865. tmpreg:=getintregister(list,size);
  866. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  867. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  868. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  869. ));
  870. end
  871. else}
  872. begin
  873. if cgsetflags or setflags then
  874. a_reg_alloc(list,NR_DEFAULTFLAGS);
  875. list.concat(setoppostfix(
  876. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  877. end;
  878. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  879. begin
  880. ovloc.loc:=LOC_FLAGS;
  881. case op of
  882. OP_ADD:
  883. ovloc.resflags:=F_CS;
  884. OP_SUB:
  885. ovloc.resflags:=F_CC;
  886. else
  887. internalerror(2019050922);
  888. end;
  889. end;
  890. end
  891. else
  892. begin
  893. { there could be added some more sophisticated optimizations }
  894. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  895. a_op_reg_reg(list,OP_NEG,size,src,dst)
  896. { we do this here instead in the peephole optimizer because
  897. it saves us a register }
  898. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  899. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  900. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  901. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  902. begin
  903. if l1>32 then{roozbeh does this ever happen?}
  904. internalerror(200308296);
  905. shifterop_reset(so);
  906. so.shiftmode:=SM_LSL;
  907. so.shiftimm:=l1;
  908. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  909. end
  910. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  911. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  912. begin
  913. if l1>32 then{does this ever happen?}
  914. internalerror(201205181);
  915. shifterop_reset(so);
  916. so.shiftmode:=SM_LSL;
  917. so.shiftimm:=l1;
  918. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  919. end
  920. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  921. begin
  922. { nothing to do on success }
  923. end
  924. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  925. broader range of shifterconstants.}
  926. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  927. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  928. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  929. into the following instruction}
  930. else if (op = OP_AND) and
  931. is_continuous_mask(aword(a), lsb, width) and
  932. ((lsb = 0) or ((lsb + width) = 32)) then
  933. begin
  934. shifterop_reset(so);
  935. if (width = 16) and
  936. (lsb = 0) and
  937. (current_settings.cputype >= cpu_armv6) then
  938. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  939. else if (width = 8) and
  940. (lsb = 0) and
  941. (current_settings.cputype >= cpu_armv6) then
  942. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  943. else if lsb = 0 then
  944. begin
  945. so.shiftmode:=SM_LSL;
  946. so.shiftimm:=32-width;
  947. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  948. so.shiftmode:=SM_LSR;
  949. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  950. end
  951. else
  952. begin
  953. so.shiftmode:=SM_LSR;
  954. so.shiftimm:=lsb;
  955. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  956. so.shiftmode:=SM_LSL;
  957. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  958. end;
  959. end
  960. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  961. begin
  962. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  963. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  964. end
  965. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  966. not(cgsetflags or setflags) and
  967. split_into_shifter_const(a, imm1, imm2) then
  968. begin
  969. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  970. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  971. end
  972. else
  973. begin
  974. tmpreg:=getintregister(list,size);
  975. a_load_const_reg(list,size,a,tmpreg);
  976. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  977. end;
  978. end;
  979. maybeadjustresult(list,op,size,dst);
  980. end;
  981. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  982. var
  983. so : tshifterop;
  984. tmpreg,overflowreg : tregister;
  985. asmop : tasmop;
  986. begin
  987. ovloc.loc:=LOC_VOID;
  988. case op of
  989. OP_NEG,OP_NOT,
  990. OP_DIV,OP_IDIV:
  991. internalerror(200308283);
  992. OP_SHL,
  993. OP_SHR,
  994. OP_SAR,
  995. OP_ROR:
  996. begin
  997. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  998. internalerror(2008072801);
  999. shifterop_reset(so);
  1000. so.rs:=src1;
  1001. so.shiftmode:=opshift2shiftmode(op);
  1002. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1003. end;
  1004. OP_ROL:
  1005. begin
  1006. if not(size in [OS_32,OS_S32]) then
  1007. internalerror(2008072804);
  1008. { simulate ROL by ror'ing 32-value }
  1009. tmpreg:=getintregister(list,OS_32);
  1010. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  1011. shifterop_reset(so);
  1012. so.rs:=tmpreg;
  1013. so.shiftmode:=SM_ROR;
  1014. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1015. end;
  1016. OP_IMUL,
  1017. OP_MUL:
  1018. begin
  1019. if (cgsetflags or setflags) and
  1020. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1021. begin
  1022. overflowreg:=getintregister(list,size);
  1023. if op=OP_IMUL then
  1024. asmop:=A_SMULL
  1025. else
  1026. asmop:=A_UMULL;
  1027. { the arm doesn't allow that rd and rm are the same }
  1028. if dst=src2 then
  1029. begin
  1030. if dst<>src1 then
  1031. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1032. else
  1033. begin
  1034. tmpreg:=getintregister(list,size);
  1035. a_load_reg_reg(list,size,size,src2,dst);
  1036. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1037. end;
  1038. end
  1039. else
  1040. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1041. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1042. if op=OP_IMUL then
  1043. begin
  1044. shifterop_reset(so);
  1045. so.shiftmode:=SM_ASR;
  1046. so.shiftimm:=31;
  1047. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1048. end
  1049. else
  1050. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1051. ovloc.loc:=LOC_FLAGS;
  1052. ovloc.resflags:=F_NE;
  1053. end
  1054. else
  1055. begin
  1056. { the arm doesn't allow that rd and rm are the same }
  1057. if dst=src2 then
  1058. begin
  1059. if dst<>src1 then
  1060. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1061. else
  1062. begin
  1063. tmpreg:=getintregister(list,size);
  1064. a_load_reg_reg(list,size,size,src2,dst);
  1065. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1066. end;
  1067. end
  1068. else
  1069. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1070. end;
  1071. end;
  1072. else
  1073. begin
  1074. if cgsetflags or setflags then
  1075. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1076. list.concat(setoppostfix(
  1077. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1078. end;
  1079. end;
  1080. maybeadjustresult(list,op,size,dst);
  1081. end;
  1082. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1083. var
  1084. asmop: tasmop;
  1085. begin
  1086. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1087. begin
  1088. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1089. case size of
  1090. OS_32: asmop:=A_UMULL;
  1091. OS_S32: asmop:=A_SMULL;
  1092. else
  1093. InternalError(2014060802);
  1094. end;
  1095. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1096. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1097. 32x32=32 bit multiplication}
  1098. if (dstlo = NR_NO) then
  1099. dstlo:=getintregister(list,size);
  1100. if (dsthi = NR_NO) then
  1101. dsthi:=getintregister(list,size);
  1102. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1103. end
  1104. else if dsthi=NR_NO then
  1105. begin
  1106. if (dstlo = NR_NO) then
  1107. dstlo:=getintregister(list,size);
  1108. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1109. end
  1110. else
  1111. begin
  1112. internalerror(2015083022);
  1113. end;
  1114. end;
  1115. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1116. var
  1117. tmpreg1,tmpreg2 : tregister;
  1118. begin
  1119. tmpreg1:=NR_NO;
  1120. { Be sure to have a base register }
  1121. if (ref.base=NR_NO) then
  1122. begin
  1123. if ref.shiftmode<>SM_None then
  1124. internalerror(2014020707);
  1125. ref.base:=ref.index;
  1126. ref.index:=NR_NO;
  1127. end;
  1128. { absolute symbols can't be handled directly, we've to store the symbol reference
  1129. in the text segment and access it pc relative
  1130. For now, we assume that references where base or index equals to PC are already
  1131. relative, all other references are assumed to be absolute and thus they need
  1132. to be handled extra.
  1133. A proper solution would be to change refoptions to a set and store the information
  1134. if the symbol is absolute or relative there.
  1135. }
  1136. if (assigned(ref.symbol) and
  1137. not(is_pc(ref.base)) and
  1138. not(is_pc(ref.index))
  1139. ) or
  1140. { [#xxx] isn't a valid address operand }
  1141. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1142. (ref.offset<-4095) or
  1143. (ref.offset>4095) or
  1144. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1145. ((ref.offset<-255) or
  1146. (ref.offset>255)
  1147. )
  1148. ) or
  1149. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1150. ((ref.offset<-1020) or
  1151. (ref.offset>1020) or
  1152. ((abs(ref.offset) mod 4)<>0)
  1153. )
  1154. ) or
  1155. ((GenerateThumbCode) and
  1156. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1157. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1158. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1159. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1160. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1161. )
  1162. ) then
  1163. begin
  1164. fixref(list,ref);
  1165. end;
  1166. if GenerateThumbCode then
  1167. begin
  1168. { certain thumb load require base and index }
  1169. if (oppostfix in [PF_SB,PF_SH]) and
  1170. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1171. begin
  1172. tmpreg1:=getintregister(list,OS_ADDR);
  1173. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1174. ref.index:=tmpreg1;
  1175. end;
  1176. { "hi" registers cannot be used as base or index }
  1177. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1178. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1179. begin
  1180. tmpreg1:=getintregister(list,OS_ADDR);
  1181. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1182. ref.base:=tmpreg1;
  1183. end;
  1184. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1185. begin
  1186. tmpreg1:=getintregister(list,OS_ADDR);
  1187. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1188. ref.index:=tmpreg1;
  1189. end;
  1190. end;
  1191. { fold if there is base, index and offset, however, don't fold
  1192. for vfp memory instructions because we later fold the index }
  1193. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1194. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1195. begin
  1196. if tmpreg1<>NR_NO then
  1197. begin
  1198. tmpreg2:=getintregister(list,OS_ADDR);
  1199. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1200. tmpreg1:=tmpreg2;
  1201. end
  1202. else
  1203. begin
  1204. tmpreg1:=getintregister(list,OS_ADDR);
  1205. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1206. ref.base:=tmpreg1;
  1207. end;
  1208. ref.offset:=0;
  1209. end;
  1210. { floating point operations have only limited references
  1211. we expect here, that a base is already set }
  1212. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1213. begin
  1214. if ref.shiftmode<>SM_none then
  1215. internalerror(200309121);
  1216. if tmpreg1<>NR_NO then
  1217. begin
  1218. if ref.base=tmpreg1 then
  1219. begin
  1220. if ref.signindex<0 then
  1221. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1222. else
  1223. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1224. ref.index:=NR_NO;
  1225. end
  1226. else
  1227. begin
  1228. if ref.index<>tmpreg1 then
  1229. internalerror(200403161);
  1230. if ref.signindex<0 then
  1231. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1232. else
  1233. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1234. ref.base:=tmpreg1;
  1235. ref.index:=NR_NO;
  1236. end;
  1237. end
  1238. else
  1239. begin
  1240. tmpreg1:=getintregister(list,OS_ADDR);
  1241. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1242. ref.base:=tmpreg1;
  1243. ref.index:=NR_NO;
  1244. end;
  1245. end;
  1246. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1247. Result := ref;
  1248. end;
  1249. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1250. var
  1251. oppostfix:toppostfix;
  1252. usedtmpref: treference;
  1253. tmpreg : tregister;
  1254. dir : integer;
  1255. begin
  1256. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1257. FromSize := ToSize;
  1258. case ToSize of
  1259. { signed integer registers }
  1260. OS_8,
  1261. OS_S8:
  1262. oppostfix:=PF_B;
  1263. OS_16,
  1264. OS_S16:
  1265. oppostfix:=PF_H;
  1266. OS_32,
  1267. OS_S32,
  1268. { for vfp value stored in integer register }
  1269. OS_F32:
  1270. oppostfix:=PF_None;
  1271. else
  1272. InternalError(2003082912);
  1273. end;
  1274. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1275. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1276. (oppostfix =PF_H)) then
  1277. begin
  1278. if target_info.endian=endian_big then
  1279. dir:=-1
  1280. else
  1281. dir:=1;
  1282. case FromSize of
  1283. OS_16,OS_S16:
  1284. begin
  1285. tmpreg:=getintregister(list,OS_INT);
  1286. usedtmpref:=ref;
  1287. if target_info.endian=endian_big then
  1288. inc(usedtmpref.offset,1);
  1289. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1290. inc(usedtmpref.offset,dir);
  1291. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1292. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1293. end;
  1294. OS_32,OS_S32:
  1295. begin
  1296. tmpreg:=getintregister(list,OS_INT);
  1297. usedtmpref:=ref;
  1298. if ref.alignment=2 then
  1299. begin
  1300. if target_info.endian=endian_big then
  1301. inc(usedtmpref.offset,2);
  1302. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1303. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1304. inc(usedtmpref.offset,dir*2);
  1305. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1306. end
  1307. else
  1308. begin
  1309. if target_info.endian=endian_big then
  1310. inc(usedtmpref.offset,3);
  1311. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1312. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1313. inc(usedtmpref.offset,dir);
  1314. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1315. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1316. inc(usedtmpref.offset,dir);
  1317. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1318. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1319. inc(usedtmpref.offset,dir);
  1320. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1321. end;
  1322. end
  1323. else
  1324. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1325. end;
  1326. end
  1327. else
  1328. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1329. end;
  1330. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1331. var
  1332. oppostfix:toppostfix;
  1333. href: treference;
  1334. tmpreg: TRegister;
  1335. begin
  1336. case ToSize of
  1337. { signed integer registers }
  1338. OS_8,
  1339. OS_S8:
  1340. oppostfix:=PF_B;
  1341. OS_16,
  1342. OS_S16:
  1343. oppostfix:=PF_H;
  1344. OS_32,
  1345. OS_S32:
  1346. oppostfix:=PF_None;
  1347. else
  1348. InternalError(2003082910);
  1349. end;
  1350. if (tosize in [OS_S16,OS_16]) and
  1351. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1352. begin
  1353. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1354. tmpreg:=getintregister(list,OS_INT);
  1355. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1356. href:=result;
  1357. inc(href.offset);
  1358. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1359. end
  1360. else
  1361. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1362. end;
  1363. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1364. var
  1365. oppostfix:toppostfix;
  1366. so: tshifterop;
  1367. tmpreg: TRegister;
  1368. href: treference;
  1369. begin
  1370. case FromSize of
  1371. { signed integer registers }
  1372. OS_8:
  1373. oppostfix:=PF_B;
  1374. OS_S8:
  1375. oppostfix:=PF_SB;
  1376. OS_16:
  1377. oppostfix:=PF_H;
  1378. OS_S16:
  1379. oppostfix:=PF_SH;
  1380. OS_32,
  1381. OS_S32:
  1382. oppostfix:=PF_None;
  1383. else
  1384. InternalError(200308291);
  1385. end;
  1386. if (tosize=OS_S8) and
  1387. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1388. begin
  1389. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1390. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1391. end
  1392. else if (tosize in [OS_S16,OS_16]) and
  1393. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1394. begin
  1395. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1396. tmpreg:=getintregister(list,OS_INT);
  1397. href:=result;
  1398. inc(href.offset);
  1399. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1400. shifterop_reset(so);
  1401. so.shiftmode:=SM_LSL;
  1402. so.shiftimm:=8;
  1403. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1404. end
  1405. else
  1406. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1407. end;
  1408. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1409. var
  1410. so : tshifterop;
  1411. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1412. begin
  1413. if GenerateThumbCode then
  1414. begin
  1415. case shiftmode of
  1416. SM_ASR:
  1417. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1418. SM_LSR:
  1419. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1420. SM_LSL:
  1421. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1422. else
  1423. internalerror(2013090301);
  1424. end;
  1425. end
  1426. else
  1427. begin
  1428. so.shiftmode:=shiftmode;
  1429. so.shiftimm:=shiftimm;
  1430. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1431. end;
  1432. end;
  1433. var
  1434. instr: taicpu;
  1435. conv_done: boolean;
  1436. begin
  1437. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1438. internalerror(2002090901);
  1439. conv_done:=false;
  1440. if tosize<>fromsize then
  1441. begin
  1442. shifterop_reset(so);
  1443. conv_done:=true;
  1444. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1445. fromsize:=tosize;
  1446. if current_settings.cputype<cpu_armv6 then
  1447. case fromsize of
  1448. OS_8:
  1449. if GenerateThumbCode then
  1450. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1451. else
  1452. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1453. OS_S8:
  1454. begin
  1455. do_shift(SM_LSL,24,reg1);
  1456. if tosize=OS_16 then
  1457. begin
  1458. do_shift(SM_ASR,8,reg2);
  1459. do_shift(SM_LSR,16,reg2);
  1460. end
  1461. else
  1462. do_shift(SM_ASR,24,reg2);
  1463. end;
  1464. OS_16:
  1465. begin
  1466. do_shift(SM_LSL,16,reg1);
  1467. do_shift(SM_LSR,16,reg2);
  1468. end;
  1469. OS_S16:
  1470. begin
  1471. do_shift(SM_LSL,16,reg1);
  1472. do_shift(SM_ASR,16,reg2)
  1473. end;
  1474. else
  1475. conv_done:=false;
  1476. end
  1477. else
  1478. case fromsize of
  1479. OS_8:
  1480. if GenerateThumbCode then
  1481. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1482. else
  1483. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1484. OS_S8:
  1485. begin
  1486. if tosize=OS_16 then
  1487. begin
  1488. so.shiftmode:=SM_ROR;
  1489. so.shiftimm:=16;
  1490. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1491. do_shift(SM_LSR,16,reg2);
  1492. end
  1493. else
  1494. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1495. end;
  1496. OS_16:
  1497. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1498. OS_S16:
  1499. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1500. else
  1501. conv_done:=false;
  1502. end
  1503. end;
  1504. if not conv_done and (reg1<>reg2) then
  1505. begin
  1506. { same size, only a register mov required }
  1507. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1508. list.Concat(instr);
  1509. { Notify the register allocator that we have written a move instruction so
  1510. it can try to eliminate it. }
  1511. add_move_instruction(instr);
  1512. end;
  1513. end;
  1514. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1515. var
  1516. href,href2 : treference;
  1517. hloc : pcgparalocation;
  1518. begin
  1519. href:=ref;
  1520. hloc:=paraloc.location;
  1521. while assigned(hloc) do
  1522. begin
  1523. case hloc^.loc of
  1524. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1525. begin
  1526. paramanager.allocparaloc(list,paraloc.location);
  1527. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1528. end;
  1529. LOC_REGISTER :
  1530. case hloc^.size of
  1531. OS_32,
  1532. OS_F32:
  1533. begin
  1534. paramanager.allocparaloc(list,paraloc.location);
  1535. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1536. end;
  1537. OS_64,
  1538. OS_F64:
  1539. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1540. else
  1541. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1542. end;
  1543. LOC_REFERENCE :
  1544. begin
  1545. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1546. { concatcopy should choose the best way to copy the data }
  1547. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1548. end;
  1549. else
  1550. internalerror(200408241);
  1551. end;
  1552. inc(href.offset,tcgsize2size[hloc^.size]);
  1553. hloc:=hloc^.next;
  1554. end;
  1555. end;
  1556. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1557. begin
  1558. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1559. end;
  1560. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1561. var
  1562. oppostfix:toppostfix;
  1563. begin
  1564. case fromsize of
  1565. OS_32,
  1566. OS_F32:
  1567. oppostfix:=PF_S;
  1568. OS_64,
  1569. OS_F64:
  1570. oppostfix:=PF_D;
  1571. OS_F80:
  1572. oppostfix:=PF_E;
  1573. else
  1574. InternalError(200309021);
  1575. end;
  1576. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1577. if fromsize<>tosize then
  1578. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1579. end;
  1580. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1581. var
  1582. oppostfix:toppostfix;
  1583. begin
  1584. case tosize of
  1585. OS_F32:
  1586. oppostfix:=PF_S;
  1587. OS_F64:
  1588. oppostfix:=PF_D;
  1589. OS_F80:
  1590. oppostfix:=PF_E;
  1591. else
  1592. InternalError(200309022);
  1593. end;
  1594. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1595. end;
  1596. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1597. var
  1598. r : TRegister;
  1599. ai: taicpu;
  1600. l: TAsmLabel;
  1601. begin
  1602. if needs_check_for_fpu_exceptions and
  1603. (force or current_procinfo.FPUExceptionCheckNeeded) then
  1604. begin
  1605. r:=getintregister(list,OS_INT);
  1606. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1607. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1608. current_asmdata.getjumplabel(l);
  1609. ai:=taicpu.op_sym(A_B,l);
  1610. ai.is_jmp:=true;
  1611. ai.condition:=C_EQ;
  1612. list.concat(ai);
  1613. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1614. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1615. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1616. a_label(list,l);
  1617. if clear then
  1618. current_procinfo.FPUExceptionCheckNeeded:=false;
  1619. end;
  1620. end;
  1621. { comparison operations }
  1622. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1623. l : tasmlabel);
  1624. var
  1625. tmpreg : tregister;
  1626. b : byte;
  1627. begin
  1628. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1629. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1630. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1631. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1632. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1633. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1634. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1635. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1636. else
  1637. begin
  1638. tmpreg:=getintregister(list,size);
  1639. a_load_const_reg(list,size,a,tmpreg);
  1640. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1641. end;
  1642. a_jmp_cond(list,cmp_op,l);
  1643. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1644. end;
  1645. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1646. begin
  1647. if reverse then
  1648. begin
  1649. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1650. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1651. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1652. end
  1653. { it is decided during the compilation of the system unit if this code is used or not
  1654. so no additional check for rbit is needed }
  1655. else
  1656. begin
  1657. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1658. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1659. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1660. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1661. if GenerateThumb2Code then
  1662. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1663. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1664. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1665. end;
  1666. end;
  1667. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1668. begin
  1669. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1670. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1671. a_jmp_cond(list,cmp_op,l);
  1672. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1673. end;
  1674. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1675. var
  1676. ai : taicpu;
  1677. begin
  1678. { generate far jump, leave it to the optimizer to get rid of it }
  1679. if GenerateThumbCode then
  1680. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1681. else
  1682. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1683. ai.is_jmp:=true;
  1684. list.concat(ai);
  1685. end;
  1686. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1687. var
  1688. ai : taicpu;
  1689. begin
  1690. { generate far jump, leave it to the optimizer to get rid of it }
  1691. if GenerateThumbCode then
  1692. ai:=taicpu.op_sym(A_BL,l)
  1693. else
  1694. ai:=taicpu.op_sym(A_B,l);
  1695. ai.is_jmp:=true;
  1696. list.concat(ai);
  1697. end;
  1698. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1699. var
  1700. ai : taicpu;
  1701. inv_flags : TResFlags;
  1702. hlabel : TAsmLabel;
  1703. begin
  1704. if GenerateThumbCode then
  1705. begin
  1706. inv_flags:=f;
  1707. inverse_flags(inv_flags);
  1708. { the optimizer has to fix this if jump range is sufficient short }
  1709. current_asmdata.getjumplabel(hlabel);
  1710. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1711. ai.is_jmp:=true;
  1712. list.concat(ai);
  1713. a_jmp_always(list,l);
  1714. a_label(list,hlabel);
  1715. end
  1716. else
  1717. begin
  1718. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1719. ai.is_jmp:=true;
  1720. list.concat(ai);
  1721. end;
  1722. end;
  1723. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1724. begin
  1725. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1726. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1727. end;
  1728. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1729. begin
  1730. if target_info.system = system_arm_linux then
  1731. begin
  1732. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1733. a_call_name(list,'__gnu_mcount_nc',false);
  1734. end
  1735. else
  1736. internalerror(2014091201);
  1737. end;
  1738. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1739. var
  1740. ref : treference;
  1741. shift : byte;
  1742. firstfloatreg,lastfloatreg,
  1743. r : byte;
  1744. mmregs,
  1745. regs, saveregs : tcpuregisterset;
  1746. registerarea, offset,
  1747. r7offset,
  1748. stackmisalignment : pint;
  1749. imm1, imm2: DWord;
  1750. stack_parameters : Boolean;
  1751. begin
  1752. LocalSize:=align(LocalSize,4);
  1753. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1754. { call instruction does not put anything on the stack }
  1755. registerarea:=0;
  1756. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1757. lastfloatreg:=RS_NO;
  1758. if not(nostackframe) then
  1759. begin
  1760. firstfloatreg:=RS_NO;
  1761. mmregs:=[];
  1762. case current_settings.fputype of
  1763. fpu_none,
  1764. fpu_soft,
  1765. fpu_libgcc:
  1766. ;
  1767. fpu_fpa,
  1768. fpu_fpa10,
  1769. fpu_fpa11:
  1770. begin
  1771. { save floating point registers? }
  1772. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1773. for r:=RS_F0 to RS_F7 do
  1774. if r in regs then
  1775. begin
  1776. if firstfloatreg=RS_NO then
  1777. firstfloatreg:=r;
  1778. lastfloatreg:=r;
  1779. inc(registerarea,12);
  1780. end;
  1781. end;
  1782. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1783. begin;
  1784. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1785. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1786. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1787. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1788. end
  1789. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1790. begin;
  1791. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1792. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1793. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1794. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1795. end
  1796. else
  1797. internalerror(2019050924);
  1798. end;
  1799. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1800. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1801. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1802. { save int registers }
  1803. reference_reset(ref,4,[]);
  1804. ref.index:=NR_STACK_POINTER_REG;
  1805. ref.addressmode:=AM_PREINDEXED;
  1806. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1807. if not(target_info.system in systems_darwin) then
  1808. begin
  1809. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1810. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1811. begin
  1812. a_reg_alloc(list,NR_R12);
  1813. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1814. end;
  1815. { the (old) ARM APCS requires saving both the stack pointer (to
  1816. crawl the stack) and the PC (to identify the function this
  1817. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1818. and R15 -- still needs updating for EABI and Darwin, they don't
  1819. need that }
  1820. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1821. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1822. else
  1823. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1824. include(regs,RS_R14);
  1825. if regs<>[] then
  1826. begin
  1827. for r:=RS_R0 to RS_R15 do
  1828. if r in regs then
  1829. inc(registerarea,4);
  1830. { if the stack is not 8 byte aligned, try to add an extra register,
  1831. so we can avoid the extra sub/add ...,#4 later (KB) }
  1832. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1833. for r:=RS_R3 downto RS_R0 do
  1834. if not(r in regs) then
  1835. begin
  1836. regs:=regs+[r];
  1837. inc(registerarea,4);
  1838. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1839. break;
  1840. end;
  1841. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1842. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  1843. end;
  1844. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1845. begin
  1846. offset:=-4;
  1847. for r:=RS_R15 downto RS_R0 do
  1848. if r in regs then
  1849. begin
  1850. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),offset);
  1851. dec(offset,4);
  1852. end;
  1853. { the framepointer now points to the saved R15, so the saved
  1854. framepointer is at R11-12 (for get_caller_frame) }
  1855. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1856. a_reg_dealloc(list,NR_R12);
  1857. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  1858. current_asmdata.asmcfi.cfa_def_cfa_offset(list,4);
  1859. end;
  1860. end
  1861. else
  1862. begin
  1863. { always save r14 if we use r7 as the framepointer, because
  1864. the parameter offsets are hardcoded in advance and always
  1865. assume that r14 sits on the stack right behind the saved r7
  1866. }
  1867. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1868. include(regs,RS_FRAME_POINTER_REG);
  1869. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1870. include(regs,RS_R14);
  1871. if regs<>[] then
  1872. begin
  1873. { on Darwin, you first have to save [r4-r7,lr], and then
  1874. [r8,r10,r11] and make r7 point to the previously saved
  1875. r7 so that you can perform a stack crawl based on it
  1876. ([r7] is previous stack frame, [r7+4] is return address
  1877. }
  1878. include(regs,RS_FRAME_POINTER_REG);
  1879. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1880. r7offset:=0;
  1881. for r:=RS_R0 to RS_R15 do
  1882. if r in saveregs then
  1883. begin
  1884. inc(registerarea,4);
  1885. if r<RS_FRAME_POINTER_REG then
  1886. inc(r7offset,4);
  1887. end;
  1888. { save the registers }
  1889. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1890. { make r7 point to the saved r7 (regardless of whether this
  1891. frame uses the framepointer, for backtrace purposes) }
  1892. if r7offset<>0 then
  1893. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1894. else
  1895. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1896. { now save the rest (if any) }
  1897. saveregs:=regs-saveregs;
  1898. if saveregs<>[] then
  1899. begin
  1900. for r:=RS_R8 to RS_R11 do
  1901. if r in saveregs then
  1902. inc(registerarea,4);
  1903. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1904. end;
  1905. end;
  1906. end;
  1907. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1908. if (LocalSize<>0) or
  1909. ((stackmisalignment<>0) and
  1910. ((pi_do_call in current_procinfo.flags) or
  1911. (po_assembler in current_procinfo.procdef.procoptions))) then
  1912. begin
  1913. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1914. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1915. begin
  1916. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1917. internalerror(2014030901)
  1918. else
  1919. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1920. end;
  1921. if is_shifter_const(localsize,shift) then
  1922. begin
  1923. a_reg_dealloc(list,NR_R12);
  1924. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1925. end
  1926. else if split_into_shifter_const(localsize, imm1, imm2) then
  1927. begin
  1928. a_reg_dealloc(list,NR_R12);
  1929. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1930. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1931. end
  1932. else
  1933. begin
  1934. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1935. a_reg_alloc(list,NR_R12);
  1936. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1937. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1938. a_reg_dealloc(list,NR_R12);
  1939. end;
  1940. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1941. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  1942. end;
  1943. if (mmregs<>[]) or
  1944. (firstfloatreg<>RS_NO) then
  1945. begin
  1946. reference_reset(ref,4,[]);
  1947. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1948. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1949. begin
  1950. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1951. begin
  1952. a_reg_alloc(list,NR_R12);
  1953. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1954. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1955. a_reg_dealloc(list,NR_R12);
  1956. end
  1957. else
  1958. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1959. ref.base:=NR_R12;
  1960. end
  1961. else
  1962. begin
  1963. ref.base:=current_procinfo.framepointer;
  1964. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1965. end;
  1966. case current_settings.fputype of
  1967. fpu_fpa,
  1968. fpu_fpa10,
  1969. fpu_fpa11:
  1970. begin
  1971. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1972. lastfloatreg-firstfloatreg+1,ref));
  1973. end;
  1974. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1975. begin
  1976. ref.index:=ref.base;
  1977. ref.base:=NR_NO;
  1978. if mmregs<>[] then
  1979. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1980. end
  1981. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1982. begin
  1983. ref.index:=ref.base;
  1984. ref.base:=NR_NO;
  1985. if mmregs<>[] then
  1986. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1987. end
  1988. else
  1989. internalerror(2019050923);
  1990. end;
  1991. end;
  1992. end;
  1993. end;
  1994. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1995. var
  1996. ref : treference;
  1997. LocalSize : longint;
  1998. firstfloatreg,lastfloatreg,
  1999. r,
  2000. shift : byte;
  2001. mmregs,
  2002. saveregs,
  2003. regs : tcpuregisterset;
  2004. registerarea,
  2005. stackmisalignment: pint;
  2006. paddingreg: TSuperRegister;
  2007. imm1, imm2: DWord;
  2008. begin
  2009. if not(nostackframe) then
  2010. begin
  2011. registerarea:=0;
  2012. firstfloatreg:=RS_NO;
  2013. lastfloatreg:=RS_NO;
  2014. mmregs:=[];
  2015. saveregs:=[];
  2016. case current_settings.fputype of
  2017. fpu_none,
  2018. fpu_soft,
  2019. fpu_libgcc:
  2020. ;
  2021. fpu_fpa,
  2022. fpu_fpa10,
  2023. fpu_fpa11:
  2024. begin
  2025. { restore floating point registers? }
  2026. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  2027. for r:=RS_F0 to RS_F7 do
  2028. if r in regs then
  2029. begin
  2030. if firstfloatreg=RS_NO then
  2031. firstfloatreg:=r;
  2032. lastfloatreg:=r;
  2033. { floating point register space is already included in
  2034. localsize below by calc_stackframe_size
  2035. inc(registerarea,12);
  2036. }
  2037. end;
  2038. end;
  2039. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2040. begin
  2041. { restore vfp registers? }
  2042. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  2043. they have numbers>$1f which is not really correct as they should simply have the same numbers
  2044. as the even ones by with a different subtype as it is done on x86 with al/ah }
  2045. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  2046. end
  2047. else
  2048. internalerror(2019050908);
  2049. end;
  2050. if (firstfloatreg<>RS_NO) or
  2051. (mmregs<>[]) then
  2052. begin
  2053. reference_reset(ref,4,[]);
  2054. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  2055. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2056. begin
  2057. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2058. begin
  2059. a_reg_alloc(list,NR_R12);
  2060. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2061. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2062. a_reg_dealloc(list,NR_R12);
  2063. end
  2064. else
  2065. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2066. ref.base:=NR_R12;
  2067. end
  2068. else
  2069. begin
  2070. ref.base:=current_procinfo.framepointer;
  2071. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2072. end;
  2073. case current_settings.fputype of
  2074. fpu_fpa,
  2075. fpu_fpa10,
  2076. fpu_fpa11:
  2077. begin
  2078. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2079. lastfloatreg-firstfloatreg+1,ref));
  2080. end;
  2081. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2082. begin
  2083. ref.index:=ref.base;
  2084. ref.base:=NR_NO;
  2085. if mmregs<>[] then
  2086. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2087. end
  2088. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2089. begin
  2090. ref.index:=ref.base;
  2091. ref.base:=NR_NO;
  2092. if mmregs<>[] then
  2093. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2094. end
  2095. else
  2096. internalerror(2019050921);
  2097. end;
  2098. end;
  2099. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2100. if (pi_do_call in current_procinfo.flags) or
  2101. (regs<>[]) or
  2102. ((target_info.system in systems_darwin) and
  2103. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2104. begin
  2105. exclude(regs,RS_R14);
  2106. include(regs,RS_R15);
  2107. if (target_info.system in systems_darwin) then
  2108. include(regs,RS_FRAME_POINTER_REG);
  2109. end;
  2110. if not(target_info.system in systems_darwin) then
  2111. begin
  2112. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2113. The saved PC came after that but is discarded, since we restore
  2114. the stack pointer }
  2115. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2116. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2117. end
  2118. else
  2119. begin
  2120. { restore R8-R11 already if necessary (they've been stored
  2121. before the others) }
  2122. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2123. if saveregs<>[] then
  2124. begin
  2125. reference_reset(ref,4,[]);
  2126. ref.index:=NR_STACK_POINTER_REG;
  2127. ref.addressmode:=AM_PREINDEXED;
  2128. for r:=RS_R8 to RS_R11 do
  2129. if r in saveregs then
  2130. inc(registerarea,4);
  2131. regs:=regs-saveregs;
  2132. end;
  2133. end;
  2134. for r:=RS_R0 to RS_R15 do
  2135. if r in regs then
  2136. inc(registerarea,4);
  2137. { reapply the stack padding reg, in case there was one, see the complimentary
  2138. comment in g_proc_entry() (KB) }
  2139. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2140. if paddingreg < RS_R4 then
  2141. if paddingreg in regs then
  2142. internalerror(201306190)
  2143. else
  2144. begin
  2145. regs:=regs+[paddingreg];
  2146. inc(registerarea,4);
  2147. end;
  2148. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2149. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2150. (target_info.system in systems_darwin) then
  2151. begin
  2152. LocalSize:=current_procinfo.calc_stackframe_size;
  2153. if (LocalSize<>0) or
  2154. ((stackmisalignment<>0) and
  2155. ((pi_do_call in current_procinfo.flags) or
  2156. (po_assembler in current_procinfo.procdef.procoptions))) then
  2157. begin
  2158. if pi_estimatestacksize in current_procinfo.flags then
  2159. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2160. else
  2161. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2162. if is_shifter_const(LocalSize,shift) then
  2163. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2164. else if split_into_shifter_const(localsize, imm1, imm2) then
  2165. begin
  2166. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2167. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2168. end
  2169. else
  2170. begin
  2171. a_reg_alloc(list,NR_R12);
  2172. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2173. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2174. a_reg_dealloc(list,NR_R12);
  2175. end;
  2176. end;
  2177. if (target_info.system in systems_darwin) and
  2178. (saveregs<>[]) then
  2179. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2180. if regs=[] then
  2181. begin
  2182. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2183. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2184. else
  2185. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2186. end
  2187. else
  2188. begin
  2189. reference_reset(ref,4,[]);
  2190. ref.index:=NR_STACK_POINTER_REG;
  2191. ref.addressmode:=AM_PREINDEXED;
  2192. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2193. end;
  2194. end
  2195. else
  2196. begin
  2197. { restore int registers and return }
  2198. reference_reset(ref,4,[]);
  2199. ref.index:=NR_FRAME_POINTER_REG;
  2200. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2201. end;
  2202. end
  2203. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2204. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2205. else
  2206. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2207. end;
  2208. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2209. var
  2210. ref : treference;
  2211. l : TAsmLabel;
  2212. regs : tcpuregisterset;
  2213. r: byte;
  2214. begin
  2215. if (cs_create_pic in current_settings.moduleswitches) and
  2216. (pi_needs_got in current_procinfo.flags) and
  2217. (tf_pic_uses_got in target_info.flags) then
  2218. begin
  2219. { Procedure parametrs are not initialized at this stage.
  2220. Before GOT initialization code, allocate registers used for procedure parameters
  2221. to prevent usage of these registers for temp operations in later stages of code
  2222. generation. }
  2223. regs:=rg[R_INTREGISTER].used_in_proc;
  2224. for r:=RS_R0 to RS_R3 do
  2225. if r in regs then
  2226. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2227. { Allocate scratch register R12 and use it for GOT calculations directly.
  2228. Otherwise the init code can be distorted in later stages of code generation. }
  2229. a_reg_alloc(list,NR_R12);
  2230. reference_reset(ref,4,[]);
  2231. current_asmdata.getglobaldatalabel(l);
  2232. cg.a_label(current_procinfo.aktlocaldata,l);
  2233. ref.symbol:=l;
  2234. ref.base:=NR_PC;
  2235. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2236. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2237. current_asmdata.getaddrlabel(l);
  2238. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2239. cg.a_label(list,l);
  2240. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2241. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2242. { Deallocate registers }
  2243. a_reg_dealloc(list,NR_R12);
  2244. for r:=RS_R3 downto RS_R0 do
  2245. if r in regs then
  2246. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2247. end;
  2248. end;
  2249. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2250. var
  2251. b : byte;
  2252. tmpref : treference;
  2253. instr : taicpu;
  2254. begin
  2255. if ref.addressmode<>AM_OFFSET then
  2256. internalerror(200309071);
  2257. tmpref:=ref;
  2258. { Be sure to have a base register }
  2259. if (tmpref.base=NR_NO) then
  2260. begin
  2261. if tmpref.shiftmode<>SM_None then
  2262. internalerror(2014020702);
  2263. if tmpref.signindex<0 then
  2264. internalerror(200312023);
  2265. tmpref.base:=tmpref.index;
  2266. tmpref.index:=NR_NO;
  2267. end;
  2268. if assigned(tmpref.symbol) or
  2269. not((is_shifter_const(tmpref.offset,b)) or
  2270. (is_shifter_const(-tmpref.offset,b))
  2271. ) then
  2272. fixref(list,tmpref);
  2273. { expect a base here if there is an index }
  2274. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2275. internalerror(200312022);
  2276. if tmpref.index<>NR_NO then
  2277. begin
  2278. if tmpref.shiftmode<>SM_None then
  2279. internalerror(200312021);
  2280. if tmpref.signindex<0 then
  2281. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2282. else
  2283. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2284. if tmpref.offset<>0 then
  2285. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2286. end
  2287. else
  2288. begin
  2289. if tmpref.base=NR_NO then
  2290. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2291. else
  2292. if tmpref.offset<>0 then
  2293. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2294. else
  2295. begin
  2296. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2297. list.concat(instr);
  2298. add_move_instruction(instr);
  2299. end;
  2300. end;
  2301. end;
  2302. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2303. var
  2304. tmpreg, tmpreg2 : tregister;
  2305. tmpref : treference;
  2306. l, piclabel : tasmlabel;
  2307. indirection_done : boolean;
  2308. begin
  2309. { absolute symbols can't be handled directly, we've to store the symbol reference
  2310. in the text segment and access it pc relative
  2311. For now, we assume that references where base or index equals to PC are already
  2312. relative, all other references are assumed to be absolute and thus they need
  2313. to be handled extra.
  2314. A proper solution would be to change refoptions to a set and store the information
  2315. if the symbol is absolute or relative there.
  2316. }
  2317. { create consts entry }
  2318. reference_reset(tmpref,4,[]);
  2319. current_asmdata.getjumplabel(l);
  2320. cg.a_label(current_procinfo.aktlocaldata,l);
  2321. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2322. piclabel:=nil;
  2323. tmpreg:=NR_NO;
  2324. indirection_done:=false;
  2325. if assigned(ref.symbol) then
  2326. begin
  2327. if (target_info.system=system_arm_ios) and
  2328. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2329. begin
  2330. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2331. if ref.offset<>0 then
  2332. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2333. indirection_done:=true;
  2334. end
  2335. else if ref.refaddr=addr_gottpoff then
  2336. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2337. else if ref.refaddr=addr_tlsgd then
  2338. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2339. else if ref.refaddr=addr_tlsdesc then
  2340. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2341. else if ref.refaddr=addr_tpoff then
  2342. begin
  2343. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2344. Internalerror(2019092804);
  2345. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2346. end
  2347. else if (cs_create_pic in current_settings.moduleswitches) then
  2348. if (tf_pic_uses_got in target_info.flags) then
  2349. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2350. else
  2351. begin
  2352. { ideally, we would want to generate
  2353. ldr r1, LPICConstPool
  2354. LPICLocal:
  2355. ldr/str r2,[pc,r1]
  2356. ...
  2357. LPICConstPool:
  2358. .long _globsym-(LPICLocal+8)
  2359. However, we cannot be sure that the ldr/str will follow
  2360. right after the call to fixref, so we have to load the
  2361. complete address already in a register.
  2362. }
  2363. current_asmdata.getaddrlabel(piclabel);
  2364. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2365. end
  2366. else
  2367. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2368. end
  2369. else
  2370. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2371. { load consts entry }
  2372. if not indirection_done then
  2373. begin
  2374. tmpreg:=getintregister(list,OS_INT);
  2375. tmpref.symbol:=l;
  2376. tmpref.base:=NR_PC;
  2377. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2378. if (cs_create_pic in current_settings.moduleswitches) and
  2379. (tf_pic_uses_got in target_info.flags) and
  2380. assigned(ref.symbol) then
  2381. begin
  2382. {$ifdef EXTDEBUG}
  2383. if not (pi_needs_got in current_procinfo.flags) then
  2384. Comment(V_warning,'pi_needs_got not included');
  2385. {$endif EXTDEBUG}
  2386. Include(current_procinfo.flags,pi_needs_got);
  2387. reference_reset(tmpref,4,[]);
  2388. tmpref.base:=current_procinfo.got;
  2389. tmpref.index:=tmpreg;
  2390. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2391. if ref.offset<>0 then
  2392. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2393. end;
  2394. end;
  2395. if assigned(piclabel) then
  2396. begin
  2397. cg.a_label(list,piclabel);
  2398. tmpreg2:=getaddressregister(list);
  2399. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2400. tmpreg:=tmpreg2
  2401. end;
  2402. { This routine can be called with PC as base/index in case the offset
  2403. was too large to encode in a load/store. In that case, the entire
  2404. absolute expression has been re-encoded in a new constpool entry, and
  2405. we have to remove the use of PC from the original reference (the code
  2406. above made everything relative to the value loaded from the new
  2407. constpool entry) }
  2408. if is_pc(ref.base) then
  2409. ref.base:=NR_NO;
  2410. if is_pc(ref.index) then
  2411. ref.index:=NR_NO;
  2412. if (ref.base<>NR_NO) then
  2413. begin
  2414. if ref.index<>NR_NO then
  2415. begin
  2416. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2417. ref.base:=tmpreg;
  2418. end
  2419. else
  2420. if ref.base<>NR_PC then
  2421. begin
  2422. ref.index:=tmpreg;
  2423. ref.shiftimm:=0;
  2424. ref.signindex:=1;
  2425. ref.shiftmode:=SM_None;
  2426. end
  2427. else
  2428. ref.base:=tmpreg;
  2429. end
  2430. else
  2431. ref.base:=tmpreg;
  2432. ref.offset:=0;
  2433. ref.symbol:=nil;
  2434. end;
  2435. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2436. var
  2437. paraloc1,paraloc2,paraloc3 : TCGPara;
  2438. pd : tprocdef;
  2439. begin
  2440. pd:=search_system_proc('MOVE');
  2441. paraloc1.init;
  2442. paraloc2.init;
  2443. paraloc3.init;
  2444. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  2445. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  2446. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  2447. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2448. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2449. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2450. paramanager.freecgpara(list,paraloc3);
  2451. paramanager.freecgpara(list,paraloc2);
  2452. paramanager.freecgpara(list,paraloc1);
  2453. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2454. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2455. a_call_name(list,'FPC_MOVE',false);
  2456. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2457. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2458. paraloc3.done;
  2459. paraloc2.done;
  2460. paraloc1.done;
  2461. end;
  2462. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2463. const
  2464. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2465. maxtmpreg_thumb = 5;
  2466. type
  2467. ttmpregisters = array[1..maxtmpreg_arm] of tregister;
  2468. var
  2469. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2470. srcreg,destreg,countreg,r,tmpreg:tregister;
  2471. helpsize:aint;
  2472. copysize:byte;
  2473. cgsize:Tcgsize;
  2474. tmpregisters:ttmpregisters;
  2475. maxtmpreg,
  2476. tmpregi,tmpregi2:byte;
  2477. { will never be called with count<=4 }
  2478. procedure genloop(count : aword;size : byte);
  2479. const
  2480. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2481. var
  2482. l : tasmlabel;
  2483. begin
  2484. current_asmdata.getjumplabel(l);
  2485. if count<size then size:=1;
  2486. a_load_const_reg(list,OS_INT,count div size,countreg);
  2487. cg.a_label(list,l);
  2488. srcref.addressmode:=AM_POSTINDEXED;
  2489. dstref.addressmode:=AM_POSTINDEXED;
  2490. srcref.offset:=size;
  2491. dstref.offset:=size;
  2492. r:=getintregister(list,size2opsize[size]);
  2493. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2494. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2495. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2496. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2497. a_jmp_flags(list,F_NE,l);
  2498. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2499. srcref.offset:=1;
  2500. dstref.offset:=1;
  2501. case count mod size of
  2502. 1:
  2503. begin
  2504. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2505. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2506. end;
  2507. 2:
  2508. if aligned then
  2509. begin
  2510. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2511. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2512. end
  2513. else
  2514. begin
  2515. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2516. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2517. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2518. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2519. end;
  2520. 3:
  2521. if aligned then
  2522. begin
  2523. srcref.offset:=2;
  2524. dstref.offset:=2;
  2525. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2526. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2527. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2528. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2529. end
  2530. else
  2531. begin
  2532. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2533. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2534. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2535. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2536. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2537. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2538. end;
  2539. end;
  2540. { keep the registers alive }
  2541. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2542. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2543. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2544. end;
  2545. { save estimation, if a creating a separate ref is needed or
  2546. if we can keep the original reference while copying }
  2547. function SimpleRef(const ref : treference) : boolean;
  2548. begin
  2549. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2550. ((ref.symbol=nil) and
  2551. (ref.addressmode=AM_OFFSET) and
  2552. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2553. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2554. { ldrh has a limited offset range }
  2555. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2556. )
  2557. );
  2558. end;
  2559. { will never be called with count<=4 }
  2560. procedure genloop_thumb(count : aword;size : byte);
  2561. procedure refincofs(const ref : treference;const value : longint = 1);
  2562. begin
  2563. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2564. end;
  2565. const
  2566. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2567. var
  2568. l : tasmlabel;
  2569. begin
  2570. current_asmdata.getjumplabel(l);
  2571. if count<size then size:=1;
  2572. a_load_const_reg(list,OS_INT,count div size,countreg);
  2573. cg.a_label(list,l);
  2574. r:=getintregister(list,size2opsize[size]);
  2575. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2576. refincofs(srcref);
  2577. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2578. refincofs(dstref);
  2579. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2580. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2581. a_jmp_flags(list,F_NE,l);
  2582. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2583. case count mod size of
  2584. 1:
  2585. begin
  2586. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2587. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2588. end;
  2589. 2:
  2590. if aligned then
  2591. begin
  2592. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2593. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2594. end
  2595. else
  2596. begin
  2597. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2598. refincofs(srcref);
  2599. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2600. refincofs(dstref);
  2601. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2602. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2603. end;
  2604. 3:
  2605. if aligned then
  2606. begin
  2607. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2608. refincofs(srcref,2);
  2609. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2610. refincofs(dstref,2);
  2611. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2612. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2613. end
  2614. else
  2615. begin
  2616. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2617. refincofs(srcref);
  2618. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2619. refincofs(dstref);
  2620. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2621. refincofs(srcref);
  2622. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2623. refincofs(dstref);
  2624. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2625. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2626. end;
  2627. end;
  2628. { keep the registers alive }
  2629. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2630. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2631. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2632. end;
  2633. begin
  2634. tmpregisters:=Default(ttmpregisters);
  2635. if len=0 then
  2636. exit;
  2637. if GenerateThumbCode then
  2638. maxtmpreg:=maxtmpreg_thumb
  2639. else
  2640. maxtmpreg:=maxtmpreg_arm;
  2641. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2642. dstref:=dest;
  2643. srcref:=source;
  2644. if cs_opt_size in current_settings.optimizerswitches then
  2645. helpsize:=8;
  2646. if aligned and (len=4) then
  2647. begin
  2648. tmpreg:=getintregister(list,OS_32);
  2649. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2650. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2651. end
  2652. else if aligned and (len=2) then
  2653. begin
  2654. tmpreg:=getintregister(list,OS_16);
  2655. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2656. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2657. end
  2658. else if (len<=helpsize) and aligned then
  2659. begin
  2660. tmpregi:=0;
  2661. { loading address in a separate register needed? }
  2662. if SimpleRef(source) then
  2663. begin
  2664. { ... then we don't need a loadaddr }
  2665. srcref:=source;
  2666. end
  2667. else
  2668. begin
  2669. srcreg:=getintregister(list,OS_ADDR);
  2670. a_loadaddr_ref_reg(list,source,srcreg);
  2671. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2672. end;
  2673. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2674. begin
  2675. inc(tmpregi);
  2676. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2677. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2678. inc(srcref.offset,4);
  2679. dec(len,4);
  2680. end;
  2681. { loading address in a separate register needed? }
  2682. if SimpleRef(dest) then
  2683. dstref:=dest
  2684. else
  2685. begin
  2686. destreg:=getintregister(list,OS_ADDR);
  2687. a_loadaddr_ref_reg(list,dest,destreg);
  2688. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2689. end;
  2690. tmpregi2:=1;
  2691. while (tmpregi2<=tmpregi) do
  2692. begin
  2693. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2694. inc(dstref.offset,4);
  2695. inc(tmpregi2);
  2696. end;
  2697. copysize:=4;
  2698. cgsize:=OS_32;
  2699. while len<>0 do
  2700. begin
  2701. if len<2 then
  2702. begin
  2703. copysize:=1;
  2704. cgsize:=OS_8;
  2705. end
  2706. else if len<4 then
  2707. begin
  2708. copysize:=2;
  2709. cgsize:=OS_16;
  2710. end;
  2711. dec(len,copysize);
  2712. r:=getintregister(list,cgsize);
  2713. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2714. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2715. inc(srcref.offset,copysize);
  2716. inc(dstref.offset,copysize);
  2717. end;{end of while}
  2718. end
  2719. else
  2720. begin
  2721. cgsize:=OS_32;
  2722. if (len<=4) then{len<=4 and not aligned}
  2723. begin
  2724. r:=getintregister(list,cgsize);
  2725. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2726. if Len=1 then
  2727. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2728. else
  2729. begin
  2730. tmpreg:=getintregister(list,cgsize);
  2731. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2732. inc(usedtmpref.offset,1);
  2733. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2734. inc(usedtmpref2.offset,1);
  2735. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2736. if len>2 then
  2737. begin
  2738. inc(usedtmpref.offset,1);
  2739. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2740. inc(usedtmpref2.offset,1);
  2741. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2742. if len>3 then
  2743. begin
  2744. inc(usedtmpref.offset,1);
  2745. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2746. inc(usedtmpref2.offset,1);
  2747. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2748. end;
  2749. end;
  2750. end;
  2751. end{end of if len<=4}
  2752. else
  2753. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2754. destreg:=getintregister(list,OS_ADDR);
  2755. a_loadaddr_ref_reg(list,dest,destreg);
  2756. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2757. srcreg:=getintregister(list,OS_ADDR);
  2758. a_loadaddr_ref_reg(list,source,srcreg);
  2759. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2760. countreg:=getintregister(list,OS_32);
  2761. // if cs_opt_size in current_settings.optimizerswitches then
  2762. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2763. {if aligned then
  2764. genloop(len,4)
  2765. else}
  2766. if GenerateThumbCode then
  2767. genloop_thumb(len,1)
  2768. else
  2769. genloop(len,1);
  2770. end;
  2771. end;
  2772. end;
  2773. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2774. begin
  2775. g_concatcopy_internal(list,source,dest,len,false);
  2776. end;
  2777. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2778. begin
  2779. if (source.alignment in [1,3]) or
  2780. (dest.alignment in [1,3]) then
  2781. g_concatcopy_internal(list,source,dest,len,false)
  2782. else
  2783. g_concatcopy_internal(list,source,dest,len,true);
  2784. end;
  2785. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2786. var
  2787. ovloc : tlocation;
  2788. begin
  2789. ovloc.loc:=LOC_VOID;
  2790. g_overflowCheck_loc(list,l,def,ovloc);
  2791. end;
  2792. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2793. var
  2794. hl : tasmlabel;
  2795. ai:TAiCpu;
  2796. hflags : tresflags;
  2797. begin
  2798. if not(cs_check_overflow in current_settings.localswitches) then
  2799. exit;
  2800. current_asmdata.getjumplabel(hl);
  2801. case ovloc.loc of
  2802. LOC_VOID:
  2803. begin
  2804. ai:=taicpu.op_sym(A_B,hl);
  2805. ai.is_jmp:=true;
  2806. if not((def.typ=pointerdef) or
  2807. ((def.typ=orddef) and
  2808. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2809. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2810. ai.SetCondition(C_VC)
  2811. else
  2812. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2813. ai.SetCondition(C_CS)
  2814. else
  2815. ai.SetCondition(C_CC);
  2816. list.concat(ai);
  2817. end;
  2818. LOC_FLAGS:
  2819. begin
  2820. hflags:=ovloc.resflags;
  2821. inverse_flags(hflags);
  2822. cg.a_jmp_flags(list,hflags,hl);
  2823. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2824. end;
  2825. else
  2826. internalerror(200409281);
  2827. end;
  2828. a_call_name(list,'FPC_OVERFLOW',false);
  2829. a_label(list,hl);
  2830. end;
  2831. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2832. begin
  2833. { this work is done in g_proc_entry }
  2834. end;
  2835. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2836. begin
  2837. { this work is done in g_proc_exit }
  2838. end;
  2839. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2840. var
  2841. ai : taicpu;
  2842. hlabel : TAsmLabel;
  2843. begin
  2844. if GenerateThumbCode then
  2845. begin
  2846. { the optimizer has to fix this if jump range is sufficient short }
  2847. current_asmdata.getjumplabel(hlabel);
  2848. ai:=Taicpu.Op_sym(A_B,hlabel);
  2849. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2850. ai.is_jmp:=true;
  2851. list.concat(ai);
  2852. a_jmp_always(list,l);
  2853. a_label(list,hlabel);
  2854. end
  2855. else
  2856. begin
  2857. ai:=Taicpu.Op_sym(A_B,l);
  2858. ai.SetCondition(OpCmp2AsmCond[cond]);
  2859. ai.is_jmp:=true;
  2860. list.concat(ai);
  2861. end;
  2862. end;
  2863. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2864. const
  2865. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2866. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2867. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2868. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2869. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2870. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2871. begin
  2872. result:=convertop[fromsize,tosize];
  2873. if result=A_NONE then
  2874. internalerror(200312205);
  2875. end;
  2876. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2877. const
  2878. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2879. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2880. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2881. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2882. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2883. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2884. begin
  2885. result:=convertop[fromsize,tosize];
  2886. end;
  2887. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2888. var
  2889. instr: taicpu;
  2890. begin
  2891. if (shuffle=nil) or shufflescalar(shuffle) then
  2892. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2893. else
  2894. internalerror(2009112407);
  2895. list.concat(instr);
  2896. case instr.opcode of
  2897. A_VMOV:
  2898. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2899. add_move_instruction(instr);
  2900. else
  2901. { VCVT can generate an exception }
  2902. maybe_check_for_fpu_exception(list);
  2903. end;
  2904. end;
  2905. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2906. var
  2907. intreg,
  2908. tmpmmreg : tregister;
  2909. reg64 : tregister64;
  2910. begin
  2911. if assigned(shuffle) and
  2912. not(shufflescalar(shuffle)) then
  2913. internalerror(2009112413);
  2914. case fromsize of
  2915. OS_32,OS_S32:
  2916. begin
  2917. fromsize:=OS_F32;
  2918. { since we are loading an integer, no conversion may be required }
  2919. if (fromsize<>tosize) then
  2920. internalerror(2009112801);
  2921. end;
  2922. OS_64,OS_S64:
  2923. begin
  2924. fromsize:=OS_F64;
  2925. { since we are loading an integer, no conversion may be required }
  2926. if (fromsize<>tosize) then
  2927. internalerror(2009112901);
  2928. end;
  2929. OS_F32,OS_F64:
  2930. ;
  2931. else
  2932. internalerror(2019050920);
  2933. end;
  2934. if (fromsize<>tosize) then
  2935. tmpmmreg:=getmmregister(list,fromsize)
  2936. else
  2937. tmpmmreg:=reg;
  2938. if (ref.alignment in [1,2]) then
  2939. begin
  2940. case fromsize of
  2941. OS_F32:
  2942. begin
  2943. intreg:=getintregister(list,OS_32);
  2944. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2945. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2946. end;
  2947. OS_F64:
  2948. begin
  2949. reg64.reglo:=getintregister(list,OS_32);
  2950. reg64.reghi:=getintregister(list,OS_32);
  2951. cg64.a_load64_ref_reg(list,ref,reg64);
  2952. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2953. end;
  2954. else
  2955. internalerror(2009112412);
  2956. end;
  2957. end
  2958. else
  2959. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2960. if (tmpmmreg<>reg) then
  2961. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2962. end;
  2963. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2964. var
  2965. intreg,
  2966. tmpmmreg : tregister;
  2967. reg64 : tregister64;
  2968. begin
  2969. if assigned(shuffle) and
  2970. not(shufflescalar(shuffle)) then
  2971. internalerror(2009112416);
  2972. case tosize of
  2973. OS_32,OS_S32:
  2974. begin
  2975. tosize:=OS_F32;
  2976. { since we are loading an integer, no conversion may be required }
  2977. if (fromsize<>tosize) then
  2978. internalerror(2009112802);
  2979. end;
  2980. OS_64,OS_S64:
  2981. begin
  2982. tosize:=OS_F64;
  2983. { since we are loading an integer, no conversion may be required }
  2984. if (fromsize<>tosize) then
  2985. internalerror(2009112902);
  2986. end;
  2987. OS_F32,OS_F64:
  2988. ;
  2989. else
  2990. internalerror(2019050919);
  2991. end;
  2992. if (fromsize<>tosize) then
  2993. begin
  2994. tmpmmreg:=getmmregister(list,tosize);
  2995. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2996. end
  2997. else
  2998. tmpmmreg:=reg;
  2999. if (ref.alignment in [1,2]) then
  3000. begin
  3001. case tosize of
  3002. OS_F32:
  3003. begin
  3004. intreg:=getintregister(list,OS_32);
  3005. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  3006. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  3007. end;
  3008. OS_F64:
  3009. begin
  3010. reg64.reglo:=getintregister(list,OS_32);
  3011. reg64.reghi:=getintregister(list,OS_32);
  3012. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  3013. cg64.a_load64_reg_ref(list,reg64,ref);
  3014. end;
  3015. else
  3016. internalerror(2009112417);
  3017. end;
  3018. end
  3019. else
  3020. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  3021. { VSTR cannot generate an FPU exception, VCVT is handled separately, so we do not need a check here }
  3022. end;
  3023. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  3024. begin
  3025. { this code can only be used to transfer raw data, not to perform
  3026. conversions }
  3027. if (tosize<>OS_F32) then
  3028. internalerror(2009112419);
  3029. if not(fromsize in [OS_32,OS_S32]) then
  3030. internalerror(2009112420);
  3031. if assigned(shuffle) and
  3032. not shufflescalar(shuffle) then
  3033. internalerror(2009112516);
  3034. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  3035. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3036. end;
  3037. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  3038. begin
  3039. { this code can only be used to transfer raw data, not to perform
  3040. conversions }
  3041. if (fromsize<>OS_F32) then
  3042. internalerror(2009112430);
  3043. if not(tosize in [OS_32,OS_S32]) then
  3044. internalerror(2009112409);
  3045. if assigned(shuffle) and
  3046. not shufflescalar(shuffle) then
  3047. internalerror(2009112514);
  3048. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  3049. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3050. end;
  3051. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  3052. var
  3053. tmpreg: tregister;
  3054. begin
  3055. { the vfp doesn't support xor nor any other logical operation, but
  3056. this routine is used to initialise global mm regvars. We can
  3057. easily initialise an mm reg with 0 though. }
  3058. case op of
  3059. OP_XOR:
  3060. begin
  3061. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3062. begin
  3063. if (reg_cgsize(src)<>size) or
  3064. assigned(shuffle) then
  3065. internalerror(2019081301);
  3066. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3067. end
  3068. else
  3069. begin
  3070. if (src<>dst) or
  3071. (reg_cgsize(src)<>size) or
  3072. assigned(shuffle) then
  3073. internalerror(2009112907);
  3074. tmpreg:=getintregister(list,OS_32);
  3075. a_load_const_reg(list,OS_32,0,tmpreg);
  3076. case size of
  3077. OS_F32:
  3078. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3079. OS_F64:
  3080. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3081. else
  3082. internalerror(2009112908);
  3083. end;
  3084. end;
  3085. end
  3086. else
  3087. internalerror(2009112906);
  3088. end;
  3089. end;
  3090. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3091. const
  3092. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3093. begin
  3094. if (op in overflowops) and
  3095. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3096. a_load_reg_reg(list,OS_32,size,dst,dst);
  3097. end;
  3098. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3099. procedure checkreg(var reg : TRegister);
  3100. var
  3101. tmpreg : TRegister;
  3102. begin
  3103. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3104. (getsupreg(reg)=RS_R15) then
  3105. begin
  3106. tmpreg:=getintregister(list,OS_INT);
  3107. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3108. reg:=tmpreg;
  3109. end;
  3110. end;
  3111. begin
  3112. checkreg(op1);
  3113. checkreg(op2);
  3114. checkreg(op3);
  3115. checkreg(op4);
  3116. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3117. end;
  3118. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3119. begin
  3120. if pi_needs_tls in current_procinfo.flags then
  3121. begin
  3122. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3123. a_call_name(list,'fpc_read_tp',false);
  3124. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3125. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3126. end;
  3127. end;
  3128. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3129. begin
  3130. case op of
  3131. OP_NEG:
  3132. begin
  3133. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3134. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3135. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3136. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3137. end;
  3138. OP_NOT:
  3139. begin
  3140. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3141. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3142. end;
  3143. else
  3144. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3145. end;
  3146. end;
  3147. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3148. begin
  3149. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3150. end;
  3151. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3152. var
  3153. ovloc : tlocation;
  3154. begin
  3155. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3156. end;
  3157. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3158. var
  3159. ovloc : tlocation;
  3160. begin
  3161. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3162. end;
  3163. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3164. begin
  3165. { this code can only be used to transfer raw data, not to perform
  3166. conversions }
  3167. if (mmsize<>OS_F64) then
  3168. internalerror(2009112405);
  3169. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3170. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3171. end;
  3172. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3173. begin
  3174. { this code can only be used to transfer raw data, not to perform
  3175. conversions }
  3176. if (mmsize<>OS_F64) then
  3177. internalerror(2009112406);
  3178. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3179. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3180. end;
  3181. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3182. var
  3183. tmpreg : tregister;
  3184. b : byte;
  3185. begin
  3186. ovloc.loc:=LOC_VOID;
  3187. case op of
  3188. OP_NEG,
  3189. OP_NOT :
  3190. internalerror(2012022501);
  3191. else
  3192. ;
  3193. end;
  3194. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3195. begin
  3196. case op of
  3197. OP_ADD:
  3198. begin
  3199. if is_shifter_const(lo(value),b) then
  3200. begin
  3201. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3202. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3203. end
  3204. else
  3205. begin
  3206. tmpreg:=cg.getintregister(list,OS_32);
  3207. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3208. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3209. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3210. end;
  3211. if is_shifter_const(hi(value),b) then
  3212. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3213. else
  3214. begin
  3215. tmpreg:=cg.getintregister(list,OS_32);
  3216. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3217. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3218. end;
  3219. end;
  3220. OP_SUB:
  3221. begin
  3222. if is_shifter_const(lo(value),b) then
  3223. begin
  3224. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3225. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3226. end
  3227. else
  3228. begin
  3229. tmpreg:=cg.getintregister(list,OS_32);
  3230. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3231. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3232. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3233. end;
  3234. if is_shifter_const(hi(value),b) then
  3235. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3236. else
  3237. begin
  3238. tmpreg:=cg.getintregister(list,OS_32);
  3239. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3240. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3241. end;
  3242. end;
  3243. else
  3244. internalerror(200502131);
  3245. end;
  3246. if size=OS_64 then
  3247. begin
  3248. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3249. ovloc.loc:=LOC_FLAGS;
  3250. case op of
  3251. OP_ADD:
  3252. ovloc.resflags:=F_CS;
  3253. OP_SUB:
  3254. ovloc.resflags:=F_CC;
  3255. else
  3256. internalerror(2019050918);
  3257. end;
  3258. end;
  3259. end
  3260. else
  3261. begin
  3262. case op of
  3263. OP_AND,OP_OR,OP_XOR:
  3264. begin
  3265. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3266. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3267. end;
  3268. OP_ADD:
  3269. begin
  3270. if is_shifter_const(aint(lo(value)),b) then
  3271. begin
  3272. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3273. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3274. end
  3275. else
  3276. begin
  3277. tmpreg:=cg.getintregister(list,OS_32);
  3278. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3279. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3280. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3281. end;
  3282. if is_shifter_const(aint(hi(value)),b) then
  3283. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3284. else
  3285. begin
  3286. tmpreg:=cg.getintregister(list,OS_32);
  3287. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3288. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3289. end;
  3290. end;
  3291. OP_SUB:
  3292. begin
  3293. if is_shifter_const(aint(lo(value)),b) then
  3294. begin
  3295. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3296. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3297. end
  3298. else
  3299. begin
  3300. tmpreg:=cg.getintregister(list,OS_32);
  3301. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3302. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3303. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3304. end;
  3305. if is_shifter_const(aint(hi(value)),b) then
  3306. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3307. else
  3308. begin
  3309. tmpreg:=cg.getintregister(list,OS_32);
  3310. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3311. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3312. end;
  3313. end;
  3314. else
  3315. internalerror(2003083101);
  3316. end;
  3317. end;
  3318. end;
  3319. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3320. begin
  3321. ovloc.loc:=LOC_VOID;
  3322. case op of
  3323. OP_NEG,
  3324. OP_NOT :
  3325. internalerror(2012022502);
  3326. else
  3327. ;
  3328. end;
  3329. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3330. begin
  3331. case op of
  3332. OP_ADD:
  3333. begin
  3334. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3335. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3336. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3337. end;
  3338. OP_SUB:
  3339. begin
  3340. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3341. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3342. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3343. end;
  3344. else
  3345. internalerror(2003083102);
  3346. end;
  3347. if size=OS_64 then
  3348. begin
  3349. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3350. ovloc.loc:=LOC_FLAGS;
  3351. case op of
  3352. OP_ADD:
  3353. ovloc.resflags:=F_CS;
  3354. OP_SUB:
  3355. ovloc.resflags:=F_CC;
  3356. else
  3357. internalerror(2019050917);
  3358. end;
  3359. end;
  3360. end
  3361. else
  3362. begin
  3363. case op of
  3364. OP_AND,OP_OR,OP_XOR:
  3365. begin
  3366. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3367. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3368. end;
  3369. OP_ADD:
  3370. begin
  3371. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3372. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3373. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3374. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3375. end;
  3376. OP_SUB:
  3377. begin
  3378. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3379. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3380. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3381. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3382. end;
  3383. else
  3384. internalerror(2003083104);
  3385. end;
  3386. end;
  3387. end;
  3388. procedure tthumbcgarm.init_register_allocators;
  3389. begin
  3390. inherited init_register_allocators;
  3391. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3392. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3393. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3394. else
  3395. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3396. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3397. end;
  3398. procedure tthumbcgarm.done_register_allocators;
  3399. begin
  3400. rg[R_INTREGISTER].free;
  3401. rg[R_FPUREGISTER].free;
  3402. rg[R_MMREGISTER].free;
  3403. inherited done_register_allocators;
  3404. end;
  3405. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3406. var
  3407. ref : treference;
  3408. r : byte;
  3409. regs : tcpuregisterset;
  3410. stackmisalignment : pint;
  3411. registerarea: DWord;
  3412. stack_parameters: Boolean;
  3413. begin
  3414. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3415. LocalSize:=align(LocalSize,4);
  3416. { call instruction does not put anything on the stack }
  3417. stackmisalignment:=0;
  3418. if not(nostackframe) then
  3419. begin
  3420. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3421. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3422. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3423. { save int registers }
  3424. reference_reset(ref,4,[]);
  3425. ref.index:=NR_STACK_POINTER_REG;
  3426. ref.addressmode:=AM_PREINDEXED;
  3427. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3428. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3429. begin
  3430. //!!!! a_reg_alloc(list,NR_R12);
  3431. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3432. end;
  3433. { the (old) ARM APCS requires saving both the stack pointer (to
  3434. crawl the stack) and the PC (to identify the function this
  3435. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3436. and R15 -- still needs updating for EABI and Darwin, they don't
  3437. need that }
  3438. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3439. regs:=regs+[RS_R7,RS_R14]
  3440. else
  3441. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3442. include(regs,RS_R14);
  3443. { safely estimate stack size }
  3444. if localsize+current_settings.alignment.localalignmax+4>508 then
  3445. begin
  3446. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3447. include(regs,RS_R4);
  3448. end;
  3449. registerarea:=0;
  3450. { do not save integer registers if the procedure does not return }
  3451. if po_noreturn in current_procinfo.procdef.procoptions then
  3452. regs:=[];
  3453. if regs<>[] then
  3454. begin
  3455. for r:=RS_R0 to RS_R15 do
  3456. if r in regs then
  3457. inc(registerarea,4);
  3458. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3459. { we need to run the loop twice to get cfi right }
  3460. registerarea:=0;
  3461. for r:=RS_R0 to RS_R15 do
  3462. if r in regs then
  3463. begin
  3464. inc(registerarea,4);
  3465. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),-registerarea);
  3466. end;
  3467. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  3468. end;
  3469. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3470. if stack_parameters or (LocalSize<>0) or
  3471. ((stackmisalignment<>0) and
  3472. ((pi_do_call in current_procinfo.flags) or
  3473. (po_assembler in current_procinfo.procdef.procoptions))) then
  3474. begin
  3475. { do we access stack parameters?
  3476. if yes, the previously estimated stacksize must be used }
  3477. if stack_parameters then
  3478. begin
  3479. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3480. begin
  3481. writeln(localsize);
  3482. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3483. internalerror(2013040601);
  3484. end
  3485. else
  3486. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3487. end
  3488. else
  3489. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3490. if localsize<508 then
  3491. begin
  3492. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3493. end
  3494. else if localsize<=1016 then
  3495. begin
  3496. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3497. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3498. end
  3499. else
  3500. begin
  3501. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3502. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3503. include(regs,RS_R4);
  3504. end;
  3505. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  3506. end;
  3507. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3508. begin
  3509. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3510. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  3511. end;
  3512. end;
  3513. end;
  3514. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3515. var
  3516. LocalSize : longint;
  3517. r: byte;
  3518. regs : tcpuregisterset;
  3519. registerarea : DWord;
  3520. stackmisalignment: pint;
  3521. stack_parameters : Boolean;
  3522. begin
  3523. { a routine not returning needs no exit code,
  3524. we trust this directive as arm thumb is normally used if small code shall be generated }
  3525. if po_noreturn in current_procinfo.procdef.procoptions then
  3526. exit;
  3527. if not(nostackframe) then
  3528. begin
  3529. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3530. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3531. include(regs,RS_R15);
  3532. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3533. include(regs,getsupreg(current_procinfo.framepointer));
  3534. registerarea:=0;
  3535. for r:=RS_R0 to RS_R15 do
  3536. if r in regs then
  3537. inc(registerarea,4);
  3538. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3539. LocalSize:=current_procinfo.calc_stackframe_size;
  3540. if stack_parameters then
  3541. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3542. else
  3543. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3544. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3545. (target_info.system in systems_darwin) then
  3546. begin
  3547. if (LocalSize<>0) or
  3548. ((stackmisalignment<>0) and
  3549. ((pi_do_call in current_procinfo.flags) or
  3550. (po_assembler in current_procinfo.procdef.procoptions))) then
  3551. begin
  3552. if LocalSize=0 then
  3553. else if LocalSize<=508 then
  3554. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3555. else if LocalSize<=1016 then
  3556. begin
  3557. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3558. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3559. end
  3560. else
  3561. begin
  3562. a_reg_alloc(list,NR_R3);
  3563. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3564. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3565. a_reg_dealloc(list,NR_R3);
  3566. end;
  3567. end;
  3568. if regs=[] then
  3569. begin
  3570. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3571. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3572. else
  3573. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3574. end
  3575. else
  3576. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3577. end;
  3578. end
  3579. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3580. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3581. else
  3582. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3583. end;
  3584. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3585. var
  3586. oppostfix:toppostfix;
  3587. usedtmpref: treference;
  3588. tmpreg,tmpreg2 : tregister;
  3589. dir : integer;
  3590. begin
  3591. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3592. FromSize := ToSize;
  3593. case FromSize of
  3594. { signed integer registers }
  3595. OS_8:
  3596. oppostfix:=PF_B;
  3597. OS_S8:
  3598. oppostfix:=PF_SB;
  3599. OS_16:
  3600. oppostfix:=PF_H;
  3601. OS_S16:
  3602. oppostfix:=PF_SH;
  3603. OS_32,
  3604. OS_S32:
  3605. oppostfix:=PF_None;
  3606. else
  3607. InternalError(200308298);
  3608. end;
  3609. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3610. begin
  3611. if target_info.endian=endian_big then
  3612. dir:=-1
  3613. else
  3614. dir:=1;
  3615. case FromSize of
  3616. OS_16,OS_S16:
  3617. begin
  3618. { only complicated references need an extra loadaddr }
  3619. if assigned(ref.symbol) or
  3620. (ref.index<>NR_NO) or
  3621. (ref.offset<-124) or
  3622. (ref.offset>124) or
  3623. { sometimes the compiler reused registers }
  3624. (reg=ref.index) or
  3625. (reg=ref.base) then
  3626. begin
  3627. tmpreg2:=getintregister(list,OS_INT);
  3628. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3629. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3630. end
  3631. else
  3632. usedtmpref:=ref;
  3633. if target_info.endian=endian_big then
  3634. inc(usedtmpref.offset,1);
  3635. tmpreg:=getintregister(list,OS_INT);
  3636. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3637. inc(usedtmpref.offset,dir);
  3638. if FromSize=OS_16 then
  3639. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3640. else
  3641. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3642. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3643. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3644. end;
  3645. OS_32,OS_S32:
  3646. begin
  3647. tmpreg:=getintregister(list,OS_INT);
  3648. { only complicated references need an extra loadaddr }
  3649. if assigned(ref.symbol) or
  3650. (ref.index<>NR_NO) or
  3651. (ref.offset<-124) or
  3652. (ref.offset>124) or
  3653. { sometimes the compiler reused registers }
  3654. (reg=ref.index) or
  3655. (reg=ref.base) then
  3656. begin
  3657. tmpreg2:=getintregister(list,OS_INT);
  3658. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3659. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3660. end
  3661. else
  3662. usedtmpref:=ref;
  3663. if ref.alignment=2 then
  3664. begin
  3665. if target_info.endian=endian_big then
  3666. inc(usedtmpref.offset,2);
  3667. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3668. inc(usedtmpref.offset,dir*2);
  3669. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3670. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3671. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3672. end
  3673. else
  3674. begin
  3675. if target_info.endian=endian_big then
  3676. inc(usedtmpref.offset,3);
  3677. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3678. inc(usedtmpref.offset,dir);
  3679. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3680. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3681. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3682. inc(usedtmpref.offset,dir);
  3683. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3684. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3685. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3686. inc(usedtmpref.offset,dir);
  3687. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3688. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3689. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3690. end;
  3691. end
  3692. else
  3693. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3694. end;
  3695. end
  3696. else
  3697. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3698. if (fromsize=OS_S8) and (tosize = OS_16) then
  3699. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3700. end;
  3701. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3702. var
  3703. l : tasmlabel;
  3704. hr : treference;
  3705. begin
  3706. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3707. internalerror(2002090908);
  3708. if is_thumb_imm(a) then
  3709. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,a),PF_S))
  3710. else
  3711. begin
  3712. reference_reset(hr,4,[]);
  3713. current_asmdata.getjumplabel(l);
  3714. cg.a_label(current_procinfo.aktlocaldata,l);
  3715. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3716. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3717. hr.symbol:=l;
  3718. hr.base:=NR_PC;
  3719. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3720. end;
  3721. end;
  3722. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3723. var
  3724. hsym : tsym;
  3725. href,
  3726. tmpref : treference;
  3727. paraloc : Pcgparalocation;
  3728. l : TAsmLabel;
  3729. begin
  3730. { calculate the parameter info for the procdef }
  3731. procdef.init_paraloc_info(callerside);
  3732. hsym:=tsym(procdef.parast.Find('self'));
  3733. if not(assigned(hsym) and
  3734. (hsym.typ=paravarsym)) then
  3735. internalerror(2003052504);
  3736. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3737. while paraloc<>nil do
  3738. with paraloc^ do
  3739. begin
  3740. case loc of
  3741. LOC_REGISTER:
  3742. begin
  3743. if is_thumb_imm(ioffset) then
  3744. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3745. else
  3746. begin
  3747. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3748. reference_reset(tmpref,4,[]);
  3749. current_asmdata.getjumplabel(l);
  3750. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3751. cg.a_label(current_procinfo.aktlocaldata,l);
  3752. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3753. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3754. tmpref.symbol:=l;
  3755. tmpref.base:=NR_PC;
  3756. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3757. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3758. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3759. end;
  3760. end;
  3761. LOC_REFERENCE:
  3762. begin
  3763. { offset in the wrapper needs to be adjusted for the stored
  3764. return address }
  3765. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3766. if is_thumb_imm(ioffset) then
  3767. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3768. else
  3769. begin
  3770. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3771. reference_reset(tmpref,4,[]);
  3772. current_asmdata.getjumplabel(l);
  3773. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3774. cg.a_label(current_procinfo.aktlocaldata,l);
  3775. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3776. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3777. tmpref.symbol:=l;
  3778. tmpref.base:=NR_PC;
  3779. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3780. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3781. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3782. end;
  3783. end
  3784. else
  3785. internalerror(2003091804);
  3786. end;
  3787. paraloc:=next;
  3788. end;
  3789. end;
  3790. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3791. var
  3792. href : treference;
  3793. tmpreg : TRegister;
  3794. begin
  3795. href:=ref;
  3796. if { LDR/STR limitations }
  3797. (
  3798. (((op=A_LDR) and (oppostfix=PF_None)) or
  3799. ((op=A_STR) and (oppostfix=PF_None))) and
  3800. (ref.base<>NR_STACK_POINTER_REG) and
  3801. (abs(ref.offset)>124)
  3802. ) or
  3803. { LDRB/STRB limitations }
  3804. (
  3805. (((op=A_LDR) and (oppostfix=PF_B)) or
  3806. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3807. ((op=A_STR) and (oppostfix=PF_B)) or
  3808. ((op=A_STRB) and (oppostfix=PF_None))) and
  3809. ((ref.base=NR_STACK_POINTER_REG) or
  3810. (ref.index=NR_STACK_POINTER_REG) or
  3811. (abs(ref.offset)>31)
  3812. )
  3813. ) or
  3814. { LDRH/STRH limitations }
  3815. (
  3816. (((op=A_LDR) and (oppostfix=PF_H)) or
  3817. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3818. ((op=A_STR) and (oppostfix=PF_H)) or
  3819. ((op=A_STRH) and (oppostfix=PF_None))) and
  3820. ((ref.base=NR_STACK_POINTER_REG) or
  3821. (ref.index=NR_STACK_POINTER_REG) or
  3822. (abs(ref.offset)>62) or
  3823. ((abs(ref.offset) mod 2)<>0)
  3824. )
  3825. ) then
  3826. begin
  3827. tmpreg:=getintregister(list,OS_ADDR);
  3828. a_loadaddr_ref_reg(list,ref,tmpreg);
  3829. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3830. end
  3831. else if (op=A_LDR) and
  3832. (oppostfix in [PF_None]) and
  3833. (ref.base=NR_STACK_POINTER_REG) and
  3834. (abs(ref.offset)>1020) then
  3835. begin
  3836. tmpreg:=getintregister(list,OS_ADDR);
  3837. a_loadaddr_ref_reg(list,ref,tmpreg);
  3838. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3839. end
  3840. else if (op=A_LDR) and
  3841. ((oppostfix in [PF_SH,PF_SB]) or
  3842. (abs(ref.offset)>124)) then
  3843. begin
  3844. tmpreg:=getintregister(list,OS_ADDR);
  3845. a_loadaddr_ref_reg(list,ref,tmpreg);
  3846. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3847. end;
  3848. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3849. end;
  3850. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3851. var
  3852. tmpreg : tregister;
  3853. begin
  3854. case op of
  3855. OP_NEG:
  3856. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3857. OP_NOT:
  3858. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,dst,src),PF_S));
  3859. OP_DIV,OP_IDIV:
  3860. internalerror(200308284);
  3861. OP_ROL:
  3862. begin
  3863. if not(size in [OS_32,OS_S32]) then
  3864. internalerror(2008072805);
  3865. { simulate ROL by ror'ing 32-value }
  3866. tmpreg:=getintregister(list,OS_32);
  3867. a_load_const_reg(list,OS_32,32,tmpreg);
  3868. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3869. list.concat(setoppostfix(taicpu.op_reg_reg(A_ROR,dst,src),PF_S));
  3870. end;
  3871. else
  3872. begin
  3873. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3874. list.concat(setoppostfix(
  3875. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix_thumb[op]));
  3876. end;
  3877. end;
  3878. maybeadjustresult(list,op,size,dst);
  3879. end;
  3880. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3881. var
  3882. tmpreg : tregister;
  3883. {$ifdef DUMMY}
  3884. l1 : longint;
  3885. {$endif DUMMY}
  3886. begin
  3887. //!!! ovloc.loc:=LOC_VOID;
  3888. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3889. case op of
  3890. OP_ADD:
  3891. begin
  3892. op:=OP_SUB;
  3893. a:=aint(dword(-a));
  3894. end;
  3895. OP_SUB:
  3896. begin
  3897. op:=OP_ADD;
  3898. a:=aint(dword(-a));
  3899. end
  3900. else
  3901. ;
  3902. end;
  3903. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3904. begin
  3905. // if cgsetflags or setflags then
  3906. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3907. list.concat(setoppostfix(
  3908. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix_thumb[op]));
  3909. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3910. begin
  3911. //!!! ovloc.loc:=LOC_FLAGS;
  3912. case op of
  3913. OP_ADD:
  3914. //!!! ovloc.resflags:=F_CS;
  3915. ;
  3916. OP_SUB:
  3917. //!!! ovloc.resflags:=F_CC;
  3918. ;
  3919. else
  3920. ;
  3921. end;
  3922. end;
  3923. end
  3924. else
  3925. begin
  3926. { there could be added some more sophisticated optimizations }
  3927. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3928. a_load_reg_reg(list,size,size,dst,dst)
  3929. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3930. a_load_const_reg(list,size,0,dst)
  3931. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3932. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3933. { we do this here instead in the peephole optimizer because
  3934. it saves us a register }
  3935. {$ifdef DUMMY}
  3936. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3937. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3938. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3939. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3940. begin
  3941. if l1>32 then{roozbeh does this ever happen?}
  3942. internalerror(2003082903);
  3943. shifterop_reset(so);
  3944. so.shiftmode:=SM_LSL;
  3945. so.shiftimm:=l1;
  3946. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3947. end
  3948. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3949. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3950. begin
  3951. if l1>32 then{does this ever happen?}
  3952. internalerror(2012051802);
  3953. shifterop_reset(so);
  3954. so.shiftmode:=SM_LSL;
  3955. so.shiftimm:=l1;
  3956. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3957. end
  3958. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3959. begin
  3960. { nothing to do on success }
  3961. end
  3962. {$endif DUMMY}
  3963. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3964. Just using mov x, #0 might allow some easier optimizations down the line. }
  3965. else if (op = OP_AND) and (dword(a)=0) then
  3966. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,dst,0),PF_S))
  3967. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3968. else if (op = OP_AND) and (not(dword(a))=0) then
  3969. // do nothing
  3970. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3971. broader range of shifterconstants.}
  3972. {$ifdef DUMMY}
  3973. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3974. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3975. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3976. begin
  3977. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3978. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3979. end
  3980. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3981. not(cgsetflags or setflags) and
  3982. split_into_shifter_const(a, imm1, imm2) then
  3983. begin
  3984. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3985. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3986. end
  3987. {$endif DUMMY}
  3988. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3989. begin
  3990. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3991. end
  3992. else
  3993. begin
  3994. tmpreg:=getintregister(list,size);
  3995. a_load_const_reg(list,size,a,tmpreg);
  3996. a_op_reg_reg(list,op,size,tmpreg,dst);
  3997. end;
  3998. end;
  3999. maybeadjustresult(list,op,size,dst);
  4000. end;
  4001. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  4002. begin
  4003. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  4004. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  4005. else
  4006. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  4007. end;
  4008. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4009. var
  4010. l1,l2 : tasmlabel;
  4011. ai : taicpu;
  4012. begin
  4013. current_asmdata.getjumplabel(l1);
  4014. current_asmdata.getjumplabel(l2);
  4015. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  4016. ai.is_jmp:=true;
  4017. list.concat(ai);
  4018. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,0),PF_S));
  4019. list.concat(taicpu.op_sym(A_B,l2));
  4020. cg.a_label(list,l1);
  4021. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,1),PF_S));
  4022. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4023. cg.a_label(list,l2);
  4024. end;
  4025. procedure tthumb2cgarm.init_register_allocators;
  4026. begin
  4027. inherited init_register_allocators;
  4028. { currently, we save R14 always, so we can use it }
  4029. if (target_info.system<>system_arm_ios) then
  4030. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4031. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4032. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  4033. else
  4034. { r9 is not available on Darwin according to the llvm code generator }
  4035. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4036. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4037. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  4038. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4039. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  4040. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  4041. init_mmregister_allocator;
  4042. end;
  4043. procedure tthumb2cgarm.done_register_allocators;
  4044. begin
  4045. rg[R_INTREGISTER].free;
  4046. rg[R_FPUREGISTER].free;
  4047. rg[R_MMREGISTER].free;
  4048. inherited done_register_allocators;
  4049. end;
  4050. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  4051. begin
  4052. list.concat(taicpu.op_reg(A_BLX, reg));
  4053. {
  4054. the compiler does not properly set this flag anymore in pass 1, and
  4055. for now we only need it after pass 2 (I hope) (JM)
  4056. if not(pi_do_call in current_procinfo.flags) then
  4057. internalerror(2003060703);
  4058. }
  4059. include(current_procinfo.flags,pi_do_call);
  4060. end;
  4061. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4062. var
  4063. l : tasmlabel;
  4064. hr : treference;
  4065. begin
  4066. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4067. internalerror(2002090909);
  4068. if is_thumb32_imm(a) then
  4069. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4070. else if is_thumb32_imm(not(a)) then
  4071. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4072. else if (a and $FFFF)=a then
  4073. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4074. else
  4075. begin
  4076. reference_reset(hr,4,[]);
  4077. current_asmdata.getjumplabel(l);
  4078. cg.a_label(current_procinfo.aktlocaldata,l);
  4079. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4080. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4081. hr.symbol:=l;
  4082. hr.base:=NR_PC;
  4083. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4084. end;
  4085. end;
  4086. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4087. var
  4088. oppostfix:toppostfix;
  4089. usedtmpref: treference;
  4090. tmpreg,tmpreg2 : tregister;
  4091. so : tshifterop;
  4092. dir : integer;
  4093. begin
  4094. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4095. FromSize := ToSize;
  4096. case FromSize of
  4097. { signed integer registers }
  4098. OS_8:
  4099. oppostfix:=PF_B;
  4100. OS_S8:
  4101. oppostfix:=PF_SB;
  4102. OS_16:
  4103. oppostfix:=PF_H;
  4104. OS_S16:
  4105. oppostfix:=PF_SH;
  4106. OS_32,
  4107. OS_S32:
  4108. oppostfix:=PF_None;
  4109. else
  4110. InternalError(2003082913);
  4111. end;
  4112. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4113. begin
  4114. if target_info.endian=endian_big then
  4115. dir:=-1
  4116. else
  4117. dir:=1;
  4118. case FromSize of
  4119. OS_16,OS_S16:
  4120. begin
  4121. { only complicated references need an extra loadaddr }
  4122. if assigned(ref.symbol) or
  4123. (ref.index<>NR_NO) or
  4124. (ref.offset<-255) or
  4125. (ref.offset>4094) or
  4126. { sometimes the compiler reused registers }
  4127. (reg=ref.index) or
  4128. (reg=ref.base) then
  4129. begin
  4130. tmpreg2:=getintregister(list,OS_INT);
  4131. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4132. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4133. end
  4134. else
  4135. usedtmpref:=ref;
  4136. if target_info.endian=endian_big then
  4137. inc(usedtmpref.offset,1);
  4138. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4139. tmpreg:=getintregister(list,OS_INT);
  4140. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4141. inc(usedtmpref.offset,dir);
  4142. if FromSize=OS_16 then
  4143. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4144. else
  4145. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4146. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4147. end;
  4148. OS_32,OS_S32:
  4149. begin
  4150. tmpreg:=getintregister(list,OS_INT);
  4151. { only complicated references need an extra loadaddr }
  4152. if assigned(ref.symbol) or
  4153. (ref.index<>NR_NO) or
  4154. (ref.offset<-255) or
  4155. (ref.offset>4092) or
  4156. { sometimes the compiler reused registers }
  4157. (reg=ref.index) or
  4158. (reg=ref.base) then
  4159. begin
  4160. tmpreg2:=getintregister(list,OS_INT);
  4161. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4162. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4163. end
  4164. else
  4165. usedtmpref:=ref;
  4166. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4167. if ref.alignment=2 then
  4168. begin
  4169. if target_info.endian=endian_big then
  4170. inc(usedtmpref.offset,2);
  4171. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4172. inc(usedtmpref.offset,dir*2);
  4173. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4174. so.shiftimm:=16;
  4175. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4176. end
  4177. else
  4178. begin
  4179. if target_info.endian=endian_big then
  4180. inc(usedtmpref.offset,3);
  4181. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4182. inc(usedtmpref.offset,dir);
  4183. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4184. so.shiftimm:=8;
  4185. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4186. inc(usedtmpref.offset,dir);
  4187. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4188. so.shiftimm:=16;
  4189. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4190. inc(usedtmpref.offset,dir);
  4191. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4192. so.shiftimm:=24;
  4193. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4194. end;
  4195. end
  4196. else
  4197. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4198. end;
  4199. end
  4200. else
  4201. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4202. if (fromsize=OS_S8) and (tosize = OS_16) then
  4203. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4204. end;
  4205. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4206. begin
  4207. if op = OP_NOT then
  4208. begin
  4209. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4210. case size of
  4211. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4212. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4213. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4214. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4215. OS_32,
  4216. OS_S32:
  4217. ;
  4218. else
  4219. internalerror(2019050916);
  4220. end;
  4221. end
  4222. else
  4223. inherited a_op_reg_reg(list, op, size, src, dst);
  4224. end;
  4225. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4226. var
  4227. shift, width : byte;
  4228. tmpreg : tregister;
  4229. so : tshifterop;
  4230. l1 : longint;
  4231. begin
  4232. ovloc.loc:=LOC_VOID;
  4233. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4234. case op of
  4235. OP_ADD:
  4236. begin
  4237. op:=OP_SUB;
  4238. a:=aint(dword(-a));
  4239. end;
  4240. OP_SUB:
  4241. begin
  4242. op:=OP_ADD;
  4243. a:=aint(dword(-a));
  4244. end
  4245. else
  4246. ;
  4247. end;
  4248. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4249. case op of
  4250. OP_NEG,OP_NOT,
  4251. OP_DIV,OP_IDIV:
  4252. internalerror(200308285);
  4253. OP_SHL:
  4254. begin
  4255. if a>32 then
  4256. internalerror(2014020703);
  4257. if a<>0 then
  4258. begin
  4259. shifterop_reset(so);
  4260. so.shiftmode:=SM_LSL;
  4261. so.shiftimm:=a;
  4262. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4263. end
  4264. else
  4265. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4266. end;
  4267. OP_ROL:
  4268. begin
  4269. if a>32 then
  4270. internalerror(2014020704);
  4271. if a<>0 then
  4272. begin
  4273. shifterop_reset(so);
  4274. so.shiftmode:=SM_ROR;
  4275. so.shiftimm:=32-a;
  4276. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4277. end
  4278. else
  4279. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4280. end;
  4281. OP_ROR:
  4282. begin
  4283. if a>32 then
  4284. internalerror(2014020705);
  4285. if a<>0 then
  4286. begin
  4287. shifterop_reset(so);
  4288. so.shiftmode:=SM_ROR;
  4289. so.shiftimm:=a;
  4290. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4291. end
  4292. else
  4293. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4294. end;
  4295. OP_SHR:
  4296. begin
  4297. if a>32 then
  4298. internalerror(200308292);
  4299. shifterop_reset(so);
  4300. if a<>0 then
  4301. begin
  4302. so.shiftmode:=SM_LSR;
  4303. so.shiftimm:=a;
  4304. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4305. end
  4306. else
  4307. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4308. end;
  4309. OP_SAR:
  4310. begin
  4311. if a>32 then
  4312. internalerror(200308295);
  4313. if a<>0 then
  4314. begin
  4315. shifterop_reset(so);
  4316. so.shiftmode:=SM_ASR;
  4317. so.shiftimm:=a;
  4318. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4319. end
  4320. else
  4321. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4322. end;
  4323. else
  4324. if (op in [OP_SUB, OP_ADD]) and
  4325. ((a < 0) or
  4326. (a > 4095)) then
  4327. begin
  4328. tmpreg:=getintregister(list,size);
  4329. a_load_const_reg(list, size, a, tmpreg);
  4330. if cgsetflags or setflags then
  4331. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4332. list.concat(setoppostfix(
  4333. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4334. end
  4335. else
  4336. begin
  4337. if cgsetflags or setflags then
  4338. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4339. list.concat(setoppostfix(
  4340. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4341. end;
  4342. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4343. begin
  4344. ovloc.loc:=LOC_FLAGS;
  4345. case op of
  4346. OP_ADD:
  4347. ovloc.resflags:=F_CS;
  4348. OP_SUB:
  4349. ovloc.resflags:=F_CC;
  4350. else
  4351. ;
  4352. end;
  4353. end;
  4354. end
  4355. else
  4356. begin
  4357. { there could be added some more sophisticated optimizations }
  4358. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4359. a_load_reg_reg(list,size,size,src,dst)
  4360. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4361. a_load_const_reg(list,size,0,dst)
  4362. else if (op in [OP_IMUL]) and (a=-1) then
  4363. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4364. { we do this here instead in the peephole optimizer because
  4365. it saves us a register }
  4366. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4367. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4368. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4369. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4370. begin
  4371. if l1>32 then{roozbeh does this ever happen?}
  4372. internalerror(2003082911);
  4373. shifterop_reset(so);
  4374. so.shiftmode:=SM_LSL;
  4375. so.shiftimm:=l1;
  4376. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4377. end
  4378. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4379. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4380. begin
  4381. if l1>32 then{does this ever happen?}
  4382. internalerror(2012051803);
  4383. shifterop_reset(so);
  4384. so.shiftmode:=SM_LSL;
  4385. so.shiftimm:=l1;
  4386. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4387. end
  4388. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4389. begin
  4390. { nothing to do on success }
  4391. end
  4392. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4393. Just using mov x, #0 might allow some easier optimizations down the line. }
  4394. else if (op = OP_AND) and (dword(a)=0) then
  4395. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4396. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4397. else if (op = OP_AND) and (not(dword(a))=0) then
  4398. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4399. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4400. broader range of shifterconstants.}
  4401. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4402. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4403. else if (op = OP_AND) and is_thumb32_imm(a) then
  4404. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4405. else if (op = OP_AND) and (a = $FFFF) then
  4406. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4407. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4408. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4409. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4410. begin
  4411. a_load_reg_reg(list,size,size,src,dst);
  4412. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4413. end
  4414. else
  4415. begin
  4416. tmpreg:=getintregister(list,size);
  4417. a_load_const_reg(list,size,a,tmpreg);
  4418. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4419. end;
  4420. end;
  4421. maybeadjustresult(list,op,size,dst);
  4422. end;
  4423. const
  4424. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4425. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4426. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4427. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4428. var
  4429. so : tshifterop;
  4430. tmpreg,overflowreg : tregister;
  4431. asmop : tasmop;
  4432. begin
  4433. ovloc.loc:=LOC_VOID;
  4434. case op of
  4435. OP_NEG,OP_NOT:
  4436. internalerror(200308286);
  4437. OP_ROL:
  4438. begin
  4439. if not(size in [OS_32,OS_S32]) then
  4440. internalerror(2008072806);
  4441. { simulate ROL by ror'ing 32-value }
  4442. tmpreg:=getintregister(list,OS_32);
  4443. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4444. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4445. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4446. end;
  4447. OP_ROR:
  4448. begin
  4449. if not(size in [OS_32,OS_S32]) then
  4450. internalerror(2008072802);
  4451. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4452. end;
  4453. OP_IMUL,
  4454. OP_MUL:
  4455. begin
  4456. if cgsetflags or setflags then
  4457. begin
  4458. overflowreg:=getintregister(list,size);
  4459. if op=OP_IMUL then
  4460. asmop:=A_SMULL
  4461. else
  4462. asmop:=A_UMULL;
  4463. { the arm doesn't allow that rd and rm are the same }
  4464. if dst=src2 then
  4465. begin
  4466. if dst<>src1 then
  4467. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4468. else
  4469. begin
  4470. tmpreg:=getintregister(list,size);
  4471. a_load_reg_reg(list,size,size,src2,dst);
  4472. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4473. end;
  4474. end
  4475. else
  4476. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4477. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4478. if op=OP_IMUL then
  4479. begin
  4480. shifterop_reset(so);
  4481. so.shiftmode:=SM_ASR;
  4482. so.shiftimm:=31;
  4483. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4484. end
  4485. else
  4486. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4487. ovloc.loc:=LOC_FLAGS;
  4488. ovloc.resflags:=F_NE;
  4489. end
  4490. else
  4491. begin
  4492. { the arm doesn't allow that rd and rm are the same }
  4493. if dst=src2 then
  4494. begin
  4495. if dst<>src1 then
  4496. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4497. else
  4498. begin
  4499. tmpreg:=getintregister(list,size);
  4500. a_load_reg_reg(list,size,size,src2,dst);
  4501. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4502. end;
  4503. end
  4504. else
  4505. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4506. end;
  4507. end;
  4508. else
  4509. begin
  4510. if cgsetflags or setflags then
  4511. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4512. {$ifdef dummy}
  4513. { R13 is not allowed for certain instruction operands }
  4514. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4515. begin
  4516. if getsupreg(dst)=RS_R13 then
  4517. begin
  4518. tmpreg:=getintregister(list,OS_INT);
  4519. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4520. dst:=tmpreg;
  4521. end;
  4522. if getsupreg(src1)=RS_R13 then
  4523. begin
  4524. tmpreg:=getintregister(list,OS_INT);
  4525. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4526. src1:=tmpreg;
  4527. end;
  4528. end;
  4529. {$endif}
  4530. list.concat(setoppostfix(
  4531. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4532. end;
  4533. end;
  4534. maybeadjustresult(list,op,size,dst);
  4535. end;
  4536. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4537. begin
  4538. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4539. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4540. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4541. end;
  4542. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4543. var
  4544. ref : treference;
  4545. shift : byte;
  4546. firstfloatreg,lastfloatreg,
  4547. r : byte;
  4548. regs : tcpuregisterset;
  4549. stackmisalignment: pint;
  4550. begin
  4551. LocalSize:=align(LocalSize,4);
  4552. { call instruction does not put anything on the stack }
  4553. stackmisalignment:=0;
  4554. if not(nostackframe) then
  4555. begin
  4556. firstfloatreg:=RS_NO;
  4557. lastfloatreg:=RS_NO;
  4558. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4559. begin
  4560. { save floating point registers? }
  4561. for r:=RS_F0 to RS_F7 do
  4562. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4563. begin
  4564. if firstfloatreg=RS_NO then
  4565. firstfloatreg:=r;
  4566. lastfloatreg:=r;
  4567. inc(stackmisalignment,12);
  4568. end;
  4569. end;
  4570. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4571. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4572. begin
  4573. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4574. a_reg_alloc(list,NR_R12);
  4575. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4576. end;
  4577. { save int registers }
  4578. reference_reset(ref,4,[]);
  4579. ref.index:=NR_STACK_POINTER_REG;
  4580. ref.addressmode:=AM_PREINDEXED;
  4581. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4582. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4583. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4584. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4585. include(regs,RS_R14);
  4586. if regs<>[] then
  4587. begin
  4588. for r:=RS_R0 to RS_R15 do
  4589. if (r in regs) then
  4590. inc(stackmisalignment,4);
  4591. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4592. end;
  4593. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4594. begin
  4595. { the framepointer now points to the saved R15, so the saved
  4596. framepointer is at R11-12 (for get_caller_frame) }
  4597. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4598. a_reg_dealloc(list,NR_R12);
  4599. end;
  4600. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4601. if (LocalSize<>0) or
  4602. ((stackmisalignment<>0) and
  4603. ((pi_do_call in current_procinfo.flags) or
  4604. (po_assembler in current_procinfo.procdef.procoptions))) then
  4605. begin
  4606. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4607. if not(is_shifter_const(localsize,shift)) then
  4608. begin
  4609. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4610. a_reg_alloc(list,NR_R12);
  4611. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4612. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4613. a_reg_dealloc(list,NR_R12);
  4614. end
  4615. else
  4616. begin
  4617. a_reg_dealloc(list,NR_R12);
  4618. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4619. end;
  4620. end;
  4621. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4622. begin
  4623. if firstfloatreg<>RS_NO then
  4624. begin
  4625. reference_reset(ref,4,[]);
  4626. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4627. begin
  4628. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4629. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4630. ref.base:=NR_R12;
  4631. end
  4632. else
  4633. begin
  4634. ref.base:=current_procinfo.framepointer;
  4635. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4636. end;
  4637. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4638. lastfloatreg-firstfloatreg+1,ref));
  4639. end;
  4640. end;
  4641. end;
  4642. end;
  4643. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4644. var
  4645. ref : treference;
  4646. firstfloatreg,lastfloatreg,
  4647. r : byte;
  4648. shift : byte;
  4649. regs : tcpuregisterset;
  4650. LocalSize : longint;
  4651. stackmisalignment: pint;
  4652. begin
  4653. { a routine not returning needs no exit code,
  4654. we trust this directive as arm thumb is normally used if small code shall be generated }
  4655. if po_noreturn in current_procinfo.procdef.procoptions then
  4656. exit;
  4657. if not(nostackframe) then
  4658. begin
  4659. stackmisalignment:=0;
  4660. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4661. begin
  4662. { restore floating point register }
  4663. firstfloatreg:=RS_NO;
  4664. lastfloatreg:=RS_NO;
  4665. { save floating point registers? }
  4666. for r:=RS_F0 to RS_F7 do
  4667. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4668. begin
  4669. if firstfloatreg=RS_NO then
  4670. firstfloatreg:=r;
  4671. lastfloatreg:=r;
  4672. { floating point register space is already included in
  4673. localsize below by calc_stackframe_size
  4674. inc(stackmisalignment,12);
  4675. }
  4676. end;
  4677. if firstfloatreg<>RS_NO then
  4678. begin
  4679. reference_reset(ref,4,[]);
  4680. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4681. begin
  4682. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4683. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4684. ref.base:=NR_R12;
  4685. end
  4686. else
  4687. begin
  4688. ref.base:=current_procinfo.framepointer;
  4689. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4690. end;
  4691. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4692. lastfloatreg-firstfloatreg+1,ref));
  4693. end;
  4694. end;
  4695. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4696. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4697. begin
  4698. exclude(regs,RS_R14);
  4699. include(regs,RS_R15);
  4700. end;
  4701. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4702. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4703. for r:=RS_R0 to RS_R15 do
  4704. if (r in regs) then
  4705. inc(stackmisalignment,4);
  4706. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4707. LocalSize:=current_procinfo.calc_stackframe_size;
  4708. if (LocalSize<>0) or
  4709. ((stackmisalignment<>0) and
  4710. ((pi_do_call in current_procinfo.flags) or
  4711. (po_assembler in current_procinfo.procdef.procoptions))) then
  4712. begin
  4713. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4714. if not(is_shifter_const(LocalSize,shift)) then
  4715. begin
  4716. a_reg_alloc(list,NR_R12);
  4717. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4718. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4719. a_reg_dealloc(list,NR_R12);
  4720. end
  4721. else
  4722. begin
  4723. a_reg_dealloc(list,NR_R12);
  4724. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4725. end;
  4726. end;
  4727. if regs=[] then
  4728. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4729. else
  4730. begin
  4731. reference_reset(ref,4,[]);
  4732. ref.index:=NR_STACK_POINTER_REG;
  4733. ref.addressmode:=AM_PREINDEXED;
  4734. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4735. end;
  4736. end
  4737. else
  4738. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4739. end;
  4740. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4741. var
  4742. tmpreg : tregister;
  4743. tmpref : treference;
  4744. l : tasmlabel;
  4745. begin
  4746. tmpreg:=NR_NO;
  4747. { Be sure to have a base register }
  4748. if (ref.base=NR_NO) then
  4749. begin
  4750. if ref.shiftmode<>SM_None then
  4751. internalerror(2014020706);
  4752. ref.base:=ref.index;
  4753. ref.index:=NR_NO;
  4754. end;
  4755. { absolute symbols can't be handled directly, we've to store the symbol reference
  4756. in the text segment and access it pc relative
  4757. For now, we assume that references where base or index equals to PC are already
  4758. relative, all other references are assumed to be absolute and thus they need
  4759. to be handled extra.
  4760. A proper solution would be to change refoptions to a set and store the information
  4761. if the symbol is absolute or relative there.
  4762. }
  4763. if (assigned(ref.symbol) and
  4764. not(is_pc(ref.base)) and
  4765. not(is_pc(ref.index))
  4766. ) or
  4767. { [#xxx] isn't a valid address operand }
  4768. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4769. //(ref.offset<-4095) or
  4770. (ref.offset<-255) or
  4771. (ref.offset>4095) or
  4772. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4773. ((ref.offset<-255) or
  4774. (ref.offset>255)
  4775. )
  4776. ) or
  4777. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4778. ((ref.offset<-1020) or
  4779. (ref.offset>1020) or
  4780. ((abs(ref.offset) mod 4)<>0) or
  4781. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4782. assigned(ref.symbol)
  4783. )
  4784. ) then
  4785. begin
  4786. reference_reset(tmpref,4,[]);
  4787. { load symbol }
  4788. tmpreg:=getintregister(list,OS_INT);
  4789. if assigned(ref.symbol) then
  4790. begin
  4791. current_asmdata.getjumplabel(l);
  4792. cg.a_label(current_procinfo.aktlocaldata,l);
  4793. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4794. if ref.refaddr=addr_gottpoff then
  4795. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4796. else if ref.refaddr=addr_tlsgd then
  4797. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4798. else if ref.refaddr=addr_tlsdesc then
  4799. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4800. else if ref.refaddr=addr_tpoff then
  4801. begin
  4802. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4803. Internalerror(2019092807);
  4804. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4805. end
  4806. else
  4807. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4808. { load consts entry }
  4809. tmpref.symbol:=l;
  4810. tmpref.base:=NR_R15;
  4811. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4812. { in case of LDF/STF, we got rid of the NR_R15 }
  4813. if is_pc(ref.base) then
  4814. ref.base:=NR_NO;
  4815. if is_pc(ref.index) then
  4816. ref.index:=NR_NO;
  4817. end
  4818. else
  4819. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4820. if (ref.base<>NR_NO) then
  4821. begin
  4822. if ref.index<>NR_NO then
  4823. begin
  4824. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4825. ref.base:=tmpreg;
  4826. end
  4827. else
  4828. begin
  4829. ref.index:=tmpreg;
  4830. ref.shiftimm:=0;
  4831. ref.signindex:=1;
  4832. ref.shiftmode:=SM_None;
  4833. end;
  4834. end
  4835. else
  4836. ref.base:=tmpreg;
  4837. ref.offset:=0;
  4838. ref.symbol:=nil;
  4839. end;
  4840. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4841. begin
  4842. if tmpreg<>NR_NO then
  4843. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4844. else
  4845. begin
  4846. tmpreg:=getintregister(list,OS_ADDR);
  4847. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4848. ref.base:=tmpreg;
  4849. end;
  4850. ref.offset:=0;
  4851. end;
  4852. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4853. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4854. begin
  4855. tmpreg:=getintregister(list,OS_ADDR);
  4856. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4857. ref.base := tmpreg;
  4858. end;
  4859. { floating point operations have only limited references
  4860. we expect here, that a base is already set }
  4861. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4862. begin
  4863. if ref.shiftmode<>SM_none then
  4864. internalerror(2003091202);
  4865. if tmpreg<>NR_NO then
  4866. begin
  4867. if ref.base=tmpreg then
  4868. begin
  4869. if ref.signindex<0 then
  4870. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4871. else
  4872. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4873. ref.index:=NR_NO;
  4874. end
  4875. else
  4876. begin
  4877. if ref.index<>tmpreg then
  4878. internalerror(2004031602);
  4879. if ref.signindex<0 then
  4880. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4881. else
  4882. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4883. ref.base:=tmpreg;
  4884. ref.index:=NR_NO;
  4885. end;
  4886. end
  4887. else
  4888. begin
  4889. tmpreg:=getintregister(list,OS_ADDR);
  4890. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4891. ref.base:=tmpreg;
  4892. ref.index:=NR_NO;
  4893. end;
  4894. end;
  4895. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4896. Result := ref;
  4897. end;
  4898. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4899. var
  4900. instr: taicpu;
  4901. begin
  4902. if (fromsize=OS_F32) and
  4903. (tosize=OS_F32) then
  4904. begin
  4905. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4906. list.Concat(instr);
  4907. add_move_instruction(instr);
  4908. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4909. end
  4910. else if (fromsize=OS_F64) and
  4911. (tosize=OS_F64) then
  4912. begin
  4913. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4914. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4915. end
  4916. else if (fromsize=OS_F32) and
  4917. (tosize=OS_F64) then
  4918. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4919. begin
  4920. //list.concat(nil);
  4921. end;
  4922. end;
  4923. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4924. begin
  4925. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4926. end;
  4927. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4928. begin
  4929. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4930. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4931. end;
  4932. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4933. begin
  4934. if //(shuffle=nil) and
  4935. (tosize=OS_F32) then
  4936. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4937. else
  4938. internalerror(2012100813);
  4939. end;
  4940. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4941. begin
  4942. if //(shuffle=nil) and
  4943. (fromsize=OS_F32) then
  4944. begin
  4945. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4946. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4947. end
  4948. else
  4949. internalerror(2012100814);
  4950. end;
  4951. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4952. var tmpreg: tregister;
  4953. begin
  4954. case op of
  4955. OP_NEG:
  4956. begin
  4957. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4958. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4959. tmpreg:=cg.getintregister(list,OS_32);
  4960. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4961. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4962. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4963. end;
  4964. else
  4965. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4966. end;
  4967. end;
  4968. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4969. begin
  4970. case op of
  4971. OP_NEG:
  4972. begin
  4973. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reglo,0),PF_S));
  4974. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reghi,0),PF_S));
  4975. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4976. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4977. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4978. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4979. end;
  4980. OP_NOT:
  4981. begin
  4982. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4983. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4984. end;
  4985. OP_AND,OP_OR,OP_XOR:
  4986. begin
  4987. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4988. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4989. end;
  4990. OP_ADD:
  4991. begin
  4992. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4993. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4994. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi),PF_S));
  4995. end;
  4996. OP_SUB:
  4997. begin
  4998. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4999. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  5000. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  5001. end;
  5002. else
  5003. internalerror(2003083105);
  5004. end;
  5005. end;
  5006. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  5007. var
  5008. tmpreg : tregister;
  5009. begin
  5010. case op of
  5011. OP_AND,OP_OR,OP_XOR:
  5012. begin
  5013. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  5014. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  5015. end;
  5016. OP_ADD:
  5017. begin
  5018. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5019. begin
  5020. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5021. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  5022. end
  5023. else
  5024. begin
  5025. tmpreg:=cg.getintregister(list,OS_32);
  5026. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5027. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5028. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  5029. end;
  5030. tmpreg:=cg.getintregister(list,OS_32);
  5031. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  5032. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg),PF_S));
  5033. end;
  5034. OP_SUB:
  5035. begin
  5036. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5037. begin
  5038. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5039. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  5040. end
  5041. else
  5042. begin
  5043. tmpreg:=cg.getintregister(list,OS_32);
  5044. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5045. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5046. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  5047. end;
  5048. tmpreg:=cg.getintregister(list,OS_32);
  5049. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  5050. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg),PF_S));
  5051. end;
  5052. else
  5053. internalerror(2003083106);
  5054. end;
  5055. end;
  5056. procedure create_codegen;
  5057. begin
  5058. if GenerateThumb2Code then
  5059. begin
  5060. cg:=tthumb2cgarm.create;
  5061. cg64:=tthumb2cg64farm.create;
  5062. casmoptimizer:=TCpuThumb2AsmOptimizer;
  5063. end
  5064. else if GenerateThumbCode then
  5065. begin
  5066. cg:=tthumbcgarm.create;
  5067. cg64:=tthumbcg64farm.create;
  5068. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5069. end
  5070. else
  5071. begin
  5072. cg:=tarmcgarm.create;
  5073. cg64:=tarmcg64farm.create;
  5074. casmoptimizer:=TCpuAsmOptimizer;
  5075. end;
  5076. end;
  5077. end.