cgcpu.pas 224 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. procedure init_mmregister_allocator;
  36. public
  37. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  38. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  39. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  40. { move instructions }
  41. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  42. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  43. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  44. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  45. { fpu move instructions }
  46. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  47. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  48. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  49. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  50. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  51. { comparison operations }
  52. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  53. l : tasmlabel);override;
  54. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  55. procedure a_jmp_name(list : TAsmList;const s : string); override;
  56. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  57. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  58. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  59. procedure g_profilecode(list : TAsmList); override;
  60. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  61. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  62. procedure g_maybe_got_init(list : TAsmList); override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  66. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  67. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  68. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  69. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  70. procedure g_save_registers(list : TAsmList);override;
  71. procedure g_restore_registers(list : TAsmList);override;
  72. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  73. procedure fixref(list : TAsmList;var ref : treference);
  74. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  75. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  78. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  79. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  80. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  81. { Transform unsupported methods into Internal errors }
  82. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  83. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  84. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  85. { clear out potential overflow bits from 8 or 16 bit operations
  86. the upper 24/16 bits of a register after an operation }
  87. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  88. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  89. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  90. procedure g_maybe_tls_init(list : TAsmList); override;
  91. end;
  92. { tcgarm is shared between normal arm and thumb-2 }
  93. tcgarm = class(tbasecgarm)
  94. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  95. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  96. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  97. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  98. size: tcgsize; a: tcgint; src, dst: tregister); override;
  99. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  100. size: tcgsize; src1, src2, dst: tregister); override;
  101. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  102. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  103. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  104. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  105. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  106. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  107. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  108. end;
  109. { normal arm cg }
  110. tarmcgarm = class(tcgarm)
  111. procedure init_register_allocators;override;
  112. procedure done_register_allocators;override;
  113. end;
  114. { 64 bit cg for all arm flavours }
  115. tbasecg64farm = class(tcg64f32)
  116. end;
  117. { tcg64farm is shared between normal arm and thumb-2 }
  118. tcg64farm = class(tbasecg64farm)
  119. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  120. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  121. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  122. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  123. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  124. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  125. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  126. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  127. end;
  128. tarmcg64farm = class(tcg64farm)
  129. end;
  130. tthumbcgarm = class(tbasecgarm)
  131. procedure init_register_allocators;override;
  132. procedure done_register_allocators;override;
  133. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  134. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  135. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  136. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  137. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  138. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  139. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  140. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  141. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  142. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  143. end;
  144. tthumbcg64farm = class(tbasecg64farm)
  145. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  146. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  147. end;
  148. tthumb2cgarm = class(tcgarm)
  149. procedure init_register_allocators;override;
  150. procedure done_register_allocators;override;
  151. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  152. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  153. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  154. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  155. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  156. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  157. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  158. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  159. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  160. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  161. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  163. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  164. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  165. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  166. end;
  167. tthumb2cg64farm = class(tcg64farm)
  168. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  169. end;
  170. const
  171. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  172. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  173. winstackpagesize = 4096;
  174. function get_fpu_postfix(def : tdef) : toppostfix;
  175. procedure create_codegen;
  176. implementation
  177. uses
  178. globals,verbose,systems,cutils,
  179. aopt,aoptcpu,
  180. fmodule,
  181. symconst,symsym,symtable,
  182. tgobj,
  183. procinfo,cpupi,
  184. paramgr;
  185. { Range check must be disabled explicitly as conversions between signed and unsigned
  186. 32-bit values are done without explicit typecasts }
  187. {$R-}
  188. function get_fpu_postfix(def : tdef) : toppostfix;
  189. begin
  190. if def.typ=floatdef then
  191. begin
  192. case tfloatdef(def).floattype of
  193. s32real:
  194. result:=PF_S;
  195. s64real:
  196. result:=PF_D;
  197. s80real:
  198. result:=PF_E;
  199. else
  200. internalerror(200401272);
  201. end;
  202. end
  203. else
  204. internalerror(200401271);
  205. end;
  206. procedure tarmcgarm.init_register_allocators;
  207. begin
  208. inherited init_register_allocators;
  209. { currently, we always save R14, so we can use it }
  210. if (target_info.system<>system_arm_ios) then
  211. begin
  212. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  213. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  214. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  215. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  216. else
  217. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  218. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  219. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  220. end
  221. else
  222. { r7 is not available on Darwin, it's used as frame pointer (always,
  223. for backtrace support -- also in gcc/clang -> R11 can be used).
  224. r9 is volatile }
  225. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  226. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  227. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  228. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  229. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  230. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  231. init_mmregister_allocator;
  232. end;
  233. procedure tarmcgarm.done_register_allocators;
  234. begin
  235. rg[R_INTREGISTER].free;
  236. rg[R_FPUREGISTER].free;
  237. rg[R_MMREGISTER].free;
  238. inherited done_register_allocators;
  239. end;
  240. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  241. var
  242. imm_shift : byte;
  243. l : tasmlabel;
  244. hr : treference;
  245. imm1, imm2: DWord;
  246. begin
  247. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  248. internalerror(2002090907);
  249. if is_shifter_const(a,imm_shift) then
  250. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  251. else if is_shifter_const(not(a),imm_shift) then
  252. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  253. { loading of constants with mov and orr }
  254. else if (split_into_shifter_const(a,imm1, imm2)) then
  255. begin
  256. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  257. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  258. end
  259. { loading of constants with mvn and bic }
  260. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  261. begin
  262. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  263. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  264. end
  265. else
  266. begin
  267. reference_reset(hr,4,[]);
  268. current_asmdata.getjumplabel(l);
  269. cg.a_label(current_procinfo.aktlocaldata,l);
  270. hr.symboldata:=current_procinfo.aktlocaldata.last;
  271. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  272. hr.symbol:=l;
  273. hr.base:=NR_PC;
  274. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  275. end;
  276. end;
  277. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  278. var
  279. oppostfix:toppostfix;
  280. usedtmpref: treference;
  281. tmpreg,tmpreg2 : tregister;
  282. so : tshifterop;
  283. dir : integer;
  284. begin
  285. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  286. FromSize := ToSize;
  287. case FromSize of
  288. { signed integer registers }
  289. OS_8:
  290. oppostfix:=PF_B;
  291. OS_S8:
  292. oppostfix:=PF_SB;
  293. OS_16:
  294. oppostfix:=PF_H;
  295. OS_S16:
  296. oppostfix:=PF_SH;
  297. OS_32,
  298. OS_S32:
  299. oppostfix:=PF_None;
  300. else
  301. InternalError(200308297);
  302. end;
  303. if (fromsize=OS_S8) and
  304. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  305. oppostfix:=PF_B;
  306. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  307. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  308. (oppostfix in [PF_SH,PF_H])) then
  309. begin
  310. if target_info.endian=endian_big then
  311. dir:=-1
  312. else
  313. dir:=1;
  314. case FromSize of
  315. OS_16,OS_S16:
  316. begin
  317. { only complicated references need an extra loadaddr }
  318. if assigned(ref.symbol) or
  319. (ref.index<>NR_NO) or
  320. (ref.offset<-4095) or
  321. (ref.offset>4094) or
  322. { sometimes the compiler reused registers }
  323. (reg=ref.index) or
  324. (reg=ref.base) then
  325. begin
  326. tmpreg2:=getintregister(list,OS_INT);
  327. a_loadaddr_ref_reg(list,ref,tmpreg2);
  328. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  329. end
  330. else
  331. usedtmpref:=ref;
  332. if target_info.endian=endian_big then
  333. inc(usedtmpref.offset,1);
  334. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  335. tmpreg:=getintregister(list,OS_INT);
  336. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  337. inc(usedtmpref.offset,dir);
  338. if FromSize=OS_16 then
  339. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  340. else
  341. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  342. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  343. end;
  344. OS_32,OS_S32:
  345. begin
  346. tmpreg:=getintregister(list,OS_INT);
  347. { only complicated references need an extra loadaddr }
  348. if assigned(ref.symbol) or
  349. (ref.index<>NR_NO) or
  350. (ref.offset<-4095) or
  351. (ref.offset>4092) or
  352. { sometimes the compiler reused registers }
  353. (reg=ref.index) or
  354. (reg=ref.base) then
  355. begin
  356. tmpreg2:=getintregister(list,OS_INT);
  357. a_loadaddr_ref_reg(list,ref,tmpreg2);
  358. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  359. end
  360. else
  361. usedtmpref:=ref;
  362. shifterop_reset(so);so.shiftmode:=SM_LSL;
  363. if ref.alignment=2 then
  364. begin
  365. if target_info.endian=endian_big then
  366. inc(usedtmpref.offset,2);
  367. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  368. inc(usedtmpref.offset,dir*2);
  369. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  370. so.shiftimm:=16;
  371. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  372. end
  373. else
  374. begin
  375. tmpreg2:=getintregister(list,OS_INT);
  376. if target_info.endian=endian_big then
  377. inc(usedtmpref.offset,3);
  378. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  379. inc(usedtmpref.offset,dir);
  380. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  381. inc(usedtmpref.offset,dir);
  382. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  383. so.shiftimm:=8;
  384. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  385. inc(usedtmpref.offset,dir);
  386. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  387. so.shiftimm:=16;
  388. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  389. so.shiftimm:=24;
  390. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  391. end;
  392. end
  393. else
  394. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  395. end;
  396. end
  397. else
  398. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  399. if (fromsize=OS_S8) and
  400. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  401. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  402. else if (fromsize=OS_S8) and (tosize = OS_16) then
  403. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  404. end;
  405. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  406. var
  407. hsym : tsym;
  408. href : treference;
  409. paraloc : Pcgparalocation;
  410. shift : byte;
  411. begin
  412. { calculate the parameter info for the procdef }
  413. procdef.init_paraloc_info(callerside);
  414. hsym:=tsym(procdef.parast.Find('self'));
  415. if not(assigned(hsym) and
  416. (hsym.typ=paravarsym)) then
  417. internalerror(2003052503);
  418. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  419. while paraloc<>nil do
  420. with paraloc^ do
  421. begin
  422. case loc of
  423. LOC_REGISTER:
  424. begin
  425. if is_shifter_const(ioffset,shift) then
  426. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  427. else
  428. begin
  429. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  430. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  431. end;
  432. end;
  433. LOC_REFERENCE:
  434. begin
  435. { offset in the wrapper needs to be adjusted for the stored
  436. return address }
  437. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  438. if is_shifter_const(ioffset,shift) then
  439. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  440. else
  441. begin
  442. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  443. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  444. end;
  445. end
  446. else
  447. internalerror(2003091803);
  448. end;
  449. paraloc:=next;
  450. end;
  451. end;
  452. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  453. var
  454. ref: treference;
  455. begin
  456. paraloc.check_simple_location;
  457. paramanager.allocparaloc(list,paraloc.location);
  458. case paraloc.location^.loc of
  459. LOC_REGISTER,LOC_CREGISTER:
  460. a_load_const_reg(list,size,a,paraloc.location^.register);
  461. LOC_REFERENCE:
  462. begin
  463. reference_reset(ref,paraloc.alignment,[]);
  464. ref.base:=paraloc.location^.reference.index;
  465. ref.offset:=paraloc.location^.reference.offset;
  466. a_load_const_ref(list,size,a,ref);
  467. end;
  468. else
  469. internalerror(2002081101);
  470. end;
  471. end;
  472. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  473. begin
  474. { doubles in softemu mode have a strange order of registers and references }
  475. if (cgpara.size=OS_F64) and
  476. (location^.size=OS_32) then
  477. begin
  478. g_concatcopy(list,ref,paralocref,4)
  479. end
  480. else
  481. inherited;
  482. end;
  483. procedure tbasecgarm.init_mmregister_allocator;
  484. begin
  485. { The register allocator currently cannot deal with multiple
  486. non-overlapping subregs per register, so we can only use
  487. half the single precision registers for now (as sub registers of the
  488. double precision ones). }
  489. if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
  490. (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
  491. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  492. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  493. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  494. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  495. ],first_mm_imreg,[])
  496. else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
  497. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
  498. [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
  499. RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
  500. RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
  501. ],first_mm_imreg,[])
  502. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  503. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  504. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  505. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  506. ],first_mm_imreg,[]);
  507. end;
  508. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  509. var
  510. ref: treference;
  511. tmpreg: tregister;
  512. begin
  513. paraloc.check_simple_location;
  514. paramanager.allocparaloc(list,paraloc.location);
  515. case paraloc.location^.loc of
  516. LOC_REGISTER,LOC_CREGISTER:
  517. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  518. LOC_REFERENCE:
  519. begin
  520. reference_reset(ref,paraloc.alignment,[]);
  521. ref.base := paraloc.location^.reference.index;
  522. ref.offset := paraloc.location^.reference.offset;
  523. tmpreg := getintregister(list,OS_ADDR);
  524. a_loadaddr_ref_reg(list,r,tmpreg);
  525. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  526. end;
  527. else
  528. internalerror(2002080701);
  529. end;
  530. end;
  531. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  532. var
  533. branchopcode: tasmop;
  534. r : treference;
  535. sym : TAsmSymbol;
  536. begin
  537. { use always BL as newer binutils do not translate blx apparently
  538. generating BL is also what clang and gcc do by default }
  539. branchopcode:=A_BL;
  540. if not(weak) then
  541. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  542. else
  543. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  544. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  545. if (tf_pic_uses_got in target_info.flags) and
  546. (cs_create_pic in current_settings.moduleswitches) then
  547. begin
  548. r.refaddr:=addr_pic
  549. end
  550. else
  551. r.refaddr:=addr_full;
  552. list.concat(taicpu.op_ref(branchopcode,r));
  553. {
  554. the compiler does not properly set this flag anymore in pass 1, and
  555. for now we only need it after pass 2 (I hope) (JM)
  556. if not(pi_do_call in current_procinfo.flags) then
  557. internalerror(2003060703);
  558. }
  559. include(current_procinfo.flags,pi_do_call);
  560. end;
  561. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  562. begin
  563. { check not really correct: should only be used for non-Thumb cpus }
  564. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  565. begin
  566. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  567. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  568. end
  569. else
  570. list.concat(taicpu.op_reg(A_BLX, reg));
  571. {
  572. the compiler does not properly set this flag anymore in pass 1, and
  573. for now we only need it after pass 2 (I hope) (JM)
  574. if not(pi_do_call in current_procinfo.flags) then
  575. internalerror(2003060703);
  576. }
  577. include(current_procinfo.flags,pi_do_call);
  578. end;
  579. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  580. begin
  581. a_op_const_reg_reg(list,op,size,a,reg,reg);
  582. end;
  583. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  584. var
  585. tmpreg,tmpresreg : tregister;
  586. tmpref : treference;
  587. begin
  588. tmpreg:=getintregister(list,size);
  589. tmpresreg:=getintregister(list,size);
  590. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  591. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  592. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  593. end;
  594. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  595. var
  596. so : tshifterop;
  597. begin
  598. if op = OP_NEG then
  599. begin
  600. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  601. maybeadjustresult(list,OP_NEG,size,dst);
  602. end
  603. else if op = OP_NOT then
  604. begin
  605. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  606. begin
  607. shifterop_reset(so);
  608. so.shiftmode:=SM_LSL;
  609. if size in [OS_8, OS_S8] then
  610. so.shiftimm:=24
  611. else
  612. so.shiftimm:=16;
  613. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  614. {Using a shift here allows this to be folded into another instruction}
  615. if size in [OS_S8, OS_S16] then
  616. so.shiftmode:=SM_ASR
  617. else
  618. so.shiftmode:=SM_LSR;
  619. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  620. end
  621. else
  622. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  623. end
  624. else
  625. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  626. end;
  627. const
  628. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  629. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  630. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  631. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  632. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  633. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  634. op_reg_postfix_thumb: array[TOpCG] of TOpPostfix =
  635. (PF_None,PF_None,PF_None,PF_S,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_S,
  636. PF_None,PF_S,PF_S,PF_None,PF_S,PF_None,PF_S);
  637. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  638. size: tcgsize; a: tcgint; src, dst: tregister);
  639. var
  640. ovloc : tlocation;
  641. begin
  642. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  643. end;
  644. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  645. size: tcgsize; src1, src2, dst: tregister);
  646. var
  647. ovloc : tlocation;
  648. begin
  649. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  650. end;
  651. function opshift2shiftmode(op: TOpCg): tshiftmode;
  652. begin
  653. case op of
  654. OP_SHL: Result:=SM_LSL;
  655. OP_SHR: Result:=SM_LSR;
  656. OP_ROR: Result:=SM_ROR;
  657. OP_ROL: Result:=SM_ROR;
  658. OP_SAR: Result:=SM_ASR;
  659. else internalerror(2012070501);
  660. end
  661. end;
  662. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  663. var
  664. multiplier : dword;
  665. power : longint;
  666. shifterop : tshifterop;
  667. bitsset : byte;
  668. negative : boolean;
  669. first, doshiftadd: boolean;
  670. b,
  671. cycles : byte;
  672. maxeffort : byte;
  673. leftmostbit,i,shiftvalue: DWord;
  674. begin
  675. result:=true;
  676. cycles:=0;
  677. negative:=a<0;
  678. shifterop.rs:=NR_NO;
  679. shifterop.shiftmode:=SM_LSL;
  680. if negative then
  681. inc(cycles);
  682. multiplier:=dword(abs(a));
  683. { heuristics to estimate how much instructions are reasonable to replace the mul,
  684. this is currently based on XScale timings }
  685. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  686. actual multiplication, this requires min. 1+4 cycles
  687. because the first shift imm. might cause a stall and because we need more instructions
  688. when replacing the mul we generate max. 3 instructions to replace this mul }
  689. maxeffort:=3;
  690. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  691. a ldr, so generating one more operation to replace this is beneficial }
  692. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  693. inc(maxeffort);
  694. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  695. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  696. dec(maxeffort);
  697. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  698. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  699. dec(maxeffort);
  700. { "symmetric" bit pattern like $10101010 where
  701. res:=a*$10101010 can be simplified into
  702. temp:=a*$1010
  703. res:=temp+temp shl 16
  704. }
  705. doshiftadd:=false;
  706. leftmostbit:=BsrDWord(multiplier);
  707. shiftvalue:=0;
  708. if (maxeffort>1) and (leftmostbit>2) then
  709. begin
  710. for i:=2 to 31 do
  711. if (multiplier shr i)=(multiplier and ($ffffffff shr (32-i))) then
  712. begin
  713. doshiftadd:=true;
  714. shiftvalue:=i;
  715. dec(maxeffort);
  716. multiplier:=multiplier shr shiftvalue;
  717. break;
  718. end;
  719. end;
  720. bitsset:=popcnt(multiplier and $fffffffe);
  721. { most simple cases }
  722. if a=1 then
  723. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  724. else if a=0 then
  725. a_load_const_reg(list,OS_32,0,dst)
  726. else if a=-1 then
  727. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  728. { add up ?
  729. basically, one add is needed for each bit being set in the constant factor
  730. however, the least significant bit is for free, it can be hidden in the initial
  731. instruction
  732. }
  733. else if (bitsset+cycles<=maxeffort) and
  734. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  735. begin
  736. first:=true;
  737. while multiplier<>0 do
  738. begin
  739. shifterop.shiftimm:=BsrDWord(multiplier);
  740. if odd(multiplier) then
  741. begin
  742. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  743. dec(multiplier);
  744. end
  745. else
  746. if first then
  747. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  748. else
  749. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  750. first:=false;
  751. dec(multiplier,1 shl shifterop.shiftimm);
  752. end;
  753. if doshiftadd then
  754. begin
  755. shifterop.shiftimm:=shiftvalue;
  756. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  757. end;
  758. if negative then
  759. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  760. end
  761. { subtract from the next greater power of two? }
  762. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  763. begin
  764. first:=true;
  765. while multiplier<>0 do
  766. begin
  767. if first then
  768. begin
  769. multiplier:=(1 shl power)-multiplier;
  770. shifterop.shiftimm:=power;
  771. end
  772. else
  773. shifterop.shiftimm:=BsrDWord(multiplier);
  774. if odd(multiplier) then
  775. begin
  776. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  777. dec(multiplier);
  778. end
  779. else
  780. if first then
  781. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  782. else
  783. begin
  784. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  785. dec(multiplier,1 shl shifterop.shiftimm);
  786. end;
  787. first:=false;
  788. end;
  789. if doshiftadd then
  790. begin
  791. shifterop.shiftimm:=shiftvalue;
  792. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
  793. end;
  794. if negative then
  795. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  796. end
  797. else
  798. result:=false;
  799. end;
  800. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  801. var
  802. shift, lsb, width : byte;
  803. tmpreg : tregister;
  804. so : tshifterop;
  805. l1 : longint;
  806. imm1, imm2: DWord;
  807. begin
  808. optimize_op_const(size, op, a);
  809. case op of
  810. OP_NONE:
  811. begin
  812. if src <> dst then
  813. a_load_reg_reg(list, size, size, src, dst);
  814. exit;
  815. end;
  816. OP_MOVE:
  817. begin
  818. a_load_const_reg(list, size, a, dst);
  819. exit;
  820. end;
  821. else
  822. ;
  823. end;
  824. ovloc.loc:=LOC_VOID;
  825. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  826. case op of
  827. OP_ADD:
  828. begin
  829. op:=OP_SUB;
  830. a:=aint(dword(-a));
  831. end;
  832. OP_SUB:
  833. begin
  834. op:=OP_ADD;
  835. a:=aint(dword(-a));
  836. end
  837. else
  838. ;
  839. end;
  840. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  841. case op of
  842. OP_NEG,OP_NOT:
  843. internalerror(200308281);
  844. OP_SHL,
  845. OP_SHR,
  846. OP_ROL,
  847. OP_ROR,
  848. OP_SAR:
  849. begin
  850. if a>32 then
  851. internalerror(200308294);
  852. shifterop_reset(so);
  853. so.shiftmode:=opshift2shiftmode(op);
  854. if op = OP_ROL then
  855. so.shiftimm:=32-a
  856. else
  857. so.shiftimm:=a;
  858. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  859. end;
  860. else
  861. {if (op in [OP_SUB, OP_ADD]) and
  862. ((a < 0) or
  863. (a > 4095)) then
  864. begin
  865. tmpreg:=getintregister(list,size);
  866. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  867. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  868. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  869. ));
  870. end
  871. else}
  872. begin
  873. if cgsetflags or setflags then
  874. a_reg_alloc(list,NR_DEFAULTFLAGS);
  875. list.concat(setoppostfix(
  876. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  877. end;
  878. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  879. begin
  880. ovloc.loc:=LOC_FLAGS;
  881. case op of
  882. OP_ADD:
  883. ovloc.resflags:=F_CS;
  884. OP_SUB:
  885. ovloc.resflags:=F_CC;
  886. else
  887. internalerror(2019050922);
  888. end;
  889. end;
  890. end
  891. else
  892. begin
  893. { there could be added some more sophisticated optimizations }
  894. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  895. a_op_reg_reg(list,OP_NEG,size,src,dst)
  896. { we do this here instead in the peephole optimizer because
  897. it saves us a register }
  898. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  899. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  900. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  901. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  902. begin
  903. if l1>32 then{roozbeh does this ever happen?}
  904. internalerror(200308296);
  905. shifterop_reset(so);
  906. so.shiftmode:=SM_LSL;
  907. so.shiftimm:=l1;
  908. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  909. end
  910. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  911. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  912. begin
  913. if l1>32 then{does this ever happen?}
  914. internalerror(201205181);
  915. shifterop_reset(so);
  916. so.shiftmode:=SM_LSL;
  917. so.shiftimm:=l1;
  918. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  919. end
  920. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  921. begin
  922. { nothing to do on success }
  923. end
  924. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  925. broader range of shifterconstants.}
  926. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  927. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  928. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  929. into the following instruction}
  930. else if (op = OP_AND) and
  931. is_continuous_mask(aword(a), lsb, width) and
  932. ((lsb = 0) or ((lsb + width) = 32)) then
  933. begin
  934. shifterop_reset(so);
  935. if (width = 16) and
  936. (lsb = 0) and
  937. (current_settings.cputype >= cpu_armv6) then
  938. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  939. else if (width = 8) and
  940. (lsb = 0) and
  941. (current_settings.cputype >= cpu_armv6) then
  942. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  943. else if lsb = 0 then
  944. begin
  945. so.shiftmode:=SM_LSL;
  946. so.shiftimm:=32-width;
  947. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  948. so.shiftmode:=SM_LSR;
  949. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  950. end
  951. else
  952. begin
  953. so.shiftmode:=SM_LSR;
  954. so.shiftimm:=lsb;
  955. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  956. so.shiftmode:=SM_LSL;
  957. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  958. end;
  959. end
  960. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  961. begin
  962. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  963. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  964. end
  965. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  966. not(cgsetflags or setflags) and
  967. split_into_shifter_const(a, imm1, imm2) then
  968. begin
  969. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  970. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  971. end
  972. else
  973. begin
  974. tmpreg:=getintregister(list,size);
  975. a_load_const_reg(list,size,a,tmpreg);
  976. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  977. end;
  978. end;
  979. maybeadjustresult(list,op,size,dst);
  980. end;
  981. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  982. var
  983. so : tshifterop;
  984. tmpreg,overflowreg : tregister;
  985. asmop : tasmop;
  986. begin
  987. ovloc.loc:=LOC_VOID;
  988. case op of
  989. OP_NEG,OP_NOT,
  990. OP_DIV,OP_IDIV:
  991. internalerror(200308283);
  992. OP_SHL,
  993. OP_SHR,
  994. OP_SAR,
  995. OP_ROR:
  996. begin
  997. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  998. internalerror(2008072801);
  999. shifterop_reset(so);
  1000. so.rs:=src1;
  1001. so.shiftmode:=opshift2shiftmode(op);
  1002. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1003. end;
  1004. OP_ROL:
  1005. begin
  1006. if not(size in [OS_32,OS_S32]) then
  1007. internalerror(2008072804);
  1008. { simulate ROL by ror'ing 32-value }
  1009. tmpreg:=getintregister(list,OS_32);
  1010. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  1011. shifterop_reset(so);
  1012. so.rs:=tmpreg;
  1013. so.shiftmode:=SM_ROR;
  1014. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  1015. end;
  1016. OP_IMUL,
  1017. OP_MUL:
  1018. begin
  1019. if (cgsetflags or setflags) and
  1020. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1021. begin
  1022. overflowreg:=getintregister(list,size);
  1023. if op=OP_IMUL then
  1024. asmop:=A_SMULL
  1025. else
  1026. asmop:=A_UMULL;
  1027. { the arm doesn't allow that rd and rm are the same }
  1028. if dst=src2 then
  1029. begin
  1030. if dst<>src1 then
  1031. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1032. else
  1033. begin
  1034. tmpreg:=getintregister(list,size);
  1035. a_load_reg_reg(list,size,size,src2,dst);
  1036. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1037. end;
  1038. end
  1039. else
  1040. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1041. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1042. if op=OP_IMUL then
  1043. begin
  1044. shifterop_reset(so);
  1045. so.shiftmode:=SM_ASR;
  1046. so.shiftimm:=31;
  1047. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1048. end
  1049. else
  1050. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1051. ovloc.loc:=LOC_FLAGS;
  1052. ovloc.resflags:=F_NE;
  1053. end
  1054. else
  1055. begin
  1056. { the arm doesn't allow that rd and rm are the same }
  1057. if dst=src2 then
  1058. begin
  1059. if dst<>src1 then
  1060. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1061. else
  1062. begin
  1063. tmpreg:=getintregister(list,size);
  1064. a_load_reg_reg(list,size,size,src2,dst);
  1065. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1066. end;
  1067. end
  1068. else
  1069. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1070. end;
  1071. end;
  1072. else
  1073. begin
  1074. if cgsetflags or setflags then
  1075. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1076. list.concat(setoppostfix(
  1077. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1078. end;
  1079. end;
  1080. maybeadjustresult(list,op,size,dst);
  1081. end;
  1082. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1083. var
  1084. asmop: tasmop;
  1085. begin
  1086. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1087. begin
  1088. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1089. case size of
  1090. OS_32: asmop:=A_UMULL;
  1091. OS_S32: asmop:=A_SMULL;
  1092. else
  1093. InternalError(2014060802);
  1094. end;
  1095. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1096. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1097. 32x32=32 bit multiplication}
  1098. if (dstlo = NR_NO) then
  1099. dstlo:=getintregister(list,size);
  1100. if (dsthi = NR_NO) then
  1101. dsthi:=getintregister(list,size);
  1102. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1103. end
  1104. else if dsthi=NR_NO then
  1105. begin
  1106. if (dstlo = NR_NO) then
  1107. dstlo:=getintregister(list,size);
  1108. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1109. end
  1110. else
  1111. begin
  1112. internalerror(2015083022);
  1113. end;
  1114. end;
  1115. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1116. var
  1117. tmpreg1,tmpreg2 : tregister;
  1118. begin
  1119. tmpreg1:=NR_NO;
  1120. { Be sure to have a base register }
  1121. if (ref.base=NR_NO) then
  1122. begin
  1123. if ref.shiftmode<>SM_None then
  1124. internalerror(2014020707);
  1125. ref.base:=ref.index;
  1126. ref.index:=NR_NO;
  1127. end;
  1128. { absolute symbols can't be handled directly, we've to store the symbol reference
  1129. in the text segment and access it pc relative
  1130. For now, we assume that references where base or index equals to PC are already
  1131. relative, all other references are assumed to be absolute and thus they need
  1132. to be handled extra.
  1133. A proper solution would be to change refoptions to a set and store the information
  1134. if the symbol is absolute or relative there.
  1135. }
  1136. if (assigned(ref.symbol) and
  1137. not(is_pc(ref.base)) and
  1138. not(is_pc(ref.index))
  1139. ) or
  1140. { [#xxx] isn't a valid address operand }
  1141. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1142. (ref.offset<-4095) or
  1143. (ref.offset>4095) or
  1144. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1145. ((ref.offset<-255) or
  1146. (ref.offset>255)
  1147. )
  1148. ) or
  1149. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1150. ((ref.offset<-1020) or
  1151. (ref.offset>1020) or
  1152. ((abs(ref.offset) mod 4)<>0)
  1153. )
  1154. ) or
  1155. ((GenerateThumbCode) and
  1156. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1157. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1158. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1159. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1160. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1161. )
  1162. ) then
  1163. begin
  1164. fixref(list,ref);
  1165. end;
  1166. if GenerateThumbCode then
  1167. begin
  1168. { certain thumb load require base and index }
  1169. if (oppostfix in [PF_SB,PF_SH]) and
  1170. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1171. begin
  1172. tmpreg1:=getintregister(list,OS_ADDR);
  1173. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1174. ref.index:=tmpreg1;
  1175. end;
  1176. { "hi" registers cannot be used as base or index }
  1177. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1178. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1179. begin
  1180. tmpreg1:=getintregister(list,OS_ADDR);
  1181. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1182. ref.base:=tmpreg1;
  1183. end;
  1184. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1185. begin
  1186. tmpreg1:=getintregister(list,OS_ADDR);
  1187. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1188. ref.index:=tmpreg1;
  1189. end;
  1190. end;
  1191. { fold if there is base, index and offset, however, don't fold
  1192. for vfp memory instructions because we later fold the index }
  1193. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1194. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1195. begin
  1196. if tmpreg1<>NR_NO then
  1197. begin
  1198. tmpreg2:=getintregister(list,OS_ADDR);
  1199. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1200. tmpreg1:=tmpreg2;
  1201. end
  1202. else
  1203. begin
  1204. tmpreg1:=getintregister(list,OS_ADDR);
  1205. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1206. ref.base:=tmpreg1;
  1207. end;
  1208. ref.offset:=0;
  1209. end;
  1210. { floating point operations have only limited references
  1211. we expect here, that a base is already set }
  1212. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1213. begin
  1214. if ref.shiftmode<>SM_none then
  1215. internalerror(200309121);
  1216. if tmpreg1<>NR_NO then
  1217. begin
  1218. if ref.base=tmpreg1 then
  1219. begin
  1220. if ref.signindex<0 then
  1221. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1222. else
  1223. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1224. ref.index:=NR_NO;
  1225. end
  1226. else
  1227. begin
  1228. if ref.index<>tmpreg1 then
  1229. internalerror(200403161);
  1230. if ref.signindex<0 then
  1231. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1232. else
  1233. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1234. ref.base:=tmpreg1;
  1235. ref.index:=NR_NO;
  1236. end;
  1237. end
  1238. else
  1239. begin
  1240. tmpreg1:=getintregister(list,OS_ADDR);
  1241. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1242. ref.base:=tmpreg1;
  1243. ref.index:=NR_NO;
  1244. end;
  1245. end;
  1246. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1247. Result := ref;
  1248. end;
  1249. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1250. var
  1251. oppostfix:toppostfix;
  1252. usedtmpref: treference;
  1253. tmpreg : tregister;
  1254. dir : integer;
  1255. begin
  1256. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1257. FromSize := ToSize;
  1258. case ToSize of
  1259. { signed integer registers }
  1260. OS_8,
  1261. OS_S8:
  1262. oppostfix:=PF_B;
  1263. OS_16,
  1264. OS_S16:
  1265. oppostfix:=PF_H;
  1266. OS_32,
  1267. OS_S32,
  1268. { for vfp value stored in integer register }
  1269. OS_F32:
  1270. oppostfix:=PF_None;
  1271. else
  1272. InternalError(2003082912);
  1273. end;
  1274. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1275. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1276. (oppostfix =PF_H)) then
  1277. begin
  1278. if target_info.endian=endian_big then
  1279. dir:=-1
  1280. else
  1281. dir:=1;
  1282. case FromSize of
  1283. OS_16,OS_S16:
  1284. begin
  1285. tmpreg:=getintregister(list,OS_INT);
  1286. usedtmpref:=ref;
  1287. if target_info.endian=endian_big then
  1288. inc(usedtmpref.offset,1);
  1289. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1290. inc(usedtmpref.offset,dir);
  1291. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1292. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1293. end;
  1294. OS_32,OS_S32:
  1295. begin
  1296. tmpreg:=getintregister(list,OS_INT);
  1297. usedtmpref:=ref;
  1298. if ref.alignment=2 then
  1299. begin
  1300. if target_info.endian=endian_big then
  1301. inc(usedtmpref.offset,2);
  1302. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1303. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1304. inc(usedtmpref.offset,dir*2);
  1305. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1306. end
  1307. else
  1308. begin
  1309. if target_info.endian=endian_big then
  1310. inc(usedtmpref.offset,3);
  1311. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1312. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1313. inc(usedtmpref.offset,dir);
  1314. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1315. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1316. inc(usedtmpref.offset,dir);
  1317. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1318. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1319. inc(usedtmpref.offset,dir);
  1320. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1321. end;
  1322. end
  1323. else
  1324. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1325. end;
  1326. end
  1327. else
  1328. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1329. end;
  1330. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1331. var
  1332. oppostfix:toppostfix;
  1333. href: treference;
  1334. tmpreg: TRegister;
  1335. begin
  1336. case ToSize of
  1337. { signed integer registers }
  1338. OS_8,
  1339. OS_S8:
  1340. oppostfix:=PF_B;
  1341. OS_16,
  1342. OS_S16:
  1343. oppostfix:=PF_H;
  1344. OS_32,
  1345. OS_S32:
  1346. oppostfix:=PF_None;
  1347. else
  1348. InternalError(2003082910);
  1349. end;
  1350. if (tosize in [OS_S16,OS_16]) and
  1351. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1352. begin
  1353. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1354. tmpreg:=getintregister(list,OS_INT);
  1355. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1356. href:=result;
  1357. inc(href.offset);
  1358. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1359. end
  1360. else
  1361. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1362. end;
  1363. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1364. var
  1365. oppostfix:toppostfix;
  1366. so: tshifterop;
  1367. tmpreg: TRegister;
  1368. href: treference;
  1369. begin
  1370. case FromSize of
  1371. { signed integer registers }
  1372. OS_8:
  1373. oppostfix:=PF_B;
  1374. OS_S8:
  1375. oppostfix:=PF_SB;
  1376. OS_16:
  1377. oppostfix:=PF_H;
  1378. OS_S16:
  1379. oppostfix:=PF_SH;
  1380. OS_32,
  1381. OS_S32:
  1382. oppostfix:=PF_None;
  1383. else
  1384. InternalError(200308291);
  1385. end;
  1386. if (tosize=OS_S8) and
  1387. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1388. begin
  1389. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1390. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1391. end
  1392. else if (tosize in [OS_S16,OS_16]) and
  1393. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1394. begin
  1395. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1396. tmpreg:=getintregister(list,OS_INT);
  1397. href:=result;
  1398. inc(href.offset);
  1399. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1400. shifterop_reset(so);
  1401. so.shiftmode:=SM_LSL;
  1402. so.shiftimm:=8;
  1403. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1404. end
  1405. else
  1406. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1407. end;
  1408. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1409. var
  1410. so : tshifterop;
  1411. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1412. begin
  1413. if GenerateThumbCode then
  1414. begin
  1415. case shiftmode of
  1416. SM_ASR:
  1417. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1418. SM_LSR:
  1419. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1420. SM_LSL:
  1421. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1422. else
  1423. internalerror(2013090301);
  1424. end;
  1425. end
  1426. else
  1427. begin
  1428. so.shiftmode:=shiftmode;
  1429. so.shiftimm:=shiftimm;
  1430. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1431. end;
  1432. end;
  1433. var
  1434. instr: taicpu;
  1435. conv_done: boolean;
  1436. begin
  1437. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1438. internalerror(2002090901);
  1439. conv_done:=false;
  1440. if tosize<>fromsize then
  1441. begin
  1442. shifterop_reset(so);
  1443. conv_done:=true;
  1444. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1445. fromsize:=tosize;
  1446. if current_settings.cputype<cpu_armv6 then
  1447. case fromsize of
  1448. OS_8:
  1449. if GenerateThumbCode then
  1450. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1451. else
  1452. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1453. OS_S8:
  1454. begin
  1455. do_shift(SM_LSL,24,reg1);
  1456. if tosize=OS_16 then
  1457. begin
  1458. do_shift(SM_ASR,8,reg2);
  1459. do_shift(SM_LSR,16,reg2);
  1460. end
  1461. else
  1462. do_shift(SM_ASR,24,reg2);
  1463. end;
  1464. OS_16:
  1465. begin
  1466. do_shift(SM_LSL,16,reg1);
  1467. do_shift(SM_LSR,16,reg2);
  1468. end;
  1469. OS_S16:
  1470. begin
  1471. do_shift(SM_LSL,16,reg1);
  1472. do_shift(SM_ASR,16,reg2)
  1473. end;
  1474. else
  1475. conv_done:=false;
  1476. end
  1477. else
  1478. case fromsize of
  1479. OS_8:
  1480. if GenerateThumbCode then
  1481. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1482. else
  1483. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1484. OS_S8:
  1485. begin
  1486. if tosize=OS_16 then
  1487. begin
  1488. so.shiftmode:=SM_ROR;
  1489. so.shiftimm:=16;
  1490. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1491. do_shift(SM_LSR,16,reg2);
  1492. end
  1493. else
  1494. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1495. end;
  1496. OS_16:
  1497. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1498. OS_S16:
  1499. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1500. else
  1501. conv_done:=false;
  1502. end
  1503. end;
  1504. if not conv_done and (reg1<>reg2) then
  1505. begin
  1506. { same size, only a register mov required }
  1507. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1508. list.Concat(instr);
  1509. { Notify the register allocator that we have written a move instruction so
  1510. it can try to eliminate it. }
  1511. add_move_instruction(instr);
  1512. end;
  1513. end;
  1514. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1515. var
  1516. href,href2 : treference;
  1517. hloc : pcgparalocation;
  1518. begin
  1519. href:=ref;
  1520. hloc:=paraloc.location;
  1521. while assigned(hloc) do
  1522. begin
  1523. case hloc^.loc of
  1524. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1525. begin
  1526. paramanager.allocparaloc(list,paraloc.location);
  1527. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1528. end;
  1529. LOC_REGISTER :
  1530. case hloc^.size of
  1531. OS_32,
  1532. OS_F32:
  1533. begin
  1534. paramanager.allocparaloc(list,paraloc.location);
  1535. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1536. end;
  1537. OS_64,
  1538. OS_F64:
  1539. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1540. else
  1541. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1542. end;
  1543. LOC_REFERENCE :
  1544. begin
  1545. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1546. { concatcopy should choose the best way to copy the data }
  1547. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1548. end;
  1549. else
  1550. internalerror(200408241);
  1551. end;
  1552. inc(href.offset,tcgsize2size[hloc^.size]);
  1553. hloc:=hloc^.next;
  1554. end;
  1555. end;
  1556. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1557. begin
  1558. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1559. end;
  1560. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1561. var
  1562. oppostfix:toppostfix;
  1563. begin
  1564. case fromsize of
  1565. OS_32,
  1566. OS_F32:
  1567. oppostfix:=PF_S;
  1568. OS_64,
  1569. OS_F64:
  1570. oppostfix:=PF_D;
  1571. OS_F80:
  1572. oppostfix:=PF_E;
  1573. else
  1574. InternalError(200309021);
  1575. end;
  1576. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1577. if fromsize<>tosize then
  1578. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1579. end;
  1580. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1581. var
  1582. oppostfix:toppostfix;
  1583. begin
  1584. case tosize of
  1585. OS_F32:
  1586. oppostfix:=PF_S;
  1587. OS_F64:
  1588. oppostfix:=PF_D;
  1589. OS_F80:
  1590. oppostfix:=PF_E;
  1591. else
  1592. InternalError(200309022);
  1593. end;
  1594. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1595. end;
  1596. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1597. var
  1598. r : TRegister;
  1599. ai: taicpu;
  1600. l: TAsmLabel;
  1601. begin
  1602. if needs_check_for_fpu_exceptions and
  1603. (force or current_procinfo.FPUExceptionCheckNeeded) then
  1604. begin
  1605. r:=getintregister(list,OS_INT);
  1606. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1607. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1608. current_asmdata.getjumplabel(l);
  1609. ai:=taicpu.op_sym(A_B,l);
  1610. ai.is_jmp:=true;
  1611. ai.condition:=C_EQ;
  1612. list.concat(ai);
  1613. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1614. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  1615. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1616. a_label(list,l);
  1617. if clear then
  1618. current_procinfo.FPUExceptionCheckNeeded:=false;
  1619. end;
  1620. end;
  1621. { comparison operations }
  1622. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1623. l : tasmlabel);
  1624. var
  1625. tmpreg : tregister;
  1626. b : byte;
  1627. begin
  1628. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1629. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1630. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1631. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1632. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1633. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1634. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1635. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1636. else
  1637. begin
  1638. tmpreg:=getintregister(list,size);
  1639. a_load_const_reg(list,size,a,tmpreg);
  1640. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1641. end;
  1642. a_jmp_cond(list,cmp_op,l);
  1643. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1644. end;
  1645. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1646. begin
  1647. if reverse then
  1648. begin
  1649. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1650. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1651. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1652. end
  1653. { it is decided during the compilation of the system unit if this code is used or not
  1654. so no additional check for rbit is needed }
  1655. else
  1656. begin
  1657. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1658. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1659. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1660. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1661. if GenerateThumb2Code then
  1662. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1663. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1664. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1665. end;
  1666. end;
  1667. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1668. begin
  1669. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1670. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1671. a_jmp_cond(list,cmp_op,l);
  1672. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1673. end;
  1674. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1675. var
  1676. ai : taicpu;
  1677. begin
  1678. { generate far jump, leave it to the optimizer to get rid of it }
  1679. if GenerateThumbCode then
  1680. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1681. else
  1682. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1683. ai.is_jmp:=true;
  1684. list.concat(ai);
  1685. end;
  1686. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1687. var
  1688. ai : taicpu;
  1689. begin
  1690. { generate far jump, leave it to the optimizer to get rid of it }
  1691. if GenerateThumbCode then
  1692. ai:=taicpu.op_sym(A_BL,l)
  1693. else
  1694. ai:=taicpu.op_sym(A_B,l);
  1695. ai.is_jmp:=true;
  1696. list.concat(ai);
  1697. end;
  1698. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1699. var
  1700. ai : taicpu;
  1701. inv_flags : TResFlags;
  1702. hlabel : TAsmLabel;
  1703. begin
  1704. if GenerateThumbCode then
  1705. begin
  1706. inv_flags:=f;
  1707. inverse_flags(inv_flags);
  1708. { the optimizer has to fix this if jump range is sufficient short }
  1709. current_asmdata.getjumplabel(hlabel);
  1710. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1711. ai.is_jmp:=true;
  1712. list.concat(ai);
  1713. a_jmp_always(list,l);
  1714. a_label(list,hlabel);
  1715. end
  1716. else
  1717. begin
  1718. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1719. ai.is_jmp:=true;
  1720. list.concat(ai);
  1721. end;
  1722. end;
  1723. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1724. begin
  1725. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1726. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1727. end;
  1728. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1729. begin
  1730. if target_info.system = system_arm_linux then
  1731. begin
  1732. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1733. a_call_name(list,'__gnu_mcount_nc',false);
  1734. end
  1735. else
  1736. internalerror(2014091201);
  1737. end;
  1738. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1739. var
  1740. ref : treference;
  1741. shift : byte;
  1742. firstfloatreg,lastfloatreg,
  1743. r : byte;
  1744. mmregs,
  1745. regs, saveregs : tcpuregisterset;
  1746. registerarea, offset,
  1747. r7offset,
  1748. stackmisalignment : pint;
  1749. imm1, imm2: DWord;
  1750. stack_parameters : Boolean;
  1751. begin
  1752. LocalSize:=align(LocalSize,4);
  1753. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1754. { call instruction does not put anything on the stack }
  1755. registerarea:=0;
  1756. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1757. lastfloatreg:=RS_NO;
  1758. if not(nostackframe) then
  1759. begin
  1760. firstfloatreg:=RS_NO;
  1761. mmregs:=[];
  1762. case current_settings.fputype of
  1763. fpu_none,
  1764. fpu_soft,
  1765. fpu_libgcc:
  1766. ;
  1767. fpu_fpa,
  1768. fpu_fpa10,
  1769. fpu_fpa11:
  1770. begin
  1771. { save floating point registers? }
  1772. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1773. for r:=RS_F0 to RS_F7 do
  1774. if r in regs then
  1775. begin
  1776. if firstfloatreg=RS_NO then
  1777. firstfloatreg:=r;
  1778. lastfloatreg:=r;
  1779. inc(registerarea,12);
  1780. end;
  1781. end;
  1782. else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
  1783. begin;
  1784. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1785. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1786. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1787. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1788. end
  1789. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1790. begin;
  1791. { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
  1792. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1793. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1794. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
  1795. end
  1796. else
  1797. internalerror(2019050924);
  1798. end;
  1799. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1800. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1801. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1802. { save int registers }
  1803. reference_reset(ref,4,[]);
  1804. ref.index:=NR_STACK_POINTER_REG;
  1805. ref.addressmode:=AM_PREINDEXED;
  1806. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1807. if not(target_info.system in systems_darwin) then
  1808. begin
  1809. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1810. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1811. begin
  1812. a_reg_alloc(list,NR_R12);
  1813. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1814. end;
  1815. { the (old) ARM APCS requires saving both the stack pointer (to
  1816. crawl the stack) and the PC (to identify the function this
  1817. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1818. and R15 -- still needs updating for EABI and Darwin, they don't
  1819. need that }
  1820. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1821. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1822. else
  1823. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1824. include(regs,RS_R14);
  1825. if regs<>[] then
  1826. begin
  1827. for r:=RS_R0 to RS_R15 do
  1828. if r in regs then
  1829. inc(registerarea,4);
  1830. { if the stack is not 8 byte aligned, try to add an extra register,
  1831. so we can avoid the extra sub/add ...,#4 later (KB) }
  1832. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1833. for r:=RS_R3 downto RS_R0 do
  1834. if not(r in regs) then
  1835. begin
  1836. regs:=regs+[r];
  1837. inc(registerarea,4);
  1838. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1839. break;
  1840. end;
  1841. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1842. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  1843. end;
  1844. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1845. begin
  1846. offset:=-4;
  1847. for r:=RS_R15 downto RS_R0 do
  1848. if r in regs then
  1849. begin
  1850. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),offset);
  1851. dec(offset,4);
  1852. end;
  1853. { the framepointer now points to the saved R15, so the saved
  1854. framepointer is at R11-12 (for get_caller_frame) }
  1855. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1856. a_reg_dealloc(list,NR_R12);
  1857. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  1858. current_asmdata.asmcfi.cfa_def_cfa_offset(list,4);
  1859. end;
  1860. end
  1861. else
  1862. begin
  1863. { always save r14 if we use r7 as the framepointer, because
  1864. the parameter offsets are hardcoded in advance and always
  1865. assume that r14 sits on the stack right behind the saved r7
  1866. }
  1867. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1868. include(regs,RS_FRAME_POINTER_REG);
  1869. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1870. include(regs,RS_R14);
  1871. if regs<>[] then
  1872. begin
  1873. { on Darwin, you first have to save [r4-r7,lr], and then
  1874. [r8,r10,r11] and make r7 point to the previously saved
  1875. r7 so that you can perform a stack crawl based on it
  1876. ([r7] is previous stack frame, [r7+4] is return address
  1877. }
  1878. include(regs,RS_FRAME_POINTER_REG);
  1879. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1880. r7offset:=0;
  1881. for r:=RS_R0 to RS_R15 do
  1882. if r in saveregs then
  1883. begin
  1884. inc(registerarea,4);
  1885. if r<RS_FRAME_POINTER_REG then
  1886. inc(r7offset,4);
  1887. end;
  1888. { save the registers }
  1889. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1890. { make r7 point to the saved r7 (regardless of whether this
  1891. frame uses the framepointer, for backtrace purposes) }
  1892. if r7offset<>0 then
  1893. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1894. else
  1895. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1896. { now save the rest (if any) }
  1897. saveregs:=regs-saveregs;
  1898. if saveregs<>[] then
  1899. begin
  1900. for r:=RS_R8 to RS_R11 do
  1901. if r in saveregs then
  1902. inc(registerarea,4);
  1903. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1904. end;
  1905. end;
  1906. end;
  1907. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1908. if (LocalSize<>0) or
  1909. ((stackmisalignment<>0) and
  1910. ((pi_do_call in current_procinfo.flags) or
  1911. (po_assembler in current_procinfo.procdef.procoptions))) then
  1912. begin
  1913. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1914. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1915. begin
  1916. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1917. internalerror(2014030901)
  1918. else
  1919. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1920. end;
  1921. if is_shifter_const(localsize,shift) then
  1922. begin
  1923. a_reg_dealloc(list,NR_R12);
  1924. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1925. end
  1926. else if split_into_shifter_const(localsize, imm1, imm2) then
  1927. begin
  1928. a_reg_dealloc(list,NR_R12);
  1929. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1930. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1931. end
  1932. else
  1933. begin
  1934. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1935. a_reg_alloc(list,NR_R12);
  1936. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1937. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1938. a_reg_dealloc(list,NR_R12);
  1939. end;
  1940. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1941. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  1942. end;
  1943. if (mmregs<>[]) or
  1944. (firstfloatreg<>RS_NO) then
  1945. begin
  1946. reference_reset(ref,4,[]);
  1947. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1948. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  1949. begin
  1950. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1951. begin
  1952. a_reg_alloc(list,NR_R12);
  1953. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1954. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1955. a_reg_dealloc(list,NR_R12);
  1956. end
  1957. else
  1958. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1959. ref.base:=NR_R12;
  1960. end
  1961. else
  1962. begin
  1963. ref.base:=current_procinfo.framepointer;
  1964. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1965. end;
  1966. case current_settings.fputype of
  1967. fpu_fpa,
  1968. fpu_fpa10,
  1969. fpu_fpa11:
  1970. begin
  1971. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1972. lastfloatreg-firstfloatreg+1,ref));
  1973. end;
  1974. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  1975. begin
  1976. ref.index:=ref.base;
  1977. ref.base:=NR_NO;
  1978. if mmregs<>[] then
  1979. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1980. end
  1981. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  1982. begin
  1983. ref.index:=ref.base;
  1984. ref.base:=NR_NO;
  1985. if mmregs<>[] then
  1986. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  1987. end
  1988. else
  1989. internalerror(2019050923);
  1990. end;
  1991. end;
  1992. end;
  1993. end;
  1994. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1995. var
  1996. ref : treference;
  1997. LocalSize : longint;
  1998. firstfloatreg,lastfloatreg,
  1999. r,
  2000. shift : byte;
  2001. mmregs,
  2002. saveregs,
  2003. regs : tcpuregisterset;
  2004. registerarea,
  2005. stackmisalignment: pint;
  2006. paddingreg: TSuperRegister;
  2007. imm1, imm2: DWord;
  2008. begin
  2009. if not(nostackframe) then
  2010. begin
  2011. registerarea:=0;
  2012. firstfloatreg:=RS_NO;
  2013. lastfloatreg:=RS_NO;
  2014. mmregs:=[];
  2015. saveregs:=[];
  2016. case current_settings.fputype of
  2017. fpu_none,
  2018. fpu_soft,
  2019. fpu_libgcc:
  2020. ;
  2021. fpu_fpa,
  2022. fpu_fpa10,
  2023. fpu_fpa11:
  2024. begin
  2025. { restore floating point registers? }
  2026. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  2027. for r:=RS_F0 to RS_F7 do
  2028. if r in regs then
  2029. begin
  2030. if firstfloatreg=RS_NO then
  2031. firstfloatreg:=r;
  2032. lastfloatreg:=r;
  2033. { floating point register space is already included in
  2034. localsize below by calc_stackframe_size
  2035. inc(registerarea,12);
  2036. }
  2037. end;
  2038. end;
  2039. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2040. begin
  2041. { restore vfp registers? }
  2042. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  2043. they have numbers>$1f which is not really correct as they should simply have the same numbers
  2044. as the even ones by with a different subtype as it is done on x86 with al/ah }
  2045. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  2046. end
  2047. else
  2048. internalerror(2019050908);
  2049. end;
  2050. if (firstfloatreg<>RS_NO) or
  2051. (mmregs<>[]) then
  2052. begin
  2053. reference_reset(ref,4,[]);
  2054. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  2055. (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
  2056. begin
  2057. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  2058. begin
  2059. a_reg_alloc(list,NR_R12);
  2060. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  2061. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  2062. a_reg_dealloc(list,NR_R12);
  2063. end
  2064. else
  2065. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  2066. ref.base:=NR_R12;
  2067. end
  2068. else
  2069. begin
  2070. ref.base:=current_procinfo.framepointer;
  2071. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2072. end;
  2073. case current_settings.fputype of
  2074. fpu_fpa,
  2075. fpu_fpa10,
  2076. fpu_fpa11:
  2077. begin
  2078. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2079. lastfloatreg-firstfloatreg+1,ref));
  2080. end;
  2081. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  2082. begin
  2083. ref.index:=ref.base;
  2084. ref.base:=NR_NO;
  2085. if mmregs<>[] then
  2086. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2087. end
  2088. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  2089. begin
  2090. ref.index:=ref.base;
  2091. ref.base:=NR_NO;
  2092. if mmregs<>[] then
  2093. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
  2094. end
  2095. else
  2096. internalerror(2019050921);
  2097. end;
  2098. end;
  2099. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2100. if (pi_do_call in current_procinfo.flags) or
  2101. (regs<>[]) or
  2102. ((target_info.system in systems_darwin) and
  2103. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2104. begin
  2105. exclude(regs,RS_R14);
  2106. include(regs,RS_R15);
  2107. if (target_info.system in systems_darwin) then
  2108. include(regs,RS_FRAME_POINTER_REG);
  2109. end;
  2110. if not(target_info.system in systems_darwin) then
  2111. begin
  2112. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2113. The saved PC came after that but is discarded, since we restore
  2114. the stack pointer }
  2115. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2116. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2117. end
  2118. else
  2119. begin
  2120. { restore R8-R11 already if necessary (they've been stored
  2121. before the others) }
  2122. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2123. if saveregs<>[] then
  2124. begin
  2125. reference_reset(ref,4,[]);
  2126. ref.index:=NR_STACK_POINTER_REG;
  2127. ref.addressmode:=AM_PREINDEXED;
  2128. for r:=RS_R8 to RS_R11 do
  2129. if r in saveregs then
  2130. inc(registerarea,4);
  2131. regs:=regs-saveregs;
  2132. end;
  2133. end;
  2134. for r:=RS_R0 to RS_R15 do
  2135. if r in regs then
  2136. inc(registerarea,4);
  2137. { reapply the stack padding reg, in case there was one, see the complimentary
  2138. comment in g_proc_entry() (KB) }
  2139. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2140. if paddingreg < RS_R4 then
  2141. if paddingreg in regs then
  2142. internalerror(201306190)
  2143. else
  2144. begin
  2145. regs:=regs+[paddingreg];
  2146. inc(registerarea,4);
  2147. end;
  2148. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2149. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2150. (target_info.system in systems_darwin) then
  2151. begin
  2152. LocalSize:=current_procinfo.calc_stackframe_size;
  2153. if (LocalSize<>0) or
  2154. ((stackmisalignment<>0) and
  2155. ((pi_do_call in current_procinfo.flags) or
  2156. (po_assembler in current_procinfo.procdef.procoptions))) then
  2157. begin
  2158. if pi_estimatestacksize in current_procinfo.flags then
  2159. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2160. else
  2161. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2162. if is_shifter_const(LocalSize,shift) then
  2163. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2164. else if split_into_shifter_const(localsize, imm1, imm2) then
  2165. begin
  2166. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2167. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2168. end
  2169. else
  2170. begin
  2171. a_reg_alloc(list,NR_R12);
  2172. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2173. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2174. a_reg_dealloc(list,NR_R12);
  2175. end;
  2176. end;
  2177. if (target_info.system in systems_darwin) and
  2178. (saveregs<>[]) then
  2179. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2180. if regs=[] then
  2181. begin
  2182. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2183. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2184. else
  2185. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2186. end
  2187. else
  2188. begin
  2189. reference_reset(ref,4,[]);
  2190. ref.index:=NR_STACK_POINTER_REG;
  2191. ref.addressmode:=AM_PREINDEXED;
  2192. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2193. end;
  2194. end
  2195. else
  2196. begin
  2197. { restore int registers and return }
  2198. reference_reset(ref,4,[]);
  2199. ref.index:=NR_FRAME_POINTER_REG;
  2200. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2201. end;
  2202. end
  2203. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2204. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2205. else
  2206. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2207. end;
  2208. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2209. var
  2210. ref : treference;
  2211. l : TAsmLabel;
  2212. regs : tcpuregisterset;
  2213. r: byte;
  2214. begin
  2215. if (cs_create_pic in current_settings.moduleswitches) and
  2216. (pi_needs_got in current_procinfo.flags) and
  2217. (tf_pic_uses_got in target_info.flags) then
  2218. begin
  2219. { Procedure parametrs are not initialized at this stage.
  2220. Before GOT initialization code, allocate registers used for procedure parameters
  2221. to prevent usage of these registers for temp operations in later stages of code
  2222. generation. }
  2223. regs:=rg[R_INTREGISTER].used_in_proc;
  2224. for r:=RS_R0 to RS_R3 do
  2225. if r in regs then
  2226. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2227. { Allocate scratch register R12 and use it for GOT calculations directly.
  2228. Otherwise the init code can be distorted in later stages of code generation. }
  2229. a_reg_alloc(list,NR_R12);
  2230. reference_reset(ref,4,[]);
  2231. current_asmdata.getglobaldatalabel(l);
  2232. cg.a_label(current_procinfo.aktlocaldata,l);
  2233. ref.symbol:=l;
  2234. ref.base:=NR_PC;
  2235. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2236. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2237. current_asmdata.getaddrlabel(l);
  2238. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2239. cg.a_label(list,l);
  2240. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2241. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2242. { Deallocate registers }
  2243. a_reg_dealloc(list,NR_R12);
  2244. for r:=RS_R3 downto RS_R0 do
  2245. if r in regs then
  2246. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2247. end;
  2248. end;
  2249. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2250. var
  2251. b : byte;
  2252. tmpref : treference;
  2253. instr : taicpu;
  2254. begin
  2255. if ref.addressmode<>AM_OFFSET then
  2256. internalerror(200309071);
  2257. tmpref:=ref;
  2258. { Be sure to have a base register }
  2259. if (tmpref.base=NR_NO) then
  2260. begin
  2261. if tmpref.shiftmode<>SM_None then
  2262. internalerror(2014020702);
  2263. if tmpref.signindex<0 then
  2264. internalerror(200312023);
  2265. tmpref.base:=tmpref.index;
  2266. tmpref.index:=NR_NO;
  2267. end;
  2268. if assigned(tmpref.symbol) or
  2269. not((is_shifter_const(tmpref.offset,b)) or
  2270. (is_shifter_const(-tmpref.offset,b))
  2271. ) then
  2272. fixref(list,tmpref);
  2273. { expect a base here if there is an index }
  2274. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2275. internalerror(200312022);
  2276. if tmpref.index<>NR_NO then
  2277. begin
  2278. if tmpref.shiftmode<>SM_None then
  2279. internalerror(200312021);
  2280. if tmpref.signindex<0 then
  2281. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2282. else
  2283. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2284. if tmpref.offset<>0 then
  2285. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2286. end
  2287. else
  2288. begin
  2289. if tmpref.base=NR_NO then
  2290. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2291. else
  2292. if tmpref.offset<>0 then
  2293. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2294. else
  2295. begin
  2296. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2297. list.concat(instr);
  2298. add_move_instruction(instr);
  2299. end;
  2300. end;
  2301. end;
  2302. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2303. var
  2304. tmpreg, tmpreg2 : tregister;
  2305. tmpref : treference;
  2306. l, piclabel : tasmlabel;
  2307. indirection_done : boolean;
  2308. begin
  2309. { absolute symbols can't be handled directly, we've to store the symbol reference
  2310. in the text segment and access it pc relative
  2311. For now, we assume that references where base or index equals to PC are already
  2312. relative, all other references are assumed to be absolute and thus they need
  2313. to be handled extra.
  2314. A proper solution would be to change refoptions to a set and store the information
  2315. if the symbol is absolute or relative there.
  2316. }
  2317. { create consts entry }
  2318. reference_reset(tmpref,4,[]);
  2319. current_asmdata.getjumplabel(l);
  2320. cg.a_label(current_procinfo.aktlocaldata,l);
  2321. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2322. piclabel:=nil;
  2323. tmpreg:=NR_NO;
  2324. indirection_done:=false;
  2325. if assigned(ref.symbol) then
  2326. begin
  2327. if (target_info.system=system_arm_ios) and
  2328. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2329. begin
  2330. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2331. if ref.offset<>0 then
  2332. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2333. indirection_done:=true;
  2334. end
  2335. else if ref.refaddr=addr_gottpoff then
  2336. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2337. else if ref.refaddr=addr_tlsgd then
  2338. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  2339. else if ref.refaddr=addr_tlsdesc then
  2340. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  2341. else if ref.refaddr=addr_tpoff then
  2342. begin
  2343. if assigned(ref.relsymbol) or (ref.offset<>0) then
  2344. Internalerror(2019092804);
  2345. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  2346. end
  2347. else if (cs_create_pic in current_settings.moduleswitches) then
  2348. if (tf_pic_uses_got in target_info.flags) then
  2349. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2350. else
  2351. begin
  2352. { ideally, we would want to generate
  2353. ldr r1, LPICConstPool
  2354. LPICLocal:
  2355. ldr/str r2,[pc,r1]
  2356. ...
  2357. LPICConstPool:
  2358. .long _globsym-(LPICLocal+8)
  2359. However, we cannot be sure that the ldr/str will follow
  2360. right after the call to fixref, so we have to load the
  2361. complete address already in a register.
  2362. }
  2363. current_asmdata.getaddrlabel(piclabel);
  2364. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2365. end
  2366. else
  2367. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2368. end
  2369. else
  2370. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2371. { load consts entry }
  2372. if not indirection_done then
  2373. begin
  2374. tmpreg:=getintregister(list,OS_INT);
  2375. tmpref.symbol:=l;
  2376. tmpref.base:=NR_PC;
  2377. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2378. if (cs_create_pic in current_settings.moduleswitches) and
  2379. (tf_pic_uses_got in target_info.flags) and
  2380. assigned(ref.symbol) then
  2381. begin
  2382. {$ifdef EXTDEBUG}
  2383. if not (pi_needs_got in current_procinfo.flags) then
  2384. Comment(V_warning,'pi_needs_got not included');
  2385. {$endif EXTDEBUG}
  2386. Include(current_procinfo.flags,pi_needs_got);
  2387. reference_reset(tmpref,4,[]);
  2388. tmpref.base:=current_procinfo.got;
  2389. tmpref.index:=tmpreg;
  2390. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2391. if ref.offset<>0 then
  2392. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2393. end;
  2394. end;
  2395. if assigned(piclabel) then
  2396. begin
  2397. cg.a_label(list,piclabel);
  2398. tmpreg2:=getaddressregister(list);
  2399. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2400. tmpreg:=tmpreg2
  2401. end;
  2402. { This routine can be called with PC as base/index in case the offset
  2403. was too large to encode in a load/store. In that case, the entire
  2404. absolute expression has been re-encoded in a new constpool entry, and
  2405. we have to remove the use of PC from the original reference (the code
  2406. above made everything relative to the value loaded from the new
  2407. constpool entry) }
  2408. if is_pc(ref.base) then
  2409. ref.base:=NR_NO;
  2410. if is_pc(ref.index) then
  2411. ref.index:=NR_NO;
  2412. if (ref.base<>NR_NO) then
  2413. begin
  2414. if ref.index<>NR_NO then
  2415. begin
  2416. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2417. ref.base:=tmpreg;
  2418. end
  2419. else
  2420. if ref.base<>NR_PC then
  2421. begin
  2422. ref.index:=tmpreg;
  2423. ref.shiftimm:=0;
  2424. ref.signindex:=1;
  2425. ref.shiftmode:=SM_None;
  2426. end
  2427. else
  2428. ref.base:=tmpreg;
  2429. end
  2430. else
  2431. ref.base:=tmpreg;
  2432. ref.offset:=0;
  2433. ref.symbol:=nil;
  2434. end;
  2435. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2436. var
  2437. paraloc1,paraloc2,paraloc3 : TCGPara;
  2438. pd : tprocdef;
  2439. begin
  2440. pd:=search_system_proc('MOVE');
  2441. paraloc1.init;
  2442. paraloc2.init;
  2443. paraloc3.init;
  2444. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  2445. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  2446. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  2447. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2448. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2449. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2450. paramanager.freecgpara(list,paraloc3);
  2451. paramanager.freecgpara(list,paraloc2);
  2452. paramanager.freecgpara(list,paraloc1);
  2453. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2454. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2455. a_call_name(list,'FPC_MOVE',false);
  2456. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2457. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2458. paraloc3.done;
  2459. paraloc2.done;
  2460. paraloc1.done;
  2461. end;
  2462. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2463. const
  2464. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2465. maxtmpreg_thumb = 5;
  2466. type
  2467. ttmpregisters = array[1..maxtmpreg_arm] of tregister;
  2468. var
  2469. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2470. srcreg,destreg,countreg,r,tmpreg:tregister;
  2471. helpsize:aint;
  2472. copysize:byte;
  2473. cgsize:Tcgsize;
  2474. tmpregisters:ttmpregisters;
  2475. maxtmpreg,
  2476. tmpregi,tmpregi2:byte;
  2477. { will never be called with count<=4 }
  2478. procedure genloop(count : aword;size : byte);
  2479. const
  2480. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2481. var
  2482. l : tasmlabel;
  2483. begin
  2484. current_asmdata.getjumplabel(l);
  2485. if count<size then size:=1;
  2486. a_load_const_reg(list,OS_INT,count div size,countreg);
  2487. cg.a_label(list,l);
  2488. srcref.addressmode:=AM_POSTINDEXED;
  2489. dstref.addressmode:=AM_POSTINDEXED;
  2490. srcref.offset:=size;
  2491. dstref.offset:=size;
  2492. r:=getintregister(list,size2opsize[size]);
  2493. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2494. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2495. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2496. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2497. a_jmp_flags(list,F_NE,l);
  2498. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2499. srcref.offset:=1;
  2500. dstref.offset:=1;
  2501. case count mod size of
  2502. 1:
  2503. begin
  2504. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2505. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2506. end;
  2507. 2:
  2508. if aligned then
  2509. begin
  2510. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2511. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2512. end
  2513. else
  2514. begin
  2515. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2516. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2517. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2518. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2519. end;
  2520. 3:
  2521. if aligned then
  2522. begin
  2523. srcref.offset:=2;
  2524. dstref.offset:=2;
  2525. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2526. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2527. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2528. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2529. end
  2530. else
  2531. begin
  2532. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2533. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2534. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2535. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2536. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2537. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2538. end;
  2539. end;
  2540. { keep the registers alive }
  2541. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2542. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2543. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2544. end;
  2545. { save estimation, if a creating a separate ref is needed or
  2546. if we can keep the original reference while copying }
  2547. function SimpleRef(const ref : treference) : boolean;
  2548. begin
  2549. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2550. ((ref.symbol=nil) and
  2551. (ref.addressmode=AM_OFFSET) and
  2552. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2553. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2554. { ldrh has a limited offset range }
  2555. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2556. )
  2557. );
  2558. end;
  2559. { will never be called with count<=4 }
  2560. procedure genloop_thumb(count : aword;size : byte);
  2561. procedure refincofs(const ref : treference;const value : longint = 1);
  2562. begin
  2563. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2564. end;
  2565. const
  2566. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2567. var
  2568. l : tasmlabel;
  2569. begin
  2570. current_asmdata.getjumplabel(l);
  2571. if count<size then size:=1;
  2572. a_load_const_reg(list,OS_INT,count div size,countreg);
  2573. cg.a_label(list,l);
  2574. r:=getintregister(list,size2opsize[size]);
  2575. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2576. refincofs(srcref);
  2577. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2578. refincofs(dstref);
  2579. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2580. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2581. a_jmp_flags(list,F_NE,l);
  2582. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2583. case count mod size of
  2584. 1:
  2585. begin
  2586. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2587. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2588. end;
  2589. 2:
  2590. if aligned then
  2591. begin
  2592. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2593. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2594. end
  2595. else
  2596. begin
  2597. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2598. refincofs(srcref);
  2599. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2600. refincofs(dstref);
  2601. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2602. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2603. end;
  2604. 3:
  2605. if aligned then
  2606. begin
  2607. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2608. refincofs(srcref,2);
  2609. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2610. refincofs(dstref,2);
  2611. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2612. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2613. end
  2614. else
  2615. begin
  2616. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2617. refincofs(srcref);
  2618. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2619. refincofs(dstref);
  2620. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2621. refincofs(srcref);
  2622. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2623. refincofs(dstref);
  2624. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2625. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2626. end;
  2627. end;
  2628. { keep the registers alive }
  2629. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2630. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2631. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2632. end;
  2633. begin
  2634. tmpregisters:=Default(ttmpregisters);
  2635. if len=0 then
  2636. exit;
  2637. if GenerateThumbCode then
  2638. maxtmpreg:=maxtmpreg_thumb
  2639. else
  2640. maxtmpreg:=maxtmpreg_arm;
  2641. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2642. dstref:=dest;
  2643. srcref:=source;
  2644. if cs_opt_size in current_settings.optimizerswitches then
  2645. helpsize:=8;
  2646. if aligned and (len=4) then
  2647. begin
  2648. tmpreg:=getintregister(list,OS_32);
  2649. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2650. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2651. end
  2652. else if aligned and (len=2) then
  2653. begin
  2654. tmpreg:=getintregister(list,OS_16);
  2655. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2656. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2657. end
  2658. else if (len<=helpsize) and aligned then
  2659. begin
  2660. tmpregi:=0;
  2661. { loading address in a separate register needed? }
  2662. if SimpleRef(source) then
  2663. begin
  2664. { ... then we don't need a loadaddr }
  2665. srcref:=source;
  2666. end
  2667. else
  2668. begin
  2669. srcreg:=getintregister(list,OS_ADDR);
  2670. a_loadaddr_ref_reg(list,source,srcreg);
  2671. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2672. end;
  2673. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2674. begin
  2675. inc(tmpregi);
  2676. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2677. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2678. inc(srcref.offset,4);
  2679. dec(len,4);
  2680. end;
  2681. { loading address in a separate register needed? }
  2682. if SimpleRef(dest) then
  2683. dstref:=dest
  2684. else
  2685. begin
  2686. destreg:=getintregister(list,OS_ADDR);
  2687. a_loadaddr_ref_reg(list,dest,destreg);
  2688. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2689. end;
  2690. tmpregi2:=1;
  2691. while (tmpregi2<=tmpregi) do
  2692. begin
  2693. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2694. inc(dstref.offset,4);
  2695. inc(tmpregi2);
  2696. end;
  2697. copysize:=4;
  2698. cgsize:=OS_32;
  2699. while len<>0 do
  2700. begin
  2701. if len<2 then
  2702. begin
  2703. copysize:=1;
  2704. cgsize:=OS_8;
  2705. end
  2706. else if len<4 then
  2707. begin
  2708. copysize:=2;
  2709. cgsize:=OS_16;
  2710. end;
  2711. dec(len,copysize);
  2712. r:=getintregister(list,cgsize);
  2713. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2714. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2715. inc(srcref.offset,copysize);
  2716. inc(dstref.offset,copysize);
  2717. end;{end of while}
  2718. end
  2719. else
  2720. begin
  2721. cgsize:=OS_32;
  2722. if (len<=4) then{len<=4 and not aligned}
  2723. begin
  2724. r:=getintregister(list,cgsize);
  2725. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2726. if Len=1 then
  2727. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2728. else
  2729. begin
  2730. tmpreg:=getintregister(list,cgsize);
  2731. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2732. inc(usedtmpref.offset,1);
  2733. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2734. inc(usedtmpref2.offset,1);
  2735. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2736. if len>2 then
  2737. begin
  2738. inc(usedtmpref.offset,1);
  2739. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2740. inc(usedtmpref2.offset,1);
  2741. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2742. if len>3 then
  2743. begin
  2744. inc(usedtmpref.offset,1);
  2745. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2746. inc(usedtmpref2.offset,1);
  2747. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2748. end;
  2749. end;
  2750. end;
  2751. end{end of if len<=4}
  2752. else
  2753. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2754. destreg:=getintregister(list,OS_ADDR);
  2755. a_loadaddr_ref_reg(list,dest,destreg);
  2756. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2757. srcreg:=getintregister(list,OS_ADDR);
  2758. a_loadaddr_ref_reg(list,source,srcreg);
  2759. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2760. countreg:=getintregister(list,OS_32);
  2761. // if cs_opt_size in current_settings.optimizerswitches then
  2762. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2763. {if aligned then
  2764. genloop(len,4)
  2765. else}
  2766. if GenerateThumbCode then
  2767. genloop_thumb(len,1)
  2768. else
  2769. genloop(len,1);
  2770. end;
  2771. end;
  2772. end;
  2773. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2774. begin
  2775. g_concatcopy_internal(list,source,dest,len,false);
  2776. end;
  2777. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2778. begin
  2779. if (source.alignment in [1,3]) or
  2780. (dest.alignment in [1,3]) then
  2781. g_concatcopy_internal(list,source,dest,len,false)
  2782. else
  2783. g_concatcopy_internal(list,source,dest,len,true);
  2784. end;
  2785. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2786. var
  2787. ovloc : tlocation;
  2788. begin
  2789. ovloc.loc:=LOC_VOID;
  2790. g_overflowCheck_loc(list,l,def,ovloc);
  2791. end;
  2792. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2793. var
  2794. hl : tasmlabel;
  2795. ai:TAiCpu;
  2796. hflags : tresflags;
  2797. begin
  2798. if not(cs_check_overflow in current_settings.localswitches) then
  2799. exit;
  2800. current_asmdata.getjumplabel(hl);
  2801. case ovloc.loc of
  2802. LOC_VOID:
  2803. begin
  2804. ai:=taicpu.op_sym(A_B,hl);
  2805. ai.is_jmp:=true;
  2806. if not((def.typ=pointerdef) or
  2807. ((def.typ=orddef) and
  2808. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2809. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2810. ai.SetCondition(C_VC)
  2811. else
  2812. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2813. ai.SetCondition(C_CS)
  2814. else
  2815. ai.SetCondition(C_CC);
  2816. list.concat(ai);
  2817. end;
  2818. LOC_FLAGS:
  2819. begin
  2820. hflags:=ovloc.resflags;
  2821. inverse_flags(hflags);
  2822. cg.a_jmp_flags(list,hflags,hl);
  2823. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2824. end;
  2825. else
  2826. internalerror(200409281);
  2827. end;
  2828. a_call_name(list,'FPC_OVERFLOW',false);
  2829. a_label(list,hl);
  2830. end;
  2831. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2832. begin
  2833. { this work is done in g_proc_entry }
  2834. end;
  2835. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2836. begin
  2837. { this work is done in g_proc_exit }
  2838. end;
  2839. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2840. var
  2841. ai : taicpu;
  2842. hlabel : TAsmLabel;
  2843. begin
  2844. if GenerateThumbCode then
  2845. begin
  2846. { the optimizer has to fix this if jump range is sufficient short }
  2847. current_asmdata.getjumplabel(hlabel);
  2848. ai:=Taicpu.Op_sym(A_B,hlabel);
  2849. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2850. ai.is_jmp:=true;
  2851. list.concat(ai);
  2852. a_jmp_always(list,l);
  2853. a_label(list,hlabel);
  2854. end
  2855. else
  2856. begin
  2857. ai:=Taicpu.Op_sym(A_B,l);
  2858. ai.SetCondition(OpCmp2AsmCond[cond]);
  2859. ai.is_jmp:=true;
  2860. list.concat(ai);
  2861. end;
  2862. end;
  2863. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2864. const
  2865. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2866. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2867. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2868. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2869. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2870. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2871. begin
  2872. result:=convertop[fromsize,tosize];
  2873. if result=A_NONE then
  2874. internalerror(200312205);
  2875. end;
  2876. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2877. const
  2878. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2879. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2880. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2881. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2882. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2883. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2884. begin
  2885. result:=convertop[fromsize,tosize];
  2886. end;
  2887. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2888. var
  2889. instr: taicpu;
  2890. begin
  2891. if (shuffle=nil) or shufflescalar(shuffle) then
  2892. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2893. else
  2894. internalerror(2009112407);
  2895. list.concat(instr);
  2896. case instr.opcode of
  2897. A_VMOV:
  2898. { VMOV cannot generate an FPU exception, so we do not need a check here }
  2899. add_move_instruction(instr);
  2900. else
  2901. { VCVT can generate an exception }
  2902. maybe_check_for_fpu_exception(list);
  2903. end;
  2904. end;
  2905. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2906. var
  2907. intreg,
  2908. tmpmmreg : tregister;
  2909. reg64 : tregister64;
  2910. begin
  2911. if assigned(shuffle) and
  2912. not(shufflescalar(shuffle)) then
  2913. internalerror(2009112413);
  2914. case fromsize of
  2915. OS_32,OS_S32:
  2916. begin
  2917. fromsize:=OS_F32;
  2918. { since we are loading an integer, no conversion may be required }
  2919. if (fromsize<>tosize) then
  2920. internalerror(2009112801);
  2921. end;
  2922. OS_64,OS_S64:
  2923. begin
  2924. fromsize:=OS_F64;
  2925. { since we are loading an integer, no conversion may be required }
  2926. if (fromsize<>tosize) then
  2927. internalerror(2009112901);
  2928. end;
  2929. OS_F32,OS_F64:
  2930. ;
  2931. else
  2932. internalerror(2019050920);
  2933. end;
  2934. if (fromsize<>tosize) then
  2935. tmpmmreg:=getmmregister(list,fromsize)
  2936. else
  2937. tmpmmreg:=reg;
  2938. if (ref.alignment in [1,2]) then
  2939. begin
  2940. case fromsize of
  2941. OS_F32:
  2942. begin
  2943. intreg:=getintregister(list,OS_32);
  2944. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2945. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2946. end;
  2947. OS_F64:
  2948. begin
  2949. reg64.reglo:=getintregister(list,OS_32);
  2950. reg64.reghi:=getintregister(list,OS_32);
  2951. cg64.a_load64_ref_reg(list,ref,reg64);
  2952. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2953. end;
  2954. else
  2955. internalerror(2009112412);
  2956. end;
  2957. end
  2958. else
  2959. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2960. if (tmpmmreg<>reg) then
  2961. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2962. end;
  2963. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2964. var
  2965. intreg,
  2966. tmpmmreg : tregister;
  2967. reg64 : tregister64;
  2968. begin
  2969. if assigned(shuffle) and
  2970. not(shufflescalar(shuffle)) then
  2971. internalerror(2009112416);
  2972. case tosize of
  2973. OS_32,OS_S32:
  2974. begin
  2975. tosize:=OS_F32;
  2976. { since we are loading an integer, no conversion may be required }
  2977. if (fromsize<>tosize) then
  2978. internalerror(2009112802);
  2979. end;
  2980. OS_64,OS_S64:
  2981. begin
  2982. tosize:=OS_F64;
  2983. { since we are loading an integer, no conversion may be required }
  2984. if (fromsize<>tosize) then
  2985. internalerror(2009112902);
  2986. end;
  2987. OS_F32,OS_F64:
  2988. ;
  2989. else
  2990. internalerror(2019050919);
  2991. end;
  2992. if (fromsize<>tosize) then
  2993. begin
  2994. tmpmmreg:=getmmregister(list,tosize);
  2995. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2996. end
  2997. else
  2998. tmpmmreg:=reg;
  2999. if (ref.alignment in [1,2]) then
  3000. begin
  3001. case tosize of
  3002. OS_F32:
  3003. begin
  3004. intreg:=getintregister(list,OS_32);
  3005. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  3006. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  3007. end;
  3008. OS_F64:
  3009. begin
  3010. reg64.reglo:=getintregister(list,OS_32);
  3011. reg64.reghi:=getintregister(list,OS_32);
  3012. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  3013. cg64.a_load64_reg_ref(list,reg64,ref);
  3014. end;
  3015. else
  3016. internalerror(2009112417);
  3017. end;
  3018. end
  3019. else
  3020. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  3021. { VSTR cannot generate an FPU exception, VCVT is handled separately, so we do not need a check here }
  3022. end;
  3023. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  3024. begin
  3025. { this code can only be used to transfer raw data, not to perform
  3026. conversions }
  3027. if (tosize<>OS_F32) then
  3028. internalerror(2009112419);
  3029. if not(fromsize in [OS_32,OS_S32]) then
  3030. internalerror(2009112420);
  3031. if assigned(shuffle) and
  3032. not shufflescalar(shuffle) then
  3033. internalerror(2009112516);
  3034. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  3035. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3036. end;
  3037. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  3038. begin
  3039. { this code can only be used to transfer raw data, not to perform
  3040. conversions }
  3041. if (fromsize<>OS_F32) then
  3042. internalerror(2009112430);
  3043. if not(tosize in [OS_32,OS_S32]) then
  3044. internalerror(2009112409);
  3045. if assigned(shuffle) and
  3046. not shufflescalar(shuffle) then
  3047. internalerror(2009112514);
  3048. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  3049. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3050. end;
  3051. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  3052. var
  3053. tmpreg: tregister;
  3054. begin
  3055. { the vfp doesn't support xor nor any other logical operation, but
  3056. this routine is used to initialise global mm regvars. We can
  3057. easily initialise an mm reg with 0 though. }
  3058. case op of
  3059. OP_XOR:
  3060. begin
  3061. if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
  3062. begin
  3063. if (reg_cgsize(src)<>size) or
  3064. assigned(shuffle) then
  3065. internalerror(2019081301);
  3066. list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
  3067. end
  3068. else
  3069. begin
  3070. if (src<>dst) or
  3071. (reg_cgsize(src)<>size) or
  3072. assigned(shuffle) then
  3073. internalerror(2009112907);
  3074. tmpreg:=getintregister(list,OS_32);
  3075. a_load_const_reg(list,OS_32,0,tmpreg);
  3076. case size of
  3077. OS_F32:
  3078. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  3079. OS_F64:
  3080. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  3081. else
  3082. internalerror(2009112908);
  3083. end;
  3084. end;
  3085. end
  3086. else
  3087. internalerror(2009112906);
  3088. end;
  3089. end;
  3090. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  3091. const
  3092. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  3093. begin
  3094. if (op in overflowops) and
  3095. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  3096. a_load_reg_reg(list,OS_32,size,dst,dst);
  3097. end;
  3098. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  3099. procedure checkreg(var reg : TRegister);
  3100. var
  3101. tmpreg : TRegister;
  3102. begin
  3103. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  3104. (getsupreg(reg)=RS_R15) then
  3105. begin
  3106. tmpreg:=getintregister(list,OS_INT);
  3107. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3108. reg:=tmpreg;
  3109. end;
  3110. end;
  3111. begin
  3112. checkreg(op1);
  3113. checkreg(op2);
  3114. checkreg(op3);
  3115. checkreg(op4);
  3116. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3117. end;
  3118. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3119. begin
  3120. if pi_needs_tls in current_procinfo.flags then
  3121. begin
  3122. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3123. a_call_name(list,'fpc_read_tp',false);
  3124. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3125. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3126. end;
  3127. end;
  3128. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3129. begin
  3130. case op of
  3131. OP_NEG:
  3132. begin
  3133. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3134. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3135. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3136. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3137. end;
  3138. OP_NOT:
  3139. begin
  3140. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3141. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3142. end;
  3143. else
  3144. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3145. end;
  3146. end;
  3147. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3148. begin
  3149. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3150. end;
  3151. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3152. var
  3153. ovloc : tlocation;
  3154. begin
  3155. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3156. end;
  3157. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3158. var
  3159. ovloc : tlocation;
  3160. begin
  3161. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3162. end;
  3163. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3164. begin
  3165. { this code can only be used to transfer raw data, not to perform
  3166. conversions }
  3167. if (mmsize<>OS_F64) then
  3168. internalerror(2009112405);
  3169. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3170. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3171. end;
  3172. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3173. begin
  3174. { this code can only be used to transfer raw data, not to perform
  3175. conversions }
  3176. if (mmsize<>OS_F64) then
  3177. internalerror(2009112406);
  3178. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3179. { VMOV cannot generate an FPU exception, so we do not need a check here }
  3180. end;
  3181. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3182. var
  3183. tmpreg : tregister;
  3184. b : byte;
  3185. begin
  3186. ovloc.loc:=LOC_VOID;
  3187. case op of
  3188. OP_NEG,
  3189. OP_NOT :
  3190. internalerror(2012022501);
  3191. else
  3192. ;
  3193. end;
  3194. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3195. begin
  3196. case op of
  3197. OP_ADD:
  3198. begin
  3199. if is_shifter_const(lo(value),b) then
  3200. begin
  3201. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3202. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3203. end
  3204. else
  3205. begin
  3206. tmpreg:=cg.getintregister(list,OS_32);
  3207. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3208. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3209. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3210. end;
  3211. if is_shifter_const(hi(value),b) then
  3212. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3213. else
  3214. begin
  3215. tmpreg:=cg.getintregister(list,OS_32);
  3216. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3217. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3218. end;
  3219. end;
  3220. OP_SUB:
  3221. begin
  3222. if is_shifter_const(lo(value),b) then
  3223. begin
  3224. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3225. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3226. end
  3227. else
  3228. begin
  3229. tmpreg:=cg.getintregister(list,OS_32);
  3230. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3231. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3232. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3233. end;
  3234. if is_shifter_const(hi(value),b) then
  3235. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3236. else
  3237. begin
  3238. tmpreg:=cg.getintregister(list,OS_32);
  3239. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3240. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3241. end;
  3242. end;
  3243. else
  3244. internalerror(200502131);
  3245. end;
  3246. if size=OS_64 then
  3247. begin
  3248. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3249. ovloc.loc:=LOC_FLAGS;
  3250. case op of
  3251. OP_ADD:
  3252. ovloc.resflags:=F_CS;
  3253. OP_SUB:
  3254. ovloc.resflags:=F_CC;
  3255. else
  3256. internalerror(2019050918);
  3257. end;
  3258. end;
  3259. end
  3260. else
  3261. begin
  3262. case op of
  3263. OP_AND,OP_OR,OP_XOR:
  3264. begin
  3265. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3266. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3267. end;
  3268. OP_ADD:
  3269. begin
  3270. if is_shifter_const(aint(lo(value)),b) then
  3271. begin
  3272. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3273. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3274. end
  3275. else
  3276. begin
  3277. tmpreg:=cg.getintregister(list,OS_32);
  3278. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3279. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3280. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3281. end;
  3282. if is_shifter_const(aint(hi(value)),b) then
  3283. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3284. else
  3285. begin
  3286. tmpreg:=cg.getintregister(list,OS_32);
  3287. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3288. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3289. end;
  3290. end;
  3291. OP_SUB:
  3292. begin
  3293. if is_shifter_const(aint(lo(value)),b) then
  3294. begin
  3295. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3296. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3297. end
  3298. else
  3299. begin
  3300. tmpreg:=cg.getintregister(list,OS_32);
  3301. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3302. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3303. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3304. end;
  3305. if is_shifter_const(aint(hi(value)),b) then
  3306. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3307. else
  3308. begin
  3309. tmpreg:=cg.getintregister(list,OS_32);
  3310. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3311. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3312. end;
  3313. end;
  3314. else
  3315. internalerror(2003083101);
  3316. end;
  3317. end;
  3318. end;
  3319. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3320. begin
  3321. ovloc.loc:=LOC_VOID;
  3322. case op of
  3323. OP_NEG,
  3324. OP_NOT :
  3325. internalerror(2012022502);
  3326. else
  3327. ;
  3328. end;
  3329. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3330. begin
  3331. case op of
  3332. OP_ADD:
  3333. begin
  3334. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3335. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3336. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3337. end;
  3338. OP_SUB:
  3339. begin
  3340. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3341. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3342. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3343. end;
  3344. else
  3345. internalerror(2003083102);
  3346. end;
  3347. ovloc.loc:=LOC_FLAGS;
  3348. if size=OS_64 then
  3349. begin
  3350. { arm has a weired opinion how flags for SUB/ADD are handled }
  3351. case op of
  3352. OP_ADD:
  3353. ovloc.resflags:=F_CS;
  3354. OP_SUB:
  3355. ovloc.resflags:=F_CC;
  3356. else
  3357. internalerror(2019050917);
  3358. end;
  3359. end
  3360. else
  3361. ovloc.resflags:=F_VS;
  3362. end
  3363. else
  3364. begin
  3365. case op of
  3366. OP_AND,OP_OR,OP_XOR:
  3367. begin
  3368. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3369. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3370. end;
  3371. OP_ADD:
  3372. begin
  3373. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3374. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3375. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3376. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3377. end;
  3378. OP_SUB:
  3379. begin
  3380. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3381. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3382. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3383. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3384. end;
  3385. else
  3386. internalerror(2003083104);
  3387. end;
  3388. end;
  3389. end;
  3390. procedure tthumbcgarm.init_register_allocators;
  3391. begin
  3392. inherited init_register_allocators;
  3393. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3394. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3395. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3396. else
  3397. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3398. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3399. end;
  3400. procedure tthumbcgarm.done_register_allocators;
  3401. begin
  3402. rg[R_INTREGISTER].free;
  3403. rg[R_FPUREGISTER].free;
  3404. rg[R_MMREGISTER].free;
  3405. inherited done_register_allocators;
  3406. end;
  3407. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3408. var
  3409. ref : treference;
  3410. r : byte;
  3411. regs : tcpuregisterset;
  3412. stackmisalignment : pint;
  3413. registerarea: DWord;
  3414. stack_parameters: Boolean;
  3415. begin
  3416. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3417. LocalSize:=align(LocalSize,4);
  3418. { call instruction does not put anything on the stack }
  3419. stackmisalignment:=0;
  3420. if not(nostackframe) then
  3421. begin
  3422. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3423. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3424. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3425. { save int registers }
  3426. reference_reset(ref,4,[]);
  3427. ref.index:=NR_STACK_POINTER_REG;
  3428. ref.addressmode:=AM_PREINDEXED;
  3429. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3430. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3431. begin
  3432. //!!!! a_reg_alloc(list,NR_R12);
  3433. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3434. end;
  3435. { the (old) ARM APCS requires saving both the stack pointer (to
  3436. crawl the stack) and the PC (to identify the function this
  3437. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3438. and R15 -- still needs updating for EABI and Darwin, they don't
  3439. need that }
  3440. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3441. regs:=regs+[RS_R7,RS_R14]
  3442. else
  3443. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3444. include(regs,RS_R14);
  3445. { safely estimate stack size }
  3446. if localsize+current_settings.alignment.localalignmax+4>508 then
  3447. begin
  3448. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3449. include(regs,RS_R4);
  3450. end;
  3451. registerarea:=0;
  3452. { do not save integer registers if the procedure does not return }
  3453. if po_noreturn in current_procinfo.procdef.procoptions then
  3454. regs:=[];
  3455. if regs<>[] then
  3456. begin
  3457. for r:=RS_R0 to RS_R15 do
  3458. if r in regs then
  3459. inc(registerarea,4);
  3460. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3461. { we need to run the loop twice to get cfi right }
  3462. registerarea:=0;
  3463. for r:=RS_R0 to RS_R15 do
  3464. if r in regs then
  3465. begin
  3466. inc(registerarea,4);
  3467. current_asmdata.asmcfi.cfa_offset(list,newreg(R_INTREGISTER,r,R_SUBWHOLE),-registerarea);
  3468. end;
  3469. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea);
  3470. end;
  3471. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3472. if stack_parameters or (LocalSize<>0) or
  3473. ((stackmisalignment<>0) and
  3474. ((pi_do_call in current_procinfo.flags) or
  3475. (po_assembler in current_procinfo.procdef.procoptions))) then
  3476. begin
  3477. { do we access stack parameters?
  3478. if yes, the previously estimated stacksize must be used }
  3479. if stack_parameters then
  3480. begin
  3481. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3482. begin
  3483. writeln(localsize);
  3484. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3485. internalerror(2013040601);
  3486. end
  3487. else
  3488. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3489. end
  3490. else
  3491. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3492. if localsize<508 then
  3493. begin
  3494. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3495. end
  3496. else if localsize<=1016 then
  3497. begin
  3498. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3499. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3500. end
  3501. else
  3502. begin
  3503. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3504. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3505. include(regs,RS_R4);
  3506. end;
  3507. current_asmdata.asmcfi.cfa_def_cfa_offset(list,registerarea+localsize);
  3508. end;
  3509. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3510. begin
  3511. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3512. current_asmdata.asmcfi.cfa_def_cfa_register(list,current_procinfo.framepointer);
  3513. end;
  3514. end;
  3515. end;
  3516. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3517. var
  3518. LocalSize : longint;
  3519. r: byte;
  3520. regs : tcpuregisterset;
  3521. registerarea : DWord;
  3522. stackmisalignment: pint;
  3523. stack_parameters : Boolean;
  3524. begin
  3525. { a routine not returning needs no exit code,
  3526. we trust this directive as arm thumb is normally used if small code shall be generated }
  3527. if po_noreturn in current_procinfo.procdef.procoptions then
  3528. exit;
  3529. if not(nostackframe) then
  3530. begin
  3531. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3532. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3533. include(regs,RS_R15);
  3534. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3535. include(regs,getsupreg(current_procinfo.framepointer));
  3536. registerarea:=0;
  3537. for r:=RS_R0 to RS_R15 do
  3538. if r in regs then
  3539. inc(registerarea,4);
  3540. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3541. LocalSize:=current_procinfo.calc_stackframe_size;
  3542. if stack_parameters then
  3543. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3544. else
  3545. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3546. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3547. (target_info.system in systems_darwin) then
  3548. begin
  3549. if (LocalSize<>0) or
  3550. ((stackmisalignment<>0) and
  3551. ((pi_do_call in current_procinfo.flags) or
  3552. (po_assembler in current_procinfo.procdef.procoptions))) then
  3553. begin
  3554. if LocalSize=0 then
  3555. else if LocalSize<=508 then
  3556. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3557. else if LocalSize<=1016 then
  3558. begin
  3559. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3560. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3561. end
  3562. else
  3563. begin
  3564. a_reg_alloc(list,NR_R3);
  3565. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3566. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3567. a_reg_dealloc(list,NR_R3);
  3568. end;
  3569. end;
  3570. if regs=[] then
  3571. begin
  3572. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3573. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3574. else
  3575. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3576. end
  3577. else
  3578. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3579. end;
  3580. end
  3581. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3582. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3583. else
  3584. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3585. end;
  3586. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3587. var
  3588. oppostfix:toppostfix;
  3589. usedtmpref: treference;
  3590. tmpreg,tmpreg2 : tregister;
  3591. dir : integer;
  3592. begin
  3593. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3594. FromSize := ToSize;
  3595. case FromSize of
  3596. { signed integer registers }
  3597. OS_8:
  3598. oppostfix:=PF_B;
  3599. OS_S8:
  3600. oppostfix:=PF_SB;
  3601. OS_16:
  3602. oppostfix:=PF_H;
  3603. OS_S16:
  3604. oppostfix:=PF_SH;
  3605. OS_32,
  3606. OS_S32:
  3607. oppostfix:=PF_None;
  3608. else
  3609. InternalError(200308298);
  3610. end;
  3611. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3612. begin
  3613. if target_info.endian=endian_big then
  3614. dir:=-1
  3615. else
  3616. dir:=1;
  3617. case FromSize of
  3618. OS_16,OS_S16:
  3619. begin
  3620. { only complicated references need an extra loadaddr }
  3621. if assigned(ref.symbol) or
  3622. (ref.index<>NR_NO) or
  3623. (ref.offset<-124) or
  3624. (ref.offset>124) or
  3625. { sometimes the compiler reused registers }
  3626. (reg=ref.index) or
  3627. (reg=ref.base) then
  3628. begin
  3629. tmpreg2:=getintregister(list,OS_INT);
  3630. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3631. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3632. end
  3633. else
  3634. usedtmpref:=ref;
  3635. if target_info.endian=endian_big then
  3636. inc(usedtmpref.offset,1);
  3637. tmpreg:=getintregister(list,OS_INT);
  3638. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3639. inc(usedtmpref.offset,dir);
  3640. if FromSize=OS_16 then
  3641. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3642. else
  3643. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3644. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3645. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3646. end;
  3647. OS_32,OS_S32:
  3648. begin
  3649. tmpreg:=getintregister(list,OS_INT);
  3650. { only complicated references need an extra loadaddr }
  3651. if assigned(ref.symbol) or
  3652. (ref.index<>NR_NO) or
  3653. (ref.offset<-124) or
  3654. (ref.offset>124) or
  3655. { sometimes the compiler reused registers }
  3656. (reg=ref.index) or
  3657. (reg=ref.base) then
  3658. begin
  3659. tmpreg2:=getintregister(list,OS_INT);
  3660. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3661. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3662. end
  3663. else
  3664. usedtmpref:=ref;
  3665. if ref.alignment=2 then
  3666. begin
  3667. if target_info.endian=endian_big then
  3668. inc(usedtmpref.offset,2);
  3669. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3670. inc(usedtmpref.offset,dir*2);
  3671. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3672. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3673. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3674. end
  3675. else
  3676. begin
  3677. if target_info.endian=endian_big then
  3678. inc(usedtmpref.offset,3);
  3679. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3680. inc(usedtmpref.offset,dir);
  3681. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3682. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3683. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3684. inc(usedtmpref.offset,dir);
  3685. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3686. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3687. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3688. inc(usedtmpref.offset,dir);
  3689. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3690. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3691. list.concat(setoppostfix(taicpu.op_reg_reg(A_ORR,reg,tmpreg),PF_S));
  3692. end;
  3693. end
  3694. else
  3695. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3696. end;
  3697. end
  3698. else
  3699. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3700. if (fromsize=OS_S8) and (tosize = OS_16) then
  3701. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3702. end;
  3703. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3704. var
  3705. l : tasmlabel;
  3706. hr : treference;
  3707. begin
  3708. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3709. internalerror(2002090908);
  3710. if is_thumb_imm(a) then
  3711. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,a),PF_S))
  3712. else
  3713. begin
  3714. reference_reset(hr,4,[]);
  3715. current_asmdata.getjumplabel(l);
  3716. cg.a_label(current_procinfo.aktlocaldata,l);
  3717. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3718. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3719. hr.symbol:=l;
  3720. hr.base:=NR_PC;
  3721. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3722. end;
  3723. end;
  3724. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3725. var
  3726. hsym : tsym;
  3727. href,
  3728. tmpref : treference;
  3729. paraloc : Pcgparalocation;
  3730. l : TAsmLabel;
  3731. begin
  3732. { calculate the parameter info for the procdef }
  3733. procdef.init_paraloc_info(callerside);
  3734. hsym:=tsym(procdef.parast.Find('self'));
  3735. if not(assigned(hsym) and
  3736. (hsym.typ=paravarsym)) then
  3737. internalerror(2003052504);
  3738. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3739. while paraloc<>nil do
  3740. with paraloc^ do
  3741. begin
  3742. case loc of
  3743. LOC_REGISTER:
  3744. begin
  3745. if is_thumb_imm(ioffset) then
  3746. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3747. else
  3748. begin
  3749. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3750. reference_reset(tmpref,4,[]);
  3751. current_asmdata.getjumplabel(l);
  3752. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3753. cg.a_label(current_procinfo.aktlocaldata,l);
  3754. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3755. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3756. tmpref.symbol:=l;
  3757. tmpref.base:=NR_PC;
  3758. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3759. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3760. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3761. end;
  3762. end;
  3763. LOC_REFERENCE:
  3764. begin
  3765. { offset in the wrapper needs to be adjusted for the stored
  3766. return address }
  3767. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3768. if is_thumb_imm(ioffset) then
  3769. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3770. else
  3771. begin
  3772. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3773. reference_reset(tmpref,4,[]);
  3774. current_asmdata.getjumplabel(l);
  3775. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3776. cg.a_label(current_procinfo.aktlocaldata,l);
  3777. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3778. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3779. tmpref.symbol:=l;
  3780. tmpref.base:=NR_PC;
  3781. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3782. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3783. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3784. end;
  3785. end
  3786. else
  3787. internalerror(2003091804);
  3788. end;
  3789. paraloc:=next;
  3790. end;
  3791. end;
  3792. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3793. var
  3794. href : treference;
  3795. tmpreg : TRegister;
  3796. begin
  3797. href:=ref;
  3798. if { LDR/STR limitations }
  3799. (
  3800. (((op=A_LDR) and (oppostfix=PF_None)) or
  3801. ((op=A_STR) and (oppostfix=PF_None))) and
  3802. (ref.base<>NR_STACK_POINTER_REG) and
  3803. (abs(ref.offset)>124)
  3804. ) or
  3805. { LDRB/STRB limitations }
  3806. (
  3807. (((op=A_LDR) and (oppostfix=PF_B)) or
  3808. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3809. ((op=A_STR) and (oppostfix=PF_B)) or
  3810. ((op=A_STRB) and (oppostfix=PF_None))) and
  3811. ((ref.base=NR_STACK_POINTER_REG) or
  3812. (ref.index=NR_STACK_POINTER_REG) or
  3813. (abs(ref.offset)>31)
  3814. )
  3815. ) or
  3816. { LDRH/STRH limitations }
  3817. (
  3818. (((op=A_LDR) and (oppostfix=PF_H)) or
  3819. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3820. ((op=A_STR) and (oppostfix=PF_H)) or
  3821. ((op=A_STRH) and (oppostfix=PF_None))) and
  3822. ((ref.base=NR_STACK_POINTER_REG) or
  3823. (ref.index=NR_STACK_POINTER_REG) or
  3824. (abs(ref.offset)>62) or
  3825. ((abs(ref.offset) mod 2)<>0)
  3826. )
  3827. ) then
  3828. begin
  3829. tmpreg:=getintregister(list,OS_ADDR);
  3830. a_loadaddr_ref_reg(list,ref,tmpreg);
  3831. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3832. end
  3833. else if (op=A_LDR) and
  3834. (oppostfix in [PF_None]) and
  3835. (ref.base=NR_STACK_POINTER_REG) and
  3836. (abs(ref.offset)>1020) then
  3837. begin
  3838. tmpreg:=getintregister(list,OS_ADDR);
  3839. a_loadaddr_ref_reg(list,ref,tmpreg);
  3840. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3841. end
  3842. else if (op=A_LDR) and
  3843. ((oppostfix in [PF_SH,PF_SB]) or
  3844. (abs(ref.offset)>124)) then
  3845. begin
  3846. tmpreg:=getintregister(list,OS_ADDR);
  3847. a_loadaddr_ref_reg(list,ref,tmpreg);
  3848. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3849. end;
  3850. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3851. end;
  3852. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3853. var
  3854. tmpreg : tregister;
  3855. begin
  3856. case op of
  3857. OP_NEG:
  3858. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3859. OP_NOT:
  3860. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,dst,src),PF_S));
  3861. OP_DIV,OP_IDIV:
  3862. internalerror(200308284);
  3863. OP_ROL:
  3864. begin
  3865. if not(size in [OS_32,OS_S32]) then
  3866. internalerror(2008072805);
  3867. { simulate ROL by ror'ing 32-value }
  3868. tmpreg:=getintregister(list,OS_32);
  3869. a_load_const_reg(list,OS_32,32,tmpreg);
  3870. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3871. list.concat(setoppostfix(taicpu.op_reg_reg(A_ROR,dst,src),PF_S));
  3872. end;
  3873. else
  3874. begin
  3875. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3876. list.concat(setoppostfix(
  3877. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix_thumb[op]));
  3878. end;
  3879. end;
  3880. maybeadjustresult(list,op,size,dst);
  3881. end;
  3882. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3883. var
  3884. tmpreg : tregister;
  3885. {$ifdef DUMMY}
  3886. l1 : longint;
  3887. {$endif DUMMY}
  3888. begin
  3889. //!!! ovloc.loc:=LOC_VOID;
  3890. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3891. case op of
  3892. OP_ADD:
  3893. begin
  3894. op:=OP_SUB;
  3895. a:=aint(dword(-a));
  3896. end;
  3897. OP_SUB:
  3898. begin
  3899. op:=OP_ADD;
  3900. a:=aint(dword(-a));
  3901. end
  3902. else
  3903. ;
  3904. end;
  3905. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3906. begin
  3907. // if cgsetflags or setflags then
  3908. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3909. list.concat(setoppostfix(
  3910. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix_thumb[op]));
  3911. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3912. begin
  3913. //!!! ovloc.loc:=LOC_FLAGS;
  3914. case op of
  3915. OP_ADD:
  3916. //!!! ovloc.resflags:=F_CS;
  3917. ;
  3918. OP_SUB:
  3919. //!!! ovloc.resflags:=F_CC;
  3920. ;
  3921. else
  3922. ;
  3923. end;
  3924. end;
  3925. end
  3926. else
  3927. begin
  3928. { there could be added some more sophisticated optimizations }
  3929. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3930. a_load_reg_reg(list,size,size,dst,dst)
  3931. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3932. a_load_const_reg(list,size,0,dst)
  3933. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3934. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3935. { we do this here instead in the peephole optimizer because
  3936. it saves us a register }
  3937. {$ifdef DUMMY}
  3938. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3939. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3940. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3941. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3942. begin
  3943. if l1>32 then{roozbeh does this ever happen?}
  3944. internalerror(2003082903);
  3945. shifterop_reset(so);
  3946. so.shiftmode:=SM_LSL;
  3947. so.shiftimm:=l1;
  3948. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3949. end
  3950. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3951. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3952. begin
  3953. if l1>32 then{does this ever happen?}
  3954. internalerror(2012051802);
  3955. shifterop_reset(so);
  3956. so.shiftmode:=SM_LSL;
  3957. so.shiftimm:=l1;
  3958. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3959. end
  3960. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3961. begin
  3962. { nothing to do on success }
  3963. end
  3964. {$endif DUMMY}
  3965. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3966. Just using mov x, #0 might allow some easier optimizations down the line. }
  3967. else if (op = OP_AND) and (dword(a)=0) then
  3968. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,dst,0),PF_S))
  3969. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3970. else if (op = OP_AND) and (not(dword(a))=0) then
  3971. // do nothing
  3972. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3973. broader range of shifterconstants.}
  3974. {$ifdef DUMMY}
  3975. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3976. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3977. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3978. begin
  3979. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3980. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3981. end
  3982. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3983. not(cgsetflags or setflags) and
  3984. split_into_shifter_const(a, imm1, imm2) then
  3985. begin
  3986. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3987. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3988. end
  3989. {$endif DUMMY}
  3990. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3991. begin
  3992. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3993. end
  3994. else
  3995. begin
  3996. tmpreg:=getintregister(list,size);
  3997. a_load_const_reg(list,size,a,tmpreg);
  3998. a_op_reg_reg(list,op,size,tmpreg,dst);
  3999. end;
  4000. end;
  4001. maybeadjustresult(list,op,size,dst);
  4002. end;
  4003. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  4004. begin
  4005. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  4006. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  4007. else
  4008. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  4009. end;
  4010. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4011. var
  4012. l1,l2 : tasmlabel;
  4013. ai : taicpu;
  4014. begin
  4015. current_asmdata.getjumplabel(l1);
  4016. current_asmdata.getjumplabel(l2);
  4017. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  4018. ai.is_jmp:=true;
  4019. list.concat(ai);
  4020. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,0),PF_S));
  4021. list.concat(taicpu.op_sym(A_B,l2));
  4022. cg.a_label(list,l1);
  4023. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,reg,1),PF_S));
  4024. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4025. cg.a_label(list,l2);
  4026. end;
  4027. procedure tthumb2cgarm.init_register_allocators;
  4028. begin
  4029. inherited init_register_allocators;
  4030. { currently, we save R14 always, so we can use it }
  4031. if (target_info.system<>system_arm_ios) then
  4032. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4033. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4034. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  4035. else
  4036. { r9 is not available on Darwin according to the llvm code generator }
  4037. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  4038. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  4039. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  4040. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4041. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  4042. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  4043. init_mmregister_allocator;
  4044. end;
  4045. procedure tthumb2cgarm.done_register_allocators;
  4046. begin
  4047. rg[R_INTREGISTER].free;
  4048. rg[R_FPUREGISTER].free;
  4049. rg[R_MMREGISTER].free;
  4050. inherited done_register_allocators;
  4051. end;
  4052. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  4053. begin
  4054. list.concat(taicpu.op_reg(A_BLX, reg));
  4055. {
  4056. the compiler does not properly set this flag anymore in pass 1, and
  4057. for now we only need it after pass 2 (I hope) (JM)
  4058. if not(pi_do_call in current_procinfo.flags) then
  4059. internalerror(2003060703);
  4060. }
  4061. include(current_procinfo.flags,pi_do_call);
  4062. end;
  4063. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  4064. var
  4065. l : tasmlabel;
  4066. hr : treference;
  4067. begin
  4068. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  4069. internalerror(2002090909);
  4070. if is_thumb32_imm(a) then
  4071. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  4072. else if is_thumb32_imm(not(a)) then
  4073. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  4074. else if (a and $FFFF)=a then
  4075. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  4076. else
  4077. begin
  4078. reference_reset(hr,4,[]);
  4079. current_asmdata.getjumplabel(l);
  4080. cg.a_label(current_procinfo.aktlocaldata,l);
  4081. hr.symboldata:=current_procinfo.aktlocaldata.last;
  4082. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  4083. hr.symbol:=l;
  4084. hr.base:=NR_PC;
  4085. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  4086. end;
  4087. end;
  4088. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  4089. var
  4090. oppostfix:toppostfix;
  4091. usedtmpref: treference;
  4092. tmpreg,tmpreg2 : tregister;
  4093. so : tshifterop;
  4094. dir : integer;
  4095. begin
  4096. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  4097. FromSize := ToSize;
  4098. case FromSize of
  4099. { signed integer registers }
  4100. OS_8:
  4101. oppostfix:=PF_B;
  4102. OS_S8:
  4103. oppostfix:=PF_SB;
  4104. OS_16:
  4105. oppostfix:=PF_H;
  4106. OS_S16:
  4107. oppostfix:=PF_SH;
  4108. OS_32,
  4109. OS_S32:
  4110. oppostfix:=PF_None;
  4111. else
  4112. InternalError(2003082913);
  4113. end;
  4114. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4115. begin
  4116. if target_info.endian=endian_big then
  4117. dir:=-1
  4118. else
  4119. dir:=1;
  4120. case FromSize of
  4121. OS_16,OS_S16:
  4122. begin
  4123. { only complicated references need an extra loadaddr }
  4124. if assigned(ref.symbol) or
  4125. (ref.index<>NR_NO) or
  4126. (ref.offset<-255) or
  4127. (ref.offset>4094) or
  4128. { sometimes the compiler reused registers }
  4129. (reg=ref.index) or
  4130. (reg=ref.base) then
  4131. begin
  4132. tmpreg2:=getintregister(list,OS_INT);
  4133. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4134. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4135. end
  4136. else
  4137. usedtmpref:=ref;
  4138. if target_info.endian=endian_big then
  4139. inc(usedtmpref.offset,1);
  4140. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4141. tmpreg:=getintregister(list,OS_INT);
  4142. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4143. inc(usedtmpref.offset,dir);
  4144. if FromSize=OS_16 then
  4145. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4146. else
  4147. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4148. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4149. end;
  4150. OS_32,OS_S32:
  4151. begin
  4152. tmpreg:=getintregister(list,OS_INT);
  4153. { only complicated references need an extra loadaddr }
  4154. if assigned(ref.symbol) or
  4155. (ref.index<>NR_NO) or
  4156. (ref.offset<-255) or
  4157. (ref.offset>4092) or
  4158. { sometimes the compiler reused registers }
  4159. (reg=ref.index) or
  4160. (reg=ref.base) then
  4161. begin
  4162. tmpreg2:=getintregister(list,OS_INT);
  4163. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4164. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4165. end
  4166. else
  4167. usedtmpref:=ref;
  4168. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4169. if ref.alignment=2 then
  4170. begin
  4171. if target_info.endian=endian_big then
  4172. inc(usedtmpref.offset,2);
  4173. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4174. inc(usedtmpref.offset,dir*2);
  4175. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4176. so.shiftimm:=16;
  4177. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4178. end
  4179. else
  4180. begin
  4181. if target_info.endian=endian_big then
  4182. inc(usedtmpref.offset,3);
  4183. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4184. inc(usedtmpref.offset,dir);
  4185. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4186. so.shiftimm:=8;
  4187. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4188. inc(usedtmpref.offset,dir);
  4189. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4190. so.shiftimm:=16;
  4191. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4192. inc(usedtmpref.offset,dir);
  4193. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4194. so.shiftimm:=24;
  4195. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4196. end;
  4197. end
  4198. else
  4199. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4200. end;
  4201. end
  4202. else
  4203. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4204. if (fromsize=OS_S8) and (tosize = OS_16) then
  4205. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4206. end;
  4207. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4208. begin
  4209. if op = OP_NOT then
  4210. begin
  4211. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4212. case size of
  4213. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4214. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4215. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4216. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4217. OS_32,
  4218. OS_S32:
  4219. ;
  4220. else
  4221. internalerror(2019050916);
  4222. end;
  4223. end
  4224. else
  4225. inherited a_op_reg_reg(list, op, size, src, dst);
  4226. end;
  4227. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4228. var
  4229. shift, width : byte;
  4230. tmpreg : tregister;
  4231. so : tshifterop;
  4232. l1 : longint;
  4233. begin
  4234. ovloc.loc:=LOC_VOID;
  4235. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4236. case op of
  4237. OP_ADD:
  4238. begin
  4239. op:=OP_SUB;
  4240. a:=aint(dword(-a));
  4241. end;
  4242. OP_SUB:
  4243. begin
  4244. op:=OP_ADD;
  4245. a:=aint(dword(-a));
  4246. end
  4247. else
  4248. ;
  4249. end;
  4250. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4251. case op of
  4252. OP_NEG,OP_NOT,
  4253. OP_DIV,OP_IDIV:
  4254. internalerror(200308285);
  4255. OP_SHL:
  4256. begin
  4257. if a>32 then
  4258. internalerror(2014020703);
  4259. if a<>0 then
  4260. begin
  4261. shifterop_reset(so);
  4262. so.shiftmode:=SM_LSL;
  4263. so.shiftimm:=a;
  4264. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4265. end
  4266. else
  4267. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4268. end;
  4269. OP_ROL:
  4270. begin
  4271. if a>32 then
  4272. internalerror(2014020704);
  4273. if a<>0 then
  4274. begin
  4275. shifterop_reset(so);
  4276. so.shiftmode:=SM_ROR;
  4277. so.shiftimm:=32-a;
  4278. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4279. end
  4280. else
  4281. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4282. end;
  4283. OP_ROR:
  4284. begin
  4285. if a>32 then
  4286. internalerror(2014020705);
  4287. if a<>0 then
  4288. begin
  4289. shifterop_reset(so);
  4290. so.shiftmode:=SM_ROR;
  4291. so.shiftimm:=a;
  4292. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4293. end
  4294. else
  4295. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4296. end;
  4297. OP_SHR:
  4298. begin
  4299. if a>32 then
  4300. internalerror(200308292);
  4301. shifterop_reset(so);
  4302. if a<>0 then
  4303. begin
  4304. so.shiftmode:=SM_LSR;
  4305. so.shiftimm:=a;
  4306. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4307. end
  4308. else
  4309. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4310. end;
  4311. OP_SAR:
  4312. begin
  4313. if a>32 then
  4314. internalerror(200308295);
  4315. if a<>0 then
  4316. begin
  4317. shifterop_reset(so);
  4318. so.shiftmode:=SM_ASR;
  4319. so.shiftimm:=a;
  4320. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4321. end
  4322. else
  4323. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4324. end;
  4325. else
  4326. if (op in [OP_SUB, OP_ADD]) and
  4327. ((a < 0) or
  4328. (a > 4095)) then
  4329. begin
  4330. tmpreg:=getintregister(list,size);
  4331. a_load_const_reg(list, size, a, tmpreg);
  4332. if cgsetflags or setflags then
  4333. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4334. list.concat(setoppostfix(
  4335. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4336. end
  4337. else
  4338. begin
  4339. if cgsetflags or setflags then
  4340. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4341. list.concat(setoppostfix(
  4342. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4343. end;
  4344. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4345. begin
  4346. ovloc.loc:=LOC_FLAGS;
  4347. case op of
  4348. OP_ADD:
  4349. ovloc.resflags:=F_CS;
  4350. OP_SUB:
  4351. ovloc.resflags:=F_CC;
  4352. else
  4353. ;
  4354. end;
  4355. end;
  4356. end
  4357. else
  4358. begin
  4359. { there could be added some more sophisticated optimizations }
  4360. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4361. a_load_reg_reg(list,size,size,src,dst)
  4362. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4363. a_load_const_reg(list,size,0,dst)
  4364. else if (op in [OP_IMUL]) and (a=-1) then
  4365. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4366. { we do this here instead in the peephole optimizer because
  4367. it saves us a register }
  4368. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4369. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4370. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4371. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4372. begin
  4373. if l1>32 then{roozbeh does this ever happen?}
  4374. internalerror(2003082911);
  4375. shifterop_reset(so);
  4376. so.shiftmode:=SM_LSL;
  4377. so.shiftimm:=l1;
  4378. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4379. end
  4380. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4381. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4382. begin
  4383. if l1>32 then{does this ever happen?}
  4384. internalerror(2012051803);
  4385. shifterop_reset(so);
  4386. so.shiftmode:=SM_LSL;
  4387. so.shiftimm:=l1;
  4388. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4389. end
  4390. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4391. begin
  4392. { nothing to do on success }
  4393. end
  4394. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4395. Just using mov x, #0 might allow some easier optimizations down the line. }
  4396. else if (op = OP_AND) and (dword(a)=0) then
  4397. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4398. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4399. else if (op = OP_AND) and (not(dword(a))=0) then
  4400. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4401. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4402. broader range of shifterconstants.}
  4403. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4404. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4405. else if (op = OP_AND) and is_thumb32_imm(a) then
  4406. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4407. else if (op = OP_AND) and (a = $FFFF) then
  4408. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4409. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4410. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4411. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4412. begin
  4413. a_load_reg_reg(list,size,size,src,dst);
  4414. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4415. end
  4416. else
  4417. begin
  4418. tmpreg:=getintregister(list,size);
  4419. a_load_const_reg(list,size,a,tmpreg);
  4420. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4421. end;
  4422. end;
  4423. maybeadjustresult(list,op,size,dst);
  4424. end;
  4425. const
  4426. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4427. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4428. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4429. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4430. var
  4431. so : tshifterop;
  4432. tmpreg,overflowreg : tregister;
  4433. asmop : tasmop;
  4434. begin
  4435. ovloc.loc:=LOC_VOID;
  4436. case op of
  4437. OP_NEG,OP_NOT:
  4438. internalerror(200308286);
  4439. OP_ROL:
  4440. begin
  4441. if not(size in [OS_32,OS_S32]) then
  4442. internalerror(2008072806);
  4443. { simulate ROL by ror'ing 32-value }
  4444. tmpreg:=getintregister(list,OS_32);
  4445. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4446. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4447. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4448. end;
  4449. OP_ROR:
  4450. begin
  4451. if not(size in [OS_32,OS_S32]) then
  4452. internalerror(2008072802);
  4453. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4454. end;
  4455. OP_IMUL,
  4456. OP_MUL:
  4457. begin
  4458. if cgsetflags or setflags then
  4459. begin
  4460. overflowreg:=getintregister(list,size);
  4461. if op=OP_IMUL then
  4462. asmop:=A_SMULL
  4463. else
  4464. asmop:=A_UMULL;
  4465. { the arm doesn't allow that rd and rm are the same }
  4466. if dst=src2 then
  4467. begin
  4468. if dst<>src1 then
  4469. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4470. else
  4471. begin
  4472. tmpreg:=getintregister(list,size);
  4473. a_load_reg_reg(list,size,size,src2,dst);
  4474. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4475. end;
  4476. end
  4477. else
  4478. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4479. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4480. if op=OP_IMUL then
  4481. begin
  4482. shifterop_reset(so);
  4483. so.shiftmode:=SM_ASR;
  4484. so.shiftimm:=31;
  4485. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4486. end
  4487. else
  4488. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4489. ovloc.loc:=LOC_FLAGS;
  4490. ovloc.resflags:=F_NE;
  4491. end
  4492. else
  4493. begin
  4494. { the arm doesn't allow that rd and rm are the same }
  4495. if dst=src2 then
  4496. begin
  4497. if dst<>src1 then
  4498. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4499. else
  4500. begin
  4501. tmpreg:=getintregister(list,size);
  4502. a_load_reg_reg(list,size,size,src2,dst);
  4503. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4504. end;
  4505. end
  4506. else
  4507. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4508. end;
  4509. end;
  4510. else
  4511. begin
  4512. if cgsetflags or setflags then
  4513. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4514. {$ifdef dummy}
  4515. { R13 is not allowed for certain instruction operands }
  4516. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4517. begin
  4518. if getsupreg(dst)=RS_R13 then
  4519. begin
  4520. tmpreg:=getintregister(list,OS_INT);
  4521. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4522. dst:=tmpreg;
  4523. end;
  4524. if getsupreg(src1)=RS_R13 then
  4525. begin
  4526. tmpreg:=getintregister(list,OS_INT);
  4527. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4528. src1:=tmpreg;
  4529. end;
  4530. end;
  4531. {$endif}
  4532. list.concat(setoppostfix(
  4533. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4534. end;
  4535. end;
  4536. maybeadjustresult(list,op,size,dst);
  4537. end;
  4538. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4539. begin
  4540. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4541. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4542. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4543. end;
  4544. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4545. var
  4546. ref : treference;
  4547. shift : byte;
  4548. firstfloatreg,lastfloatreg,
  4549. r : byte;
  4550. regs : tcpuregisterset;
  4551. stackmisalignment: pint;
  4552. begin
  4553. LocalSize:=align(LocalSize,4);
  4554. { call instruction does not put anything on the stack }
  4555. stackmisalignment:=0;
  4556. if not(nostackframe) then
  4557. begin
  4558. firstfloatreg:=RS_NO;
  4559. lastfloatreg:=RS_NO;
  4560. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4561. begin
  4562. { save floating point registers? }
  4563. for r:=RS_F0 to RS_F7 do
  4564. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4565. begin
  4566. if firstfloatreg=RS_NO then
  4567. firstfloatreg:=r;
  4568. lastfloatreg:=r;
  4569. inc(stackmisalignment,12);
  4570. end;
  4571. end;
  4572. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4573. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4574. begin
  4575. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4576. a_reg_alloc(list,NR_R12);
  4577. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4578. end;
  4579. { save int registers }
  4580. reference_reset(ref,4,[]);
  4581. ref.index:=NR_STACK_POINTER_REG;
  4582. ref.addressmode:=AM_PREINDEXED;
  4583. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4584. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4585. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4586. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4587. include(regs,RS_R14);
  4588. if regs<>[] then
  4589. begin
  4590. for r:=RS_R0 to RS_R15 do
  4591. if (r in regs) then
  4592. inc(stackmisalignment,4);
  4593. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4594. end;
  4595. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4596. begin
  4597. { the framepointer now points to the saved R15, so the saved
  4598. framepointer is at R11-12 (for get_caller_frame) }
  4599. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4600. a_reg_dealloc(list,NR_R12);
  4601. end;
  4602. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4603. if (LocalSize<>0) or
  4604. ((stackmisalignment<>0) and
  4605. ((pi_do_call in current_procinfo.flags) or
  4606. (po_assembler in current_procinfo.procdef.procoptions))) then
  4607. begin
  4608. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4609. if not(is_shifter_const(localsize,shift)) then
  4610. begin
  4611. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4612. a_reg_alloc(list,NR_R12);
  4613. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4614. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4615. a_reg_dealloc(list,NR_R12);
  4616. end
  4617. else
  4618. begin
  4619. a_reg_dealloc(list,NR_R12);
  4620. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4621. end;
  4622. end;
  4623. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4624. begin
  4625. if firstfloatreg<>RS_NO then
  4626. begin
  4627. reference_reset(ref,4,[]);
  4628. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4629. begin
  4630. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4631. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4632. ref.base:=NR_R12;
  4633. end
  4634. else
  4635. begin
  4636. ref.base:=current_procinfo.framepointer;
  4637. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4638. end;
  4639. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4640. lastfloatreg-firstfloatreg+1,ref));
  4641. end;
  4642. end;
  4643. end;
  4644. end;
  4645. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4646. var
  4647. ref : treference;
  4648. firstfloatreg,lastfloatreg,
  4649. r : byte;
  4650. shift : byte;
  4651. regs : tcpuregisterset;
  4652. LocalSize : longint;
  4653. stackmisalignment: pint;
  4654. begin
  4655. { a routine not returning needs no exit code,
  4656. we trust this directive as arm thumb is normally used if small code shall be generated }
  4657. if po_noreturn in current_procinfo.procdef.procoptions then
  4658. exit;
  4659. if not(nostackframe) then
  4660. begin
  4661. stackmisalignment:=0;
  4662. if FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype] then
  4663. begin
  4664. { restore floating point register }
  4665. firstfloatreg:=RS_NO;
  4666. lastfloatreg:=RS_NO;
  4667. { save floating point registers? }
  4668. for r:=RS_F0 to RS_F7 do
  4669. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4670. begin
  4671. if firstfloatreg=RS_NO then
  4672. firstfloatreg:=r;
  4673. lastfloatreg:=r;
  4674. { floating point register space is already included in
  4675. localsize below by calc_stackframe_size
  4676. inc(stackmisalignment,12);
  4677. }
  4678. end;
  4679. if firstfloatreg<>RS_NO then
  4680. begin
  4681. reference_reset(ref,4,[]);
  4682. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4683. begin
  4684. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4685. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4686. ref.base:=NR_R12;
  4687. end
  4688. else
  4689. begin
  4690. ref.base:=current_procinfo.framepointer;
  4691. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4692. end;
  4693. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4694. lastfloatreg-firstfloatreg+1,ref));
  4695. end;
  4696. end;
  4697. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4698. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4699. begin
  4700. exclude(regs,RS_R14);
  4701. include(regs,RS_R15);
  4702. end;
  4703. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4704. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4705. for r:=RS_R0 to RS_R15 do
  4706. if (r in regs) then
  4707. inc(stackmisalignment,4);
  4708. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4709. LocalSize:=current_procinfo.calc_stackframe_size;
  4710. if (LocalSize<>0) or
  4711. ((stackmisalignment<>0) and
  4712. ((pi_do_call in current_procinfo.flags) or
  4713. (po_assembler in current_procinfo.procdef.procoptions))) then
  4714. begin
  4715. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4716. if not(is_shifter_const(LocalSize,shift)) then
  4717. begin
  4718. a_reg_alloc(list,NR_R12);
  4719. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4720. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4721. a_reg_dealloc(list,NR_R12);
  4722. end
  4723. else
  4724. begin
  4725. a_reg_dealloc(list,NR_R12);
  4726. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4727. end;
  4728. end;
  4729. if regs=[] then
  4730. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4731. else
  4732. begin
  4733. reference_reset(ref,4,[]);
  4734. ref.index:=NR_STACK_POINTER_REG;
  4735. ref.addressmode:=AM_PREINDEXED;
  4736. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4737. end;
  4738. end
  4739. else
  4740. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4741. end;
  4742. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4743. var
  4744. tmpreg : tregister;
  4745. tmpref : treference;
  4746. l : tasmlabel;
  4747. begin
  4748. tmpreg:=NR_NO;
  4749. { Be sure to have a base register }
  4750. if (ref.base=NR_NO) then
  4751. begin
  4752. if ref.shiftmode<>SM_None then
  4753. internalerror(2014020706);
  4754. ref.base:=ref.index;
  4755. ref.index:=NR_NO;
  4756. end;
  4757. { absolute symbols can't be handled directly, we've to store the symbol reference
  4758. in the text segment and access it pc relative
  4759. For now, we assume that references where base or index equals to PC are already
  4760. relative, all other references are assumed to be absolute and thus they need
  4761. to be handled extra.
  4762. A proper solution would be to change refoptions to a set and store the information
  4763. if the symbol is absolute or relative there.
  4764. }
  4765. if (assigned(ref.symbol) and
  4766. not(is_pc(ref.base)) and
  4767. not(is_pc(ref.index))
  4768. ) or
  4769. { [#xxx] isn't a valid address operand }
  4770. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4771. //(ref.offset<-4095) or
  4772. (ref.offset<-255) or
  4773. (ref.offset>4095) or
  4774. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4775. ((ref.offset<-255) or
  4776. (ref.offset>255)
  4777. )
  4778. ) or
  4779. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4780. ((ref.offset<-1020) or
  4781. (ref.offset>1020) or
  4782. ((abs(ref.offset) mod 4)<>0) or
  4783. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4784. assigned(ref.symbol)
  4785. )
  4786. ) then
  4787. begin
  4788. reference_reset(tmpref,4,[]);
  4789. { load symbol }
  4790. tmpreg:=getintregister(list,OS_INT);
  4791. if assigned(ref.symbol) then
  4792. begin
  4793. current_asmdata.getjumplabel(l);
  4794. cg.a_label(current_procinfo.aktlocaldata,l);
  4795. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4796. if ref.refaddr=addr_gottpoff then
  4797. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4798. else if ref.refaddr=addr_tlsgd then
  4799. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
  4800. else if ref.refaddr=addr_tlsdesc then
  4801. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
  4802. else if ref.refaddr=addr_tpoff then
  4803. begin
  4804. if assigned(ref.relsymbol) or (ref.offset<>0) then
  4805. Internalerror(2019092807);
  4806. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
  4807. end
  4808. else
  4809. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4810. { load consts entry }
  4811. tmpref.symbol:=l;
  4812. tmpref.base:=NR_R15;
  4813. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4814. { in case of LDF/STF, we got rid of the NR_R15 }
  4815. if is_pc(ref.base) then
  4816. ref.base:=NR_NO;
  4817. if is_pc(ref.index) then
  4818. ref.index:=NR_NO;
  4819. end
  4820. else
  4821. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4822. if (ref.base<>NR_NO) then
  4823. begin
  4824. if ref.index<>NR_NO then
  4825. begin
  4826. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4827. ref.base:=tmpreg;
  4828. end
  4829. else
  4830. begin
  4831. ref.index:=tmpreg;
  4832. ref.shiftimm:=0;
  4833. ref.signindex:=1;
  4834. ref.shiftmode:=SM_None;
  4835. end;
  4836. end
  4837. else
  4838. ref.base:=tmpreg;
  4839. ref.offset:=0;
  4840. ref.symbol:=nil;
  4841. end;
  4842. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4843. begin
  4844. if tmpreg<>NR_NO then
  4845. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4846. else
  4847. begin
  4848. tmpreg:=getintregister(list,OS_ADDR);
  4849. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4850. ref.base:=tmpreg;
  4851. end;
  4852. ref.offset:=0;
  4853. end;
  4854. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4855. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4856. begin
  4857. tmpreg:=getintregister(list,OS_ADDR);
  4858. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4859. ref.base := tmpreg;
  4860. end;
  4861. { floating point operations have only limited references
  4862. we expect here, that a base is already set }
  4863. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4864. begin
  4865. if ref.shiftmode<>SM_none then
  4866. internalerror(2003091202);
  4867. if tmpreg<>NR_NO then
  4868. begin
  4869. if ref.base=tmpreg then
  4870. begin
  4871. if ref.signindex<0 then
  4872. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4873. else
  4874. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4875. ref.index:=NR_NO;
  4876. end
  4877. else
  4878. begin
  4879. if ref.index<>tmpreg then
  4880. internalerror(2004031602);
  4881. if ref.signindex<0 then
  4882. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4883. else
  4884. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4885. ref.base:=tmpreg;
  4886. ref.index:=NR_NO;
  4887. end;
  4888. end
  4889. else
  4890. begin
  4891. tmpreg:=getintregister(list,OS_ADDR);
  4892. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4893. ref.base:=tmpreg;
  4894. ref.index:=NR_NO;
  4895. end;
  4896. end;
  4897. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4898. Result := ref;
  4899. end;
  4900. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4901. var
  4902. instr: taicpu;
  4903. begin
  4904. if (fromsize=OS_F32) and
  4905. (tosize=OS_F32) then
  4906. begin
  4907. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4908. list.Concat(instr);
  4909. add_move_instruction(instr);
  4910. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4911. end
  4912. else if (fromsize=OS_F64) and
  4913. (tosize=OS_F64) then
  4914. begin
  4915. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4916. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4917. end
  4918. else if (fromsize=OS_F32) and
  4919. (tosize=OS_F64) then
  4920. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4921. begin
  4922. //list.concat(nil);
  4923. end;
  4924. end;
  4925. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4926. begin
  4927. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4928. end;
  4929. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4930. begin
  4931. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4932. { VSTR cannot generate an FPU exception, so we do not need a check here }
  4933. end;
  4934. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4935. begin
  4936. if //(shuffle=nil) and
  4937. (tosize=OS_F32) then
  4938. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4939. else
  4940. internalerror(2012100813);
  4941. end;
  4942. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4943. begin
  4944. if //(shuffle=nil) and
  4945. (fromsize=OS_F32) then
  4946. begin
  4947. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4948. { VMOV cannot generate an FPU exception, so we do not need a check here }
  4949. end
  4950. else
  4951. internalerror(2012100814);
  4952. end;
  4953. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4954. var tmpreg: tregister;
  4955. begin
  4956. case op of
  4957. OP_NEG:
  4958. begin
  4959. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4960. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4961. tmpreg:=cg.getintregister(list,OS_32);
  4962. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4963. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4964. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4965. end;
  4966. else
  4967. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4968. end;
  4969. end;
  4970. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4971. begin
  4972. case op of
  4973. OP_NEG:
  4974. begin
  4975. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reglo,0),PF_S));
  4976. list.concat(setoppostfix(taicpu.op_reg_const(A_MOV,regdst.reghi,0),PF_S));
  4977. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4978. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4979. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  4980. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4981. end;
  4982. OP_NOT:
  4983. begin
  4984. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4985. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4986. end;
  4987. OP_AND,OP_OR,OP_XOR:
  4988. begin
  4989. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4990. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4991. end;
  4992. OP_ADD:
  4993. begin
  4994. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4995. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4996. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi),PF_S));
  4997. end;
  4998. OP_SUB:
  4999. begin
  5000. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5001. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  5002. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi),PF_S));
  5003. end;
  5004. else
  5005. internalerror(2003083105);
  5006. end;
  5007. end;
  5008. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  5009. var
  5010. tmpreg : tregister;
  5011. begin
  5012. case op of
  5013. OP_AND,OP_OR,OP_XOR:
  5014. begin
  5015. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  5016. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  5017. end;
  5018. OP_ADD:
  5019. begin
  5020. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5021. begin
  5022. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5023. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  5024. end
  5025. else
  5026. begin
  5027. tmpreg:=cg.getintregister(list,OS_32);
  5028. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5029. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5030. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  5031. end;
  5032. tmpreg:=cg.getintregister(list,OS_32);
  5033. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  5034. list.concat(setoppostfix(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg),PF_S));
  5035. end;
  5036. OP_SUB:
  5037. begin
  5038. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  5039. begin
  5040. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5041. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  5042. end
  5043. else
  5044. begin
  5045. tmpreg:=cg.getintregister(list,OS_32);
  5046. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  5047. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  5048. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  5049. end;
  5050. tmpreg:=cg.getintregister(list,OS_32);
  5051. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  5052. list.concat(setoppostfix(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg),PF_S));
  5053. end;
  5054. else
  5055. internalerror(2003083106);
  5056. end;
  5057. end;
  5058. procedure create_codegen;
  5059. begin
  5060. if GenerateThumb2Code then
  5061. begin
  5062. cg:=tthumb2cgarm.create;
  5063. cg64:=tthumb2cg64farm.create;
  5064. casmoptimizer:=TCpuThumb2AsmOptimizer;
  5065. end
  5066. else if GenerateThumbCode then
  5067. begin
  5068. cg:=tthumbcgarm.create;
  5069. cg64:=tthumbcg64farm.create;
  5070. // casmoptimizer:=TCpuThumbAsmOptimizer;
  5071. end
  5072. else
  5073. begin
  5074. cg:=tarmcgarm.create;
  5075. cg64:=tarmcg64farm.create;
  5076. casmoptimizer:=TCpuAsmOptimizer;
  5077. end;
  5078. end;
  5079. end.