cgcpu.pas 214 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. public
  36. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  37. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  38. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  39. { move instructions }
  40. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  41. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  42. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  43. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  44. { fpu move instructions }
  45. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  46. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  47. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  48. procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
  49. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  50. { comparison operations }
  51. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  52. l : tasmlabel);override;
  53. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  54. procedure a_jmp_name(list : TAsmList;const s : string); override;
  55. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  56. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  57. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  58. procedure g_profilecode(list : TAsmList); override;
  59. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  60. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  61. procedure g_maybe_got_init(list : TAsmList); override;
  62. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  63. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  64. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  65. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  66. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  67. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  68. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  69. procedure g_save_registers(list : TAsmList);override;
  70. procedure g_restore_registers(list : TAsmList);override;
  71. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  72. procedure fixref(list : TAsmList;var ref : treference);
  73. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  74. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  75. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  77. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  78. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  79. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  80. { Transform unsupported methods into Internal errors }
  81. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  82. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  83. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  84. { clear out potential overflow bits from 8 or 16 bit operations }
  85. { the upper 24/16 bits of a register after an operation }
  86. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  87. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  88. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  89. end;
  90. { tcgarm is shared between normal arm and thumb-2 }
  91. tcgarm = class(tbasecgarm)
  92. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  93. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  94. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  95. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  96. size: tcgsize; a: tcgint; src, dst: tregister); override;
  97. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  98. size: tcgsize; src1, src2, dst: tregister); override;
  99. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  100. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  101. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  102. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  103. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  104. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  105. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  106. end;
  107. { normal arm cg }
  108. tarmcgarm = class(tcgarm)
  109. procedure init_register_allocators;override;
  110. procedure done_register_allocators;override;
  111. end;
  112. { 64 bit cg for all arm flavours }
  113. tbasecg64farm = class(tcg64f32)
  114. end;
  115. { tcg64farm is shared between normal arm and thumb-2 }
  116. tcg64farm = class(tbasecg64farm)
  117. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  118. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  119. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  120. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  121. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  122. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  123. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  124. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  125. end;
  126. tarmcg64farm = class(tcg64farm)
  127. end;
  128. tthumbcgarm = class(tbasecgarm)
  129. procedure init_register_allocators;override;
  130. procedure done_register_allocators;override;
  131. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  132. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  133. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  134. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  135. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  136. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  137. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  138. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  139. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  140. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  141. end;
  142. tthumbcg64farm = class(tbasecg64farm)
  143. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  144. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  145. end;
  146. tthumb2cgarm = class(tcgarm)
  147. procedure init_register_allocators;override;
  148. procedure done_register_allocators;override;
  149. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  150. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  151. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  152. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  153. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  154. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  155. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  156. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  157. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  158. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  159. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  160. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  161. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  163. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  164. end;
  165. tthumb2cg64farm = class(tcg64farm)
  166. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  167. end;
  168. const
  169. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  170. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  171. winstackpagesize = 4096;
  172. function get_fpu_postfix(def : tdef) : toppostfix;
  173. procedure create_codegen;
  174. implementation
  175. uses
  176. globals,verbose,systems,cutils,
  177. aopt,aoptcpu,
  178. fmodule,
  179. symconst,symsym,symtable,
  180. tgobj,
  181. procinfo,cpupi,
  182. paramgr;
  183. { Range check must be disabled explicitly as conversions between signed and unsigned
  184. 32-bit values are done without explicit typecasts }
  185. {$R-}
  186. function get_fpu_postfix(def : tdef) : toppostfix;
  187. begin
  188. if def.typ=floatdef then
  189. begin
  190. case tfloatdef(def).floattype of
  191. s32real:
  192. result:=PF_S;
  193. s64real:
  194. result:=PF_D;
  195. s80real:
  196. result:=PF_E;
  197. else
  198. internalerror(200401272);
  199. end;
  200. end
  201. else
  202. internalerror(200401271);
  203. end;
  204. procedure tarmcgarm.init_register_allocators;
  205. begin
  206. inherited init_register_allocators;
  207. { currently, we always save R14, so we can use it }
  208. if (target_info.system<>system_arm_ios) then
  209. begin
  210. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  211. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  212. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  213. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  214. else
  215. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  216. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  217. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  218. end
  219. else
  220. { r7 is not available on Darwin, it's used as frame pointer (always,
  221. for backtrace support -- also in gcc/clang -> R11 can be used).
  222. r9 is volatile }
  223. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  224. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  225. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  226. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  227. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  228. { The register allocator currently cannot deal with multiple
  229. non-overlapping subregs per register, so we can only use
  230. half the single precision registers for now (as sub registers of the
  231. double precision ones). }
  232. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  233. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  234. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  235. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  236. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  237. ],first_mm_imreg,[])
  238. else
  239. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  240. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15],first_mm_imreg,[]);
  241. end;
  242. procedure tarmcgarm.done_register_allocators;
  243. begin
  244. rg[R_INTREGISTER].free;
  245. rg[R_FPUREGISTER].free;
  246. rg[R_MMREGISTER].free;
  247. inherited done_register_allocators;
  248. end;
  249. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  250. var
  251. imm_shift : byte;
  252. l : tasmlabel;
  253. hr : treference;
  254. imm1, imm2: DWord;
  255. begin
  256. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  257. internalerror(2002090902);
  258. if is_shifter_const(a,imm_shift) then
  259. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  260. else if is_shifter_const(not(a),imm_shift) then
  261. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  262. { loading of constants with mov and orr }
  263. else if (split_into_shifter_const(a,imm1, imm2)) then
  264. begin
  265. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  266. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  267. end
  268. { loading of constants with mvn and bic }
  269. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  270. begin
  271. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  272. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  273. end
  274. else
  275. begin
  276. reference_reset(hr,4,[]);
  277. current_asmdata.getjumplabel(l);
  278. cg.a_label(current_procinfo.aktlocaldata,l);
  279. hr.symboldata:=current_procinfo.aktlocaldata.last;
  280. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  281. hr.symbol:=l;
  282. hr.base:=NR_PC;
  283. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  284. end;
  285. end;
  286. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  287. var
  288. oppostfix:toppostfix;
  289. usedtmpref: treference;
  290. tmpreg,tmpreg2 : tregister;
  291. so : tshifterop;
  292. dir : integer;
  293. begin
  294. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  295. FromSize := ToSize;
  296. case FromSize of
  297. { signed integer registers }
  298. OS_8:
  299. oppostfix:=PF_B;
  300. OS_S8:
  301. oppostfix:=PF_SB;
  302. OS_16:
  303. oppostfix:=PF_H;
  304. OS_S16:
  305. oppostfix:=PF_SH;
  306. OS_32,
  307. OS_S32:
  308. oppostfix:=PF_None;
  309. else
  310. InternalError(200308297);
  311. end;
  312. if (fromsize=OS_S8) and
  313. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  314. oppostfix:=PF_B;
  315. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  316. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  317. (oppostfix in [PF_SH,PF_H])) then
  318. begin
  319. if target_info.endian=endian_big then
  320. dir:=-1
  321. else
  322. dir:=1;
  323. case FromSize of
  324. OS_16,OS_S16:
  325. begin
  326. { only complicated references need an extra loadaddr }
  327. if assigned(ref.symbol) or
  328. (ref.index<>NR_NO) or
  329. (ref.offset<-4095) or
  330. (ref.offset>4094) or
  331. { sometimes the compiler reused registers }
  332. (reg=ref.index) or
  333. (reg=ref.base) then
  334. begin
  335. tmpreg2:=getintregister(list,OS_INT);
  336. a_loadaddr_ref_reg(list,ref,tmpreg2);
  337. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  338. end
  339. else
  340. usedtmpref:=ref;
  341. if target_info.endian=endian_big then
  342. inc(usedtmpref.offset,1);
  343. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  344. tmpreg:=getintregister(list,OS_INT);
  345. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  346. inc(usedtmpref.offset,dir);
  347. if FromSize=OS_16 then
  348. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  349. else
  350. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  351. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  352. end;
  353. OS_32,OS_S32:
  354. begin
  355. tmpreg:=getintregister(list,OS_INT);
  356. { only complicated references need an extra loadaddr }
  357. if assigned(ref.symbol) or
  358. (ref.index<>NR_NO) or
  359. (ref.offset<-4095) or
  360. (ref.offset>4092) or
  361. { sometimes the compiler reused registers }
  362. (reg=ref.index) or
  363. (reg=ref.base) then
  364. begin
  365. tmpreg2:=getintregister(list,OS_INT);
  366. a_loadaddr_ref_reg(list,ref,tmpreg2);
  367. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  368. end
  369. else
  370. usedtmpref:=ref;
  371. shifterop_reset(so);so.shiftmode:=SM_LSL;
  372. if ref.alignment=2 then
  373. begin
  374. if target_info.endian=endian_big then
  375. inc(usedtmpref.offset,2);
  376. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  377. inc(usedtmpref.offset,dir*2);
  378. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  379. so.shiftimm:=16;
  380. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  381. end
  382. else
  383. begin
  384. tmpreg2:=getintregister(list,OS_INT);
  385. if target_info.endian=endian_big then
  386. inc(usedtmpref.offset,3);
  387. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  388. inc(usedtmpref.offset,dir);
  389. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  390. inc(usedtmpref.offset,dir);
  391. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  392. so.shiftimm:=8;
  393. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  394. inc(usedtmpref.offset,dir);
  395. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  396. so.shiftimm:=16;
  397. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  398. so.shiftimm:=24;
  399. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  400. end;
  401. end
  402. else
  403. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  404. end;
  405. end
  406. else
  407. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  408. if (fromsize=OS_S8) and
  409. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  410. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  411. else if (fromsize=OS_S8) and (tosize = OS_16) then
  412. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  413. end;
  414. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  415. var
  416. hsym : tsym;
  417. href : treference;
  418. paraloc : Pcgparalocation;
  419. shift : byte;
  420. begin
  421. { calculate the parameter info for the procdef }
  422. procdef.init_paraloc_info(callerside);
  423. hsym:=tsym(procdef.parast.Find('self'));
  424. if not(assigned(hsym) and
  425. (hsym.typ=paravarsym)) then
  426. internalerror(200305251);
  427. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  428. while paraloc<>nil do
  429. with paraloc^ do
  430. begin
  431. case loc of
  432. LOC_REGISTER:
  433. begin
  434. if is_shifter_const(ioffset,shift) then
  435. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  436. else
  437. begin
  438. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  439. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  440. end;
  441. end;
  442. LOC_REFERENCE:
  443. begin
  444. { offset in the wrapper needs to be adjusted for the stored
  445. return address }
  446. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  447. if is_shifter_const(ioffset,shift) then
  448. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  449. else
  450. begin
  451. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  452. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  453. end;
  454. end
  455. else
  456. internalerror(200309189);
  457. end;
  458. paraloc:=next;
  459. end;
  460. end;
  461. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  462. var
  463. ref: treference;
  464. begin
  465. paraloc.check_simple_location;
  466. paramanager.allocparaloc(list,paraloc.location);
  467. case paraloc.location^.loc of
  468. LOC_REGISTER,LOC_CREGISTER:
  469. a_load_const_reg(list,size,a,paraloc.location^.register);
  470. LOC_REFERENCE:
  471. begin
  472. reference_reset(ref,paraloc.alignment,[]);
  473. ref.base:=paraloc.location^.reference.index;
  474. ref.offset:=paraloc.location^.reference.offset;
  475. a_load_const_ref(list,size,a,ref);
  476. end;
  477. else
  478. internalerror(2002081101);
  479. end;
  480. end;
  481. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  482. begin
  483. { doubles in softemu mode have a strange order of registers and references }
  484. if (cgpara.size=OS_F64) and
  485. (location^.size=OS_32) then
  486. begin
  487. g_concatcopy(list,ref,paralocref,4)
  488. end
  489. else
  490. inherited;
  491. end;
  492. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  493. var
  494. ref: treference;
  495. tmpreg: tregister;
  496. begin
  497. paraloc.check_simple_location;
  498. paramanager.allocparaloc(list,paraloc.location);
  499. case paraloc.location^.loc of
  500. LOC_REGISTER,LOC_CREGISTER:
  501. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  502. LOC_REFERENCE:
  503. begin
  504. reference_reset(ref,paraloc.alignment,[]);
  505. ref.base := paraloc.location^.reference.index;
  506. ref.offset := paraloc.location^.reference.offset;
  507. tmpreg := getintregister(list,OS_ADDR);
  508. a_loadaddr_ref_reg(list,r,tmpreg);
  509. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  510. end;
  511. else
  512. internalerror(2002080701);
  513. end;
  514. end;
  515. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  516. var
  517. branchopcode: tasmop;
  518. r : treference;
  519. sym : TAsmSymbol;
  520. begin
  521. { use always BL as newer binutils do not translate blx apparently
  522. generating BL is also what clang and gcc do by default }
  523. branchopcode:=A_BL;
  524. if not(weak) then
  525. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  526. else
  527. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  528. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  529. if (tf_pic_uses_got in target_info.flags) and
  530. (cs_create_pic in current_settings.moduleswitches) then
  531. begin
  532. r.refaddr:=addr_pic
  533. end
  534. else
  535. r.refaddr:=addr_full;
  536. list.concat(taicpu.op_ref(branchopcode,r));
  537. {
  538. the compiler does not properly set this flag anymore in pass 1, and
  539. for now we only need it after pass 2 (I hope) (JM)
  540. if not(pi_do_call in current_procinfo.flags) then
  541. internalerror(2003060703);
  542. }
  543. include(current_procinfo.flags,pi_do_call);
  544. end;
  545. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  546. begin
  547. { check not really correct: should only be used for non-Thumb cpus }
  548. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  549. begin
  550. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  551. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  552. end
  553. else
  554. list.concat(taicpu.op_reg(A_BLX, reg));
  555. {
  556. the compiler does not properly set this flag anymore in pass 1, and
  557. for now we only need it after pass 2 (I hope) (JM)
  558. if not(pi_do_call in current_procinfo.flags) then
  559. internalerror(2003060703);
  560. }
  561. include(current_procinfo.flags,pi_do_call);
  562. end;
  563. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  564. begin
  565. a_op_const_reg_reg(list,op,size,a,reg,reg);
  566. end;
  567. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  568. var
  569. tmpreg,tmpresreg : tregister;
  570. tmpref : treference;
  571. begin
  572. tmpreg:=getintregister(list,size);
  573. tmpresreg:=getintregister(list,size);
  574. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  575. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  576. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  577. end;
  578. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  579. var
  580. so : tshifterop;
  581. begin
  582. if op = OP_NEG then
  583. begin
  584. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  585. maybeadjustresult(list,OP_NEG,size,dst);
  586. end
  587. else if op = OP_NOT then
  588. begin
  589. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  590. begin
  591. shifterop_reset(so);
  592. so.shiftmode:=SM_LSL;
  593. if size in [OS_8, OS_S8] then
  594. so.shiftimm:=24
  595. else
  596. so.shiftimm:=16;
  597. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  598. {Using a shift here allows this to be folded into another instruction}
  599. if size in [OS_S8, OS_S16] then
  600. so.shiftmode:=SM_ASR
  601. else
  602. so.shiftmode:=SM_LSR;
  603. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  604. end
  605. else
  606. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  607. end
  608. else
  609. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  610. end;
  611. const
  612. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  613. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  614. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  615. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  616. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  617. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  618. op_reg_postfix: array[TOpCG] of TOpPostfix =
  619. (PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
  620. PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None);
  621. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  622. size: tcgsize; a: tcgint; src, dst: tregister);
  623. var
  624. ovloc : tlocation;
  625. begin
  626. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  627. end;
  628. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  629. size: tcgsize; src1, src2, dst: tregister);
  630. var
  631. ovloc : tlocation;
  632. begin
  633. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  634. end;
  635. function opshift2shiftmode(op: TOpCg): tshiftmode;
  636. begin
  637. case op of
  638. OP_SHL: Result:=SM_LSL;
  639. OP_SHR: Result:=SM_LSR;
  640. OP_ROR: Result:=SM_ROR;
  641. OP_ROL: Result:=SM_ROR;
  642. OP_SAR: Result:=SM_ASR;
  643. else internalerror(2012070501);
  644. end
  645. end;
  646. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  647. var
  648. multiplier : dword;
  649. power : longint;
  650. shifterop : tshifterop;
  651. bitsset : byte;
  652. negative : boolean;
  653. first : boolean;
  654. b,
  655. cycles : byte;
  656. maxeffort : byte;
  657. begin
  658. result:=true;
  659. cycles:=0;
  660. negative:=a<0;
  661. shifterop.rs:=NR_NO;
  662. shifterop.shiftmode:=SM_LSL;
  663. if negative then
  664. inc(cycles);
  665. multiplier:=dword(abs(a));
  666. bitsset:=popcnt(multiplier and $fffffffe);
  667. { heuristics to estimate how much instructions are reasonable to replace the mul,
  668. this is currently based on XScale timings }
  669. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  670. actual multiplication, this requires min. 1+4 cycles
  671. because the first shift imm. might cause a stall and because we need more instructions
  672. when replacing the mul we generate max. 3 instructions to replace this mul }
  673. maxeffort:=3;
  674. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  675. a ldr, so generating one more operation to replace this is beneficial }
  676. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  677. inc(maxeffort);
  678. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  679. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  680. dec(maxeffort);
  681. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  682. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  683. dec(maxeffort);
  684. { most simple cases }
  685. if a=1 then
  686. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  687. else if a=0 then
  688. a_load_const_reg(list,OS_32,0,dst)
  689. else if a=-1 then
  690. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  691. { add up ?
  692. basically, one add is needed for each bit being set in the constant factor
  693. however, the least significant bit is for free, it can be hidden in the initial
  694. instruction
  695. }
  696. else if (bitsset+cycles<=maxeffort) and
  697. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  698. begin
  699. first:=true;
  700. while multiplier<>0 do
  701. begin
  702. shifterop.shiftimm:=BsrDWord(multiplier);
  703. if odd(multiplier) then
  704. begin
  705. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  706. dec(multiplier);
  707. end
  708. else
  709. if first then
  710. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  711. else
  712. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  713. first:=false;
  714. dec(multiplier,1 shl shifterop.shiftimm);
  715. end;
  716. if negative then
  717. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  718. end
  719. { subtract from the next greater power of two? }
  720. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  721. begin
  722. first:=true;
  723. while multiplier<>0 do
  724. begin
  725. if first then
  726. begin
  727. multiplier:=(1 shl power)-multiplier;
  728. shifterop.shiftimm:=power;
  729. end
  730. else
  731. shifterop.shiftimm:=BsrDWord(multiplier);
  732. if odd(multiplier) then
  733. begin
  734. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  735. dec(multiplier);
  736. end
  737. else
  738. if first then
  739. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  740. else
  741. begin
  742. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  743. dec(multiplier,1 shl shifterop.shiftimm);
  744. end;
  745. first:=false;
  746. end;
  747. if negative then
  748. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  749. end
  750. else
  751. result:=false;
  752. end;
  753. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  754. var
  755. shift, lsb, width : byte;
  756. tmpreg : tregister;
  757. so : tshifterop;
  758. l1 : longint;
  759. imm1, imm2: DWord;
  760. begin
  761. optimize_op_const(size, op, a);
  762. case op of
  763. OP_NONE:
  764. begin
  765. if src <> dst then
  766. a_load_reg_reg(list, size, size, src, dst);
  767. exit;
  768. end;
  769. OP_MOVE:
  770. begin
  771. a_load_const_reg(list, size, a, dst);
  772. exit;
  773. end;
  774. end;
  775. ovloc.loc:=LOC_VOID;
  776. if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
  777. case op of
  778. OP_ADD:
  779. begin
  780. op:=OP_SUB;
  781. a:=aint(dword(-a));
  782. end;
  783. OP_SUB:
  784. begin
  785. op:=OP_ADD;
  786. a:=aint(dword(-a));
  787. end
  788. end;
  789. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  790. case op of
  791. OP_NEG,OP_NOT:
  792. internalerror(200308281);
  793. OP_SHL,
  794. OP_SHR,
  795. OP_ROL,
  796. OP_ROR,
  797. OP_SAR:
  798. begin
  799. if a>32 then
  800. internalerror(200308294);
  801. shifterop_reset(so);
  802. so.shiftmode:=opshift2shiftmode(op);
  803. if op = OP_ROL then
  804. so.shiftimm:=32-a
  805. else
  806. so.shiftimm:=a;
  807. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  808. end;
  809. else
  810. {if (op in [OP_SUB, OP_ADD]) and
  811. ((a < 0) or
  812. (a > 4095)) then
  813. begin
  814. tmpreg:=getintregister(list,size);
  815. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  816. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  817. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  818. ));
  819. end
  820. else}
  821. begin
  822. if cgsetflags or setflags then
  823. a_reg_alloc(list,NR_DEFAULTFLAGS);
  824. list.concat(setoppostfix(
  825. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  826. end;
  827. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  828. begin
  829. ovloc.loc:=LOC_FLAGS;
  830. case op of
  831. OP_ADD:
  832. ovloc.resflags:=F_CS;
  833. OP_SUB:
  834. ovloc.resflags:=F_CC;
  835. end;
  836. end;
  837. end
  838. else
  839. begin
  840. { there could be added some more sophisticated optimizations }
  841. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  842. a_op_reg_reg(list,OP_NEG,size,src,dst)
  843. { we do this here instead in the peephole optimizer because
  844. it saves us a register }
  845. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  846. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  847. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  848. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  849. begin
  850. if l1>32 then{roozbeh does this ever happen?}
  851. internalerror(200308296);
  852. shifterop_reset(so);
  853. so.shiftmode:=SM_LSL;
  854. so.shiftimm:=l1;
  855. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  856. end
  857. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  858. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  859. begin
  860. if l1>32 then{does this ever happen?}
  861. internalerror(201205181);
  862. shifterop_reset(so);
  863. so.shiftmode:=SM_LSL;
  864. so.shiftimm:=l1;
  865. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  866. end
  867. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  868. begin
  869. { nothing to do on success }
  870. end
  871. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  872. broader range of shifterconstants.}
  873. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  874. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  875. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  876. into the following instruction}
  877. else if (op = OP_AND) and
  878. is_continuous_mask(aword(a), lsb, width) and
  879. ((lsb = 0) or ((lsb + width) = 32)) then
  880. begin
  881. shifterop_reset(so);
  882. if (width = 16) and
  883. (lsb = 0) and
  884. (current_settings.cputype >= cpu_armv6) then
  885. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  886. else if (width = 8) and
  887. (lsb = 0) and
  888. (current_settings.cputype >= cpu_armv6) then
  889. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  890. else if lsb = 0 then
  891. begin
  892. so.shiftmode:=SM_LSL;
  893. so.shiftimm:=32-width;
  894. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  895. so.shiftmode:=SM_LSR;
  896. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  897. end
  898. else
  899. begin
  900. so.shiftmode:=SM_LSR;
  901. so.shiftimm:=lsb;
  902. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  903. so.shiftmode:=SM_LSL;
  904. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  905. end;
  906. end
  907. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  908. begin
  909. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  910. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  911. end
  912. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  913. not(cgsetflags or setflags) and
  914. split_into_shifter_const(a, imm1, imm2) then
  915. begin
  916. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  917. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  918. end
  919. else
  920. begin
  921. tmpreg:=getintregister(list,size);
  922. a_load_const_reg(list,size,a,tmpreg);
  923. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  924. end;
  925. end;
  926. maybeadjustresult(list,op,size,dst);
  927. end;
  928. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  929. var
  930. so : tshifterop;
  931. tmpreg,overflowreg : tregister;
  932. asmop : tasmop;
  933. begin
  934. ovloc.loc:=LOC_VOID;
  935. case op of
  936. OP_NEG,OP_NOT,
  937. OP_DIV,OP_IDIV:
  938. internalerror(200308283);
  939. OP_SHL,
  940. OP_SHR,
  941. OP_SAR,
  942. OP_ROR:
  943. begin
  944. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  945. internalerror(2008072801);
  946. shifterop_reset(so);
  947. so.rs:=src1;
  948. so.shiftmode:=opshift2shiftmode(op);
  949. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  950. end;
  951. OP_ROL:
  952. begin
  953. if not(size in [OS_32,OS_S32]) then
  954. internalerror(2008072801);
  955. { simulate ROL by ror'ing 32-value }
  956. tmpreg:=getintregister(list,OS_32);
  957. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  958. shifterop_reset(so);
  959. so.rs:=tmpreg;
  960. so.shiftmode:=SM_ROR;
  961. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  962. end;
  963. OP_IMUL,
  964. OP_MUL:
  965. begin
  966. if (cgsetflags or setflags) and
  967. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  968. begin
  969. overflowreg:=getintregister(list,size);
  970. if op=OP_IMUL then
  971. asmop:=A_SMULL
  972. else
  973. asmop:=A_UMULL;
  974. { the arm doesn't allow that rd and rm are the same }
  975. if dst=src2 then
  976. begin
  977. if dst<>src1 then
  978. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  979. else
  980. begin
  981. tmpreg:=getintregister(list,size);
  982. a_load_reg_reg(list,size,size,src2,dst);
  983. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  984. end;
  985. end
  986. else
  987. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  988. a_reg_alloc(list,NR_DEFAULTFLAGS);
  989. if op=OP_IMUL then
  990. begin
  991. shifterop_reset(so);
  992. so.shiftmode:=SM_ASR;
  993. so.shiftimm:=31;
  994. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  995. end
  996. else
  997. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  998. ovloc.loc:=LOC_FLAGS;
  999. ovloc.resflags:=F_NE;
  1000. end
  1001. else
  1002. begin
  1003. { the arm doesn't allow that rd and rm are the same }
  1004. if dst=src2 then
  1005. begin
  1006. if dst<>src1 then
  1007. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1008. else
  1009. begin
  1010. tmpreg:=getintregister(list,size);
  1011. a_load_reg_reg(list,size,size,src2,dst);
  1012. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1013. end;
  1014. end
  1015. else
  1016. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1017. end;
  1018. end;
  1019. else
  1020. begin
  1021. if cgsetflags or setflags then
  1022. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1023. list.concat(setoppostfix(
  1024. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1025. end;
  1026. end;
  1027. maybeadjustresult(list,op,size,dst);
  1028. end;
  1029. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1030. var
  1031. asmop: tasmop;
  1032. begin
  1033. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1034. begin
  1035. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1036. case size of
  1037. OS_32: asmop:=A_UMULL;
  1038. OS_S32: asmop:=A_SMULL;
  1039. else
  1040. InternalError(2014060802);
  1041. end;
  1042. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1043. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1044. 32x32=32 bit multiplication}
  1045. if (dstlo = NR_NO) then
  1046. dstlo:=getintregister(list,size);
  1047. if (dsthi = NR_NO) then
  1048. dsthi:=getintregister(list,size);
  1049. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1050. end
  1051. else if dsthi=NR_NO then
  1052. begin
  1053. if (dstlo = NR_NO) then
  1054. dstlo:=getintregister(list,size);
  1055. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1056. end
  1057. else
  1058. begin
  1059. internalerror(2015083022);
  1060. end;
  1061. end;
  1062. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1063. var
  1064. tmpreg1,tmpreg2 : tregister;
  1065. begin
  1066. tmpreg1:=NR_NO;
  1067. { Be sure to have a base register }
  1068. if (ref.base=NR_NO) then
  1069. begin
  1070. if ref.shiftmode<>SM_None then
  1071. internalerror(2014020701);
  1072. ref.base:=ref.index;
  1073. ref.index:=NR_NO;
  1074. end;
  1075. { absolute symbols can't be handled directly, we've to store the symbol reference
  1076. in the text segment and access it pc relative
  1077. For now, we assume that references where base or index equals to PC are already
  1078. relative, all other references are assumed to be absolute and thus they need
  1079. to be handled extra.
  1080. A proper solution would be to change refoptions to a set and store the information
  1081. if the symbol is absolute or relative there.
  1082. }
  1083. if (assigned(ref.symbol) and
  1084. not(is_pc(ref.base)) and
  1085. not(is_pc(ref.index))
  1086. ) or
  1087. { [#xxx] isn't a valid address operand }
  1088. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1089. (ref.offset<-4095) or
  1090. (ref.offset>4095) or
  1091. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1092. ((ref.offset<-255) or
  1093. (ref.offset>255)
  1094. )
  1095. ) or
  1096. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1097. ((ref.offset<-1020) or
  1098. (ref.offset>1020) or
  1099. ((abs(ref.offset) mod 4)<>0)
  1100. )
  1101. ) or
  1102. ((GenerateThumbCode) and
  1103. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1104. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1105. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1106. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1107. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1108. )
  1109. ) then
  1110. begin
  1111. fixref(list,ref);
  1112. end;
  1113. if GenerateThumbCode then
  1114. begin
  1115. { certain thumb load require base and index }
  1116. if (oppostfix in [PF_SB,PF_SH]) and
  1117. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1118. begin
  1119. tmpreg1:=getintregister(list,OS_ADDR);
  1120. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1121. ref.index:=tmpreg1;
  1122. end;
  1123. { "hi" registers cannot be used as base or index }
  1124. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1125. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1126. begin
  1127. tmpreg1:=getintregister(list,OS_ADDR);
  1128. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1129. ref.base:=tmpreg1;
  1130. end;
  1131. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1132. begin
  1133. tmpreg1:=getintregister(list,OS_ADDR);
  1134. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1135. ref.index:=tmpreg1;
  1136. end;
  1137. end;
  1138. { fold if there is base, index and offset, however, don't fold
  1139. for vfp memory instructions because we later fold the index }
  1140. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1141. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1142. begin
  1143. if tmpreg1<>NR_NO then
  1144. begin
  1145. tmpreg2:=getintregister(list,OS_ADDR);
  1146. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1147. tmpreg1:=tmpreg2;
  1148. end
  1149. else
  1150. begin
  1151. tmpreg1:=getintregister(list,OS_ADDR);
  1152. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1153. ref.base:=tmpreg1;
  1154. end;
  1155. ref.offset:=0;
  1156. end;
  1157. { floating point operations have only limited references
  1158. we expect here, that a base is already set }
  1159. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1160. begin
  1161. if ref.shiftmode<>SM_none then
  1162. internalerror(200309121);
  1163. if tmpreg1<>NR_NO then
  1164. begin
  1165. if ref.base=tmpreg1 then
  1166. begin
  1167. if ref.signindex<0 then
  1168. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1169. else
  1170. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1171. ref.index:=NR_NO;
  1172. end
  1173. else
  1174. begin
  1175. if ref.index<>tmpreg1 then
  1176. internalerror(200403161);
  1177. if ref.signindex<0 then
  1178. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1179. else
  1180. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1181. ref.base:=tmpreg1;
  1182. ref.index:=NR_NO;
  1183. end;
  1184. end
  1185. else
  1186. begin
  1187. tmpreg1:=getintregister(list,OS_ADDR);
  1188. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1189. ref.base:=tmpreg1;
  1190. ref.index:=NR_NO;
  1191. end;
  1192. end;
  1193. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1194. Result := ref;
  1195. end;
  1196. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1197. var
  1198. oppostfix:toppostfix;
  1199. usedtmpref: treference;
  1200. tmpreg : tregister;
  1201. dir : integer;
  1202. begin
  1203. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1204. FromSize := ToSize;
  1205. case ToSize of
  1206. { signed integer registers }
  1207. OS_8,
  1208. OS_S8:
  1209. oppostfix:=PF_B;
  1210. OS_16,
  1211. OS_S16:
  1212. oppostfix:=PF_H;
  1213. OS_32,
  1214. OS_S32,
  1215. { for vfp value stored in integer register }
  1216. OS_F32:
  1217. oppostfix:=PF_None;
  1218. else
  1219. InternalError(200308299);
  1220. end;
  1221. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1222. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1223. (oppostfix =PF_H)) then
  1224. begin
  1225. if target_info.endian=endian_big then
  1226. dir:=-1
  1227. else
  1228. dir:=1;
  1229. case FromSize of
  1230. OS_16,OS_S16:
  1231. begin
  1232. tmpreg:=getintregister(list,OS_INT);
  1233. usedtmpref:=ref;
  1234. if target_info.endian=endian_big then
  1235. inc(usedtmpref.offset,1);
  1236. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1237. inc(usedtmpref.offset,dir);
  1238. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1239. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1240. end;
  1241. OS_32,OS_S32:
  1242. begin
  1243. tmpreg:=getintregister(list,OS_INT);
  1244. usedtmpref:=ref;
  1245. if ref.alignment=2 then
  1246. begin
  1247. if target_info.endian=endian_big then
  1248. inc(usedtmpref.offset,2);
  1249. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1250. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1251. inc(usedtmpref.offset,dir*2);
  1252. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1253. end
  1254. else
  1255. begin
  1256. if target_info.endian=endian_big then
  1257. inc(usedtmpref.offset,3);
  1258. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1259. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1260. inc(usedtmpref.offset,dir);
  1261. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1262. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1263. inc(usedtmpref.offset,dir);
  1264. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1265. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1266. inc(usedtmpref.offset,dir);
  1267. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1268. end;
  1269. end
  1270. else
  1271. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1272. end;
  1273. end
  1274. else
  1275. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1276. end;
  1277. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1278. var
  1279. oppostfix:toppostfix;
  1280. href: treference;
  1281. tmpreg: TRegister;
  1282. begin
  1283. case ToSize of
  1284. { signed integer registers }
  1285. OS_8,
  1286. OS_S8:
  1287. oppostfix:=PF_B;
  1288. OS_16,
  1289. OS_S16:
  1290. oppostfix:=PF_H;
  1291. OS_32,
  1292. OS_S32:
  1293. oppostfix:=PF_None;
  1294. else
  1295. InternalError(2003082910);
  1296. end;
  1297. if (tosize in [OS_S16,OS_16]) and
  1298. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1299. begin
  1300. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1301. tmpreg:=getintregister(list,OS_INT);
  1302. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1303. href:=result;
  1304. inc(href.offset);
  1305. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1306. end
  1307. else
  1308. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1309. end;
  1310. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1311. var
  1312. oppostfix:toppostfix;
  1313. so: tshifterop;
  1314. tmpreg: TRegister;
  1315. href: treference;
  1316. begin
  1317. case FromSize of
  1318. { signed integer registers }
  1319. OS_8:
  1320. oppostfix:=PF_B;
  1321. OS_S8:
  1322. oppostfix:=PF_SB;
  1323. OS_16:
  1324. oppostfix:=PF_H;
  1325. OS_S16:
  1326. oppostfix:=PF_SH;
  1327. OS_32,
  1328. OS_S32:
  1329. oppostfix:=PF_None;
  1330. else
  1331. InternalError(200308291);
  1332. end;
  1333. if (tosize=OS_S8) and
  1334. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1335. begin
  1336. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1337. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1338. end
  1339. else if (tosize in [OS_S16,OS_16]) and
  1340. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1341. begin
  1342. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1343. tmpreg:=getintregister(list,OS_INT);
  1344. href:=result;
  1345. inc(href.offset);
  1346. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1347. shifterop_reset(so);
  1348. so.shiftmode:=SM_LSL;
  1349. so.shiftimm:=8;
  1350. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1351. end
  1352. else
  1353. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1354. end;
  1355. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1356. var
  1357. so : tshifterop;
  1358. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1359. begin
  1360. if GenerateThumbCode then
  1361. begin
  1362. case shiftmode of
  1363. SM_ASR:
  1364. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1365. SM_LSR:
  1366. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1367. SM_LSL:
  1368. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1369. else
  1370. internalerror(2013090301);
  1371. end;
  1372. end
  1373. else
  1374. begin
  1375. so.shiftmode:=shiftmode;
  1376. so.shiftimm:=shiftimm;
  1377. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1378. end;
  1379. end;
  1380. var
  1381. instr: taicpu;
  1382. conv_done: boolean;
  1383. begin
  1384. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1385. internalerror(2002090901);
  1386. conv_done:=false;
  1387. if tosize<>fromsize then
  1388. begin
  1389. shifterop_reset(so);
  1390. conv_done:=true;
  1391. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1392. fromsize:=tosize;
  1393. if current_settings.cputype<cpu_armv6 then
  1394. case fromsize of
  1395. OS_8:
  1396. if GenerateThumbCode then
  1397. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1398. else
  1399. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1400. OS_S8:
  1401. begin
  1402. do_shift(SM_LSL,24,reg1);
  1403. if tosize=OS_16 then
  1404. begin
  1405. do_shift(SM_ASR,8,reg2);
  1406. do_shift(SM_LSR,16,reg2);
  1407. end
  1408. else
  1409. do_shift(SM_ASR,24,reg2);
  1410. end;
  1411. OS_16:
  1412. begin
  1413. do_shift(SM_LSL,16,reg1);
  1414. do_shift(SM_LSR,16,reg2);
  1415. end;
  1416. OS_S16:
  1417. begin
  1418. do_shift(SM_LSL,16,reg1);
  1419. do_shift(SM_ASR,16,reg2)
  1420. end;
  1421. else
  1422. conv_done:=false;
  1423. end
  1424. else
  1425. case fromsize of
  1426. OS_8:
  1427. if GenerateThumbCode then
  1428. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1429. else
  1430. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1431. OS_S8:
  1432. begin
  1433. if tosize=OS_16 then
  1434. begin
  1435. so.shiftmode:=SM_ROR;
  1436. so.shiftimm:=16;
  1437. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1438. do_shift(SM_LSR,16,reg2);
  1439. end
  1440. else
  1441. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1442. end;
  1443. OS_16:
  1444. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1445. OS_S16:
  1446. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1447. else
  1448. conv_done:=false;
  1449. end
  1450. end;
  1451. if not conv_done and (reg1<>reg2) then
  1452. begin
  1453. { same size, only a register mov required }
  1454. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1455. list.Concat(instr);
  1456. { Notify the register allocator that we have written a move instruction so
  1457. it can try to eliminate it. }
  1458. add_move_instruction(instr);
  1459. end;
  1460. end;
  1461. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1462. var
  1463. href,href2 : treference;
  1464. hloc : pcgparalocation;
  1465. begin
  1466. href:=ref;
  1467. hloc:=paraloc.location;
  1468. while assigned(hloc) do
  1469. begin
  1470. case hloc^.loc of
  1471. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1472. begin
  1473. paramanager.allocparaloc(list,paraloc.location);
  1474. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1475. end;
  1476. LOC_REGISTER :
  1477. case hloc^.size of
  1478. OS_32,
  1479. OS_F32:
  1480. begin
  1481. paramanager.allocparaloc(list,paraloc.location);
  1482. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1483. end;
  1484. OS_64,
  1485. OS_F64:
  1486. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1487. else
  1488. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1489. end;
  1490. LOC_REFERENCE :
  1491. begin
  1492. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1493. { concatcopy should choose the best way to copy the data }
  1494. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1495. end;
  1496. else
  1497. internalerror(200408241);
  1498. end;
  1499. inc(href.offset,tcgsize2size[hloc^.size]);
  1500. hloc:=hloc^.next;
  1501. end;
  1502. end;
  1503. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1504. begin
  1505. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1506. end;
  1507. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1508. var
  1509. oppostfix:toppostfix;
  1510. begin
  1511. case fromsize of
  1512. OS_32,
  1513. OS_F32:
  1514. oppostfix:=PF_S;
  1515. OS_64,
  1516. OS_F64:
  1517. oppostfix:=PF_D;
  1518. OS_F80:
  1519. oppostfix:=PF_E;
  1520. else
  1521. InternalError(200309021);
  1522. end;
  1523. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1524. if fromsize<>tosize then
  1525. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1526. end;
  1527. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1528. var
  1529. oppostfix:toppostfix;
  1530. begin
  1531. case tosize of
  1532. OS_F32:
  1533. oppostfix:=PF_S;
  1534. OS_F64:
  1535. oppostfix:=PF_D;
  1536. OS_F80:
  1537. oppostfix:=PF_E;
  1538. else
  1539. InternalError(200309022);
  1540. end;
  1541. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1542. end;
  1543. procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  1544. var
  1545. r : TRegister;
  1546. ai: taicpu;
  1547. l: TAsmLabel;
  1548. begin
  1549. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  1550. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  1551. begin
  1552. r:=getintregister(list,OS_INT);
  1553. list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
  1554. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
  1555. current_asmdata.getjumplabel(l);
  1556. ai:=taicpu.op_sym(A_B,l);
  1557. ai.is_jmp:=true;
  1558. ai.condition:=C_EQ;
  1559. list.concat(ai);
  1560. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1561. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_THROWFPUEXCEPTION',false);
  1562. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1563. a_label(list,l);
  1564. if clear then
  1565. current_procinfo.FPUExceptionCheckNeeded:=false;
  1566. end;
  1567. end;
  1568. { comparison operations }
  1569. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1570. l : tasmlabel);
  1571. var
  1572. tmpreg : tregister;
  1573. b : byte;
  1574. begin
  1575. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1576. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1577. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1578. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1579. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1580. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1581. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1582. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1583. else
  1584. begin
  1585. tmpreg:=getintregister(list,size);
  1586. a_load_const_reg(list,size,a,tmpreg);
  1587. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1588. end;
  1589. a_jmp_cond(list,cmp_op,l);
  1590. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1591. end;
  1592. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1593. begin
  1594. if reverse then
  1595. begin
  1596. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1597. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1598. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1599. end
  1600. { it is decided during the compilation of the system unit if this code is used or not
  1601. so no additional check for rbit is needed }
  1602. else
  1603. begin
  1604. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1605. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1606. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1607. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1608. if GenerateThumb2Code then
  1609. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1610. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1611. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1612. end;
  1613. end;
  1614. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1615. begin
  1616. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1617. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1618. a_jmp_cond(list,cmp_op,l);
  1619. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1620. end;
  1621. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1622. var
  1623. ai : taicpu;
  1624. begin
  1625. { generate far jump, leave it to the optimizer to get rid of it }
  1626. if GenerateThumbCode then
  1627. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1628. else
  1629. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1630. ai.is_jmp:=true;
  1631. list.concat(ai);
  1632. end;
  1633. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1634. var
  1635. ai : taicpu;
  1636. begin
  1637. { generate far jump, leave it to the optimizer to get rid of it }
  1638. if GenerateThumbCode then
  1639. ai:=taicpu.op_sym(A_BL,l)
  1640. else
  1641. ai:=taicpu.op_sym(A_B,l);
  1642. ai.is_jmp:=true;
  1643. list.concat(ai);
  1644. end;
  1645. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1646. var
  1647. ai : taicpu;
  1648. inv_flags : TResFlags;
  1649. hlabel : TAsmLabel;
  1650. begin
  1651. if GenerateThumbCode then
  1652. begin
  1653. inv_flags:=f;
  1654. inverse_flags(inv_flags);
  1655. { the optimizer has to fix this if jump range is sufficient short }
  1656. current_asmdata.getjumplabel(hlabel);
  1657. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1658. ai.is_jmp:=true;
  1659. list.concat(ai);
  1660. a_jmp_always(list,l);
  1661. a_label(list,hlabel);
  1662. end
  1663. else
  1664. begin
  1665. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1666. ai.is_jmp:=true;
  1667. list.concat(ai);
  1668. end;
  1669. end;
  1670. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1671. begin
  1672. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1673. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1674. end;
  1675. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1676. begin
  1677. if target_info.system = system_arm_linux then
  1678. begin
  1679. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1680. a_call_name(list,'__gnu_mcount_nc',false);
  1681. end
  1682. else
  1683. internalerror(2014091201);
  1684. end;
  1685. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1686. var
  1687. ref : treference;
  1688. shift : byte;
  1689. firstfloatreg,lastfloatreg,
  1690. r : byte;
  1691. mmregs,
  1692. regs, saveregs : tcpuregisterset;
  1693. registerarea,
  1694. r7offset,
  1695. stackmisalignment : pint;
  1696. imm1, imm2: DWord;
  1697. stack_parameters : Boolean;
  1698. begin
  1699. LocalSize:=align(LocalSize,4);
  1700. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1701. { call instruction does not put anything on the stack }
  1702. registerarea:=0;
  1703. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1704. lastfloatreg:=RS_NO;
  1705. if not(nostackframe) then
  1706. begin
  1707. firstfloatreg:=RS_NO;
  1708. mmregs:=[];
  1709. case current_settings.fputype of
  1710. fpu_fpa,
  1711. fpu_fpa10,
  1712. fpu_fpa11:
  1713. begin
  1714. { save floating point registers? }
  1715. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1716. for r:=RS_F0 to RS_F7 do
  1717. if r in regs then
  1718. begin
  1719. if firstfloatreg=RS_NO then
  1720. firstfloatreg:=r;
  1721. lastfloatreg:=r;
  1722. inc(registerarea,12);
  1723. end;
  1724. end;
  1725. fpu_vfpv2,
  1726. fpu_vfpv3,
  1727. fpu_vfpv4,
  1728. fpu_vfpv3_d16:
  1729. begin;
  1730. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1731. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1732. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1733. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1734. end;
  1735. end;
  1736. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1737. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1738. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1739. { save int registers }
  1740. reference_reset(ref,4,[]);
  1741. ref.index:=NR_STACK_POINTER_REG;
  1742. ref.addressmode:=AM_PREINDEXED;
  1743. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1744. if not(target_info.system in systems_darwin) then
  1745. begin
  1746. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1747. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1748. begin
  1749. a_reg_alloc(list,NR_R12);
  1750. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1751. end;
  1752. { the (old) ARM APCS requires saving both the stack pointer (to
  1753. crawl the stack) and the PC (to identify the function this
  1754. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1755. and R15 -- still needs updating for EABI and Darwin, they don't
  1756. need that }
  1757. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1758. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1759. else
  1760. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1761. include(regs,RS_R14);
  1762. if regs<>[] then
  1763. begin
  1764. for r:=RS_R0 to RS_R15 do
  1765. if r in regs then
  1766. inc(registerarea,4);
  1767. { if the stack is not 8 byte aligned, try to add an extra register,
  1768. so we can avoid the extra sub/add ...,#4 later (KB) }
  1769. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1770. for r:=RS_R3 downto RS_R0 do
  1771. if not(r in regs) then
  1772. begin
  1773. regs:=regs+[r];
  1774. inc(registerarea,4);
  1775. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1776. break;
  1777. end;
  1778. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1779. end;
  1780. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1781. begin
  1782. { the framepointer now points to the saved R15, so the saved
  1783. framepointer is at R11-12 (for get_caller_frame) }
  1784. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1785. a_reg_dealloc(list,NR_R12);
  1786. end;
  1787. end
  1788. else
  1789. begin
  1790. { always save r14 if we use r7 as the framepointer, because
  1791. the parameter offsets are hardcoded in advance and always
  1792. assume that r14 sits on the stack right behind the saved r7
  1793. }
  1794. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1795. include(regs,RS_FRAME_POINTER_REG);
  1796. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1797. include(regs,RS_R14);
  1798. if regs<>[] then
  1799. begin
  1800. { on Darwin, you first have to save [r4-r7,lr], and then
  1801. [r8,r10,r11] and make r7 point to the previously saved
  1802. r7 so that you can perform a stack crawl based on it
  1803. ([r7] is previous stack frame, [r7+4] is return address
  1804. }
  1805. include(regs,RS_FRAME_POINTER_REG);
  1806. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1807. r7offset:=0;
  1808. for r:=RS_R0 to RS_R15 do
  1809. if r in saveregs then
  1810. begin
  1811. inc(registerarea,4);
  1812. if r<RS_FRAME_POINTER_REG then
  1813. inc(r7offset,4);
  1814. end;
  1815. { save the registers }
  1816. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1817. { make r7 point to the saved r7 (regardless of whether this
  1818. frame uses the framepointer, for backtrace purposes) }
  1819. if r7offset<>0 then
  1820. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1821. else
  1822. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1823. { now save the rest (if any) }
  1824. saveregs:=regs-saveregs;
  1825. if saveregs<>[] then
  1826. begin
  1827. for r:=RS_R8 to RS_R11 do
  1828. if r in saveregs then
  1829. inc(registerarea,4);
  1830. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1831. end;
  1832. end;
  1833. end;
  1834. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1835. if (LocalSize<>0) or
  1836. ((stackmisalignment<>0) and
  1837. ((pi_do_call in current_procinfo.flags) or
  1838. (po_assembler in current_procinfo.procdef.procoptions))) then
  1839. begin
  1840. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1841. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1842. begin
  1843. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1844. internalerror(2014030901)
  1845. else
  1846. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1847. end;
  1848. if is_shifter_const(localsize,shift) then
  1849. begin
  1850. a_reg_dealloc(list,NR_R12);
  1851. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1852. end
  1853. else if split_into_shifter_const(localsize, imm1, imm2) then
  1854. begin
  1855. a_reg_dealloc(list,NR_R12);
  1856. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1857. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1858. end
  1859. else
  1860. begin
  1861. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1862. a_reg_alloc(list,NR_R12);
  1863. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1864. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1865. a_reg_dealloc(list,NR_R12);
  1866. end;
  1867. end;
  1868. if (mmregs<>[]) or
  1869. (firstfloatreg<>RS_NO) then
  1870. begin
  1871. reference_reset(ref,4,[]);
  1872. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1873. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1874. begin
  1875. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1876. begin
  1877. a_reg_alloc(list,NR_R12);
  1878. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1879. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1880. a_reg_dealloc(list,NR_R12);
  1881. end
  1882. else
  1883. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1884. ref.base:=NR_R12;
  1885. end
  1886. else
  1887. begin
  1888. ref.base:=current_procinfo.framepointer;
  1889. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1890. end;
  1891. case current_settings.fputype of
  1892. fpu_fpa,
  1893. fpu_fpa10,
  1894. fpu_fpa11:
  1895. begin
  1896. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1897. lastfloatreg-firstfloatreg+1,ref));
  1898. end;
  1899. fpu_vfpv2,
  1900. fpu_vfpv3,
  1901. fpu_vfpv4,
  1902. fpu_vfpv3_d16:
  1903. begin
  1904. ref.index:=ref.base;
  1905. ref.base:=NR_NO;
  1906. { FSTMX is deprecated on ARMv6 and later }
  1907. {if (current_settings.cputype<cpu_armv6) then
  1908. postfix:=PF_IAX
  1909. else
  1910. postfix:=PF_IAD;}
  1911. if mmregs<>[] then
  1912. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1913. end;
  1914. end;
  1915. end;
  1916. end;
  1917. end;
  1918. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1919. var
  1920. ref : treference;
  1921. LocalSize : longint;
  1922. firstfloatreg,lastfloatreg,
  1923. r,
  1924. shift : byte;
  1925. mmregs,
  1926. saveregs,
  1927. regs : tcpuregisterset;
  1928. registerarea,
  1929. stackmisalignment: pint;
  1930. paddingreg: TSuperRegister;
  1931. imm1, imm2: DWord;
  1932. begin
  1933. if not(nostackframe) then
  1934. begin
  1935. registerarea:=0;
  1936. firstfloatreg:=RS_NO;
  1937. lastfloatreg:=RS_NO;
  1938. mmregs:=[];
  1939. saveregs:=[];
  1940. case current_settings.fputype of
  1941. fpu_fpa,
  1942. fpu_fpa10,
  1943. fpu_fpa11:
  1944. begin
  1945. { restore floating point registers? }
  1946. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1947. for r:=RS_F0 to RS_F7 do
  1948. if r in regs then
  1949. begin
  1950. if firstfloatreg=RS_NO then
  1951. firstfloatreg:=r;
  1952. lastfloatreg:=r;
  1953. { floating point register space is already included in
  1954. localsize below by calc_stackframe_size
  1955. inc(registerarea,12);
  1956. }
  1957. end;
  1958. end;
  1959. fpu_vfpv2,
  1960. fpu_vfpv3,
  1961. fpu_vfpv4,
  1962. fpu_vfpv3_d16:
  1963. begin;
  1964. { restore vfp registers? }
  1965. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1966. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1967. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1968. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1969. end;
  1970. end;
  1971. if (firstfloatreg<>RS_NO) or
  1972. (mmregs<>[]) then
  1973. begin
  1974. reference_reset(ref,4,[]);
  1975. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1976. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1977. begin
  1978. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1979. begin
  1980. a_reg_alloc(list,NR_R12);
  1981. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1982. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1983. a_reg_dealloc(list,NR_R12);
  1984. end
  1985. else
  1986. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1987. ref.base:=NR_R12;
  1988. end
  1989. else
  1990. begin
  1991. ref.base:=current_procinfo.framepointer;
  1992. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1993. end;
  1994. case current_settings.fputype of
  1995. fpu_fpa,
  1996. fpu_fpa10,
  1997. fpu_fpa11:
  1998. begin
  1999. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2000. lastfloatreg-firstfloatreg+1,ref));
  2001. end;
  2002. fpu_vfpv2,
  2003. fpu_vfpv3,
  2004. fpu_vfpv4,
  2005. fpu_vfpv3_d16:
  2006. begin
  2007. ref.index:=ref.base;
  2008. ref.base:=NR_NO;
  2009. { FLDMX is deprecated on ARMv6 and later }
  2010. {if (current_settings.cputype<cpu_armv6) then
  2011. mmpostfix:=PF_IAX
  2012. else
  2013. mmpostfix:=PF_IAD;}
  2014. if mmregs<>[] then
  2015. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2016. end;
  2017. end;
  2018. end;
  2019. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2020. if (pi_do_call in current_procinfo.flags) or
  2021. (regs<>[]) or
  2022. ((target_info.system in systems_darwin) and
  2023. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2024. begin
  2025. exclude(regs,RS_R14);
  2026. include(regs,RS_R15);
  2027. if (target_info.system in systems_darwin) then
  2028. include(regs,RS_FRAME_POINTER_REG);
  2029. end;
  2030. if not(target_info.system in systems_darwin) then
  2031. begin
  2032. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2033. The saved PC came after that but is discarded, since we restore
  2034. the stack pointer }
  2035. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2036. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2037. end
  2038. else
  2039. begin
  2040. { restore R8-R11 already if necessary (they've been stored
  2041. before the others) }
  2042. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2043. if saveregs<>[] then
  2044. begin
  2045. reference_reset(ref,4,[]);
  2046. ref.index:=NR_STACK_POINTER_REG;
  2047. ref.addressmode:=AM_PREINDEXED;
  2048. for r:=RS_R8 to RS_R11 do
  2049. if r in saveregs then
  2050. inc(registerarea,4);
  2051. regs:=regs-saveregs;
  2052. end;
  2053. end;
  2054. for r:=RS_R0 to RS_R15 do
  2055. if r in regs then
  2056. inc(registerarea,4);
  2057. { reapply the stack padding reg, in case there was one, see the complimentary
  2058. comment in g_proc_entry() (KB) }
  2059. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2060. if paddingreg < RS_R4 then
  2061. if paddingreg in regs then
  2062. internalerror(201306190)
  2063. else
  2064. begin
  2065. regs:=regs+[paddingreg];
  2066. inc(registerarea,4);
  2067. end;
  2068. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2069. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2070. (target_info.system in systems_darwin) then
  2071. begin
  2072. LocalSize:=current_procinfo.calc_stackframe_size;
  2073. if (LocalSize<>0) or
  2074. ((stackmisalignment<>0) and
  2075. ((pi_do_call in current_procinfo.flags) or
  2076. (po_assembler in current_procinfo.procdef.procoptions))) then
  2077. begin
  2078. if pi_estimatestacksize in current_procinfo.flags then
  2079. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2080. else
  2081. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2082. if is_shifter_const(LocalSize,shift) then
  2083. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2084. else if split_into_shifter_const(localsize, imm1, imm2) then
  2085. begin
  2086. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2087. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2088. end
  2089. else
  2090. begin
  2091. a_reg_alloc(list,NR_R12);
  2092. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2093. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2094. a_reg_dealloc(list,NR_R12);
  2095. end;
  2096. end;
  2097. if (target_info.system in systems_darwin) and
  2098. (saveregs<>[]) then
  2099. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2100. if regs=[] then
  2101. begin
  2102. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2103. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2104. else
  2105. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2106. end
  2107. else
  2108. begin
  2109. reference_reset(ref,4,[]);
  2110. ref.index:=NR_STACK_POINTER_REG;
  2111. ref.addressmode:=AM_PREINDEXED;
  2112. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2113. end;
  2114. end
  2115. else
  2116. begin
  2117. { restore int registers and return }
  2118. reference_reset(ref,4,[]);
  2119. ref.index:=NR_FRAME_POINTER_REG;
  2120. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2121. end;
  2122. end
  2123. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2124. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2125. else
  2126. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2127. end;
  2128. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2129. var
  2130. ref : treference;
  2131. l : TAsmLabel;
  2132. regs : tcpuregisterset;
  2133. r: byte;
  2134. begin
  2135. if (cs_create_pic in current_settings.moduleswitches) and
  2136. (pi_needs_got in current_procinfo.flags) and
  2137. (tf_pic_uses_got in target_info.flags) then
  2138. begin
  2139. { Procedure parametrs are not initialized at this stage.
  2140. Before GOT initialization code, allocate registers used for procedure parameters
  2141. to prevent usage of these registers for temp operations in later stages of code
  2142. generation. }
  2143. regs:=rg[R_INTREGISTER].used_in_proc;
  2144. for r:=RS_R0 to RS_R3 do
  2145. if r in regs then
  2146. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2147. { Allocate scratch register R12 and use it for GOT calculations directly.
  2148. Otherwise the init code can be distorted in later stages of code generation. }
  2149. a_reg_alloc(list,NR_R12);
  2150. reference_reset(ref,4,[]);
  2151. current_asmdata.getglobaldatalabel(l);
  2152. cg.a_label(current_procinfo.aktlocaldata,l);
  2153. ref.symbol:=l;
  2154. ref.base:=NR_PC;
  2155. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2156. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2157. current_asmdata.getaddrlabel(l);
  2158. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2159. cg.a_label(list,l);
  2160. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2161. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2162. { Deallocate registers }
  2163. a_reg_dealloc(list,NR_R12);
  2164. for r:=RS_R3 downto RS_R0 do
  2165. if r in regs then
  2166. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2167. end;
  2168. end;
  2169. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2170. var
  2171. b : byte;
  2172. tmpref : treference;
  2173. instr : taicpu;
  2174. begin
  2175. if ref.addressmode<>AM_OFFSET then
  2176. internalerror(200309071);
  2177. tmpref:=ref;
  2178. { Be sure to have a base register }
  2179. if (tmpref.base=NR_NO) then
  2180. begin
  2181. if tmpref.shiftmode<>SM_None then
  2182. internalerror(2014020702);
  2183. if tmpref.signindex<0 then
  2184. internalerror(200312023);
  2185. tmpref.base:=tmpref.index;
  2186. tmpref.index:=NR_NO;
  2187. end;
  2188. if assigned(tmpref.symbol) or
  2189. not((is_shifter_const(tmpref.offset,b)) or
  2190. (is_shifter_const(-tmpref.offset,b))
  2191. ) then
  2192. fixref(list,tmpref);
  2193. { expect a base here if there is an index }
  2194. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2195. internalerror(200312022);
  2196. if tmpref.index<>NR_NO then
  2197. begin
  2198. if tmpref.shiftmode<>SM_None then
  2199. internalerror(200312021);
  2200. if tmpref.signindex<0 then
  2201. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2202. else
  2203. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2204. if tmpref.offset<>0 then
  2205. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2206. end
  2207. else
  2208. begin
  2209. if tmpref.base=NR_NO then
  2210. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2211. else
  2212. if tmpref.offset<>0 then
  2213. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2214. else
  2215. begin
  2216. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2217. list.concat(instr);
  2218. add_move_instruction(instr);
  2219. end;
  2220. end;
  2221. end;
  2222. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2223. var
  2224. tmpreg, tmpreg2 : tregister;
  2225. tmpref : treference;
  2226. l, piclabel : tasmlabel;
  2227. indirection_done : boolean;
  2228. begin
  2229. { absolute symbols can't be handled directly, we've to store the symbol reference
  2230. in the text segment and access it pc relative
  2231. For now, we assume that references where base or index equals to PC are already
  2232. relative, all other references are assumed to be absolute and thus they need
  2233. to be handled extra.
  2234. A proper solution would be to change refoptions to a set and store the information
  2235. if the symbol is absolute or relative there.
  2236. }
  2237. { create consts entry }
  2238. reference_reset(tmpref,4,[]);
  2239. current_asmdata.getjumplabel(l);
  2240. cg.a_label(current_procinfo.aktlocaldata,l);
  2241. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2242. piclabel:=nil;
  2243. tmpreg:=NR_NO;
  2244. indirection_done:=false;
  2245. if assigned(ref.symbol) then
  2246. begin
  2247. if (target_info.system=system_arm_ios) and
  2248. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2249. begin
  2250. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2251. if ref.offset<>0 then
  2252. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2253. indirection_done:=true;
  2254. end
  2255. else if (cs_create_pic in current_settings.moduleswitches) then
  2256. if (tf_pic_uses_got in target_info.flags) then
  2257. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2258. else
  2259. begin
  2260. { ideally, we would want to generate
  2261. ldr r1, LPICConstPool
  2262. LPICLocal:
  2263. ldr/str r2,[pc,r1]
  2264. ...
  2265. LPICConstPool:
  2266. .long _globsym-(LPICLocal+8)
  2267. However, we cannot be sure that the ldr/str will follow
  2268. right after the call to fixref, so we have to load the
  2269. complete address already in a register.
  2270. }
  2271. current_asmdata.getaddrlabel(piclabel);
  2272. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2273. end
  2274. else
  2275. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2276. end
  2277. else
  2278. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2279. { load consts entry }
  2280. if not indirection_done then
  2281. begin
  2282. tmpreg:=getintregister(list,OS_INT);
  2283. tmpref.symbol:=l;
  2284. tmpref.base:=NR_PC;
  2285. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2286. if (cs_create_pic in current_settings.moduleswitches) and
  2287. (tf_pic_uses_got in target_info.flags) and
  2288. assigned(ref.symbol) then
  2289. begin
  2290. reference_reset(tmpref,4,[]);
  2291. tmpref.base:=current_procinfo.got;
  2292. tmpref.index:=tmpreg;
  2293. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2294. if ref.offset<>0 then
  2295. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2296. end;
  2297. end;
  2298. if assigned(piclabel) then
  2299. begin
  2300. cg.a_label(list,piclabel);
  2301. tmpreg2:=getaddressregister(list);
  2302. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2303. tmpreg:=tmpreg2
  2304. end;
  2305. { This routine can be called with PC as base/index in case the offset
  2306. was too large to encode in a load/store. In that case, the entire
  2307. absolute expression has been re-encoded in a new constpool entry, and
  2308. we have to remove the use of PC from the original reference (the code
  2309. above made everything relative to the value loaded from the new
  2310. constpool entry) }
  2311. if is_pc(ref.base) then
  2312. ref.base:=NR_NO;
  2313. if is_pc(ref.index) then
  2314. ref.index:=NR_NO;
  2315. if (ref.base<>NR_NO) then
  2316. begin
  2317. if ref.index<>NR_NO then
  2318. begin
  2319. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2320. ref.base:=tmpreg;
  2321. end
  2322. else
  2323. if ref.base<>NR_PC then
  2324. begin
  2325. ref.index:=tmpreg;
  2326. ref.shiftimm:=0;
  2327. ref.signindex:=1;
  2328. ref.shiftmode:=SM_None;
  2329. end
  2330. else
  2331. ref.base:=tmpreg;
  2332. end
  2333. else
  2334. ref.base:=tmpreg;
  2335. ref.offset:=0;
  2336. ref.symbol:=nil;
  2337. end;
  2338. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2339. var
  2340. paraloc1,paraloc2,paraloc3 : TCGPara;
  2341. pd : tprocdef;
  2342. begin
  2343. pd:=search_system_proc('MOVE');
  2344. paraloc1.init;
  2345. paraloc2.init;
  2346. paraloc3.init;
  2347. paramanager.getintparaloc(list,pd,1,paraloc1);
  2348. paramanager.getintparaloc(list,pd,2,paraloc2);
  2349. paramanager.getintparaloc(list,pd,3,paraloc3);
  2350. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2351. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2352. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2353. paramanager.freecgpara(list,paraloc3);
  2354. paramanager.freecgpara(list,paraloc2);
  2355. paramanager.freecgpara(list,paraloc1);
  2356. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2357. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2358. a_call_name(list,'FPC_MOVE',false);
  2359. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2360. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2361. paraloc3.done;
  2362. paraloc2.done;
  2363. paraloc1.done;
  2364. end;
  2365. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2366. const
  2367. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2368. maxtmpreg_thumb = 5;
  2369. var
  2370. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2371. srcreg,destreg,countreg,r,tmpreg:tregister;
  2372. helpsize:aint;
  2373. copysize:byte;
  2374. cgsize:Tcgsize;
  2375. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2376. maxtmpreg,
  2377. tmpregi,tmpregi2:byte;
  2378. { will never be called with count<=4 }
  2379. procedure genloop(count : aword;size : byte);
  2380. const
  2381. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2382. var
  2383. l : tasmlabel;
  2384. begin
  2385. current_asmdata.getjumplabel(l);
  2386. if count<size then size:=1;
  2387. a_load_const_reg(list,OS_INT,count div size,countreg);
  2388. cg.a_label(list,l);
  2389. srcref.addressmode:=AM_POSTINDEXED;
  2390. dstref.addressmode:=AM_POSTINDEXED;
  2391. srcref.offset:=size;
  2392. dstref.offset:=size;
  2393. r:=getintregister(list,size2opsize[size]);
  2394. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2395. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2396. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2397. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2398. a_jmp_flags(list,F_NE,l);
  2399. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2400. srcref.offset:=1;
  2401. dstref.offset:=1;
  2402. case count mod size of
  2403. 1:
  2404. begin
  2405. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2406. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2407. end;
  2408. 2:
  2409. if aligned then
  2410. begin
  2411. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2412. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2413. end
  2414. else
  2415. begin
  2416. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2417. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2418. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2419. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2420. end;
  2421. 3:
  2422. if aligned then
  2423. begin
  2424. srcref.offset:=2;
  2425. dstref.offset:=2;
  2426. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2427. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2428. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2429. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2430. end
  2431. else
  2432. begin
  2433. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2434. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2435. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2436. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2437. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2438. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2439. end;
  2440. end;
  2441. { keep the registers alive }
  2442. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2443. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2444. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2445. end;
  2446. { will never be called with count<=4 }
  2447. procedure genloop_thumb(count : aword;size : byte);
  2448. procedure refincofs(const ref : treference;const value : longint = 1);
  2449. begin
  2450. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2451. end;
  2452. const
  2453. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2454. var
  2455. l : tasmlabel;
  2456. begin
  2457. current_asmdata.getjumplabel(l);
  2458. if count<size then size:=1;
  2459. a_load_const_reg(list,OS_INT,count div size,countreg);
  2460. cg.a_label(list,l);
  2461. r:=getintregister(list,size2opsize[size]);
  2462. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2463. refincofs(srcref);
  2464. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2465. refincofs(dstref);
  2466. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2467. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2468. a_jmp_flags(list,F_NE,l);
  2469. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2470. case count mod size of
  2471. 1:
  2472. begin
  2473. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2474. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2475. end;
  2476. 2:
  2477. if aligned then
  2478. begin
  2479. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2480. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2481. end
  2482. else
  2483. begin
  2484. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2485. refincofs(srcref);
  2486. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2487. refincofs(dstref);
  2488. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2489. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2490. end;
  2491. 3:
  2492. if aligned then
  2493. begin
  2494. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2495. refincofs(srcref,2);
  2496. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2497. refincofs(dstref,2);
  2498. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2499. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2500. end
  2501. else
  2502. begin
  2503. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2504. refincofs(srcref);
  2505. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2506. refincofs(dstref);
  2507. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2508. refincofs(srcref);
  2509. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2510. refincofs(dstref);
  2511. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2512. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2513. end;
  2514. end;
  2515. { keep the registers alive }
  2516. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2517. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2518. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2519. end;
  2520. begin
  2521. if len=0 then
  2522. exit;
  2523. if GenerateThumbCode then
  2524. maxtmpreg:=maxtmpreg_thumb
  2525. else
  2526. maxtmpreg:=maxtmpreg_arm;
  2527. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2528. dstref:=dest;
  2529. srcref:=source;
  2530. if cs_opt_size in current_settings.optimizerswitches then
  2531. helpsize:=8;
  2532. if aligned and (len=4) then
  2533. begin
  2534. tmpreg:=getintregister(list,OS_32);
  2535. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2536. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2537. end
  2538. else if aligned and (len=2) then
  2539. begin
  2540. tmpreg:=getintregister(list,OS_16);
  2541. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2542. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2543. end
  2544. else if (len<=helpsize) and aligned then
  2545. begin
  2546. tmpregi:=0;
  2547. srcreg:=getintregister(list,OS_ADDR);
  2548. { explicit pc relative addressing, could be
  2549. e.g. a floating point constant }
  2550. if source.base=NR_PC then
  2551. begin
  2552. { ... then we don't need a loadaddr }
  2553. srcref:=source;
  2554. end
  2555. else
  2556. begin
  2557. a_loadaddr_ref_reg(list,source,srcreg);
  2558. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2559. end;
  2560. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2561. begin
  2562. inc(tmpregi);
  2563. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2564. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2565. inc(srcref.offset,4);
  2566. dec(len,4);
  2567. end;
  2568. destreg:=getintregister(list,OS_ADDR);
  2569. a_loadaddr_ref_reg(list,dest,destreg);
  2570. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2571. tmpregi2:=1;
  2572. while (tmpregi2<=tmpregi) do
  2573. begin
  2574. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2575. inc(dstref.offset,4);
  2576. inc(tmpregi2);
  2577. end;
  2578. copysize:=4;
  2579. cgsize:=OS_32;
  2580. while len<>0 do
  2581. begin
  2582. if len<2 then
  2583. begin
  2584. copysize:=1;
  2585. cgsize:=OS_8;
  2586. end
  2587. else if len<4 then
  2588. begin
  2589. copysize:=2;
  2590. cgsize:=OS_16;
  2591. end;
  2592. dec(len,copysize);
  2593. r:=getintregister(list,cgsize);
  2594. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2595. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2596. inc(srcref.offset,copysize);
  2597. inc(dstref.offset,copysize);
  2598. end;{end of while}
  2599. end
  2600. else
  2601. begin
  2602. cgsize:=OS_32;
  2603. if (len<=4) then{len<=4 and not aligned}
  2604. begin
  2605. r:=getintregister(list,cgsize);
  2606. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2607. if Len=1 then
  2608. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2609. else
  2610. begin
  2611. tmpreg:=getintregister(list,cgsize);
  2612. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2613. inc(usedtmpref.offset,1);
  2614. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2615. inc(usedtmpref2.offset,1);
  2616. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2617. if len>2 then
  2618. begin
  2619. inc(usedtmpref.offset,1);
  2620. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2621. inc(usedtmpref2.offset,1);
  2622. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2623. if len>3 then
  2624. begin
  2625. inc(usedtmpref.offset,1);
  2626. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2627. inc(usedtmpref2.offset,1);
  2628. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2629. end;
  2630. end;
  2631. end;
  2632. end{end of if len<=4}
  2633. else
  2634. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2635. destreg:=getintregister(list,OS_ADDR);
  2636. a_loadaddr_ref_reg(list,dest,destreg);
  2637. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2638. srcreg:=getintregister(list,OS_ADDR);
  2639. a_loadaddr_ref_reg(list,source,srcreg);
  2640. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2641. countreg:=getintregister(list,OS_32);
  2642. // if cs_opt_size in current_settings.optimizerswitches then
  2643. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2644. {if aligned then
  2645. genloop(len,4)
  2646. else}
  2647. if GenerateThumbCode then
  2648. genloop_thumb(len,1)
  2649. else
  2650. genloop(len,1);
  2651. end;
  2652. end;
  2653. end;
  2654. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2655. begin
  2656. g_concatcopy_internal(list,source,dest,len,false);
  2657. end;
  2658. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2659. begin
  2660. if (source.alignment in [1,3]) or
  2661. (dest.alignment in [1,3]) then
  2662. g_concatcopy_internal(list,source,dest,len,false)
  2663. else
  2664. g_concatcopy_internal(list,source,dest,len,true);
  2665. end;
  2666. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2667. var
  2668. ovloc : tlocation;
  2669. begin
  2670. ovloc.loc:=LOC_VOID;
  2671. g_overflowCheck_loc(list,l,def,ovloc);
  2672. end;
  2673. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2674. var
  2675. hl : tasmlabel;
  2676. ai:TAiCpu;
  2677. hflags : tresflags;
  2678. begin
  2679. if not(cs_check_overflow in current_settings.localswitches) then
  2680. exit;
  2681. current_asmdata.getjumplabel(hl);
  2682. case ovloc.loc of
  2683. LOC_VOID:
  2684. begin
  2685. ai:=taicpu.op_sym(A_B,hl);
  2686. ai.is_jmp:=true;
  2687. if not((def.typ=pointerdef) or
  2688. ((def.typ=orddef) and
  2689. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2690. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2691. ai.SetCondition(C_VC)
  2692. else
  2693. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2694. ai.SetCondition(C_CS)
  2695. else
  2696. ai.SetCondition(C_CC);
  2697. list.concat(ai);
  2698. end;
  2699. LOC_FLAGS:
  2700. begin
  2701. hflags:=ovloc.resflags;
  2702. inverse_flags(hflags);
  2703. cg.a_jmp_flags(list,hflags,hl);
  2704. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2705. end;
  2706. else
  2707. internalerror(200409281);
  2708. end;
  2709. a_call_name(list,'FPC_OVERFLOW',false);
  2710. a_label(list,hl);
  2711. end;
  2712. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2713. begin
  2714. { this work is done in g_proc_entry }
  2715. end;
  2716. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2717. begin
  2718. { this work is done in g_proc_exit }
  2719. end;
  2720. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2721. var
  2722. ai : taicpu;
  2723. hlabel : TAsmLabel;
  2724. begin
  2725. if GenerateThumbCode then
  2726. begin
  2727. { the optimizer has to fix this if jump range is sufficient short }
  2728. current_asmdata.getjumplabel(hlabel);
  2729. ai:=Taicpu.Op_sym(A_B,hlabel);
  2730. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2731. ai.is_jmp:=true;
  2732. list.concat(ai);
  2733. a_jmp_always(list,l);
  2734. a_label(list,hlabel);
  2735. end
  2736. else
  2737. begin
  2738. ai:=Taicpu.Op_sym(A_B,l);
  2739. ai.SetCondition(OpCmp2AsmCond[cond]);
  2740. ai.is_jmp:=true;
  2741. list.concat(ai);
  2742. end;
  2743. end;
  2744. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2745. const
  2746. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2747. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2748. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2749. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2750. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2751. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2752. begin
  2753. result:=convertop[fromsize,tosize];
  2754. if result=A_NONE then
  2755. internalerror(200312205);
  2756. end;
  2757. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2758. const
  2759. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2760. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2761. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2762. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2763. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2764. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2765. begin
  2766. result:=convertop[fromsize,tosize];
  2767. end;
  2768. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2769. var
  2770. instr: taicpu;
  2771. begin
  2772. if (shuffle=nil) or shufflescalar(shuffle) then
  2773. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2774. else
  2775. internalerror(2009112407);
  2776. list.concat(instr);
  2777. case instr.opcode of
  2778. A_VMOV:
  2779. add_move_instruction(instr);
  2780. end;
  2781. maybe_check_for_fpu_exception(list);
  2782. end;
  2783. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2784. var
  2785. intreg,
  2786. tmpmmreg : tregister;
  2787. reg64 : tregister64;
  2788. begin
  2789. if assigned(shuffle) and
  2790. not(shufflescalar(shuffle)) then
  2791. internalerror(2009112413);
  2792. case fromsize of
  2793. OS_32,OS_S32:
  2794. begin
  2795. fromsize:=OS_F32;
  2796. { since we are loading an integer, no conversion may be required }
  2797. if (fromsize<>tosize) then
  2798. internalerror(2009112801);
  2799. end;
  2800. OS_64,OS_S64:
  2801. begin
  2802. fromsize:=OS_F64;
  2803. { since we are loading an integer, no conversion may be required }
  2804. if (fromsize<>tosize) then
  2805. internalerror(2009112901);
  2806. end;
  2807. end;
  2808. if (fromsize<>tosize) then
  2809. tmpmmreg:=getmmregister(list,fromsize)
  2810. else
  2811. tmpmmreg:=reg;
  2812. if (ref.alignment in [1,2]) then
  2813. begin
  2814. case fromsize of
  2815. OS_F32:
  2816. begin
  2817. intreg:=getintregister(list,OS_32);
  2818. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2819. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2820. end;
  2821. OS_F64:
  2822. begin
  2823. reg64.reglo:=getintregister(list,OS_32);
  2824. reg64.reghi:=getintregister(list,OS_32);
  2825. cg64.a_load64_ref_reg(list,ref,reg64);
  2826. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2827. end;
  2828. else
  2829. internalerror(2009112412);
  2830. end;
  2831. end
  2832. else
  2833. begin
  2834. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2835. end;
  2836. if (tmpmmreg<>reg) then
  2837. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2838. maybe_check_for_fpu_exception(list);
  2839. end;
  2840. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2841. var
  2842. intreg,
  2843. tmpmmreg : tregister;
  2844. reg64 : tregister64;
  2845. begin
  2846. if assigned(shuffle) and
  2847. not(shufflescalar(shuffle)) then
  2848. internalerror(2009112416);
  2849. case tosize of
  2850. OS_32,OS_S32:
  2851. begin
  2852. tosize:=OS_F32;
  2853. { since we are loading an integer, no conversion may be required }
  2854. if (fromsize<>tosize) then
  2855. internalerror(2009112801);
  2856. end;
  2857. OS_64,OS_S64:
  2858. begin
  2859. tosize:=OS_F64;
  2860. { since we are loading an integer, no conversion may be required }
  2861. if (fromsize<>tosize) then
  2862. internalerror(2009112901);
  2863. end;
  2864. end;
  2865. if (fromsize<>tosize) then
  2866. begin
  2867. tmpmmreg:=getmmregister(list,tosize);
  2868. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2869. end
  2870. else
  2871. tmpmmreg:=reg;
  2872. if (ref.alignment in [1,2]) then
  2873. begin
  2874. case tosize of
  2875. OS_F32:
  2876. begin
  2877. intreg:=getintregister(list,OS_32);
  2878. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2879. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2880. end;
  2881. OS_F64:
  2882. begin
  2883. reg64.reglo:=getintregister(list,OS_32);
  2884. reg64.reghi:=getintregister(list,OS_32);
  2885. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2886. cg64.a_load64_reg_ref(list,reg64,ref);
  2887. end;
  2888. else
  2889. internalerror(2009112417);
  2890. end;
  2891. end
  2892. else
  2893. begin
  2894. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2895. end;
  2896. maybe_check_for_fpu_exception(list);
  2897. end;
  2898. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2899. begin
  2900. { this code can only be used to transfer raw data, not to perform
  2901. conversions }
  2902. if (tosize<>OS_F32) then
  2903. internalerror(2009112419);
  2904. if not(fromsize in [OS_32,OS_S32]) then
  2905. internalerror(2009112420);
  2906. if assigned(shuffle) and
  2907. not shufflescalar(shuffle) then
  2908. internalerror(2009112516);
  2909. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  2910. maybe_check_for_fpu_exception(list);
  2911. end;
  2912. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  2913. begin
  2914. { this code can only be used to transfer raw data, not to perform
  2915. conversions }
  2916. if (fromsize<>OS_F32) then
  2917. internalerror(2009112430);
  2918. if not(tosize in [OS_32,OS_S32]) then
  2919. internalerror(2009112420);
  2920. if assigned(shuffle) and
  2921. not shufflescalar(shuffle) then
  2922. internalerror(2009112514);
  2923. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  2924. maybe_check_for_fpu_exception(list);
  2925. end;
  2926. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  2927. var
  2928. tmpreg: tregister;
  2929. begin
  2930. { the vfp doesn't support xor nor any other logical operation, but
  2931. this routine is used to initialise global mm regvars. We can
  2932. easily initialise an mm reg with 0 though. }
  2933. case op of
  2934. OP_XOR:
  2935. begin
  2936. if (src<>dst) or
  2937. (reg_cgsize(src)<>size) or
  2938. assigned(shuffle) then
  2939. internalerror(2009112907);
  2940. tmpreg:=getintregister(list,OS_32);
  2941. a_load_const_reg(list,OS_32,0,tmpreg);
  2942. case size of
  2943. OS_F32:
  2944. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  2945. OS_F64:
  2946. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  2947. else
  2948. internalerror(2009112908);
  2949. end;
  2950. end
  2951. else
  2952. internalerror(2009112906);
  2953. end;
  2954. end;
  2955. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  2956. const
  2957. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  2958. begin
  2959. if (op in overflowops) and
  2960. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  2961. a_load_reg_reg(list,OS_32,size,dst,dst);
  2962. end;
  2963. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  2964. procedure checkreg(var reg : TRegister);
  2965. var
  2966. tmpreg : TRegister;
  2967. begin
  2968. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  2969. (getsupreg(reg)=RS_R15) then
  2970. begin
  2971. tmpreg:=getintregister(list,OS_INT);
  2972. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  2973. reg:=tmpreg;
  2974. end;
  2975. end;
  2976. begin
  2977. checkreg(op1);
  2978. checkreg(op2);
  2979. checkreg(op3);
  2980. checkreg(op4);
  2981. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  2982. end;
  2983. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  2984. begin
  2985. case op of
  2986. OP_NEG:
  2987. begin
  2988. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2989. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  2990. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  2991. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2992. end;
  2993. OP_NOT:
  2994. begin
  2995. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  2996. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  2997. end;
  2998. else
  2999. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3000. end;
  3001. end;
  3002. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3003. begin
  3004. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3005. end;
  3006. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3007. var
  3008. ovloc : tlocation;
  3009. begin
  3010. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3011. end;
  3012. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3013. var
  3014. ovloc : tlocation;
  3015. begin
  3016. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3017. end;
  3018. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3019. begin
  3020. { this code can only be used to transfer raw data, not to perform
  3021. conversions }
  3022. if (mmsize<>OS_F64) then
  3023. internalerror(2009112405);
  3024. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3025. cg.maybe_check_for_fpu_exception(list);
  3026. end;
  3027. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3028. begin
  3029. { this code can only be used to transfer raw data, not to perform
  3030. conversions }
  3031. if (mmsize<>OS_F64) then
  3032. internalerror(2009112406);
  3033. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3034. cg.maybe_check_for_fpu_exception(list);
  3035. end;
  3036. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3037. var
  3038. tmpreg : tregister;
  3039. b : byte;
  3040. begin
  3041. ovloc.loc:=LOC_VOID;
  3042. case op of
  3043. OP_NEG,
  3044. OP_NOT :
  3045. internalerror(2012022501);
  3046. end;
  3047. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3048. begin
  3049. case op of
  3050. OP_ADD:
  3051. begin
  3052. if is_shifter_const(lo(value),b) then
  3053. begin
  3054. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3055. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3056. end
  3057. else
  3058. begin
  3059. tmpreg:=cg.getintregister(list,OS_32);
  3060. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3061. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3062. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3063. end;
  3064. if is_shifter_const(hi(value),b) then
  3065. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3066. else
  3067. begin
  3068. tmpreg:=cg.getintregister(list,OS_32);
  3069. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3070. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3071. end;
  3072. end;
  3073. OP_SUB:
  3074. begin
  3075. if is_shifter_const(lo(value),b) then
  3076. begin
  3077. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3078. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3079. end
  3080. else
  3081. begin
  3082. tmpreg:=cg.getintregister(list,OS_32);
  3083. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3084. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3085. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3086. end;
  3087. if is_shifter_const(hi(value),b) then
  3088. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3089. else
  3090. begin
  3091. tmpreg:=cg.getintregister(list,OS_32);
  3092. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3093. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3094. end;
  3095. end;
  3096. else
  3097. internalerror(200502131);
  3098. end;
  3099. if size=OS_64 then
  3100. begin
  3101. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3102. ovloc.loc:=LOC_FLAGS;
  3103. case op of
  3104. OP_ADD:
  3105. ovloc.resflags:=F_CS;
  3106. OP_SUB:
  3107. ovloc.resflags:=F_CC;
  3108. end;
  3109. end;
  3110. end
  3111. else
  3112. begin
  3113. case op of
  3114. OP_AND,OP_OR,OP_XOR:
  3115. begin
  3116. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3117. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3118. end;
  3119. OP_ADD:
  3120. begin
  3121. if is_shifter_const(aint(lo(value)),b) then
  3122. begin
  3123. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3124. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3125. end
  3126. else
  3127. begin
  3128. tmpreg:=cg.getintregister(list,OS_32);
  3129. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3130. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3131. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3132. end;
  3133. if is_shifter_const(aint(hi(value)),b) then
  3134. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3135. else
  3136. begin
  3137. tmpreg:=cg.getintregister(list,OS_32);
  3138. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3139. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3140. end;
  3141. end;
  3142. OP_SUB:
  3143. begin
  3144. if is_shifter_const(aint(lo(value)),b) then
  3145. begin
  3146. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3147. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3148. end
  3149. else
  3150. begin
  3151. tmpreg:=cg.getintregister(list,OS_32);
  3152. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3153. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3154. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3155. end;
  3156. if is_shifter_const(aint(hi(value)),b) then
  3157. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3158. else
  3159. begin
  3160. tmpreg:=cg.getintregister(list,OS_32);
  3161. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3162. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3163. end;
  3164. end;
  3165. else
  3166. internalerror(2003083101);
  3167. end;
  3168. end;
  3169. end;
  3170. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3171. begin
  3172. ovloc.loc:=LOC_VOID;
  3173. case op of
  3174. OP_NEG,
  3175. OP_NOT :
  3176. internalerror(2012022502);
  3177. end;
  3178. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3179. begin
  3180. case op of
  3181. OP_ADD:
  3182. begin
  3183. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3184. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3185. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3186. end;
  3187. OP_SUB:
  3188. begin
  3189. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3190. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3191. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3192. end;
  3193. else
  3194. internalerror(2003083101);
  3195. end;
  3196. if size=OS_64 then
  3197. begin
  3198. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3199. ovloc.loc:=LOC_FLAGS;
  3200. case op of
  3201. OP_ADD:
  3202. ovloc.resflags:=F_CS;
  3203. OP_SUB:
  3204. ovloc.resflags:=F_CC;
  3205. end;
  3206. end;
  3207. end
  3208. else
  3209. begin
  3210. case op of
  3211. OP_AND,OP_OR,OP_XOR:
  3212. begin
  3213. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3214. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3215. end;
  3216. OP_ADD:
  3217. begin
  3218. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3219. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3220. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3221. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3222. end;
  3223. OP_SUB:
  3224. begin
  3225. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3226. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3227. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3228. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3229. end;
  3230. else
  3231. internalerror(2003083101);
  3232. end;
  3233. end;
  3234. end;
  3235. procedure tthumbcgarm.init_register_allocators;
  3236. begin
  3237. inherited init_register_allocators;
  3238. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3239. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3240. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3241. else
  3242. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3243. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3244. end;
  3245. procedure tthumbcgarm.done_register_allocators;
  3246. begin
  3247. rg[R_INTREGISTER].free;
  3248. rg[R_FPUREGISTER].free;
  3249. rg[R_MMREGISTER].free;
  3250. inherited done_register_allocators;
  3251. end;
  3252. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3253. var
  3254. ref : treference;
  3255. r : byte;
  3256. regs : tcpuregisterset;
  3257. stackmisalignment : pint;
  3258. registerarea: DWord;
  3259. stack_parameters: Boolean;
  3260. begin
  3261. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3262. LocalSize:=align(LocalSize,4);
  3263. { call instruction does not put anything on the stack }
  3264. stackmisalignment:=0;
  3265. if not(nostackframe) then
  3266. begin
  3267. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3268. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3269. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3270. { save int registers }
  3271. reference_reset(ref,4,[]);
  3272. ref.index:=NR_STACK_POINTER_REG;
  3273. ref.addressmode:=AM_PREINDEXED;
  3274. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3275. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3276. begin
  3277. //!!!! a_reg_alloc(list,NR_R12);
  3278. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3279. end;
  3280. { the (old) ARM APCS requires saving both the stack pointer (to
  3281. crawl the stack) and the PC (to identify the function this
  3282. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3283. and R15 -- still needs updating for EABI and Darwin, they don't
  3284. need that }
  3285. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3286. regs:=regs+[RS_R7,RS_R14]
  3287. else
  3288. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3289. include(regs,RS_R14);
  3290. { safely estimate stack size }
  3291. if localsize+current_settings.alignment.localalignmax+4>508 then
  3292. begin
  3293. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3294. include(regs,RS_R4);
  3295. end;
  3296. registerarea:=0;
  3297. if regs<>[] then
  3298. begin
  3299. for r:=RS_R0 to RS_R15 do
  3300. if r in regs then
  3301. inc(registerarea,4);
  3302. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3303. end;
  3304. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3305. if stack_parameters or (LocalSize<>0) or
  3306. ((stackmisalignment<>0) and
  3307. ((pi_do_call in current_procinfo.flags) or
  3308. (po_assembler in current_procinfo.procdef.procoptions))) then
  3309. begin
  3310. { do we access stack parameters?
  3311. if yes, the previously estimated stacksize must be used }
  3312. if stack_parameters then
  3313. begin
  3314. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3315. begin
  3316. writeln(localsize);
  3317. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3318. internalerror(2013040601);
  3319. end
  3320. else
  3321. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3322. end
  3323. else
  3324. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3325. if localsize<508 then
  3326. begin
  3327. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3328. end
  3329. else if localsize<=1016 then
  3330. begin
  3331. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3332. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3333. end
  3334. else
  3335. begin
  3336. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3337. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3338. include(regs,RS_R4);
  3339. //!!!! if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  3340. //!!!! a_reg_alloc(list,NR_R12);
  3341. //!!!! a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3342. //!!!! list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  3343. //!!!! a_reg_dealloc(list,NR_R12);
  3344. end;
  3345. end;
  3346. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3347. begin
  3348. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3349. end;
  3350. end;
  3351. end;
  3352. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3353. var
  3354. LocalSize : longint;
  3355. r: byte;
  3356. regs : tcpuregisterset;
  3357. registerarea : DWord;
  3358. stackmisalignment: pint;
  3359. stack_parameters : Boolean;
  3360. begin
  3361. if not(nostackframe) then
  3362. begin
  3363. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3364. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3365. include(regs,RS_R15);
  3366. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3367. include(regs,getsupreg(current_procinfo.framepointer));
  3368. registerarea:=0;
  3369. for r:=RS_R0 to RS_R15 do
  3370. if r in regs then
  3371. inc(registerarea,4);
  3372. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3373. LocalSize:=current_procinfo.calc_stackframe_size;
  3374. if stack_parameters then
  3375. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3376. else
  3377. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3378. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3379. (target_info.system in systems_darwin) then
  3380. begin
  3381. if (LocalSize<>0) or
  3382. ((stackmisalignment<>0) and
  3383. ((pi_do_call in current_procinfo.flags) or
  3384. (po_assembler in current_procinfo.procdef.procoptions))) then
  3385. begin
  3386. if LocalSize=0 then
  3387. else if LocalSize<=508 then
  3388. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3389. else if LocalSize<=1016 then
  3390. begin
  3391. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3392. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3393. end
  3394. else
  3395. begin
  3396. a_reg_alloc(list,NR_R3);
  3397. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3398. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3399. a_reg_dealloc(list,NR_R3);
  3400. end;
  3401. end;
  3402. if regs=[] then
  3403. begin
  3404. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3405. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3406. else
  3407. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3408. end
  3409. else
  3410. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3411. end;
  3412. end
  3413. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3414. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3415. else
  3416. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3417. end;
  3418. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3419. var
  3420. oppostfix:toppostfix;
  3421. usedtmpref: treference;
  3422. tmpreg,tmpreg2 : tregister;
  3423. dir : integer;
  3424. begin
  3425. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3426. FromSize := ToSize;
  3427. case FromSize of
  3428. { signed integer registers }
  3429. OS_8:
  3430. oppostfix:=PF_B;
  3431. OS_S8:
  3432. oppostfix:=PF_SB;
  3433. OS_16:
  3434. oppostfix:=PF_H;
  3435. OS_S16:
  3436. oppostfix:=PF_SH;
  3437. OS_32,
  3438. OS_S32:
  3439. oppostfix:=PF_None;
  3440. else
  3441. InternalError(200308298);
  3442. end;
  3443. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3444. begin
  3445. if target_info.endian=endian_big then
  3446. dir:=-1
  3447. else
  3448. dir:=1;
  3449. case FromSize of
  3450. OS_16,OS_S16:
  3451. begin
  3452. { only complicated references need an extra loadaddr }
  3453. if assigned(ref.symbol) or
  3454. (ref.index<>NR_NO) or
  3455. (ref.offset<-124) or
  3456. (ref.offset>124) or
  3457. { sometimes the compiler reused registers }
  3458. (reg=ref.index) or
  3459. (reg=ref.base) then
  3460. begin
  3461. tmpreg2:=getintregister(list,OS_INT);
  3462. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3463. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3464. end
  3465. else
  3466. usedtmpref:=ref;
  3467. if target_info.endian=endian_big then
  3468. inc(usedtmpref.offset,1);
  3469. tmpreg:=getintregister(list,OS_INT);
  3470. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3471. inc(usedtmpref.offset,dir);
  3472. if FromSize=OS_16 then
  3473. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3474. else
  3475. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3476. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3477. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3478. end;
  3479. OS_32,OS_S32:
  3480. begin
  3481. tmpreg:=getintregister(list,OS_INT);
  3482. { only complicated references need an extra loadaddr }
  3483. if assigned(ref.symbol) or
  3484. (ref.index<>NR_NO) or
  3485. (ref.offset<-124) or
  3486. (ref.offset>124) or
  3487. { sometimes the compiler reused registers }
  3488. (reg=ref.index) or
  3489. (reg=ref.base) then
  3490. begin
  3491. tmpreg2:=getintregister(list,OS_INT);
  3492. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3493. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3494. end
  3495. else
  3496. usedtmpref:=ref;
  3497. if ref.alignment=2 then
  3498. begin
  3499. if target_info.endian=endian_big then
  3500. inc(usedtmpref.offset,2);
  3501. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3502. inc(usedtmpref.offset,dir*2);
  3503. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3504. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3505. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3506. end
  3507. else
  3508. begin
  3509. if target_info.endian=endian_big then
  3510. inc(usedtmpref.offset,3);
  3511. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3512. inc(usedtmpref.offset,dir);
  3513. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3514. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3515. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3516. inc(usedtmpref.offset,dir);
  3517. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3518. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3519. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3520. inc(usedtmpref.offset,dir);
  3521. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3522. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3523. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3524. end;
  3525. end
  3526. else
  3527. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3528. end;
  3529. end
  3530. else
  3531. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3532. if (fromsize=OS_S8) and (tosize = OS_16) then
  3533. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3534. end;
  3535. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3536. var
  3537. l : tasmlabel;
  3538. hr : treference;
  3539. begin
  3540. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3541. internalerror(2002090902);
  3542. if is_thumb_imm(a) then
  3543. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3544. else
  3545. begin
  3546. reference_reset(hr,4,[]);
  3547. current_asmdata.getjumplabel(l);
  3548. cg.a_label(current_procinfo.aktlocaldata,l);
  3549. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3550. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3551. hr.symbol:=l;
  3552. hr.base:=NR_PC;
  3553. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3554. end;
  3555. end;
  3556. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3557. var
  3558. hsym : tsym;
  3559. href,
  3560. tmpref : treference;
  3561. paraloc : Pcgparalocation;
  3562. l : TAsmLabel;
  3563. begin
  3564. { calculate the parameter info for the procdef }
  3565. procdef.init_paraloc_info(callerside);
  3566. hsym:=tsym(procdef.parast.Find('self'));
  3567. if not(assigned(hsym) and
  3568. (hsym.typ=paravarsym)) then
  3569. internalerror(200305251);
  3570. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3571. while paraloc<>nil do
  3572. with paraloc^ do
  3573. begin
  3574. case loc of
  3575. LOC_REGISTER:
  3576. begin
  3577. if is_thumb_imm(ioffset) then
  3578. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3579. else
  3580. begin
  3581. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3582. reference_reset(tmpref,4,[]);
  3583. current_asmdata.getjumplabel(l);
  3584. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3585. cg.a_label(current_procinfo.aktlocaldata,l);
  3586. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3587. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3588. tmpref.symbol:=l;
  3589. tmpref.base:=NR_PC;
  3590. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3591. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3592. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3593. end;
  3594. end;
  3595. LOC_REFERENCE:
  3596. begin
  3597. { offset in the wrapper needs to be adjusted for the stored
  3598. return address }
  3599. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3600. if is_thumb_imm(ioffset) then
  3601. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3602. else
  3603. begin
  3604. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3605. reference_reset(tmpref,4,[]);
  3606. current_asmdata.getjumplabel(l);
  3607. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3608. cg.a_label(current_procinfo.aktlocaldata,l);
  3609. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3610. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3611. tmpref.symbol:=l;
  3612. tmpref.base:=NR_PC;
  3613. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3614. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3615. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3616. end;
  3617. end
  3618. else
  3619. internalerror(200309189);
  3620. end;
  3621. paraloc:=next;
  3622. end;
  3623. end;
  3624. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3625. var
  3626. href : treference;
  3627. tmpreg : TRegister;
  3628. begin
  3629. href:=ref;
  3630. if { LDR/STR limitations }
  3631. (
  3632. (((op=A_LDR) and (oppostfix=PF_None)) or
  3633. ((op=A_STR) and (oppostfix=PF_None))) and
  3634. (ref.base<>NR_STACK_POINTER_REG) and
  3635. (abs(ref.offset)>124)
  3636. ) or
  3637. { LDRB/STRB limitations }
  3638. (
  3639. (((op=A_LDR) and (oppostfix=PF_B)) or
  3640. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3641. ((op=A_STR) and (oppostfix=PF_B)) or
  3642. ((op=A_STRB) and (oppostfix=PF_None))) and
  3643. ((ref.base=NR_STACK_POINTER_REG) or
  3644. (ref.index=NR_STACK_POINTER_REG) or
  3645. (abs(ref.offset)>31)
  3646. )
  3647. ) or
  3648. { LDRH/STRH limitations }
  3649. (
  3650. (((op=A_LDR) and (oppostfix=PF_H)) or
  3651. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3652. ((op=A_STR) and (oppostfix=PF_H)) or
  3653. ((op=A_STRH) and (oppostfix=PF_None))) and
  3654. ((ref.base=NR_STACK_POINTER_REG) or
  3655. (ref.index=NR_STACK_POINTER_REG) or
  3656. (abs(ref.offset)>62) or
  3657. ((abs(ref.offset) mod 2)<>0)
  3658. )
  3659. ) then
  3660. begin
  3661. tmpreg:=getintregister(list,OS_ADDR);
  3662. a_loadaddr_ref_reg(list,ref,tmpreg);
  3663. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3664. end
  3665. else if (op=A_LDR) and
  3666. (oppostfix in [PF_None]) and
  3667. (ref.base=NR_STACK_POINTER_REG) and
  3668. (abs(ref.offset)>1020) then
  3669. begin
  3670. tmpreg:=getintregister(list,OS_ADDR);
  3671. a_loadaddr_ref_reg(list,ref,tmpreg);
  3672. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3673. end
  3674. else if (op=A_LDR) and
  3675. ((oppostfix in [PF_SH,PF_SB]) or
  3676. (abs(ref.offset)>124)) then
  3677. begin
  3678. tmpreg:=getintregister(list,OS_ADDR);
  3679. a_loadaddr_ref_reg(list,ref,tmpreg);
  3680. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3681. end;
  3682. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3683. end;
  3684. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3685. var
  3686. tmpreg : tregister;
  3687. begin
  3688. case op of
  3689. OP_NEG:
  3690. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3691. OP_NOT:
  3692. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  3693. OP_DIV,OP_IDIV:
  3694. internalerror(200308284);
  3695. OP_ROL:
  3696. begin
  3697. if not(size in [OS_32,OS_S32]) then
  3698. internalerror(2008072801);
  3699. { simulate ROL by ror'ing 32-value }
  3700. tmpreg:=getintregister(list,OS_32);
  3701. a_load_const_reg(list,OS_32,32,tmpreg);
  3702. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3703. list.concat(taicpu.op_reg_reg(A_ROR,dst,src));
  3704. end;
  3705. else
  3706. begin
  3707. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3708. list.concat(setoppostfix(
  3709. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix[op]));
  3710. end;
  3711. end;
  3712. maybeadjustresult(list,op,size,dst);
  3713. end;
  3714. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3715. var
  3716. tmpreg : tregister;
  3717. {$ifdef DUMMY}
  3718. l1 : longint;
  3719. {$endif DUMMY}
  3720. begin
  3721. //!!! ovloc.loc:=LOC_VOID;
  3722. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3723. case op of
  3724. OP_ADD:
  3725. begin
  3726. op:=OP_SUB;
  3727. a:=aint(dword(-a));
  3728. end;
  3729. OP_SUB:
  3730. begin
  3731. op:=OP_ADD;
  3732. a:=aint(dword(-a));
  3733. end
  3734. end;
  3735. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3736. begin
  3737. // if cgsetflags or setflags then
  3738. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3739. list.concat(setoppostfix(
  3740. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix[op]));
  3741. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3742. begin
  3743. //!!! ovloc.loc:=LOC_FLAGS;
  3744. case op of
  3745. OP_ADD:
  3746. //!!! ovloc.resflags:=F_CS;
  3747. ;
  3748. OP_SUB:
  3749. //!!! ovloc.resflags:=F_CC;
  3750. ;
  3751. end;
  3752. end;
  3753. end
  3754. else
  3755. begin
  3756. { there could be added some more sophisticated optimizations }
  3757. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3758. a_load_reg_reg(list,size,size,dst,dst)
  3759. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3760. a_load_const_reg(list,size,0,dst)
  3761. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3762. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3763. { we do this here instead in the peephole optimizer because
  3764. it saves us a register }
  3765. {$ifdef DUMMY}
  3766. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3767. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3768. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3769. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3770. begin
  3771. if l1>32 then{roozbeh does this ever happen?}
  3772. internalerror(200308296);
  3773. shifterop_reset(so);
  3774. so.shiftmode:=SM_LSL;
  3775. so.shiftimm:=l1;
  3776. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3777. end
  3778. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3779. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3780. begin
  3781. if l1>32 then{does this ever happen?}
  3782. internalerror(201205181);
  3783. shifterop_reset(so);
  3784. so.shiftmode:=SM_LSL;
  3785. so.shiftimm:=l1;
  3786. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3787. end
  3788. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3789. begin
  3790. { nothing to do on success }
  3791. end
  3792. {$endif DUMMY}
  3793. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3794. Just using mov x, #0 might allow some easier optimizations down the line. }
  3795. else if (op = OP_AND) and (dword(a)=0) then
  3796. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  3797. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3798. else if (op = OP_AND) and (not(dword(a))=0) then
  3799. // do nothing
  3800. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3801. broader range of shifterconstants.}
  3802. {$ifdef DUMMY}
  3803. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3804. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3805. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3806. begin
  3807. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3808. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3809. end
  3810. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3811. not(cgsetflags or setflags) and
  3812. split_into_shifter_const(a, imm1, imm2) then
  3813. begin
  3814. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3815. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3816. end
  3817. {$endif DUMMY}
  3818. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3819. begin
  3820. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3821. end
  3822. else
  3823. begin
  3824. tmpreg:=getintregister(list,size);
  3825. a_load_const_reg(list,size,a,tmpreg);
  3826. a_op_reg_reg(list,op,size,tmpreg,dst);
  3827. end;
  3828. end;
  3829. maybeadjustresult(list,op,size,dst);
  3830. end;
  3831. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3832. begin
  3833. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3834. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3835. else
  3836. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3837. end;
  3838. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3839. var
  3840. l1,l2 : tasmlabel;
  3841. ai : taicpu;
  3842. begin
  3843. current_asmdata.getjumplabel(l1);
  3844. current_asmdata.getjumplabel(l2);
  3845. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3846. ai.is_jmp:=true;
  3847. list.concat(ai);
  3848. list.concat(taicpu.op_reg_const(A_MOV,reg,0));
  3849. list.concat(taicpu.op_sym(A_B,l2));
  3850. cg.a_label(list,l1);
  3851. list.concat(taicpu.op_reg_const(A_MOV,reg,1));
  3852. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3853. cg.a_label(list,l2);
  3854. end;
  3855. procedure tthumb2cgarm.init_register_allocators;
  3856. begin
  3857. inherited init_register_allocators;
  3858. { currently, we save R14 always, so we can use it }
  3859. if (target_info.system<>system_arm_ios) then
  3860. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3861. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3862. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  3863. else
  3864. { r9 is not available on Darwin according to the llvm code generator }
  3865. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3866. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3867. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  3868. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  3869. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  3870. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  3871. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3872. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3873. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  3874. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3875. ],first_mm_imreg,[])
  3876. else if current_settings.fputype in [fpu_fpv4_s16,fpu_vfpv3_d16] then
  3877. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3878. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3879. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3880. ],first_mm_imreg,[])
  3881. else
  3882. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
  3883. [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
  3884. end;
  3885. procedure tthumb2cgarm.done_register_allocators;
  3886. begin
  3887. rg[R_INTREGISTER].free;
  3888. rg[R_FPUREGISTER].free;
  3889. rg[R_MMREGISTER].free;
  3890. inherited done_register_allocators;
  3891. end;
  3892. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  3893. begin
  3894. list.concat(taicpu.op_reg(A_BLX, reg));
  3895. {
  3896. the compiler does not properly set this flag anymore in pass 1, and
  3897. for now we only need it after pass 2 (I hope) (JM)
  3898. if not(pi_do_call in current_procinfo.flags) then
  3899. internalerror(2003060703);
  3900. }
  3901. include(current_procinfo.flags,pi_do_call);
  3902. end;
  3903. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3904. var
  3905. l : tasmlabel;
  3906. hr : treference;
  3907. begin
  3908. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3909. internalerror(2002090902);
  3910. if is_thumb32_imm(a) then
  3911. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3912. else if is_thumb32_imm(not(a)) then
  3913. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  3914. else if (a and $FFFF)=a then
  3915. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  3916. else
  3917. begin
  3918. reference_reset(hr,4,[]);
  3919. current_asmdata.getjumplabel(l);
  3920. cg.a_label(current_procinfo.aktlocaldata,l);
  3921. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3922. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3923. hr.symbol:=l;
  3924. hr.base:=NR_PC;
  3925. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3926. end;
  3927. end;
  3928. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3929. var
  3930. oppostfix:toppostfix;
  3931. usedtmpref: treference;
  3932. tmpreg,tmpreg2 : tregister;
  3933. so : tshifterop;
  3934. dir : integer;
  3935. begin
  3936. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3937. FromSize := ToSize;
  3938. case FromSize of
  3939. { signed integer registers }
  3940. OS_8:
  3941. oppostfix:=PF_B;
  3942. OS_S8:
  3943. oppostfix:=PF_SB;
  3944. OS_16:
  3945. oppostfix:=PF_H;
  3946. OS_S16:
  3947. oppostfix:=PF_SH;
  3948. OS_32,
  3949. OS_S32:
  3950. oppostfix:=PF_None;
  3951. else
  3952. InternalError(200308299);
  3953. end;
  3954. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3955. begin
  3956. if target_info.endian=endian_big then
  3957. dir:=-1
  3958. else
  3959. dir:=1;
  3960. case FromSize of
  3961. OS_16,OS_S16:
  3962. begin
  3963. { only complicated references need an extra loadaddr }
  3964. if assigned(ref.symbol) or
  3965. (ref.index<>NR_NO) or
  3966. (ref.offset<-255) or
  3967. (ref.offset>4094) or
  3968. { sometimes the compiler reused registers }
  3969. (reg=ref.index) or
  3970. (reg=ref.base) then
  3971. begin
  3972. tmpreg2:=getintregister(list,OS_INT);
  3973. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3974. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3975. end
  3976. else
  3977. usedtmpref:=ref;
  3978. if target_info.endian=endian_big then
  3979. inc(usedtmpref.offset,1);
  3980. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  3981. tmpreg:=getintregister(list,OS_INT);
  3982. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3983. inc(usedtmpref.offset,dir);
  3984. if FromSize=OS_16 then
  3985. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3986. else
  3987. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3988. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3989. end;
  3990. OS_32,OS_S32:
  3991. begin
  3992. tmpreg:=getintregister(list,OS_INT);
  3993. { only complicated references need an extra loadaddr }
  3994. if assigned(ref.symbol) or
  3995. (ref.index<>NR_NO) or
  3996. (ref.offset<-255) or
  3997. (ref.offset>4092) or
  3998. { sometimes the compiler reused registers }
  3999. (reg=ref.index) or
  4000. (reg=ref.base) then
  4001. begin
  4002. tmpreg2:=getintregister(list,OS_INT);
  4003. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4004. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4005. end
  4006. else
  4007. usedtmpref:=ref;
  4008. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4009. if ref.alignment=2 then
  4010. begin
  4011. if target_info.endian=endian_big then
  4012. inc(usedtmpref.offset,2);
  4013. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4014. inc(usedtmpref.offset,dir*2);
  4015. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4016. so.shiftimm:=16;
  4017. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4018. end
  4019. else
  4020. begin
  4021. if target_info.endian=endian_big then
  4022. inc(usedtmpref.offset,3);
  4023. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4024. inc(usedtmpref.offset,dir);
  4025. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4026. so.shiftimm:=8;
  4027. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4028. inc(usedtmpref.offset,dir);
  4029. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4030. so.shiftimm:=16;
  4031. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4032. inc(usedtmpref.offset,dir);
  4033. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4034. so.shiftimm:=24;
  4035. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4036. end;
  4037. end
  4038. else
  4039. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4040. end;
  4041. end
  4042. else
  4043. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4044. if (fromsize=OS_S8) and (tosize = OS_16) then
  4045. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4046. end;
  4047. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4048. begin
  4049. if op = OP_NOT then
  4050. begin
  4051. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4052. case size of
  4053. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4054. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4055. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4056. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4057. end;
  4058. end
  4059. else
  4060. inherited a_op_reg_reg(list, op, size, src, dst);
  4061. end;
  4062. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4063. var
  4064. shift, width : byte;
  4065. tmpreg : tregister;
  4066. so : tshifterop;
  4067. l1 : longint;
  4068. begin
  4069. ovloc.loc:=LOC_VOID;
  4070. if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
  4071. case op of
  4072. OP_ADD:
  4073. begin
  4074. op:=OP_SUB;
  4075. a:=aint(dword(-a));
  4076. end;
  4077. OP_SUB:
  4078. begin
  4079. op:=OP_ADD;
  4080. a:=aint(dword(-a));
  4081. end
  4082. end;
  4083. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4084. case op of
  4085. OP_NEG,OP_NOT,
  4086. OP_DIV,OP_IDIV:
  4087. internalerror(200308285);
  4088. OP_SHL:
  4089. begin
  4090. if a>32 then
  4091. internalerror(2014020703);
  4092. if a<>0 then
  4093. begin
  4094. shifterop_reset(so);
  4095. so.shiftmode:=SM_LSL;
  4096. so.shiftimm:=a;
  4097. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4098. end
  4099. else
  4100. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4101. end;
  4102. OP_ROL:
  4103. begin
  4104. if a>32 then
  4105. internalerror(2014020704);
  4106. if a<>0 then
  4107. begin
  4108. shifterop_reset(so);
  4109. so.shiftmode:=SM_ROR;
  4110. so.shiftimm:=32-a;
  4111. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4112. end
  4113. else
  4114. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4115. end;
  4116. OP_ROR:
  4117. begin
  4118. if a>32 then
  4119. internalerror(2014020705);
  4120. if a<>0 then
  4121. begin
  4122. shifterop_reset(so);
  4123. so.shiftmode:=SM_ROR;
  4124. so.shiftimm:=a;
  4125. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4126. end
  4127. else
  4128. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4129. end;
  4130. OP_SHR:
  4131. begin
  4132. if a>32 then
  4133. internalerror(200308292);
  4134. shifterop_reset(so);
  4135. if a<>0 then
  4136. begin
  4137. so.shiftmode:=SM_LSR;
  4138. so.shiftimm:=a;
  4139. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4140. end
  4141. else
  4142. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4143. end;
  4144. OP_SAR:
  4145. begin
  4146. if a>32 then
  4147. internalerror(200308295);
  4148. if a<>0 then
  4149. begin
  4150. shifterop_reset(so);
  4151. so.shiftmode:=SM_ASR;
  4152. so.shiftimm:=a;
  4153. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4154. end
  4155. else
  4156. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4157. end;
  4158. else
  4159. if (op in [OP_SUB, OP_ADD]) and
  4160. ((a < 0) or
  4161. (a > 4095)) then
  4162. begin
  4163. tmpreg:=getintregister(list,size);
  4164. a_load_const_reg(list, size, a, tmpreg);
  4165. if cgsetflags or setflags then
  4166. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4167. list.concat(setoppostfix(
  4168. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4169. end
  4170. else
  4171. begin
  4172. if cgsetflags or setflags then
  4173. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4174. list.concat(setoppostfix(
  4175. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4176. end;
  4177. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4178. begin
  4179. ovloc.loc:=LOC_FLAGS;
  4180. case op of
  4181. OP_ADD:
  4182. ovloc.resflags:=F_CS;
  4183. OP_SUB:
  4184. ovloc.resflags:=F_CC;
  4185. end;
  4186. end;
  4187. end
  4188. else
  4189. begin
  4190. { there could be added some more sophisticated optimizations }
  4191. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4192. a_load_reg_reg(list,size,size,src,dst)
  4193. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4194. a_load_const_reg(list,size,0,dst)
  4195. else if (op in [OP_IMUL]) and (a=-1) then
  4196. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4197. { we do this here instead in the peephole optimizer because
  4198. it saves us a register }
  4199. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4200. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4201. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4202. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4203. begin
  4204. if l1>32 then{roozbeh does this ever happen?}
  4205. internalerror(200308296);
  4206. shifterop_reset(so);
  4207. so.shiftmode:=SM_LSL;
  4208. so.shiftimm:=l1;
  4209. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4210. end
  4211. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4212. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4213. begin
  4214. if l1>32 then{does this ever happen?}
  4215. internalerror(201205181);
  4216. shifterop_reset(so);
  4217. so.shiftmode:=SM_LSL;
  4218. so.shiftimm:=l1;
  4219. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4220. end
  4221. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4222. begin
  4223. { nothing to do on success }
  4224. end
  4225. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4226. Just using mov x, #0 might allow some easier optimizations down the line. }
  4227. else if (op = OP_AND) and (dword(a)=0) then
  4228. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4229. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4230. else if (op = OP_AND) and (not(dword(a))=0) then
  4231. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4232. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4233. broader range of shifterconstants.}
  4234. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4235. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4236. else if (op = OP_AND) and is_thumb32_imm(a) then
  4237. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4238. else if (op = OP_AND) and (a = $FFFF) then
  4239. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4240. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4241. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4242. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4243. begin
  4244. a_load_reg_reg(list,size,size,src,dst);
  4245. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4246. end
  4247. else
  4248. begin
  4249. tmpreg:=getintregister(list,size);
  4250. a_load_const_reg(list,size,a,tmpreg);
  4251. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4252. end;
  4253. end;
  4254. maybeadjustresult(list,op,size,dst);
  4255. end;
  4256. const
  4257. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4258. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4259. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4260. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4261. var
  4262. so : tshifterop;
  4263. tmpreg,overflowreg : tregister;
  4264. asmop : tasmop;
  4265. begin
  4266. ovloc.loc:=LOC_VOID;
  4267. case op of
  4268. OP_NEG,OP_NOT:
  4269. internalerror(200308286);
  4270. OP_ROL:
  4271. begin
  4272. if not(size in [OS_32,OS_S32]) then
  4273. internalerror(2008072801);
  4274. { simulate ROL by ror'ing 32-value }
  4275. tmpreg:=getintregister(list,OS_32);
  4276. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4277. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4278. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4279. end;
  4280. OP_ROR:
  4281. begin
  4282. if not(size in [OS_32,OS_S32]) then
  4283. internalerror(2008072802);
  4284. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4285. end;
  4286. OP_IMUL,
  4287. OP_MUL:
  4288. begin
  4289. if cgsetflags or setflags then
  4290. begin
  4291. overflowreg:=getintregister(list,size);
  4292. if op=OP_IMUL then
  4293. asmop:=A_SMULL
  4294. else
  4295. asmop:=A_UMULL;
  4296. { the arm doesn't allow that rd and rm are the same }
  4297. if dst=src2 then
  4298. begin
  4299. if dst<>src1 then
  4300. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4301. else
  4302. begin
  4303. tmpreg:=getintregister(list,size);
  4304. a_load_reg_reg(list,size,size,src2,dst);
  4305. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4306. end;
  4307. end
  4308. else
  4309. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4310. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4311. if op=OP_IMUL then
  4312. begin
  4313. shifterop_reset(so);
  4314. so.shiftmode:=SM_ASR;
  4315. so.shiftimm:=31;
  4316. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4317. end
  4318. else
  4319. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4320. ovloc.loc:=LOC_FLAGS;
  4321. ovloc.resflags:=F_NE;
  4322. end
  4323. else
  4324. begin
  4325. { the arm doesn't allow that rd and rm are the same }
  4326. if dst=src2 then
  4327. begin
  4328. if dst<>src1 then
  4329. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4330. else
  4331. begin
  4332. tmpreg:=getintregister(list,size);
  4333. a_load_reg_reg(list,size,size,src2,dst);
  4334. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4335. end;
  4336. end
  4337. else
  4338. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4339. end;
  4340. end;
  4341. else
  4342. begin
  4343. if cgsetflags or setflags then
  4344. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4345. {$ifdef dummy}
  4346. { R13 is not allowed for certain instruction operands }
  4347. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4348. begin
  4349. if getsupreg(dst)=RS_R13 then
  4350. begin
  4351. tmpreg:=getintregister(list,OS_INT);
  4352. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4353. dst:=tmpreg;
  4354. end;
  4355. if getsupreg(src1)=RS_R13 then
  4356. begin
  4357. tmpreg:=getintregister(list,OS_INT);
  4358. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4359. src1:=tmpreg;
  4360. end;
  4361. end;
  4362. {$endif}
  4363. list.concat(setoppostfix(
  4364. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4365. end;
  4366. end;
  4367. maybeadjustresult(list,op,size,dst);
  4368. end;
  4369. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4370. begin
  4371. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4372. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4373. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4374. end;
  4375. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4376. var
  4377. ref : treference;
  4378. shift : byte;
  4379. firstfloatreg,lastfloatreg,
  4380. r : byte;
  4381. regs : tcpuregisterset;
  4382. stackmisalignment: pint;
  4383. begin
  4384. LocalSize:=align(LocalSize,4);
  4385. { call instruction does not put anything on the stack }
  4386. stackmisalignment:=0;
  4387. if not(nostackframe) then
  4388. begin
  4389. firstfloatreg:=RS_NO;
  4390. lastfloatreg:=RS_NO;
  4391. { save floating point registers? }
  4392. for r:=RS_F0 to RS_F7 do
  4393. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4394. begin
  4395. if firstfloatreg=RS_NO then
  4396. firstfloatreg:=r;
  4397. lastfloatreg:=r;
  4398. inc(stackmisalignment,12);
  4399. end;
  4400. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4401. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4402. begin
  4403. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4404. a_reg_alloc(list,NR_R12);
  4405. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4406. end;
  4407. { save int registers }
  4408. reference_reset(ref,4,[]);
  4409. ref.index:=NR_STACK_POINTER_REG;
  4410. ref.addressmode:=AM_PREINDEXED;
  4411. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4412. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4413. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4414. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4415. include(regs,RS_R14);
  4416. if regs<>[] then
  4417. begin
  4418. for r:=RS_R0 to RS_R15 do
  4419. if (r in regs) then
  4420. inc(stackmisalignment,4);
  4421. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4422. end;
  4423. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4424. begin
  4425. { the framepointer now points to the saved R15, so the saved
  4426. framepointer is at R11-12 (for get_caller_frame) }
  4427. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4428. a_reg_dealloc(list,NR_R12);
  4429. end;
  4430. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4431. if (LocalSize<>0) or
  4432. ((stackmisalignment<>0) and
  4433. ((pi_do_call in current_procinfo.flags) or
  4434. (po_assembler in current_procinfo.procdef.procoptions))) then
  4435. begin
  4436. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4437. if not(is_shifter_const(localsize,shift)) then
  4438. begin
  4439. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4440. a_reg_alloc(list,NR_R12);
  4441. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4442. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4443. a_reg_dealloc(list,NR_R12);
  4444. end
  4445. else
  4446. begin
  4447. a_reg_dealloc(list,NR_R12);
  4448. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4449. end;
  4450. end;
  4451. if firstfloatreg<>RS_NO then
  4452. begin
  4453. reference_reset(ref,4,[]);
  4454. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4455. begin
  4456. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4457. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4458. ref.base:=NR_R12;
  4459. end
  4460. else
  4461. begin
  4462. ref.base:=current_procinfo.framepointer;
  4463. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4464. end;
  4465. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4466. lastfloatreg-firstfloatreg+1,ref));
  4467. end;
  4468. end;
  4469. end;
  4470. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4471. var
  4472. ref : treference;
  4473. firstfloatreg,lastfloatreg,
  4474. r : byte;
  4475. shift : byte;
  4476. regs : tcpuregisterset;
  4477. LocalSize : longint;
  4478. stackmisalignment: pint;
  4479. begin
  4480. if not(nostackframe) then
  4481. begin
  4482. stackmisalignment:=0;
  4483. { restore floating point register }
  4484. firstfloatreg:=RS_NO;
  4485. lastfloatreg:=RS_NO;
  4486. { save floating point registers? }
  4487. for r:=RS_F0 to RS_F7 do
  4488. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4489. begin
  4490. if firstfloatreg=RS_NO then
  4491. firstfloatreg:=r;
  4492. lastfloatreg:=r;
  4493. { floating point register space is already included in
  4494. localsize below by calc_stackframe_size
  4495. inc(stackmisalignment,12);
  4496. }
  4497. end;
  4498. if firstfloatreg<>RS_NO then
  4499. begin
  4500. reference_reset(ref,4,[]);
  4501. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4502. begin
  4503. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4504. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4505. ref.base:=NR_R12;
  4506. end
  4507. else
  4508. begin
  4509. ref.base:=current_procinfo.framepointer;
  4510. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4511. end;
  4512. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4513. lastfloatreg-firstfloatreg+1,ref));
  4514. end;
  4515. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4516. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4517. begin
  4518. exclude(regs,RS_R14);
  4519. include(regs,RS_R15);
  4520. end;
  4521. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4522. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4523. for r:=RS_R0 to RS_R15 do
  4524. if (r in regs) then
  4525. inc(stackmisalignment,4);
  4526. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4527. LocalSize:=current_procinfo.calc_stackframe_size;
  4528. if (LocalSize<>0) or
  4529. ((stackmisalignment<>0) and
  4530. ((pi_do_call in current_procinfo.flags) or
  4531. (po_assembler in current_procinfo.procdef.procoptions))) then
  4532. begin
  4533. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4534. if not(is_shifter_const(LocalSize,shift)) then
  4535. begin
  4536. a_reg_alloc(list,NR_R12);
  4537. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4538. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4539. a_reg_dealloc(list,NR_R12);
  4540. end
  4541. else
  4542. begin
  4543. a_reg_dealloc(list,NR_R12);
  4544. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4545. end;
  4546. end;
  4547. if regs=[] then
  4548. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4549. else
  4550. begin
  4551. reference_reset(ref,4,[]);
  4552. ref.index:=NR_STACK_POINTER_REG;
  4553. ref.addressmode:=AM_PREINDEXED;
  4554. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4555. end;
  4556. end
  4557. else
  4558. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4559. end;
  4560. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4561. var
  4562. tmpreg : tregister;
  4563. tmpref : treference;
  4564. l : tasmlabel;
  4565. begin
  4566. tmpreg:=NR_NO;
  4567. { Be sure to have a base register }
  4568. if (ref.base=NR_NO) then
  4569. begin
  4570. if ref.shiftmode<>SM_None then
  4571. internalerror(2014020706);
  4572. ref.base:=ref.index;
  4573. ref.index:=NR_NO;
  4574. end;
  4575. { absolute symbols can't be handled directly, we've to store the symbol reference
  4576. in the text segment and access it pc relative
  4577. For now, we assume that references where base or index equals to PC are already
  4578. relative, all other references are assumed to be absolute and thus they need
  4579. to be handled extra.
  4580. A proper solution would be to change refoptions to a set and store the information
  4581. if the symbol is absolute or relative there.
  4582. }
  4583. if (assigned(ref.symbol) and
  4584. not(is_pc(ref.base)) and
  4585. not(is_pc(ref.index))
  4586. ) or
  4587. { [#xxx] isn't a valid address operand }
  4588. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4589. //(ref.offset<-4095) or
  4590. (ref.offset<-255) or
  4591. (ref.offset>4095) or
  4592. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4593. ((ref.offset<-255) or
  4594. (ref.offset>255)
  4595. )
  4596. ) or
  4597. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4598. ((ref.offset<-1020) or
  4599. (ref.offset>1020) or
  4600. ((abs(ref.offset) mod 4)<>0) or
  4601. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4602. assigned(ref.symbol)
  4603. )
  4604. ) then
  4605. begin
  4606. reference_reset(tmpref,4,[]);
  4607. { load symbol }
  4608. tmpreg:=getintregister(list,OS_INT);
  4609. if assigned(ref.symbol) then
  4610. begin
  4611. current_asmdata.getjumplabel(l);
  4612. cg.a_label(current_procinfo.aktlocaldata,l);
  4613. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4614. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4615. { load consts entry }
  4616. tmpref.symbol:=l;
  4617. tmpref.base:=NR_R15;
  4618. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4619. { in case of LDF/STF, we got rid of the NR_R15 }
  4620. if is_pc(ref.base) then
  4621. ref.base:=NR_NO;
  4622. if is_pc(ref.index) then
  4623. ref.index:=NR_NO;
  4624. end
  4625. else
  4626. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4627. if (ref.base<>NR_NO) then
  4628. begin
  4629. if ref.index<>NR_NO then
  4630. begin
  4631. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4632. ref.base:=tmpreg;
  4633. end
  4634. else
  4635. begin
  4636. ref.index:=tmpreg;
  4637. ref.shiftimm:=0;
  4638. ref.signindex:=1;
  4639. ref.shiftmode:=SM_None;
  4640. end;
  4641. end
  4642. else
  4643. ref.base:=tmpreg;
  4644. ref.offset:=0;
  4645. ref.symbol:=nil;
  4646. end;
  4647. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4648. begin
  4649. if tmpreg<>NR_NO then
  4650. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4651. else
  4652. begin
  4653. tmpreg:=getintregister(list,OS_ADDR);
  4654. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4655. ref.base:=tmpreg;
  4656. end;
  4657. ref.offset:=0;
  4658. end;
  4659. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4660. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4661. begin
  4662. tmpreg:=getintregister(list,OS_ADDR);
  4663. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4664. ref.base := tmpreg;
  4665. end;
  4666. { floating point operations have only limited references
  4667. we expect here, that a base is already set }
  4668. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4669. begin
  4670. if ref.shiftmode<>SM_none then
  4671. internalerror(200309121);
  4672. if tmpreg<>NR_NO then
  4673. begin
  4674. if ref.base=tmpreg then
  4675. begin
  4676. if ref.signindex<0 then
  4677. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4678. else
  4679. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4680. ref.index:=NR_NO;
  4681. end
  4682. else
  4683. begin
  4684. if ref.index<>tmpreg then
  4685. internalerror(200403161);
  4686. if ref.signindex<0 then
  4687. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4688. else
  4689. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4690. ref.base:=tmpreg;
  4691. ref.index:=NR_NO;
  4692. end;
  4693. end
  4694. else
  4695. begin
  4696. tmpreg:=getintregister(list,OS_ADDR);
  4697. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4698. ref.base:=tmpreg;
  4699. ref.index:=NR_NO;
  4700. end;
  4701. end;
  4702. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4703. Result := ref;
  4704. end;
  4705. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4706. var
  4707. instr: taicpu;
  4708. begin
  4709. if (fromsize=OS_F32) and
  4710. (tosize=OS_F32) then
  4711. begin
  4712. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4713. list.Concat(instr);
  4714. add_move_instruction(instr);
  4715. maybe_check_for_fpu_exception(list);
  4716. end
  4717. else if (fromsize=OS_F64) and
  4718. (tosize=OS_F64) then
  4719. begin
  4720. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4721. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4722. end
  4723. else if (fromsize=OS_F32) and
  4724. (tosize=OS_F64) then
  4725. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4726. begin
  4727. //list.concat(nil);
  4728. end;
  4729. end;
  4730. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4731. begin
  4732. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4733. end;
  4734. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4735. begin
  4736. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4737. maybe_check_for_fpu_exception(list);
  4738. end;
  4739. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4740. begin
  4741. if //(shuffle=nil) and
  4742. (tosize=OS_F32) then
  4743. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4744. else
  4745. internalerror(2012100813);
  4746. end;
  4747. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4748. begin
  4749. if //(shuffle=nil) and
  4750. (fromsize=OS_F32) then
  4751. begin
  4752. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  4753. maybe_check_for_fpu_exception(list);
  4754. end
  4755. else
  4756. internalerror(2012100814);
  4757. end;
  4758. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4759. var tmpreg: tregister;
  4760. begin
  4761. case op of
  4762. OP_NEG:
  4763. begin
  4764. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4765. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4766. tmpreg:=cg.getintregister(list,OS_32);
  4767. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4768. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4769. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4770. end;
  4771. else
  4772. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4773. end;
  4774. end;
  4775. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4776. begin
  4777. case op of
  4778. OP_NEG:
  4779. begin
  4780. list.concat(taicpu.op_reg_const(A_MOV,regdst.reglo,0));
  4781. list.concat(taicpu.op_reg_const(A_MOV,regdst.reghi,0));
  4782. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4783. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4784. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4785. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4786. end;
  4787. OP_NOT:
  4788. begin
  4789. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4790. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4791. end;
  4792. OP_AND,OP_OR,OP_XOR:
  4793. begin
  4794. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4795. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4796. end;
  4797. OP_ADD:
  4798. begin
  4799. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4800. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4801. list.concat(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi));
  4802. end;
  4803. OP_SUB:
  4804. begin
  4805. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4806. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4807. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4808. end;
  4809. else
  4810. internalerror(2003083101);
  4811. end;
  4812. end;
  4813. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4814. var
  4815. tmpreg : tregister;
  4816. begin
  4817. case op of
  4818. OP_AND,OP_OR,OP_XOR:
  4819. begin
  4820. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4821. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4822. end;
  4823. OP_ADD:
  4824. begin
  4825. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4826. begin
  4827. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4828. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4829. end
  4830. else
  4831. begin
  4832. tmpreg:=cg.getintregister(list,OS_32);
  4833. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4834. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4835. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  4836. end;
  4837. tmpreg:=cg.getintregister(list,OS_32);
  4838. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  4839. list.concat(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg));
  4840. end;
  4841. OP_SUB:
  4842. begin
  4843. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4844. begin
  4845. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4846. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  4847. end
  4848. else
  4849. begin
  4850. tmpreg:=cg.getintregister(list,OS_32);
  4851. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4852. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4853. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  4854. end;
  4855. tmpreg:=cg.getintregister(list,OS_32);
  4856. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  4857. list.concat(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg));
  4858. end;
  4859. else
  4860. internalerror(2003083101);
  4861. end;
  4862. end;
  4863. procedure create_codegen;
  4864. begin
  4865. if GenerateThumb2Code then
  4866. begin
  4867. cg:=tthumb2cgarm.create;
  4868. cg64:=tthumb2cg64farm.create;
  4869. casmoptimizer:=TCpuThumb2AsmOptimizer;
  4870. end
  4871. else if GenerateThumbCode then
  4872. begin
  4873. cg:=tthumbcgarm.create;
  4874. cg64:=tthumbcg64farm.create;
  4875. // casmoptimizer:=TCpuThumbAsmOptimizer;
  4876. end
  4877. else
  4878. begin
  4879. cg:=tarmcgarm.create;
  4880. cg64:=tarmcg64farm.create;
  4881. casmoptimizer:=TCpuAsmOptimizer;
  4882. end;
  4883. end;
  4884. end.