cgcpu.pas 215 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
  34. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  35. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  36. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  37. { move instructions }
  38. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  39. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  40. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  41. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  42. { fpu move instructions }
  43. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  44. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  45. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  46. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  47. { comparison operations }
  48. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  49. l : tasmlabel);override;
  50. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  51. procedure a_jmp_name(list : TAsmList;const s : string); override;
  52. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  53. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  54. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  55. procedure g_profilecode(list : TAsmList); override;
  56. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  57. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  58. procedure g_maybe_got_init(list : TAsmList); override;
  59. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  60. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  61. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  62. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  63. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  64. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  65. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  66. procedure g_save_registers(list : TAsmList);override;
  67. procedure g_restore_registers(list : TAsmList);override;
  68. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  69. procedure fixref(list : TAsmList;var ref : treference);
  70. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  71. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  72. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  73. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  74. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  75. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  76. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  77. { Transform unsupported methods into Internal errors }
  78. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  79. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  80. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  81. { clear out potential overflow bits from 8 or 16 bit operations
  82. the upper 24/16 bits of a register after an operation }
  83. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  84. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  85. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  86. procedure g_maybe_tls_init(list : TAsmList); override;
  87. end;
  88. { tcgarm is shared between normal arm and thumb-2 }
  89. tcgarm = class(tbasecgarm)
  90. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  91. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  92. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  93. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  94. size: tcgsize; a: tcgint; src, dst: tregister); override;
  95. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  96. size: tcgsize; src1, src2, dst: tregister); override;
  97. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  98. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  99. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  100. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  101. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  102. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  103. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  104. end;
  105. { normal arm cg }
  106. tarmcgarm = class(tcgarm)
  107. procedure init_register_allocators;override;
  108. procedure done_register_allocators;override;
  109. end;
  110. { 64 bit cg for all arm flavours }
  111. tbasecg64farm = class(tcg64f32)
  112. end;
  113. { tcg64farm is shared between normal arm and thumb-2 }
  114. tcg64farm = class(tbasecg64farm)
  115. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  116. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  117. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  118. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  119. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  120. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  121. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  122. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  123. end;
  124. tarmcg64farm = class(tcg64farm)
  125. end;
  126. tthumbcgarm = class(tbasecgarm)
  127. procedure init_register_allocators;override;
  128. procedure done_register_allocators;override;
  129. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  130. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  131. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  132. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  133. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  134. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  135. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  136. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  137. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  138. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  139. end;
  140. tthumbcg64farm = class(tbasecg64farm)
  141. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  142. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  143. end;
  144. tthumb2cgarm = class(tcgarm)
  145. procedure init_register_allocators;override;
  146. procedure done_register_allocators;override;
  147. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  148. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  149. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  150. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  151. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  152. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  153. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  154. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  155. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  156. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  157. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  158. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  159. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  160. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  161. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  162. end;
  163. tthumb2cg64farm = class(tcg64farm)
  164. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  165. end;
  166. const
  167. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  168. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  169. winstackpagesize = 4096;
  170. function get_fpu_postfix(def : tdef) : toppostfix;
  171. procedure create_codegen;
  172. implementation
  173. uses
  174. globals,verbose,systems,cutils,
  175. aopt,aoptcpu,
  176. fmodule,
  177. symconst,symsym,symtable,
  178. tgobj,
  179. procinfo,cpupi,
  180. paramgr;
  181. { Range check must be disabled explicitly as conversions between signed and unsigned
  182. 32-bit values are done without explicit typecasts }
  183. {$R-}
  184. function get_fpu_postfix(def : tdef) : toppostfix;
  185. begin
  186. if def.typ=floatdef then
  187. begin
  188. case tfloatdef(def).floattype of
  189. s32real:
  190. result:=PF_S;
  191. s64real:
  192. result:=PF_D;
  193. s80real:
  194. result:=PF_E;
  195. else
  196. internalerror(200401272);
  197. end;
  198. end
  199. else
  200. internalerror(200401271);
  201. end;
  202. procedure tarmcgarm.init_register_allocators;
  203. begin
  204. inherited init_register_allocators;
  205. { currently, we always save R14, so we can use it }
  206. if (target_info.system<>system_arm_darwin) then
  207. begin
  208. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  209. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  210. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  211. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  212. else
  213. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  214. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  215. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  216. end
  217. else
  218. { r7 is not available on Darwin, it's used as frame pointer (always,
  219. for backtrace support -- also in gcc/clang -> R11 can be used).
  220. r9 is volatile }
  221. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  222. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  223. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  224. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  225. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  226. { The register allocator currently cannot deal with multiple
  227. non-overlapping subregs per register, so we can only use
  228. half the single precision registers for now (as sub registers of the
  229. double precision ones). }
  230. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  231. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  232. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  233. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  234. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  235. ],first_mm_imreg,[])
  236. else
  237. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  238. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15],first_mm_imreg,[]);
  239. end;
  240. procedure tarmcgarm.done_register_allocators;
  241. begin
  242. rg[R_INTREGISTER].free;
  243. rg[R_FPUREGISTER].free;
  244. rg[R_MMREGISTER].free;
  245. inherited done_register_allocators;
  246. end;
  247. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  248. var
  249. imm_shift : byte;
  250. l : tasmlabel;
  251. hr : treference;
  252. imm1, imm2: DWord;
  253. begin
  254. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  255. internalerror(2002090902);
  256. if is_shifter_const(a,imm_shift) then
  257. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  258. else if is_shifter_const(not(a),imm_shift) then
  259. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  260. { loading of constants with mov and orr }
  261. else if (split_into_shifter_const(a,imm1, imm2)) then
  262. begin
  263. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  264. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  265. end
  266. { loading of constants with mvn and bic }
  267. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  268. begin
  269. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  270. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  271. end
  272. else
  273. begin
  274. reference_reset(hr,4,[]);
  275. current_asmdata.getjumplabel(l);
  276. cg.a_label(current_procinfo.aktlocaldata,l);
  277. hr.symboldata:=current_procinfo.aktlocaldata.last;
  278. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  279. hr.symbol:=l;
  280. hr.base:=NR_PC;
  281. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  282. end;
  283. end;
  284. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  285. var
  286. oppostfix:toppostfix;
  287. usedtmpref: treference;
  288. tmpreg,tmpreg2 : tregister;
  289. so : tshifterop;
  290. dir : integer;
  291. begin
  292. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  293. FromSize := ToSize;
  294. case FromSize of
  295. { signed integer registers }
  296. OS_8:
  297. oppostfix:=PF_B;
  298. OS_S8:
  299. oppostfix:=PF_SB;
  300. OS_16:
  301. oppostfix:=PF_H;
  302. OS_S16:
  303. oppostfix:=PF_SH;
  304. OS_32,
  305. OS_S32:
  306. oppostfix:=PF_None;
  307. else
  308. InternalError(200308297);
  309. end;
  310. if (fromsize=OS_S8) and
  311. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  312. oppostfix:=PF_B;
  313. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  314. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  315. (oppostfix in [PF_SH,PF_H])) then
  316. begin
  317. if target_info.endian=endian_big then
  318. dir:=-1
  319. else
  320. dir:=1;
  321. case FromSize of
  322. OS_16,OS_S16:
  323. begin
  324. { only complicated references need an extra loadaddr }
  325. if assigned(ref.symbol) or
  326. (ref.index<>NR_NO) or
  327. (ref.offset<-4095) or
  328. (ref.offset>4094) or
  329. { sometimes the compiler reused registers }
  330. (reg=ref.index) or
  331. (reg=ref.base) then
  332. begin
  333. tmpreg2:=getintregister(list,OS_INT);
  334. a_loadaddr_ref_reg(list,ref,tmpreg2);
  335. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  336. end
  337. else
  338. usedtmpref:=ref;
  339. if target_info.endian=endian_big then
  340. inc(usedtmpref.offset,1);
  341. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  342. tmpreg:=getintregister(list,OS_INT);
  343. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  344. inc(usedtmpref.offset,dir);
  345. if FromSize=OS_16 then
  346. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  347. else
  348. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  349. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  350. end;
  351. OS_32,OS_S32:
  352. begin
  353. tmpreg:=getintregister(list,OS_INT);
  354. { only complicated references need an extra loadaddr }
  355. if assigned(ref.symbol) or
  356. (ref.index<>NR_NO) or
  357. (ref.offset<-4095) or
  358. (ref.offset>4092) or
  359. { sometimes the compiler reused registers }
  360. (reg=ref.index) or
  361. (reg=ref.base) then
  362. begin
  363. tmpreg2:=getintregister(list,OS_INT);
  364. a_loadaddr_ref_reg(list,ref,tmpreg2);
  365. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  366. end
  367. else
  368. usedtmpref:=ref;
  369. shifterop_reset(so);so.shiftmode:=SM_LSL;
  370. if ref.alignment=2 then
  371. begin
  372. if target_info.endian=endian_big then
  373. inc(usedtmpref.offset,2);
  374. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  375. inc(usedtmpref.offset,dir*2);
  376. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  377. so.shiftimm:=16;
  378. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  379. end
  380. else
  381. begin
  382. tmpreg2:=getintregister(list,OS_INT);
  383. if target_info.endian=endian_big then
  384. inc(usedtmpref.offset,3);
  385. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  386. inc(usedtmpref.offset,dir);
  387. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  388. inc(usedtmpref.offset,dir);
  389. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  390. so.shiftimm:=8;
  391. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  392. inc(usedtmpref.offset,dir);
  393. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  394. so.shiftimm:=16;
  395. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  396. so.shiftimm:=24;
  397. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  398. end;
  399. end
  400. else
  401. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  402. end;
  403. end
  404. else
  405. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  406. if (fromsize=OS_S8) and
  407. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  408. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  409. else if (fromsize=OS_S8) and (tosize = OS_16) then
  410. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  411. end;
  412. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  413. var
  414. hsym : tsym;
  415. href : treference;
  416. paraloc : Pcgparalocation;
  417. shift : byte;
  418. begin
  419. { calculate the parameter info for the procdef }
  420. procdef.init_paraloc_info(callerside);
  421. hsym:=tsym(procdef.parast.Find('self'));
  422. if not(assigned(hsym) and
  423. (hsym.typ=paravarsym)) then
  424. internalerror(200305251);
  425. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  426. while paraloc<>nil do
  427. with paraloc^ do
  428. begin
  429. case loc of
  430. LOC_REGISTER:
  431. begin
  432. if is_shifter_const(ioffset,shift) then
  433. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  434. else
  435. begin
  436. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  437. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  438. end;
  439. end;
  440. LOC_REFERENCE:
  441. begin
  442. { offset in the wrapper needs to be adjusted for the stored
  443. return address }
  444. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  445. if is_shifter_const(ioffset,shift) then
  446. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  447. else
  448. begin
  449. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  450. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  451. end;
  452. end
  453. else
  454. internalerror(200309189);
  455. end;
  456. paraloc:=next;
  457. end;
  458. end;
  459. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  460. var
  461. ref: treference;
  462. begin
  463. paraloc.check_simple_location;
  464. paramanager.allocparaloc(list,paraloc.location);
  465. case paraloc.location^.loc of
  466. LOC_REGISTER,LOC_CREGISTER:
  467. a_load_const_reg(list,size,a,paraloc.location^.register);
  468. LOC_REFERENCE:
  469. begin
  470. reference_reset(ref,paraloc.alignment,[]);
  471. ref.base:=paraloc.location^.reference.index;
  472. ref.offset:=paraloc.location^.reference.offset;
  473. a_load_const_ref(list,size,a,ref);
  474. end;
  475. else
  476. internalerror(2002081101);
  477. end;
  478. end;
  479. procedure tbasecgarm.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
  480. var
  481. tmpref, ref: treference;
  482. location: pcgparalocation;
  483. sizeleft: aint;
  484. begin
  485. location := paraloc.location;
  486. tmpref := r;
  487. sizeleft := paraloc.intsize;
  488. while assigned(location) do
  489. begin
  490. paramanager.allocparaloc(list,location);
  491. case location^.loc of
  492. LOC_REGISTER,LOC_CREGISTER:
  493. a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  494. LOC_REFERENCE:
  495. begin
  496. reference_reset_base(ref,location^.reference.index,location^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  497. { doubles in softemu mode have a strange order of registers and references }
  498. if location^.size=OS_32 then
  499. g_concatcopy(list,tmpref,ref,4)
  500. else
  501. begin
  502. g_concatcopy(list,tmpref,ref,sizeleft);
  503. if assigned(location^.next) then
  504. internalerror(2005010710);
  505. end;
  506. end;
  507. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  508. case location^.size of
  509. OS_F32, OS_F64:
  510. a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  511. else
  512. internalerror(2002072801);
  513. end;
  514. LOC_VOID:
  515. begin
  516. // nothing to do
  517. end;
  518. else
  519. internalerror(2002081103);
  520. end;
  521. inc(tmpref.offset,tcgsize2size[location^.size]);
  522. dec(sizeleft,tcgsize2size[location^.size]);
  523. location := location^.next;
  524. end;
  525. end;
  526. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  527. var
  528. ref: treference;
  529. tmpreg: tregister;
  530. begin
  531. paraloc.check_simple_location;
  532. paramanager.allocparaloc(list,paraloc.location);
  533. case paraloc.location^.loc of
  534. LOC_REGISTER,LOC_CREGISTER:
  535. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  536. LOC_REFERENCE:
  537. begin
  538. reference_reset(ref,paraloc.alignment,[]);
  539. ref.base := paraloc.location^.reference.index;
  540. ref.offset := paraloc.location^.reference.offset;
  541. tmpreg := getintregister(list,OS_ADDR);
  542. a_loadaddr_ref_reg(list,r,tmpreg);
  543. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  544. end;
  545. else
  546. internalerror(2002080701);
  547. end;
  548. end;
  549. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  550. var
  551. branchopcode: tasmop;
  552. r : treference;
  553. sym : TAsmSymbol;
  554. begin
  555. { use always BL as newer binutils do not translate blx apparently
  556. generating BL is also what clang and gcc do by default }
  557. branchopcode:=A_BL;
  558. if not(weak) then
  559. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  560. else
  561. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  562. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  563. if (tf_pic_uses_got in target_info.flags) and
  564. (cs_create_pic in current_settings.moduleswitches) then
  565. begin
  566. r.refaddr:=addr_pic
  567. end
  568. else
  569. r.refaddr:=addr_full;
  570. list.concat(taicpu.op_ref(branchopcode,r));
  571. {
  572. the compiler does not properly set this flag anymore in pass 1, and
  573. for now we only need it after pass 2 (I hope) (JM)
  574. if not(pi_do_call in current_procinfo.flags) then
  575. internalerror(2003060703);
  576. }
  577. include(current_procinfo.flags,pi_do_call);
  578. end;
  579. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  580. begin
  581. { check not really correct: should only be used for non-Thumb cpus }
  582. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  583. begin
  584. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  585. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  586. end
  587. else
  588. list.concat(taicpu.op_reg(A_BLX, reg));
  589. {
  590. the compiler does not properly set this flag anymore in pass 1, and
  591. for now we only need it after pass 2 (I hope) (JM)
  592. if not(pi_do_call in current_procinfo.flags) then
  593. internalerror(2003060703);
  594. }
  595. include(current_procinfo.flags,pi_do_call);
  596. end;
  597. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  598. begin
  599. a_op_const_reg_reg(list,op,size,a,reg,reg);
  600. end;
  601. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  602. var
  603. tmpreg,tmpresreg : tregister;
  604. tmpref : treference;
  605. begin
  606. tmpreg:=getintregister(list,size);
  607. tmpresreg:=getintregister(list,size);
  608. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  609. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  610. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  611. end;
  612. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  613. var
  614. so : tshifterop;
  615. begin
  616. if op = OP_NEG then
  617. begin
  618. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  619. maybeadjustresult(list,OP_NEG,size,dst);
  620. end
  621. else if op = OP_NOT then
  622. begin
  623. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  624. begin
  625. shifterop_reset(so);
  626. so.shiftmode:=SM_LSL;
  627. if size in [OS_8, OS_S8] then
  628. so.shiftimm:=24
  629. else
  630. so.shiftimm:=16;
  631. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  632. {Using a shift here allows this to be folded into another instruction}
  633. if size in [OS_S8, OS_S16] then
  634. so.shiftmode:=SM_ASR
  635. else
  636. so.shiftmode:=SM_LSR;
  637. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  638. end
  639. else
  640. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  641. end
  642. else
  643. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  644. end;
  645. const
  646. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  647. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  648. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  649. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  650. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  651. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  652. op_reg_postfix: array[TOpCG] of TOpPostfix =
  653. (PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
  654. PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None);
  655. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  656. size: tcgsize; a: tcgint; src, dst: tregister);
  657. var
  658. ovloc : tlocation;
  659. begin
  660. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  661. end;
  662. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  663. size: tcgsize; src1, src2, dst: tregister);
  664. var
  665. ovloc : tlocation;
  666. begin
  667. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  668. end;
  669. function opshift2shiftmode(op: TOpCg): tshiftmode;
  670. begin
  671. case op of
  672. OP_SHL: Result:=SM_LSL;
  673. OP_SHR: Result:=SM_LSR;
  674. OP_ROR: Result:=SM_ROR;
  675. OP_ROL: Result:=SM_ROR;
  676. OP_SAR: Result:=SM_ASR;
  677. else internalerror(2012070501);
  678. end
  679. end;
  680. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  681. var
  682. multiplier : dword;
  683. power : longint;
  684. shifterop : tshifterop;
  685. bitsset : byte;
  686. negative : boolean;
  687. first : boolean;
  688. b,
  689. cycles : byte;
  690. maxeffort : byte;
  691. begin
  692. result:=true;
  693. cycles:=0;
  694. negative:=a<0;
  695. shifterop.rs:=NR_NO;
  696. shifterop.shiftmode:=SM_LSL;
  697. if negative then
  698. inc(cycles);
  699. multiplier:=dword(abs(a));
  700. bitsset:=popcnt(multiplier and $fffffffe);
  701. { heuristics to estimate how much instructions are reasonable to replace the mul,
  702. this is currently based on XScale timings }
  703. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  704. actual multiplication, this requires min. 1+4 cycles
  705. because the first shift imm. might cause a stall and because we need more instructions
  706. when replacing the mul we generate max. 3 instructions to replace this mul }
  707. maxeffort:=3;
  708. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  709. a ldr, so generating one more operation to replace this is beneficial }
  710. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  711. inc(maxeffort);
  712. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  713. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  714. dec(maxeffort);
  715. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  716. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  717. dec(maxeffort);
  718. { most simple cases }
  719. if a=1 then
  720. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  721. else if a=0 then
  722. a_load_const_reg(list,OS_32,0,dst)
  723. else if a=-1 then
  724. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  725. { add up ?
  726. basically, one add is needed for each bit being set in the constant factor
  727. however, the least significant bit is for free, it can be hidden in the initial
  728. instruction
  729. }
  730. else if (bitsset+cycles<=maxeffort) and
  731. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  732. begin
  733. first:=true;
  734. while multiplier<>0 do
  735. begin
  736. shifterop.shiftimm:=BsrDWord(multiplier);
  737. if odd(multiplier) then
  738. begin
  739. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  740. dec(multiplier);
  741. end
  742. else
  743. if first then
  744. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  745. else
  746. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  747. first:=false;
  748. dec(multiplier,1 shl shifterop.shiftimm);
  749. end;
  750. if negative then
  751. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  752. end
  753. { subtract from the next greater power of two? }
  754. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  755. begin
  756. first:=true;
  757. while multiplier<>0 do
  758. begin
  759. if first then
  760. begin
  761. multiplier:=(1 shl power)-multiplier;
  762. shifterop.shiftimm:=power;
  763. end
  764. else
  765. shifterop.shiftimm:=BsrDWord(multiplier);
  766. if odd(multiplier) then
  767. begin
  768. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  769. dec(multiplier);
  770. end
  771. else
  772. if first then
  773. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  774. else
  775. begin
  776. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  777. dec(multiplier,1 shl shifterop.shiftimm);
  778. end;
  779. first:=false;
  780. end;
  781. if negative then
  782. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  783. end
  784. else
  785. result:=false;
  786. end;
  787. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  788. var
  789. shift, lsb, width : byte;
  790. tmpreg : tregister;
  791. so : tshifterop;
  792. l1 : longint;
  793. imm1, imm2: DWord;
  794. begin
  795. optimize_op_const(size, op, a);
  796. case op of
  797. OP_NONE:
  798. begin
  799. if src <> dst then
  800. a_load_reg_reg(list, size, size, src, dst);
  801. exit;
  802. end;
  803. OP_MOVE:
  804. begin
  805. a_load_const_reg(list, size, a, dst);
  806. exit;
  807. end;
  808. end;
  809. ovloc.loc:=LOC_VOID;
  810. if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
  811. case op of
  812. OP_ADD:
  813. begin
  814. op:=OP_SUB;
  815. a:=aint(dword(-a));
  816. end;
  817. OP_SUB:
  818. begin
  819. op:=OP_ADD;
  820. a:=aint(dword(-a));
  821. end
  822. end;
  823. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  824. case op of
  825. OP_NEG,OP_NOT:
  826. internalerror(200308281);
  827. OP_SHL,
  828. OP_SHR,
  829. OP_ROL,
  830. OP_ROR,
  831. OP_SAR:
  832. begin
  833. if a>32 then
  834. internalerror(200308294);
  835. shifterop_reset(so);
  836. so.shiftmode:=opshift2shiftmode(op);
  837. if op = OP_ROL then
  838. so.shiftimm:=32-a
  839. else
  840. so.shiftimm:=a;
  841. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  842. end;
  843. else
  844. {if (op in [OP_SUB, OP_ADD]) and
  845. ((a < 0) or
  846. (a > 4095)) then
  847. begin
  848. tmpreg:=getintregister(list,size);
  849. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  850. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  851. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  852. ));
  853. end
  854. else}
  855. begin
  856. if cgsetflags or setflags then
  857. a_reg_alloc(list,NR_DEFAULTFLAGS);
  858. list.concat(setoppostfix(
  859. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  860. end;
  861. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  862. begin
  863. ovloc.loc:=LOC_FLAGS;
  864. case op of
  865. OP_ADD:
  866. ovloc.resflags:=F_CS;
  867. OP_SUB:
  868. ovloc.resflags:=F_CC;
  869. end;
  870. end;
  871. end
  872. else
  873. begin
  874. { there could be added some more sophisticated optimizations }
  875. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  876. a_op_reg_reg(list,OP_NEG,size,src,dst)
  877. { we do this here instead in the peephole optimizer because
  878. it saves us a register }
  879. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  880. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  881. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  882. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  883. begin
  884. if l1>32 then{roozbeh does this ever happen?}
  885. internalerror(200308296);
  886. shifterop_reset(so);
  887. so.shiftmode:=SM_LSL;
  888. so.shiftimm:=l1;
  889. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  890. end
  891. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  892. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  893. begin
  894. if l1>32 then{does this ever happen?}
  895. internalerror(201205181);
  896. shifterop_reset(so);
  897. so.shiftmode:=SM_LSL;
  898. so.shiftimm:=l1;
  899. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  900. end
  901. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  902. begin
  903. { nothing to do on success }
  904. end
  905. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  906. broader range of shifterconstants.}
  907. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  908. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  909. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  910. into the following instruction}
  911. else if (op = OP_AND) and
  912. is_continuous_mask(aword(a), lsb, width) and
  913. ((lsb = 0) or ((lsb + width) = 32)) then
  914. begin
  915. shifterop_reset(so);
  916. if (width = 16) and
  917. (lsb = 0) and
  918. (current_settings.cputype >= cpu_armv6) then
  919. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  920. else if (width = 8) and
  921. (lsb = 0) and
  922. (current_settings.cputype >= cpu_armv6) then
  923. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  924. else if lsb = 0 then
  925. begin
  926. so.shiftmode:=SM_LSL;
  927. so.shiftimm:=32-width;
  928. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  929. so.shiftmode:=SM_LSR;
  930. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  931. end
  932. else
  933. begin
  934. so.shiftmode:=SM_LSR;
  935. so.shiftimm:=lsb;
  936. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  937. so.shiftmode:=SM_LSL;
  938. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  939. end;
  940. end
  941. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  942. begin
  943. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  944. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  945. end
  946. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  947. not(cgsetflags or setflags) and
  948. split_into_shifter_const(a, imm1, imm2) then
  949. begin
  950. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  951. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  952. end
  953. else
  954. begin
  955. tmpreg:=getintregister(list,size);
  956. a_load_const_reg(list,size,a,tmpreg);
  957. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  958. end;
  959. end;
  960. maybeadjustresult(list,op,size,dst);
  961. end;
  962. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  963. var
  964. so : tshifterop;
  965. tmpreg,overflowreg : tregister;
  966. asmop : tasmop;
  967. begin
  968. ovloc.loc:=LOC_VOID;
  969. case op of
  970. OP_NEG,OP_NOT,
  971. OP_DIV,OP_IDIV:
  972. internalerror(200308283);
  973. OP_SHL,
  974. OP_SHR,
  975. OP_SAR,
  976. OP_ROR:
  977. begin
  978. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  979. internalerror(2008072801);
  980. shifterop_reset(so);
  981. so.rs:=src1;
  982. so.shiftmode:=opshift2shiftmode(op);
  983. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  984. end;
  985. OP_ROL:
  986. begin
  987. if not(size in [OS_32,OS_S32]) then
  988. internalerror(2008072801);
  989. { simulate ROL by ror'ing 32-value }
  990. tmpreg:=getintregister(list,OS_32);
  991. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  992. shifterop_reset(so);
  993. so.rs:=tmpreg;
  994. so.shiftmode:=SM_ROR;
  995. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  996. end;
  997. OP_IMUL,
  998. OP_MUL:
  999. begin
  1000. if (cgsetflags or setflags) and
  1001. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1002. begin
  1003. overflowreg:=getintregister(list,size);
  1004. if op=OP_IMUL then
  1005. asmop:=A_SMULL
  1006. else
  1007. asmop:=A_UMULL;
  1008. { the arm doesn't allow that rd and rm are the same }
  1009. if dst=src2 then
  1010. begin
  1011. if dst<>src1 then
  1012. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1013. else
  1014. begin
  1015. tmpreg:=getintregister(list,size);
  1016. a_load_reg_reg(list,size,size,src2,dst);
  1017. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1018. end;
  1019. end
  1020. else
  1021. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1022. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1023. if op=OP_IMUL then
  1024. begin
  1025. shifterop_reset(so);
  1026. so.shiftmode:=SM_ASR;
  1027. so.shiftimm:=31;
  1028. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1029. end
  1030. else
  1031. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1032. ovloc.loc:=LOC_FLAGS;
  1033. ovloc.resflags:=F_NE;
  1034. end
  1035. else
  1036. begin
  1037. { the arm doesn't allow that rd and rm are the same }
  1038. if dst=src2 then
  1039. begin
  1040. if dst<>src1 then
  1041. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1042. else
  1043. begin
  1044. tmpreg:=getintregister(list,size);
  1045. a_load_reg_reg(list,size,size,src2,dst);
  1046. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1047. end;
  1048. end
  1049. else
  1050. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1051. end;
  1052. end;
  1053. else
  1054. begin
  1055. if cgsetflags or setflags then
  1056. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1057. list.concat(setoppostfix(
  1058. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1059. end;
  1060. end;
  1061. maybeadjustresult(list,op,size,dst);
  1062. end;
  1063. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1064. var
  1065. asmop: tasmop;
  1066. begin
  1067. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1068. begin
  1069. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1070. case size of
  1071. OS_32: asmop:=A_UMULL;
  1072. OS_S32: asmop:=A_SMULL;
  1073. else
  1074. InternalError(2014060802);
  1075. end;
  1076. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1077. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1078. 32x32=32 bit multiplication}
  1079. if (dstlo = NR_NO) then
  1080. dstlo:=getintregister(list,size);
  1081. if (dsthi = NR_NO) then
  1082. dsthi:=getintregister(list,size);
  1083. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1084. end
  1085. else if dsthi=NR_NO then
  1086. begin
  1087. if (dstlo = NR_NO) then
  1088. dstlo:=getintregister(list,size);
  1089. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1090. end
  1091. else
  1092. begin
  1093. internalerror(2015083022);
  1094. end;
  1095. end;
  1096. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1097. var
  1098. tmpreg1,tmpreg2 : tregister;
  1099. begin
  1100. tmpreg1:=NR_NO;
  1101. { Be sure to have a base register }
  1102. if (ref.base=NR_NO) then
  1103. begin
  1104. if ref.shiftmode<>SM_None then
  1105. internalerror(2014020701);
  1106. ref.base:=ref.index;
  1107. ref.index:=NR_NO;
  1108. end;
  1109. { absolute symbols can't be handled directly, we've to store the symbol reference
  1110. in the text segment and access it pc relative
  1111. For now, we assume that references where base or index equals to PC are already
  1112. relative, all other references are assumed to be absolute and thus they need
  1113. to be handled extra.
  1114. A proper solution would be to change refoptions to a set and store the information
  1115. if the symbol is absolute or relative there.
  1116. }
  1117. if (assigned(ref.symbol) and
  1118. not(is_pc(ref.base)) and
  1119. not(is_pc(ref.index))
  1120. ) or
  1121. { [#xxx] isn't a valid address operand }
  1122. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1123. (ref.offset<-4095) or
  1124. (ref.offset>4095) or
  1125. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1126. ((ref.offset<-255) or
  1127. (ref.offset>255)
  1128. )
  1129. ) or
  1130. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1131. ((ref.offset<-1020) or
  1132. (ref.offset>1020) or
  1133. ((abs(ref.offset) mod 4)<>0)
  1134. )
  1135. ) or
  1136. ((GenerateThumbCode) and
  1137. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1138. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1139. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1140. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1141. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1142. )
  1143. ) then
  1144. begin
  1145. fixref(list,ref);
  1146. end;
  1147. if GenerateThumbCode then
  1148. begin
  1149. { certain thumb load require base and index }
  1150. if (oppostfix in [PF_SB,PF_SH]) and
  1151. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1152. begin
  1153. tmpreg1:=getintregister(list,OS_ADDR);
  1154. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1155. ref.index:=tmpreg1;
  1156. end;
  1157. { "hi" registers cannot be used as base or index }
  1158. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1159. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1160. begin
  1161. tmpreg1:=getintregister(list,OS_ADDR);
  1162. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1163. ref.base:=tmpreg1;
  1164. end;
  1165. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1166. begin
  1167. tmpreg1:=getintregister(list,OS_ADDR);
  1168. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1169. ref.index:=tmpreg1;
  1170. end;
  1171. end;
  1172. { fold if there is base, index and offset, however, don't fold
  1173. for vfp memory instructions because we later fold the index }
  1174. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1175. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1176. begin
  1177. if tmpreg1<>NR_NO then
  1178. begin
  1179. tmpreg2:=getintregister(list,OS_ADDR);
  1180. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1181. tmpreg1:=tmpreg2;
  1182. end
  1183. else
  1184. begin
  1185. tmpreg1:=getintregister(list,OS_ADDR);
  1186. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1187. ref.base:=tmpreg1;
  1188. end;
  1189. ref.offset:=0;
  1190. end;
  1191. { floating point operations have only limited references
  1192. we expect here, that a base is already set }
  1193. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1194. begin
  1195. if ref.shiftmode<>SM_none then
  1196. internalerror(200309121);
  1197. if tmpreg1<>NR_NO then
  1198. begin
  1199. if ref.base=tmpreg1 then
  1200. begin
  1201. if ref.signindex<0 then
  1202. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1203. else
  1204. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1205. ref.index:=NR_NO;
  1206. end
  1207. else
  1208. begin
  1209. if ref.index<>tmpreg1 then
  1210. internalerror(200403161);
  1211. if ref.signindex<0 then
  1212. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1213. else
  1214. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1215. ref.base:=tmpreg1;
  1216. ref.index:=NR_NO;
  1217. end;
  1218. end
  1219. else
  1220. begin
  1221. tmpreg1:=getintregister(list,OS_ADDR);
  1222. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1223. ref.base:=tmpreg1;
  1224. ref.index:=NR_NO;
  1225. end;
  1226. end;
  1227. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1228. Result := ref;
  1229. end;
  1230. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1231. var
  1232. oppostfix:toppostfix;
  1233. usedtmpref: treference;
  1234. tmpreg : tregister;
  1235. dir : integer;
  1236. begin
  1237. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1238. FromSize := ToSize;
  1239. case ToSize of
  1240. { signed integer registers }
  1241. OS_8,
  1242. OS_S8:
  1243. oppostfix:=PF_B;
  1244. OS_16,
  1245. OS_S16:
  1246. oppostfix:=PF_H;
  1247. OS_32,
  1248. OS_S32,
  1249. { for vfp value stored in integer register }
  1250. OS_F32:
  1251. oppostfix:=PF_None;
  1252. else
  1253. InternalError(200308299);
  1254. end;
  1255. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1256. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1257. (oppostfix =PF_H)) then
  1258. begin
  1259. if target_info.endian=endian_big then
  1260. dir:=-1
  1261. else
  1262. dir:=1;
  1263. case FromSize of
  1264. OS_16,OS_S16:
  1265. begin
  1266. tmpreg:=getintregister(list,OS_INT);
  1267. usedtmpref:=ref;
  1268. if target_info.endian=endian_big then
  1269. inc(usedtmpref.offset,1);
  1270. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1271. inc(usedtmpref.offset,dir);
  1272. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1273. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1274. end;
  1275. OS_32,OS_S32:
  1276. begin
  1277. tmpreg:=getintregister(list,OS_INT);
  1278. usedtmpref:=ref;
  1279. if ref.alignment=2 then
  1280. begin
  1281. if target_info.endian=endian_big then
  1282. inc(usedtmpref.offset,2);
  1283. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1284. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1285. inc(usedtmpref.offset,dir*2);
  1286. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1287. end
  1288. else
  1289. begin
  1290. if target_info.endian=endian_big then
  1291. inc(usedtmpref.offset,3);
  1292. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1293. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1294. inc(usedtmpref.offset,dir);
  1295. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1296. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1297. inc(usedtmpref.offset,dir);
  1298. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1299. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1300. inc(usedtmpref.offset,dir);
  1301. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1302. end;
  1303. end
  1304. else
  1305. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1306. end;
  1307. end
  1308. else
  1309. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1310. end;
  1311. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1312. var
  1313. oppostfix:toppostfix;
  1314. href: treference;
  1315. tmpreg: TRegister;
  1316. begin
  1317. case ToSize of
  1318. { signed integer registers }
  1319. OS_8,
  1320. OS_S8:
  1321. oppostfix:=PF_B;
  1322. OS_16,
  1323. OS_S16:
  1324. oppostfix:=PF_H;
  1325. OS_32,
  1326. OS_S32:
  1327. oppostfix:=PF_None;
  1328. else
  1329. InternalError(2003082910);
  1330. end;
  1331. if (tosize in [OS_S16,OS_16]) and
  1332. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1333. begin
  1334. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1335. tmpreg:=getintregister(list,OS_INT);
  1336. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1337. href:=result;
  1338. inc(href.offset);
  1339. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1340. end
  1341. else
  1342. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1343. end;
  1344. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1345. var
  1346. oppostfix:toppostfix;
  1347. so: tshifterop;
  1348. tmpreg: TRegister;
  1349. href: treference;
  1350. begin
  1351. case FromSize of
  1352. { signed integer registers }
  1353. OS_8:
  1354. oppostfix:=PF_B;
  1355. OS_S8:
  1356. oppostfix:=PF_SB;
  1357. OS_16:
  1358. oppostfix:=PF_H;
  1359. OS_S16:
  1360. oppostfix:=PF_SH;
  1361. OS_32,
  1362. OS_S32:
  1363. oppostfix:=PF_None;
  1364. else
  1365. InternalError(200308291);
  1366. end;
  1367. if (tosize=OS_S8) and
  1368. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1369. begin
  1370. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1371. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1372. end
  1373. else if (tosize in [OS_S16,OS_16]) and
  1374. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1375. begin
  1376. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1377. tmpreg:=getintregister(list,OS_INT);
  1378. href:=result;
  1379. inc(href.offset);
  1380. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1381. shifterop_reset(so);
  1382. so.shiftmode:=SM_LSL;
  1383. so.shiftimm:=8;
  1384. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1385. end
  1386. else
  1387. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1388. end;
  1389. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1390. var
  1391. so : tshifterop;
  1392. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1393. begin
  1394. if GenerateThumbCode then
  1395. begin
  1396. case shiftmode of
  1397. SM_ASR:
  1398. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1399. SM_LSR:
  1400. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1401. SM_LSL:
  1402. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1403. else
  1404. internalerror(2013090301);
  1405. end;
  1406. end
  1407. else
  1408. begin
  1409. so.shiftmode:=shiftmode;
  1410. so.shiftimm:=shiftimm;
  1411. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1412. end;
  1413. end;
  1414. var
  1415. instr: taicpu;
  1416. conv_done: boolean;
  1417. begin
  1418. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1419. internalerror(2002090901);
  1420. conv_done:=false;
  1421. if tosize<>fromsize then
  1422. begin
  1423. shifterop_reset(so);
  1424. conv_done:=true;
  1425. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1426. fromsize:=tosize;
  1427. if current_settings.cputype<cpu_armv6 then
  1428. case fromsize of
  1429. OS_8:
  1430. if GenerateThumbCode then
  1431. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1432. else
  1433. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1434. OS_S8:
  1435. begin
  1436. do_shift(SM_LSL,24,reg1);
  1437. if tosize=OS_16 then
  1438. begin
  1439. do_shift(SM_ASR,8,reg2);
  1440. do_shift(SM_LSR,16,reg2);
  1441. end
  1442. else
  1443. do_shift(SM_ASR,24,reg2);
  1444. end;
  1445. OS_16:
  1446. begin
  1447. do_shift(SM_LSL,16,reg1);
  1448. do_shift(SM_LSR,16,reg2);
  1449. end;
  1450. OS_S16:
  1451. begin
  1452. do_shift(SM_LSL,16,reg1);
  1453. do_shift(SM_ASR,16,reg2)
  1454. end;
  1455. else
  1456. conv_done:=false;
  1457. end
  1458. else
  1459. case fromsize of
  1460. OS_8:
  1461. if GenerateThumbCode then
  1462. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1463. else
  1464. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1465. OS_S8:
  1466. begin
  1467. if tosize=OS_16 then
  1468. begin
  1469. so.shiftmode:=SM_ROR;
  1470. so.shiftimm:=16;
  1471. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1472. do_shift(SM_LSR,16,reg2);
  1473. end
  1474. else
  1475. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1476. end;
  1477. OS_16:
  1478. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1479. OS_S16:
  1480. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1481. else
  1482. conv_done:=false;
  1483. end
  1484. end;
  1485. if not conv_done and (reg1<>reg2) then
  1486. begin
  1487. { same size, only a register mov required }
  1488. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1489. list.Concat(instr);
  1490. { Notify the register allocator that we have written a move instruction so
  1491. it can try to eliminate it. }
  1492. add_move_instruction(instr);
  1493. end;
  1494. end;
  1495. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1496. var
  1497. href,href2 : treference;
  1498. hloc : pcgparalocation;
  1499. begin
  1500. href:=ref;
  1501. hloc:=paraloc.location;
  1502. while assigned(hloc) do
  1503. begin
  1504. case hloc^.loc of
  1505. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1506. begin
  1507. paramanager.allocparaloc(list,paraloc.location);
  1508. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1509. end;
  1510. LOC_REGISTER :
  1511. case hloc^.size of
  1512. OS_32,
  1513. OS_F32:
  1514. begin
  1515. paramanager.allocparaloc(list,paraloc.location);
  1516. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1517. end;
  1518. OS_64,
  1519. OS_F64:
  1520. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1521. else
  1522. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1523. end;
  1524. LOC_REFERENCE :
  1525. begin
  1526. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1527. { concatcopy should choose the best way to copy the data }
  1528. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1529. end;
  1530. else
  1531. internalerror(200408241);
  1532. end;
  1533. inc(href.offset,tcgsize2size[hloc^.size]);
  1534. hloc:=hloc^.next;
  1535. end;
  1536. end;
  1537. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1538. begin
  1539. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1540. end;
  1541. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1542. var
  1543. oppostfix:toppostfix;
  1544. begin
  1545. case fromsize of
  1546. OS_32,
  1547. OS_F32:
  1548. oppostfix:=PF_S;
  1549. OS_64,
  1550. OS_F64:
  1551. oppostfix:=PF_D;
  1552. OS_F80:
  1553. oppostfix:=PF_E;
  1554. else
  1555. InternalError(200309021);
  1556. end;
  1557. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1558. if fromsize<>tosize then
  1559. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1560. end;
  1561. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1562. var
  1563. oppostfix:toppostfix;
  1564. begin
  1565. case tosize of
  1566. OS_F32:
  1567. oppostfix:=PF_S;
  1568. OS_F64:
  1569. oppostfix:=PF_D;
  1570. OS_F80:
  1571. oppostfix:=PF_E;
  1572. else
  1573. InternalError(200309022);
  1574. end;
  1575. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1576. end;
  1577. { comparison operations }
  1578. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1579. l : tasmlabel);
  1580. var
  1581. tmpreg : tregister;
  1582. b : byte;
  1583. begin
  1584. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1585. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1586. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1587. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1588. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1589. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1590. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1591. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1592. else
  1593. begin
  1594. tmpreg:=getintregister(list,size);
  1595. a_load_const_reg(list,size,a,tmpreg);
  1596. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1597. end;
  1598. a_jmp_cond(list,cmp_op,l);
  1599. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1600. end;
  1601. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1602. begin
  1603. if reverse then
  1604. begin
  1605. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1606. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1607. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1608. end
  1609. { it is decided during the compilation of the system unit if this code is used or not
  1610. so no additional check for rbit is needed }
  1611. else
  1612. begin
  1613. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1614. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1615. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1616. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1617. if GenerateThumb2Code then
  1618. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1619. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1620. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1621. end;
  1622. end;
  1623. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1624. begin
  1625. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1626. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1627. a_jmp_cond(list,cmp_op,l);
  1628. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1629. end;
  1630. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1631. var
  1632. ai : taicpu;
  1633. begin
  1634. { generate far jump, leave it to the optimizer to get rid of it }
  1635. if GenerateThumbCode then
  1636. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1637. else
  1638. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1639. ai.is_jmp:=true;
  1640. list.concat(ai);
  1641. end;
  1642. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1643. var
  1644. ai : taicpu;
  1645. begin
  1646. { generate far jump, leave it to the optimizer to get rid of it }
  1647. if GenerateThumbCode then
  1648. ai:=taicpu.op_sym(A_BL,l)
  1649. else
  1650. ai:=taicpu.op_sym(A_B,l);
  1651. ai.is_jmp:=true;
  1652. list.concat(ai);
  1653. end;
  1654. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1655. var
  1656. ai : taicpu;
  1657. inv_flags : TResFlags;
  1658. hlabel : TAsmLabel;
  1659. begin
  1660. if GenerateThumbCode then
  1661. begin
  1662. inv_flags:=f;
  1663. inverse_flags(inv_flags);
  1664. { the optimizer has to fix this if jump range is sufficient short }
  1665. current_asmdata.getjumplabel(hlabel);
  1666. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1667. ai.is_jmp:=true;
  1668. list.concat(ai);
  1669. a_jmp_always(list,l);
  1670. a_label(list,hlabel);
  1671. end
  1672. else
  1673. begin
  1674. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1675. ai.is_jmp:=true;
  1676. list.concat(ai);
  1677. end;
  1678. end;
  1679. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1680. begin
  1681. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1682. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1683. end;
  1684. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1685. begin
  1686. if target_info.system = system_arm_linux then
  1687. begin
  1688. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1689. a_call_name(list,'__gnu_mcount_nc',false);
  1690. end
  1691. else
  1692. internalerror(2014091201);
  1693. end;
  1694. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1695. var
  1696. ref : treference;
  1697. shift : byte;
  1698. firstfloatreg,lastfloatreg,
  1699. r : byte;
  1700. mmregs,
  1701. regs, saveregs : tcpuregisterset;
  1702. registerarea,
  1703. r7offset,
  1704. stackmisalignment : pint;
  1705. imm1, imm2: DWord;
  1706. stack_parameters : Boolean;
  1707. begin
  1708. LocalSize:=align(LocalSize,4);
  1709. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1710. { call instruction does not put anything on the stack }
  1711. registerarea:=0;
  1712. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1713. lastfloatreg:=RS_NO;
  1714. if not(nostackframe) then
  1715. begin
  1716. firstfloatreg:=RS_NO;
  1717. mmregs:=[];
  1718. case current_settings.fputype of
  1719. fpu_fpa,
  1720. fpu_fpa10,
  1721. fpu_fpa11:
  1722. begin
  1723. { save floating point registers? }
  1724. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1725. for r:=RS_F0 to RS_F7 do
  1726. if r in regs then
  1727. begin
  1728. if firstfloatreg=RS_NO then
  1729. firstfloatreg:=r;
  1730. lastfloatreg:=r;
  1731. inc(registerarea,12);
  1732. end;
  1733. end;
  1734. fpu_vfpv2,
  1735. fpu_vfpv3,
  1736. fpu_vfpv4,
  1737. fpu_vfpv3_d16:
  1738. begin;
  1739. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1740. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1741. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1742. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1743. end;
  1744. end;
  1745. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1746. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1747. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1748. { save int registers }
  1749. reference_reset(ref,4,[]);
  1750. ref.index:=NR_STACK_POINTER_REG;
  1751. ref.addressmode:=AM_PREINDEXED;
  1752. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1753. if not(target_info.system in systems_darwin) then
  1754. begin
  1755. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1756. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1757. begin
  1758. a_reg_alloc(list,NR_R12);
  1759. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1760. end;
  1761. { the (old) ARM APCS requires saving both the stack pointer (to
  1762. crawl the stack) and the PC (to identify the function this
  1763. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1764. and R15 -- still needs updating for EABI and Darwin, they don't
  1765. need that }
  1766. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1767. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1768. else
  1769. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1770. include(regs,RS_R14);
  1771. if regs<>[] then
  1772. begin
  1773. for r:=RS_R0 to RS_R15 do
  1774. if r in regs then
  1775. inc(registerarea,4);
  1776. { if the stack is not 8 byte aligned, try to add an extra register,
  1777. so we can avoid the extra sub/add ...,#4 later (KB) }
  1778. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1779. for r:=RS_R3 downto RS_R0 do
  1780. if not(r in regs) then
  1781. begin
  1782. regs:=regs+[r];
  1783. inc(registerarea,4);
  1784. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1785. break;
  1786. end;
  1787. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1788. end;
  1789. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1790. begin
  1791. { the framepointer now points to the saved R15, so the saved
  1792. framepointer is at R11-12 (for get_caller_frame) }
  1793. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1794. a_reg_dealloc(list,NR_R12);
  1795. end;
  1796. end
  1797. else
  1798. begin
  1799. { always save r14 if we use r7 as the framepointer, because
  1800. the parameter offsets are hardcoded in advance and always
  1801. assume that r14 sits on the stack right behind the saved r7
  1802. }
  1803. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1804. include(regs,RS_FRAME_POINTER_REG);
  1805. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1806. include(regs,RS_R14);
  1807. if regs<>[] then
  1808. begin
  1809. { on Darwin, you first have to save [r4-r7,lr], and then
  1810. [r8,r10,r11] and make r7 point to the previously saved
  1811. r7 so that you can perform a stack crawl based on it
  1812. ([r7] is previous stack frame, [r7+4] is return address
  1813. }
  1814. include(regs,RS_FRAME_POINTER_REG);
  1815. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1816. r7offset:=0;
  1817. for r:=RS_R0 to RS_R15 do
  1818. if r in saveregs then
  1819. begin
  1820. inc(registerarea,4);
  1821. if r<RS_FRAME_POINTER_REG then
  1822. inc(r7offset,4);
  1823. end;
  1824. { save the registers }
  1825. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1826. { make r7 point to the saved r7 (regardless of whether this
  1827. frame uses the framepointer, for backtrace purposes) }
  1828. if r7offset<>0 then
  1829. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1830. else
  1831. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1832. { now save the rest (if any) }
  1833. saveregs:=regs-saveregs;
  1834. if saveregs<>[] then
  1835. begin
  1836. for r:=RS_R8 to RS_R11 do
  1837. if r in saveregs then
  1838. inc(registerarea,4);
  1839. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1840. end;
  1841. end;
  1842. end;
  1843. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1844. if (LocalSize<>0) or
  1845. ((stackmisalignment<>0) and
  1846. ((pi_do_call in current_procinfo.flags) or
  1847. (po_assembler in current_procinfo.procdef.procoptions))) then
  1848. begin
  1849. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1850. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1851. begin
  1852. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1853. internalerror(2014030901)
  1854. else
  1855. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1856. end;
  1857. if is_shifter_const(localsize,shift) then
  1858. begin
  1859. a_reg_dealloc(list,NR_R12);
  1860. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1861. end
  1862. else if split_into_shifter_const(localsize, imm1, imm2) then
  1863. begin
  1864. a_reg_dealloc(list,NR_R12);
  1865. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1866. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1867. end
  1868. else
  1869. begin
  1870. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1871. a_reg_alloc(list,NR_R12);
  1872. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1873. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1874. a_reg_dealloc(list,NR_R12);
  1875. end;
  1876. end;
  1877. if (mmregs<>[]) or
  1878. (firstfloatreg<>RS_NO) then
  1879. begin
  1880. reference_reset(ref,4,[]);
  1881. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1882. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1883. begin
  1884. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1885. begin
  1886. a_reg_alloc(list,NR_R12);
  1887. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1888. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1889. a_reg_dealloc(list,NR_R12);
  1890. end
  1891. else
  1892. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1893. ref.base:=NR_R12;
  1894. end
  1895. else
  1896. begin
  1897. ref.base:=current_procinfo.framepointer;
  1898. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1899. end;
  1900. case current_settings.fputype of
  1901. fpu_fpa,
  1902. fpu_fpa10,
  1903. fpu_fpa11:
  1904. begin
  1905. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1906. lastfloatreg-firstfloatreg+1,ref));
  1907. end;
  1908. fpu_vfpv2,
  1909. fpu_vfpv3,
  1910. fpu_vfpv4,
  1911. fpu_vfpv3_d16:
  1912. begin
  1913. ref.index:=ref.base;
  1914. ref.base:=NR_NO;
  1915. { FSTMX is deprecated on ARMv6 and later }
  1916. {if (current_settings.cputype<cpu_armv6) then
  1917. postfix:=PF_IAX
  1918. else
  1919. postfix:=PF_IAD;}
  1920. if mmregs<>[] then
  1921. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1922. end;
  1923. end;
  1924. end;
  1925. end;
  1926. end;
  1927. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1928. var
  1929. ref : treference;
  1930. LocalSize : longint;
  1931. firstfloatreg,lastfloatreg,
  1932. r,
  1933. shift : byte;
  1934. mmregs,
  1935. saveregs,
  1936. regs : tcpuregisterset;
  1937. registerarea,
  1938. stackmisalignment: pint;
  1939. paddingreg: TSuperRegister;
  1940. imm1, imm2: DWord;
  1941. begin
  1942. if not(nostackframe) then
  1943. begin
  1944. registerarea:=0;
  1945. firstfloatreg:=RS_NO;
  1946. lastfloatreg:=RS_NO;
  1947. mmregs:=[];
  1948. saveregs:=[];
  1949. case current_settings.fputype of
  1950. fpu_fpa,
  1951. fpu_fpa10,
  1952. fpu_fpa11:
  1953. begin
  1954. { restore floating point registers? }
  1955. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1956. for r:=RS_F0 to RS_F7 do
  1957. if r in regs then
  1958. begin
  1959. if firstfloatreg=RS_NO then
  1960. firstfloatreg:=r;
  1961. lastfloatreg:=r;
  1962. { floating point register space is already included in
  1963. localsize below by calc_stackframe_size
  1964. inc(registerarea,12);
  1965. }
  1966. end;
  1967. end;
  1968. fpu_vfpv2,
  1969. fpu_vfpv3,
  1970. fpu_vfpv4,
  1971. fpu_vfpv3_d16:
  1972. begin;
  1973. { restore vfp registers? }
  1974. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1975. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1976. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1977. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1978. end;
  1979. end;
  1980. if (firstfloatreg<>RS_NO) or
  1981. (mmregs<>[]) then
  1982. begin
  1983. reference_reset(ref,4,[]);
  1984. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1985. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1986. begin
  1987. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1988. begin
  1989. a_reg_alloc(list,NR_R12);
  1990. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1991. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1992. a_reg_dealloc(list,NR_R12);
  1993. end
  1994. else
  1995. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1996. ref.base:=NR_R12;
  1997. end
  1998. else
  1999. begin
  2000. ref.base:=current_procinfo.framepointer;
  2001. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2002. end;
  2003. case current_settings.fputype of
  2004. fpu_fpa,
  2005. fpu_fpa10,
  2006. fpu_fpa11:
  2007. begin
  2008. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2009. lastfloatreg-firstfloatreg+1,ref));
  2010. end;
  2011. fpu_vfpv2,
  2012. fpu_vfpv3,
  2013. fpu_vfpv4,
  2014. fpu_vfpv3_d16:
  2015. begin
  2016. ref.index:=ref.base;
  2017. ref.base:=NR_NO;
  2018. { FLDMX is deprecated on ARMv6 and later }
  2019. {if (current_settings.cputype<cpu_armv6) then
  2020. mmpostfix:=PF_IAX
  2021. else
  2022. mmpostfix:=PF_IAD;}
  2023. if mmregs<>[] then
  2024. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2025. end;
  2026. end;
  2027. end;
  2028. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2029. if (pi_do_call in current_procinfo.flags) or
  2030. (regs<>[]) or
  2031. ((target_info.system in systems_darwin) and
  2032. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2033. begin
  2034. exclude(regs,RS_R14);
  2035. include(regs,RS_R15);
  2036. if (target_info.system in systems_darwin) then
  2037. include(regs,RS_FRAME_POINTER_REG);
  2038. end;
  2039. if not(target_info.system in systems_darwin) then
  2040. begin
  2041. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2042. The saved PC came after that but is discarded, since we restore
  2043. the stack pointer }
  2044. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2045. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2046. end
  2047. else
  2048. begin
  2049. { restore R8-R11 already if necessary (they've been stored
  2050. before the others) }
  2051. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2052. if saveregs<>[] then
  2053. begin
  2054. reference_reset(ref,4,[]);
  2055. ref.index:=NR_STACK_POINTER_REG;
  2056. ref.addressmode:=AM_PREINDEXED;
  2057. for r:=RS_R8 to RS_R11 do
  2058. if r in saveregs then
  2059. inc(registerarea,4);
  2060. regs:=regs-saveregs;
  2061. end;
  2062. end;
  2063. for r:=RS_R0 to RS_R15 do
  2064. if r in regs then
  2065. inc(registerarea,4);
  2066. { reapply the stack padding reg, in case there was one, see the complimentary
  2067. comment in g_proc_entry() (KB) }
  2068. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2069. if paddingreg < RS_R4 then
  2070. if paddingreg in regs then
  2071. internalerror(201306190)
  2072. else
  2073. begin
  2074. regs:=regs+[paddingreg];
  2075. inc(registerarea,4);
  2076. end;
  2077. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2078. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2079. (target_info.system in systems_darwin) then
  2080. begin
  2081. LocalSize:=current_procinfo.calc_stackframe_size;
  2082. if (LocalSize<>0) or
  2083. ((stackmisalignment<>0) and
  2084. ((pi_do_call in current_procinfo.flags) or
  2085. (po_assembler in current_procinfo.procdef.procoptions))) then
  2086. begin
  2087. if pi_estimatestacksize in current_procinfo.flags then
  2088. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2089. else
  2090. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2091. if is_shifter_const(LocalSize,shift) then
  2092. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2093. else if split_into_shifter_const(localsize, imm1, imm2) then
  2094. begin
  2095. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2096. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2097. end
  2098. else
  2099. begin
  2100. a_reg_alloc(list,NR_R12);
  2101. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2102. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2103. a_reg_dealloc(list,NR_R12);
  2104. end;
  2105. end;
  2106. if (target_info.system in systems_darwin) and
  2107. (saveregs<>[]) then
  2108. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2109. if regs=[] then
  2110. begin
  2111. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2112. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2113. else
  2114. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2115. end
  2116. else
  2117. begin
  2118. reference_reset(ref,4,[]);
  2119. ref.index:=NR_STACK_POINTER_REG;
  2120. ref.addressmode:=AM_PREINDEXED;
  2121. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2122. end;
  2123. end
  2124. else
  2125. begin
  2126. { restore int registers and return }
  2127. reference_reset(ref,4,[]);
  2128. ref.index:=NR_FRAME_POINTER_REG;
  2129. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2130. end;
  2131. end
  2132. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2133. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2134. else
  2135. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2136. end;
  2137. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2138. var
  2139. ref : treference;
  2140. l : TAsmLabel;
  2141. regs : tcpuregisterset;
  2142. r: byte;
  2143. begin
  2144. if (cs_create_pic in current_settings.moduleswitches) and
  2145. (pi_needs_got in current_procinfo.flags) and
  2146. (tf_pic_uses_got in target_info.flags) then
  2147. begin
  2148. { Procedure parametrs are not initialized at this stage.
  2149. Before GOT initialization code, allocate registers used for procedure parameters
  2150. to prevent usage of these registers for temp operations in later stages of code
  2151. generation. }
  2152. regs:=rg[R_INTREGISTER].used_in_proc;
  2153. for r:=RS_R0 to RS_R3 do
  2154. if r in regs then
  2155. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2156. { Allocate scratch register R12 and use it for GOT calculations directly.
  2157. Otherwise the init code can be distorted in later stages of code generation. }
  2158. a_reg_alloc(list,NR_R12);
  2159. reference_reset(ref,4,[]);
  2160. current_asmdata.getglobaldatalabel(l);
  2161. cg.a_label(current_procinfo.aktlocaldata,l);
  2162. ref.symbol:=l;
  2163. ref.base:=NR_PC;
  2164. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2165. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2166. current_asmdata.getaddrlabel(l);
  2167. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2168. cg.a_label(list,l);
  2169. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2170. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2171. { Deallocate registers }
  2172. a_reg_dealloc(list,NR_R12);
  2173. for r:=RS_R3 downto RS_R0 do
  2174. if r in regs then
  2175. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2176. end;
  2177. end;
  2178. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2179. var
  2180. b : byte;
  2181. tmpref : treference;
  2182. instr : taicpu;
  2183. begin
  2184. if ref.addressmode<>AM_OFFSET then
  2185. internalerror(200309071);
  2186. tmpref:=ref;
  2187. { Be sure to have a base register }
  2188. if (tmpref.base=NR_NO) then
  2189. begin
  2190. if tmpref.shiftmode<>SM_None then
  2191. internalerror(2014020702);
  2192. if tmpref.signindex<0 then
  2193. internalerror(200312023);
  2194. tmpref.base:=tmpref.index;
  2195. tmpref.index:=NR_NO;
  2196. end;
  2197. if assigned(tmpref.symbol) or
  2198. not((is_shifter_const(tmpref.offset,b)) or
  2199. (is_shifter_const(-tmpref.offset,b))
  2200. ) then
  2201. fixref(list,tmpref);
  2202. { expect a base here if there is an index }
  2203. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2204. internalerror(200312022);
  2205. if tmpref.index<>NR_NO then
  2206. begin
  2207. if tmpref.shiftmode<>SM_None then
  2208. internalerror(200312021);
  2209. if tmpref.signindex<0 then
  2210. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2211. else
  2212. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2213. if tmpref.offset<>0 then
  2214. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2215. end
  2216. else
  2217. begin
  2218. if tmpref.base=NR_NO then
  2219. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2220. else
  2221. if tmpref.offset<>0 then
  2222. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2223. else
  2224. begin
  2225. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2226. list.concat(instr);
  2227. add_move_instruction(instr);
  2228. end;
  2229. end;
  2230. end;
  2231. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2232. var
  2233. tmpreg, tmpreg2 : tregister;
  2234. tmpref : treference;
  2235. l, piclabel : tasmlabel;
  2236. indirection_done : boolean;
  2237. begin
  2238. { absolute symbols can't be handled directly, we've to store the symbol reference
  2239. in the text segment and access it pc relative
  2240. For now, we assume that references where base or index equals to PC are already
  2241. relative, all other references are assumed to be absolute and thus they need
  2242. to be handled extra.
  2243. A proper solution would be to change refoptions to a set and store the information
  2244. if the symbol is absolute or relative there.
  2245. }
  2246. { create consts entry }
  2247. reference_reset(tmpref,4,[]);
  2248. current_asmdata.getjumplabel(l);
  2249. cg.a_label(current_procinfo.aktlocaldata,l);
  2250. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2251. piclabel:=nil;
  2252. tmpreg:=NR_NO;
  2253. indirection_done:=false;
  2254. if assigned(ref.symbol) then
  2255. begin
  2256. if (target_info.system=system_arm_darwin) and
  2257. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2258. begin
  2259. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2260. if ref.offset<>0 then
  2261. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2262. indirection_done:=true;
  2263. end
  2264. else if ref.refaddr=addr_gottpoff then
  2265. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2266. else if (cs_create_pic in current_settings.moduleswitches) then
  2267. if (tf_pic_uses_got in target_info.flags) then
  2268. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2269. else
  2270. begin
  2271. { ideally, we would want to generate
  2272. ldr r1, LPICConstPool
  2273. LPICLocal:
  2274. ldr/str r2,[pc,r1]
  2275. ...
  2276. LPICConstPool:
  2277. .long _globsym-(LPICLocal+8)
  2278. However, we cannot be sure that the ldr/str will follow
  2279. right after the call to fixref, so we have to load the
  2280. complete address already in a register.
  2281. }
  2282. current_asmdata.getaddrlabel(piclabel);
  2283. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2284. end
  2285. else
  2286. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2287. end
  2288. else
  2289. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2290. { load consts entry }
  2291. if not indirection_done then
  2292. begin
  2293. tmpreg:=getintregister(list,OS_INT);
  2294. tmpref.symbol:=l;
  2295. tmpref.base:=NR_PC;
  2296. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2297. if (cs_create_pic in current_settings.moduleswitches) and
  2298. (tf_pic_uses_got in target_info.flags) and
  2299. assigned(ref.symbol) then
  2300. begin
  2301. reference_reset(tmpref,4,[]);
  2302. tmpref.base:=current_procinfo.got;
  2303. tmpref.index:=tmpreg;
  2304. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2305. if ref.offset<>0 then
  2306. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2307. end;
  2308. end;
  2309. if assigned(piclabel) then
  2310. begin
  2311. cg.a_label(list,piclabel);
  2312. tmpreg2:=getaddressregister(list);
  2313. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2314. tmpreg:=tmpreg2
  2315. end;
  2316. { This routine can be called with PC as base/index in case the offset
  2317. was too large to encode in a load/store. In that case, the entire
  2318. absolute expression has been re-encoded in a new constpool entry, and
  2319. we have to remove the use of PC from the original reference (the code
  2320. above made everything relative to the value loaded from the new
  2321. constpool entry) }
  2322. if is_pc(ref.base) then
  2323. ref.base:=NR_NO;
  2324. if is_pc(ref.index) then
  2325. ref.index:=NR_NO;
  2326. if (ref.base<>NR_NO) then
  2327. begin
  2328. if ref.index<>NR_NO then
  2329. begin
  2330. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2331. ref.base:=tmpreg;
  2332. end
  2333. else
  2334. if ref.base<>NR_PC then
  2335. begin
  2336. ref.index:=tmpreg;
  2337. ref.shiftimm:=0;
  2338. ref.signindex:=1;
  2339. ref.shiftmode:=SM_None;
  2340. end
  2341. else
  2342. ref.base:=tmpreg;
  2343. end
  2344. else
  2345. ref.base:=tmpreg;
  2346. ref.offset:=0;
  2347. ref.symbol:=nil;
  2348. end;
  2349. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2350. var
  2351. paraloc1,paraloc2,paraloc3 : TCGPara;
  2352. pd : tprocdef;
  2353. begin
  2354. pd:=search_system_proc('MOVE');
  2355. paraloc1.init;
  2356. paraloc2.init;
  2357. paraloc3.init;
  2358. paramanager.getintparaloc(list,pd,1,paraloc1);
  2359. paramanager.getintparaloc(list,pd,2,paraloc2);
  2360. paramanager.getintparaloc(list,pd,3,paraloc3);
  2361. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2362. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2363. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2364. paramanager.freecgpara(list,paraloc3);
  2365. paramanager.freecgpara(list,paraloc2);
  2366. paramanager.freecgpara(list,paraloc1);
  2367. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2368. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2369. a_call_name(list,'FPC_MOVE',false);
  2370. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2371. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2372. paraloc3.done;
  2373. paraloc2.done;
  2374. paraloc1.done;
  2375. end;
  2376. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2377. const
  2378. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2379. maxtmpreg_thumb = 5;
  2380. var
  2381. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2382. srcreg,destreg,countreg,r,tmpreg:tregister;
  2383. helpsize:aint;
  2384. copysize:byte;
  2385. cgsize:Tcgsize;
  2386. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2387. maxtmpreg,
  2388. tmpregi,tmpregi2:byte;
  2389. { will never be called with count<=4 }
  2390. procedure genloop(count : aword;size : byte);
  2391. const
  2392. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2393. var
  2394. l : tasmlabel;
  2395. begin
  2396. current_asmdata.getjumplabel(l);
  2397. if count<size then size:=1;
  2398. a_load_const_reg(list,OS_INT,count div size,countreg);
  2399. cg.a_label(list,l);
  2400. srcref.addressmode:=AM_POSTINDEXED;
  2401. dstref.addressmode:=AM_POSTINDEXED;
  2402. srcref.offset:=size;
  2403. dstref.offset:=size;
  2404. r:=getintregister(list,size2opsize[size]);
  2405. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2406. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2407. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2408. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2409. a_jmp_flags(list,F_NE,l);
  2410. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2411. srcref.offset:=1;
  2412. dstref.offset:=1;
  2413. case count mod size of
  2414. 1:
  2415. begin
  2416. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2417. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2418. end;
  2419. 2:
  2420. if aligned then
  2421. begin
  2422. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2423. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2424. end
  2425. else
  2426. begin
  2427. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2428. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2429. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2430. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2431. end;
  2432. 3:
  2433. if aligned then
  2434. begin
  2435. srcref.offset:=2;
  2436. dstref.offset:=2;
  2437. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2438. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2439. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2440. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2441. end
  2442. else
  2443. begin
  2444. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2445. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2446. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2447. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2448. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2449. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2450. end;
  2451. end;
  2452. { keep the registers alive }
  2453. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2454. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2455. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2456. end;
  2457. { will never be called with count<=4 }
  2458. procedure genloop_thumb(count : aword;size : byte);
  2459. procedure refincofs(const ref : treference;const value : longint = 1);
  2460. begin
  2461. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2462. end;
  2463. const
  2464. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2465. var
  2466. l : tasmlabel;
  2467. begin
  2468. current_asmdata.getjumplabel(l);
  2469. if count<size then size:=1;
  2470. a_load_const_reg(list,OS_INT,count div size,countreg);
  2471. cg.a_label(list,l);
  2472. r:=getintregister(list,size2opsize[size]);
  2473. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2474. refincofs(srcref);
  2475. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2476. refincofs(dstref);
  2477. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2478. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2479. a_jmp_flags(list,F_NE,l);
  2480. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2481. case count mod size of
  2482. 1:
  2483. begin
  2484. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2485. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2486. end;
  2487. 2:
  2488. if aligned then
  2489. begin
  2490. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2491. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2492. end
  2493. else
  2494. begin
  2495. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2496. refincofs(srcref);
  2497. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2498. refincofs(dstref);
  2499. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2500. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2501. end;
  2502. 3:
  2503. if aligned then
  2504. begin
  2505. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2506. refincofs(srcref,2);
  2507. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2508. refincofs(dstref,2);
  2509. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2510. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2511. end
  2512. else
  2513. begin
  2514. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2515. refincofs(srcref);
  2516. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2517. refincofs(dstref);
  2518. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2519. refincofs(srcref);
  2520. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2521. refincofs(dstref);
  2522. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2523. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2524. end;
  2525. end;
  2526. { keep the registers alive }
  2527. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2528. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2529. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2530. end;
  2531. begin
  2532. if len=0 then
  2533. exit;
  2534. if GenerateThumbCode then
  2535. maxtmpreg:=maxtmpreg_thumb
  2536. else
  2537. maxtmpreg:=maxtmpreg_arm;
  2538. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2539. dstref:=dest;
  2540. srcref:=source;
  2541. if cs_opt_size in current_settings.optimizerswitches then
  2542. helpsize:=8;
  2543. if aligned and (len=4) then
  2544. begin
  2545. tmpreg:=getintregister(list,OS_32);
  2546. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2547. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2548. end
  2549. else if aligned and (len=2) then
  2550. begin
  2551. tmpreg:=getintregister(list,OS_16);
  2552. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2553. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2554. end
  2555. else if (len<=helpsize) and aligned then
  2556. begin
  2557. tmpregi:=0;
  2558. srcreg:=getintregister(list,OS_ADDR);
  2559. { explicit pc relative addressing, could be
  2560. e.g. a floating point constant }
  2561. if source.base=NR_PC then
  2562. begin
  2563. { ... then we don't need a loadaddr }
  2564. srcref:=source;
  2565. end
  2566. else
  2567. begin
  2568. a_loadaddr_ref_reg(list,source,srcreg);
  2569. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2570. end;
  2571. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2572. begin
  2573. inc(tmpregi);
  2574. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2575. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2576. inc(srcref.offset,4);
  2577. dec(len,4);
  2578. end;
  2579. destreg:=getintregister(list,OS_ADDR);
  2580. a_loadaddr_ref_reg(list,dest,destreg);
  2581. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2582. tmpregi2:=1;
  2583. while (tmpregi2<=tmpregi) do
  2584. begin
  2585. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2586. inc(dstref.offset,4);
  2587. inc(tmpregi2);
  2588. end;
  2589. copysize:=4;
  2590. cgsize:=OS_32;
  2591. while len<>0 do
  2592. begin
  2593. if len<2 then
  2594. begin
  2595. copysize:=1;
  2596. cgsize:=OS_8;
  2597. end
  2598. else if len<4 then
  2599. begin
  2600. copysize:=2;
  2601. cgsize:=OS_16;
  2602. end;
  2603. dec(len,copysize);
  2604. r:=getintregister(list,cgsize);
  2605. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2606. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2607. inc(srcref.offset,copysize);
  2608. inc(dstref.offset,copysize);
  2609. end;{end of while}
  2610. end
  2611. else
  2612. begin
  2613. cgsize:=OS_32;
  2614. if (len<=4) then{len<=4 and not aligned}
  2615. begin
  2616. r:=getintregister(list,cgsize);
  2617. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2618. if Len=1 then
  2619. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2620. else
  2621. begin
  2622. tmpreg:=getintregister(list,cgsize);
  2623. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2624. inc(usedtmpref.offset,1);
  2625. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2626. inc(usedtmpref2.offset,1);
  2627. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2628. if len>2 then
  2629. begin
  2630. inc(usedtmpref.offset,1);
  2631. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2632. inc(usedtmpref2.offset,1);
  2633. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2634. if len>3 then
  2635. begin
  2636. inc(usedtmpref.offset,1);
  2637. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2638. inc(usedtmpref2.offset,1);
  2639. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2640. end;
  2641. end;
  2642. end;
  2643. end{end of if len<=4}
  2644. else
  2645. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2646. destreg:=getintregister(list,OS_ADDR);
  2647. a_loadaddr_ref_reg(list,dest,destreg);
  2648. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2649. srcreg:=getintregister(list,OS_ADDR);
  2650. a_loadaddr_ref_reg(list,source,srcreg);
  2651. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2652. countreg:=getintregister(list,OS_32);
  2653. // if cs_opt_size in current_settings.optimizerswitches then
  2654. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2655. {if aligned then
  2656. genloop(len,4)
  2657. else}
  2658. if GenerateThumbCode then
  2659. genloop_thumb(len,1)
  2660. else
  2661. genloop(len,1);
  2662. end;
  2663. end;
  2664. end;
  2665. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2666. begin
  2667. g_concatcopy_internal(list,source,dest,len,false);
  2668. end;
  2669. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2670. begin
  2671. if (source.alignment in [1,3]) or
  2672. (dest.alignment in [1,3]) then
  2673. g_concatcopy_internal(list,source,dest,len,false)
  2674. else
  2675. g_concatcopy_internal(list,source,dest,len,true);
  2676. end;
  2677. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2678. var
  2679. ovloc : tlocation;
  2680. begin
  2681. ovloc.loc:=LOC_VOID;
  2682. g_overflowCheck_loc(list,l,def,ovloc);
  2683. end;
  2684. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2685. var
  2686. hl : tasmlabel;
  2687. ai:TAiCpu;
  2688. hflags : tresflags;
  2689. begin
  2690. if not(cs_check_overflow in current_settings.localswitches) then
  2691. exit;
  2692. current_asmdata.getjumplabel(hl);
  2693. case ovloc.loc of
  2694. LOC_VOID:
  2695. begin
  2696. ai:=taicpu.op_sym(A_B,hl);
  2697. ai.is_jmp:=true;
  2698. if not((def.typ=pointerdef) or
  2699. ((def.typ=orddef) and
  2700. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2701. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2702. ai.SetCondition(C_VC)
  2703. else
  2704. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2705. ai.SetCondition(C_CS)
  2706. else
  2707. ai.SetCondition(C_CC);
  2708. list.concat(ai);
  2709. end;
  2710. LOC_FLAGS:
  2711. begin
  2712. hflags:=ovloc.resflags;
  2713. inverse_flags(hflags);
  2714. cg.a_jmp_flags(list,hflags,hl);
  2715. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2716. end;
  2717. else
  2718. internalerror(200409281);
  2719. end;
  2720. a_call_name(list,'FPC_OVERFLOW',false);
  2721. a_label(list,hl);
  2722. end;
  2723. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2724. begin
  2725. { this work is done in g_proc_entry }
  2726. end;
  2727. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2728. begin
  2729. { this work is done in g_proc_exit }
  2730. end;
  2731. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2732. var
  2733. ai : taicpu;
  2734. hlabel : TAsmLabel;
  2735. begin
  2736. if GenerateThumbCode then
  2737. begin
  2738. { the optimizer has to fix this if jump range is sufficient short }
  2739. current_asmdata.getjumplabel(hlabel);
  2740. ai:=Taicpu.Op_sym(A_B,hlabel);
  2741. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2742. ai.is_jmp:=true;
  2743. list.concat(ai);
  2744. a_jmp_always(list,l);
  2745. a_label(list,hlabel);
  2746. end
  2747. else
  2748. begin
  2749. ai:=Taicpu.Op_sym(A_B,l);
  2750. ai.SetCondition(OpCmp2AsmCond[cond]);
  2751. ai.is_jmp:=true;
  2752. list.concat(ai);
  2753. end;
  2754. end;
  2755. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2756. const
  2757. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2758. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2759. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2760. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2761. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2762. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2763. begin
  2764. result:=convertop[fromsize,tosize];
  2765. if result=A_NONE then
  2766. internalerror(200312205);
  2767. end;
  2768. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2769. const
  2770. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2771. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2772. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2773. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2774. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2775. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2776. begin
  2777. result:=convertop[fromsize,tosize];
  2778. end;
  2779. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2780. var
  2781. instr: taicpu;
  2782. begin
  2783. if (shuffle=nil) or shufflescalar(shuffle) then
  2784. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2785. else
  2786. internalerror(2009112407);
  2787. list.concat(instr);
  2788. case instr.opcode of
  2789. A_VMOV:
  2790. add_move_instruction(instr);
  2791. end;
  2792. end;
  2793. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2794. var
  2795. intreg,
  2796. tmpmmreg : tregister;
  2797. reg64 : tregister64;
  2798. begin
  2799. if assigned(shuffle) and
  2800. not(shufflescalar(shuffle)) then
  2801. internalerror(2009112413);
  2802. case fromsize of
  2803. OS_32,OS_S32:
  2804. begin
  2805. fromsize:=OS_F32;
  2806. { since we are loading an integer, no conversion may be required }
  2807. if (fromsize<>tosize) then
  2808. internalerror(2009112801);
  2809. end;
  2810. OS_64,OS_S64:
  2811. begin
  2812. fromsize:=OS_F64;
  2813. { since we are loading an integer, no conversion may be required }
  2814. if (fromsize<>tosize) then
  2815. internalerror(2009112901);
  2816. end;
  2817. end;
  2818. if (fromsize<>tosize) then
  2819. tmpmmreg:=getmmregister(list,fromsize)
  2820. else
  2821. tmpmmreg:=reg;
  2822. if (ref.alignment in [1,2]) then
  2823. begin
  2824. case fromsize of
  2825. OS_F32:
  2826. begin
  2827. intreg:=getintregister(list,OS_32);
  2828. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2829. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2830. end;
  2831. OS_F64:
  2832. begin
  2833. reg64.reglo:=getintregister(list,OS_32);
  2834. reg64.reghi:=getintregister(list,OS_32);
  2835. cg64.a_load64_ref_reg(list,ref,reg64);
  2836. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2837. end;
  2838. else
  2839. internalerror(2009112412);
  2840. end;
  2841. end
  2842. else
  2843. begin
  2844. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2845. end;
  2846. if (tmpmmreg<>reg) then
  2847. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2848. end;
  2849. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2850. var
  2851. intreg,
  2852. tmpmmreg : tregister;
  2853. reg64 : tregister64;
  2854. begin
  2855. if assigned(shuffle) and
  2856. not(shufflescalar(shuffle)) then
  2857. internalerror(2009112416);
  2858. case tosize of
  2859. OS_32,OS_S32:
  2860. begin
  2861. tosize:=OS_F32;
  2862. { since we are loading an integer, no conversion may be required }
  2863. if (fromsize<>tosize) then
  2864. internalerror(2009112801);
  2865. end;
  2866. OS_64,OS_S64:
  2867. begin
  2868. tosize:=OS_F64;
  2869. { since we are loading an integer, no conversion may be required }
  2870. if (fromsize<>tosize) then
  2871. internalerror(2009112901);
  2872. end;
  2873. end;
  2874. if (fromsize<>tosize) then
  2875. begin
  2876. tmpmmreg:=getmmregister(list,tosize);
  2877. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2878. end
  2879. else
  2880. tmpmmreg:=reg;
  2881. if (ref.alignment in [1,2]) then
  2882. begin
  2883. case tosize of
  2884. OS_F32:
  2885. begin
  2886. intreg:=getintregister(list,OS_32);
  2887. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2888. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2889. end;
  2890. OS_F64:
  2891. begin
  2892. reg64.reglo:=getintregister(list,OS_32);
  2893. reg64.reghi:=getintregister(list,OS_32);
  2894. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2895. cg64.a_load64_reg_ref(list,reg64,ref);
  2896. end;
  2897. else
  2898. internalerror(2009112417);
  2899. end;
  2900. end
  2901. else
  2902. begin
  2903. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2904. end;
  2905. end;
  2906. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2907. begin
  2908. { this code can only be used to transfer raw data, not to perform
  2909. conversions }
  2910. if (tosize<>OS_F32) then
  2911. internalerror(2009112419);
  2912. if not(fromsize in [OS_32,OS_S32]) then
  2913. internalerror(2009112420);
  2914. if assigned(shuffle) and
  2915. not shufflescalar(shuffle) then
  2916. internalerror(2009112516);
  2917. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  2918. end;
  2919. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  2920. begin
  2921. { this code can only be used to transfer raw data, not to perform
  2922. conversions }
  2923. if (fromsize<>OS_F32) then
  2924. internalerror(2009112430);
  2925. if not(tosize in [OS_32,OS_S32]) then
  2926. internalerror(2009112420);
  2927. if assigned(shuffle) and
  2928. not shufflescalar(shuffle) then
  2929. internalerror(2009112514);
  2930. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  2931. end;
  2932. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  2933. var
  2934. tmpreg: tregister;
  2935. begin
  2936. { the vfp doesn't support xor nor any other logical operation, but
  2937. this routine is used to initialise global mm regvars. We can
  2938. easily initialise an mm reg with 0 though. }
  2939. case op of
  2940. OP_XOR:
  2941. begin
  2942. if (src<>dst) or
  2943. (reg_cgsize(src)<>size) or
  2944. assigned(shuffle) then
  2945. internalerror(2009112907);
  2946. tmpreg:=getintregister(list,OS_32);
  2947. a_load_const_reg(list,OS_32,0,tmpreg);
  2948. case size of
  2949. OS_F32:
  2950. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  2951. OS_F64:
  2952. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  2953. else
  2954. internalerror(2009112908);
  2955. end;
  2956. end
  2957. else
  2958. internalerror(2009112906);
  2959. end;
  2960. end;
  2961. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  2962. const
  2963. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  2964. begin
  2965. if (op in overflowops) and
  2966. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  2967. a_load_reg_reg(list,OS_32,size,dst,dst);
  2968. end;
  2969. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  2970. procedure checkreg(var reg : TRegister);
  2971. var
  2972. tmpreg : TRegister;
  2973. begin
  2974. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  2975. (getsupreg(reg)=RS_R15) then
  2976. begin
  2977. tmpreg:=getintregister(list,OS_INT);
  2978. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  2979. reg:=tmpreg;
  2980. end;
  2981. end;
  2982. begin
  2983. checkreg(op1);
  2984. checkreg(op2);
  2985. checkreg(op3);
  2986. checkreg(op4);
  2987. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  2988. end;
  2989. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  2990. begin
  2991. list.concat(tai_regalloc.alloc(NR_R0,nil));
  2992. a_call_name(list,'fpc_read_tp',false);
  2993. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  2994. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  2995. end;
  2996. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  2997. begin
  2998. case op of
  2999. OP_NEG:
  3000. begin
  3001. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3002. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3003. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3004. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3005. end;
  3006. OP_NOT:
  3007. begin
  3008. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3009. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3010. end;
  3011. else
  3012. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3013. end;
  3014. end;
  3015. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3016. begin
  3017. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3018. end;
  3019. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3020. var
  3021. ovloc : tlocation;
  3022. begin
  3023. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3024. end;
  3025. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3026. var
  3027. ovloc : tlocation;
  3028. begin
  3029. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3030. end;
  3031. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3032. begin
  3033. { this code can only be used to transfer raw data, not to perform
  3034. conversions }
  3035. if (mmsize<>OS_F64) then
  3036. internalerror(2009112405);
  3037. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3038. end;
  3039. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3040. begin
  3041. { this code can only be used to transfer raw data, not to perform
  3042. conversions }
  3043. if (mmsize<>OS_F64) then
  3044. internalerror(2009112406);
  3045. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3046. end;
  3047. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3048. var
  3049. tmpreg : tregister;
  3050. b : byte;
  3051. begin
  3052. ovloc.loc:=LOC_VOID;
  3053. case op of
  3054. OP_NEG,
  3055. OP_NOT :
  3056. internalerror(2012022501);
  3057. end;
  3058. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3059. begin
  3060. case op of
  3061. OP_ADD:
  3062. begin
  3063. if is_shifter_const(lo(value),b) then
  3064. begin
  3065. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3066. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3067. end
  3068. else
  3069. begin
  3070. tmpreg:=cg.getintregister(list,OS_32);
  3071. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3072. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3073. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3074. end;
  3075. if is_shifter_const(hi(value),b) then
  3076. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3077. else
  3078. begin
  3079. tmpreg:=cg.getintregister(list,OS_32);
  3080. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3081. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3082. end;
  3083. end;
  3084. OP_SUB:
  3085. begin
  3086. if is_shifter_const(lo(value),b) then
  3087. begin
  3088. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3089. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3090. end
  3091. else
  3092. begin
  3093. tmpreg:=cg.getintregister(list,OS_32);
  3094. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3095. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3096. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3097. end;
  3098. if is_shifter_const(hi(value),b) then
  3099. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3100. else
  3101. begin
  3102. tmpreg:=cg.getintregister(list,OS_32);
  3103. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3104. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3105. end;
  3106. end;
  3107. else
  3108. internalerror(200502131);
  3109. end;
  3110. if size=OS_64 then
  3111. begin
  3112. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3113. ovloc.loc:=LOC_FLAGS;
  3114. case op of
  3115. OP_ADD:
  3116. ovloc.resflags:=F_CS;
  3117. OP_SUB:
  3118. ovloc.resflags:=F_CC;
  3119. end;
  3120. end;
  3121. end
  3122. else
  3123. begin
  3124. case op of
  3125. OP_AND,OP_OR,OP_XOR:
  3126. begin
  3127. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3128. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3129. end;
  3130. OP_ADD:
  3131. begin
  3132. if is_shifter_const(aint(lo(value)),b) then
  3133. begin
  3134. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3135. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3136. end
  3137. else
  3138. begin
  3139. tmpreg:=cg.getintregister(list,OS_32);
  3140. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3141. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3142. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3143. end;
  3144. if is_shifter_const(aint(hi(value)),b) then
  3145. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3146. else
  3147. begin
  3148. tmpreg:=cg.getintregister(list,OS_32);
  3149. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3150. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3151. end;
  3152. end;
  3153. OP_SUB:
  3154. begin
  3155. if is_shifter_const(aint(lo(value)),b) then
  3156. begin
  3157. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3158. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3159. end
  3160. else
  3161. begin
  3162. tmpreg:=cg.getintregister(list,OS_32);
  3163. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3164. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3165. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3166. end;
  3167. if is_shifter_const(aint(hi(value)),b) then
  3168. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3169. else
  3170. begin
  3171. tmpreg:=cg.getintregister(list,OS_32);
  3172. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3173. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3174. end;
  3175. end;
  3176. else
  3177. internalerror(2003083101);
  3178. end;
  3179. end;
  3180. end;
  3181. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3182. begin
  3183. ovloc.loc:=LOC_VOID;
  3184. case op of
  3185. OP_NEG,
  3186. OP_NOT :
  3187. internalerror(2012022502);
  3188. end;
  3189. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3190. begin
  3191. case op of
  3192. OP_ADD:
  3193. begin
  3194. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3195. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3196. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3197. end;
  3198. OP_SUB:
  3199. begin
  3200. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3201. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3202. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3203. end;
  3204. else
  3205. internalerror(2003083101);
  3206. end;
  3207. if size=OS_64 then
  3208. begin
  3209. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3210. ovloc.loc:=LOC_FLAGS;
  3211. case op of
  3212. OP_ADD:
  3213. ovloc.resflags:=F_CS;
  3214. OP_SUB:
  3215. ovloc.resflags:=F_CC;
  3216. end;
  3217. end;
  3218. end
  3219. else
  3220. begin
  3221. case op of
  3222. OP_AND,OP_OR,OP_XOR:
  3223. begin
  3224. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3225. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3226. end;
  3227. OP_ADD:
  3228. begin
  3229. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3230. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3231. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3232. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3233. end;
  3234. OP_SUB:
  3235. begin
  3236. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3237. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3238. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3239. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3240. end;
  3241. else
  3242. internalerror(2003083101);
  3243. end;
  3244. end;
  3245. end;
  3246. procedure tthumbcgarm.init_register_allocators;
  3247. begin
  3248. inherited init_register_allocators;
  3249. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3250. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3251. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3252. else
  3253. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3254. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3255. end;
  3256. procedure tthumbcgarm.done_register_allocators;
  3257. begin
  3258. rg[R_INTREGISTER].free;
  3259. rg[R_FPUREGISTER].free;
  3260. rg[R_MMREGISTER].free;
  3261. inherited done_register_allocators;
  3262. end;
  3263. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3264. var
  3265. ref : treference;
  3266. r : byte;
  3267. regs : tcpuregisterset;
  3268. stackmisalignment : pint;
  3269. registerarea: DWord;
  3270. stack_parameters: Boolean;
  3271. begin
  3272. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3273. LocalSize:=align(LocalSize,4);
  3274. { call instruction does not put anything on the stack }
  3275. stackmisalignment:=0;
  3276. if not(nostackframe) then
  3277. begin
  3278. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3279. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3280. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3281. { save int registers }
  3282. reference_reset(ref,4,[]);
  3283. ref.index:=NR_STACK_POINTER_REG;
  3284. ref.addressmode:=AM_PREINDEXED;
  3285. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3286. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3287. begin
  3288. //!!!! a_reg_alloc(list,NR_R12);
  3289. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3290. end;
  3291. { the (old) ARM APCS requires saving both the stack pointer (to
  3292. crawl the stack) and the PC (to identify the function this
  3293. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3294. and R15 -- still needs updating for EABI and Darwin, they don't
  3295. need that }
  3296. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3297. regs:=regs+[RS_R7,RS_R14]
  3298. else
  3299. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3300. include(regs,RS_R14);
  3301. { safely estimate stack size }
  3302. if localsize+current_settings.alignment.localalignmax+4>508 then
  3303. begin
  3304. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3305. include(regs,RS_R4);
  3306. end;
  3307. registerarea:=0;
  3308. if regs<>[] then
  3309. begin
  3310. for r:=RS_R0 to RS_R15 do
  3311. if r in regs then
  3312. inc(registerarea,4);
  3313. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3314. end;
  3315. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3316. if stack_parameters or (LocalSize<>0) or
  3317. ((stackmisalignment<>0) and
  3318. ((pi_do_call in current_procinfo.flags) or
  3319. (po_assembler in current_procinfo.procdef.procoptions))) then
  3320. begin
  3321. { do we access stack parameters?
  3322. if yes, the previously estimated stacksize must be used }
  3323. if stack_parameters then
  3324. begin
  3325. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3326. begin
  3327. writeln(localsize);
  3328. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3329. internalerror(2013040601);
  3330. end
  3331. else
  3332. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3333. end
  3334. else
  3335. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3336. if localsize<508 then
  3337. begin
  3338. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3339. end
  3340. else if localsize<=1016 then
  3341. begin
  3342. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3343. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3344. end
  3345. else
  3346. begin
  3347. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3348. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3349. include(regs,RS_R4);
  3350. //!!!! if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  3351. //!!!! a_reg_alloc(list,NR_R12);
  3352. //!!!! a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3353. //!!!! list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  3354. //!!!! a_reg_dealloc(list,NR_R12);
  3355. end;
  3356. end;
  3357. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3358. begin
  3359. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3360. end;
  3361. end;
  3362. end;
  3363. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3364. var
  3365. LocalSize : longint;
  3366. r: byte;
  3367. regs : tcpuregisterset;
  3368. registerarea : DWord;
  3369. stackmisalignment: pint;
  3370. stack_parameters : Boolean;
  3371. begin
  3372. if not(nostackframe) then
  3373. begin
  3374. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3375. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3376. include(regs,RS_R15);
  3377. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3378. include(regs,getsupreg(current_procinfo.framepointer));
  3379. registerarea:=0;
  3380. for r:=RS_R0 to RS_R15 do
  3381. if r in regs then
  3382. inc(registerarea,4);
  3383. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3384. LocalSize:=current_procinfo.calc_stackframe_size;
  3385. if stack_parameters then
  3386. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3387. else
  3388. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3389. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3390. (target_info.system in systems_darwin) then
  3391. begin
  3392. if (LocalSize<>0) or
  3393. ((stackmisalignment<>0) and
  3394. ((pi_do_call in current_procinfo.flags) or
  3395. (po_assembler in current_procinfo.procdef.procoptions))) then
  3396. begin
  3397. if LocalSize=0 then
  3398. else if LocalSize<=508 then
  3399. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3400. else if LocalSize<=1016 then
  3401. begin
  3402. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3403. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3404. end
  3405. else
  3406. begin
  3407. a_reg_alloc(list,NR_R3);
  3408. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3409. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3410. a_reg_dealloc(list,NR_R3);
  3411. end;
  3412. end;
  3413. if regs=[] then
  3414. begin
  3415. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3416. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3417. else
  3418. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3419. end
  3420. else
  3421. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3422. end;
  3423. end
  3424. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3425. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3426. else
  3427. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3428. end;
  3429. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3430. var
  3431. oppostfix:toppostfix;
  3432. usedtmpref: treference;
  3433. tmpreg,tmpreg2 : tregister;
  3434. dir : integer;
  3435. begin
  3436. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3437. FromSize := ToSize;
  3438. case FromSize of
  3439. { signed integer registers }
  3440. OS_8:
  3441. oppostfix:=PF_B;
  3442. OS_S8:
  3443. oppostfix:=PF_SB;
  3444. OS_16:
  3445. oppostfix:=PF_H;
  3446. OS_S16:
  3447. oppostfix:=PF_SH;
  3448. OS_32,
  3449. OS_S32:
  3450. oppostfix:=PF_None;
  3451. else
  3452. InternalError(200308298);
  3453. end;
  3454. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3455. begin
  3456. if target_info.endian=endian_big then
  3457. dir:=-1
  3458. else
  3459. dir:=1;
  3460. case FromSize of
  3461. OS_16,OS_S16:
  3462. begin
  3463. { only complicated references need an extra loadaddr }
  3464. if assigned(ref.symbol) or
  3465. (ref.index<>NR_NO) or
  3466. (ref.offset<-124) or
  3467. (ref.offset>124) or
  3468. { sometimes the compiler reused registers }
  3469. (reg=ref.index) or
  3470. (reg=ref.base) then
  3471. begin
  3472. tmpreg2:=getintregister(list,OS_INT);
  3473. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3474. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3475. end
  3476. else
  3477. usedtmpref:=ref;
  3478. if target_info.endian=endian_big then
  3479. inc(usedtmpref.offset,1);
  3480. tmpreg:=getintregister(list,OS_INT);
  3481. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3482. inc(usedtmpref.offset,dir);
  3483. if FromSize=OS_16 then
  3484. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3485. else
  3486. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3487. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3488. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3489. end;
  3490. OS_32,OS_S32:
  3491. begin
  3492. tmpreg:=getintregister(list,OS_INT);
  3493. { only complicated references need an extra loadaddr }
  3494. if assigned(ref.symbol) or
  3495. (ref.index<>NR_NO) or
  3496. (ref.offset<-124) or
  3497. (ref.offset>124) or
  3498. { sometimes the compiler reused registers }
  3499. (reg=ref.index) or
  3500. (reg=ref.base) then
  3501. begin
  3502. tmpreg2:=getintregister(list,OS_INT);
  3503. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3504. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3505. end
  3506. else
  3507. usedtmpref:=ref;
  3508. if ref.alignment=2 then
  3509. begin
  3510. if target_info.endian=endian_big then
  3511. inc(usedtmpref.offset,2);
  3512. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3513. inc(usedtmpref.offset,dir*2);
  3514. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3515. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3516. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3517. end
  3518. else
  3519. begin
  3520. if target_info.endian=endian_big then
  3521. inc(usedtmpref.offset,3);
  3522. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3523. inc(usedtmpref.offset,dir);
  3524. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3525. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3526. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3527. inc(usedtmpref.offset,dir);
  3528. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3529. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3530. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3531. inc(usedtmpref.offset,dir);
  3532. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3533. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3534. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3535. end;
  3536. end
  3537. else
  3538. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3539. end;
  3540. end
  3541. else
  3542. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3543. if (fromsize=OS_S8) and (tosize = OS_16) then
  3544. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3545. end;
  3546. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3547. var
  3548. l : tasmlabel;
  3549. hr : treference;
  3550. begin
  3551. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3552. internalerror(2002090902);
  3553. if is_thumb_imm(a) then
  3554. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3555. else
  3556. begin
  3557. reference_reset(hr,4,[]);
  3558. current_asmdata.getjumplabel(l);
  3559. cg.a_label(current_procinfo.aktlocaldata,l);
  3560. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3561. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3562. hr.symbol:=l;
  3563. hr.base:=NR_PC;
  3564. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3565. end;
  3566. end;
  3567. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3568. var
  3569. hsym : tsym;
  3570. href,
  3571. tmpref : treference;
  3572. paraloc : Pcgparalocation;
  3573. l : TAsmLabel;
  3574. begin
  3575. { calculate the parameter info for the procdef }
  3576. procdef.init_paraloc_info(callerside);
  3577. hsym:=tsym(procdef.parast.Find('self'));
  3578. if not(assigned(hsym) and
  3579. (hsym.typ=paravarsym)) then
  3580. internalerror(200305251);
  3581. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3582. while paraloc<>nil do
  3583. with paraloc^ do
  3584. begin
  3585. case loc of
  3586. LOC_REGISTER:
  3587. begin
  3588. if is_thumb_imm(ioffset) then
  3589. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3590. else
  3591. begin
  3592. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3593. reference_reset(tmpref,4,[]);
  3594. current_asmdata.getjumplabel(l);
  3595. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3596. cg.a_label(current_procinfo.aktlocaldata,l);
  3597. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3598. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3599. tmpref.symbol:=l;
  3600. tmpref.base:=NR_PC;
  3601. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3602. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3603. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3604. end;
  3605. end;
  3606. LOC_REFERENCE:
  3607. begin
  3608. { offset in the wrapper needs to be adjusted for the stored
  3609. return address }
  3610. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3611. if is_thumb_imm(ioffset) then
  3612. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3613. else
  3614. begin
  3615. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3616. reference_reset(tmpref,4,[]);
  3617. current_asmdata.getjumplabel(l);
  3618. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3619. cg.a_label(current_procinfo.aktlocaldata,l);
  3620. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3621. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3622. tmpref.symbol:=l;
  3623. tmpref.base:=NR_PC;
  3624. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3625. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3626. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3627. end;
  3628. end
  3629. else
  3630. internalerror(200309189);
  3631. end;
  3632. paraloc:=next;
  3633. end;
  3634. end;
  3635. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3636. var
  3637. href : treference;
  3638. tmpreg : TRegister;
  3639. begin
  3640. href:=ref;
  3641. if { LDR/STR limitations }
  3642. (
  3643. (((op=A_LDR) and (oppostfix=PF_None)) or
  3644. ((op=A_STR) and (oppostfix=PF_None))) and
  3645. (ref.base<>NR_STACK_POINTER_REG) and
  3646. (abs(ref.offset)>124)
  3647. ) or
  3648. { LDRB/STRB limitations }
  3649. (
  3650. (((op=A_LDR) and (oppostfix=PF_B)) or
  3651. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3652. ((op=A_STR) and (oppostfix=PF_B)) or
  3653. ((op=A_STRB) and (oppostfix=PF_None))) and
  3654. ((ref.base=NR_STACK_POINTER_REG) or
  3655. (ref.index=NR_STACK_POINTER_REG) or
  3656. (abs(ref.offset)>31)
  3657. )
  3658. ) or
  3659. { LDRH/STRH limitations }
  3660. (
  3661. (((op=A_LDR) and (oppostfix=PF_H)) or
  3662. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3663. ((op=A_STR) and (oppostfix=PF_H)) or
  3664. ((op=A_STRH) and (oppostfix=PF_None))) and
  3665. ((ref.base=NR_STACK_POINTER_REG) or
  3666. (ref.index=NR_STACK_POINTER_REG) or
  3667. (abs(ref.offset)>62) or
  3668. ((abs(ref.offset) mod 2)<>0)
  3669. )
  3670. ) then
  3671. begin
  3672. tmpreg:=getintregister(list,OS_ADDR);
  3673. a_loadaddr_ref_reg(list,ref,tmpreg);
  3674. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3675. end
  3676. else if (op=A_LDR) and
  3677. (oppostfix in [PF_None]) and
  3678. (ref.base=NR_STACK_POINTER_REG) and
  3679. (abs(ref.offset)>1020) then
  3680. begin
  3681. tmpreg:=getintregister(list,OS_ADDR);
  3682. a_loadaddr_ref_reg(list,ref,tmpreg);
  3683. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3684. end
  3685. else if (op=A_LDR) and
  3686. ((oppostfix in [PF_SH,PF_SB]) or
  3687. (abs(ref.offset)>124)) then
  3688. begin
  3689. tmpreg:=getintregister(list,OS_ADDR);
  3690. a_loadaddr_ref_reg(list,ref,tmpreg);
  3691. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3692. end;
  3693. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3694. end;
  3695. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3696. var
  3697. tmpreg : tregister;
  3698. begin
  3699. case op of
  3700. OP_NEG:
  3701. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3702. OP_NOT:
  3703. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  3704. OP_DIV,OP_IDIV:
  3705. internalerror(200308284);
  3706. OP_ROL:
  3707. begin
  3708. if not(size in [OS_32,OS_S32]) then
  3709. internalerror(2008072801);
  3710. { simulate ROL by ror'ing 32-value }
  3711. tmpreg:=getintregister(list,OS_32);
  3712. a_load_const_reg(list,OS_32,32,tmpreg);
  3713. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3714. list.concat(taicpu.op_reg_reg(A_ROR,dst,src));
  3715. end;
  3716. else
  3717. begin
  3718. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3719. list.concat(setoppostfix(
  3720. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix[op]));
  3721. end;
  3722. end;
  3723. maybeadjustresult(list,op,size,dst);
  3724. end;
  3725. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3726. var
  3727. tmpreg : tregister;
  3728. {$ifdef DUMMY}
  3729. l1 : longint;
  3730. {$endif DUMMY}
  3731. begin
  3732. //!!! ovloc.loc:=LOC_VOID;
  3733. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3734. case op of
  3735. OP_ADD:
  3736. begin
  3737. op:=OP_SUB;
  3738. a:=aint(dword(-a));
  3739. end;
  3740. OP_SUB:
  3741. begin
  3742. op:=OP_ADD;
  3743. a:=aint(dword(-a));
  3744. end
  3745. end;
  3746. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3747. begin
  3748. // if cgsetflags or setflags then
  3749. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3750. list.concat(setoppostfix(
  3751. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix[op]));
  3752. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3753. begin
  3754. //!!! ovloc.loc:=LOC_FLAGS;
  3755. case op of
  3756. OP_ADD:
  3757. //!!! ovloc.resflags:=F_CS;
  3758. ;
  3759. OP_SUB:
  3760. //!!! ovloc.resflags:=F_CC;
  3761. ;
  3762. end;
  3763. end;
  3764. end
  3765. else
  3766. begin
  3767. { there could be added some more sophisticated optimizations }
  3768. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3769. a_load_reg_reg(list,size,size,dst,dst)
  3770. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3771. a_load_const_reg(list,size,0,dst)
  3772. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3773. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3774. { we do this here instead in the peephole optimizer because
  3775. it saves us a register }
  3776. {$ifdef DUMMY}
  3777. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3778. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3779. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3780. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3781. begin
  3782. if l1>32 then{roozbeh does this ever happen?}
  3783. internalerror(200308296);
  3784. shifterop_reset(so);
  3785. so.shiftmode:=SM_LSL;
  3786. so.shiftimm:=l1;
  3787. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3788. end
  3789. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3790. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3791. begin
  3792. if l1>32 then{does this ever happen?}
  3793. internalerror(201205181);
  3794. shifterop_reset(so);
  3795. so.shiftmode:=SM_LSL;
  3796. so.shiftimm:=l1;
  3797. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3798. end
  3799. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3800. begin
  3801. { nothing to do on success }
  3802. end
  3803. {$endif DUMMY}
  3804. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3805. Just using mov x, #0 might allow some easier optimizations down the line. }
  3806. else if (op = OP_AND) and (dword(a)=0) then
  3807. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  3808. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3809. else if (op = OP_AND) and (not(dword(a))=0) then
  3810. // do nothing
  3811. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3812. broader range of shifterconstants.}
  3813. {$ifdef DUMMY}
  3814. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3815. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3816. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3817. begin
  3818. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3819. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3820. end
  3821. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3822. not(cgsetflags or setflags) and
  3823. split_into_shifter_const(a, imm1, imm2) then
  3824. begin
  3825. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3826. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3827. end
  3828. {$endif DUMMY}
  3829. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3830. begin
  3831. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3832. end
  3833. else
  3834. begin
  3835. tmpreg:=getintregister(list,size);
  3836. a_load_const_reg(list,size,a,tmpreg);
  3837. a_op_reg_reg(list,op,size,tmpreg,dst);
  3838. end;
  3839. end;
  3840. maybeadjustresult(list,op,size,dst);
  3841. end;
  3842. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3843. begin
  3844. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3845. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3846. else
  3847. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3848. end;
  3849. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3850. var
  3851. l1,l2 : tasmlabel;
  3852. ai : taicpu;
  3853. begin
  3854. current_asmdata.getjumplabel(l1);
  3855. current_asmdata.getjumplabel(l2);
  3856. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3857. ai.is_jmp:=true;
  3858. list.concat(ai);
  3859. list.concat(taicpu.op_reg_const(A_MOV,reg,0));
  3860. list.concat(taicpu.op_sym(A_B,l2));
  3861. cg.a_label(list,l1);
  3862. list.concat(taicpu.op_reg_const(A_MOV,reg,1));
  3863. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3864. cg.a_label(list,l2);
  3865. end;
  3866. procedure tthumb2cgarm.init_register_allocators;
  3867. begin
  3868. inherited init_register_allocators;
  3869. { currently, we save R14 always, so we can use it }
  3870. if (target_info.system<>system_arm_darwin) then
  3871. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3872. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3873. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  3874. else
  3875. { r9 is not available on Darwin according to the llvm code generator }
  3876. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3877. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3878. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  3879. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  3880. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  3881. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  3882. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3883. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3884. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  3885. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3886. ],first_mm_imreg,[])
  3887. else if current_settings.fputype in [fpu_fpv4_s16,fpu_vfpv3_d16] then
  3888. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3889. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3890. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3891. ],first_mm_imreg,[])
  3892. else
  3893. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
  3894. [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
  3895. end;
  3896. procedure tthumb2cgarm.done_register_allocators;
  3897. begin
  3898. rg[R_INTREGISTER].free;
  3899. rg[R_FPUREGISTER].free;
  3900. rg[R_MMREGISTER].free;
  3901. inherited done_register_allocators;
  3902. end;
  3903. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  3904. begin
  3905. list.concat(taicpu.op_reg(A_BLX, reg));
  3906. {
  3907. the compiler does not properly set this flag anymore in pass 1, and
  3908. for now we only need it after pass 2 (I hope) (JM)
  3909. if not(pi_do_call in current_procinfo.flags) then
  3910. internalerror(2003060703);
  3911. }
  3912. include(current_procinfo.flags,pi_do_call);
  3913. end;
  3914. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3915. var
  3916. l : tasmlabel;
  3917. hr : treference;
  3918. begin
  3919. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3920. internalerror(2002090902);
  3921. if is_thumb32_imm(a) then
  3922. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3923. else if is_thumb32_imm(not(a)) then
  3924. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  3925. else if (a and $FFFF)=a then
  3926. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  3927. else
  3928. begin
  3929. reference_reset(hr,4,[]);
  3930. current_asmdata.getjumplabel(l);
  3931. cg.a_label(current_procinfo.aktlocaldata,l);
  3932. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3933. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3934. hr.symbol:=l;
  3935. hr.base:=NR_PC;
  3936. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3937. end;
  3938. end;
  3939. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3940. var
  3941. oppostfix:toppostfix;
  3942. usedtmpref: treference;
  3943. tmpreg,tmpreg2 : tregister;
  3944. so : tshifterop;
  3945. dir : integer;
  3946. begin
  3947. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3948. FromSize := ToSize;
  3949. case FromSize of
  3950. { signed integer registers }
  3951. OS_8:
  3952. oppostfix:=PF_B;
  3953. OS_S8:
  3954. oppostfix:=PF_SB;
  3955. OS_16:
  3956. oppostfix:=PF_H;
  3957. OS_S16:
  3958. oppostfix:=PF_SH;
  3959. OS_32,
  3960. OS_S32:
  3961. oppostfix:=PF_None;
  3962. else
  3963. InternalError(200308299);
  3964. end;
  3965. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3966. begin
  3967. if target_info.endian=endian_big then
  3968. dir:=-1
  3969. else
  3970. dir:=1;
  3971. case FromSize of
  3972. OS_16,OS_S16:
  3973. begin
  3974. { only complicated references need an extra loadaddr }
  3975. if assigned(ref.symbol) or
  3976. (ref.index<>NR_NO) or
  3977. (ref.offset<-255) or
  3978. (ref.offset>4094) or
  3979. { sometimes the compiler reused registers }
  3980. (reg=ref.index) or
  3981. (reg=ref.base) then
  3982. begin
  3983. tmpreg2:=getintregister(list,OS_INT);
  3984. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3985. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3986. end
  3987. else
  3988. usedtmpref:=ref;
  3989. if target_info.endian=endian_big then
  3990. inc(usedtmpref.offset,1);
  3991. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  3992. tmpreg:=getintregister(list,OS_INT);
  3993. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3994. inc(usedtmpref.offset,dir);
  3995. if FromSize=OS_16 then
  3996. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3997. else
  3998. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3999. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4000. end;
  4001. OS_32,OS_S32:
  4002. begin
  4003. tmpreg:=getintregister(list,OS_INT);
  4004. { only complicated references need an extra loadaddr }
  4005. if assigned(ref.symbol) or
  4006. (ref.index<>NR_NO) or
  4007. (ref.offset<-255) or
  4008. (ref.offset>4092) or
  4009. { sometimes the compiler reused registers }
  4010. (reg=ref.index) or
  4011. (reg=ref.base) then
  4012. begin
  4013. tmpreg2:=getintregister(list,OS_INT);
  4014. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4015. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4016. end
  4017. else
  4018. usedtmpref:=ref;
  4019. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4020. if ref.alignment=2 then
  4021. begin
  4022. if target_info.endian=endian_big then
  4023. inc(usedtmpref.offset,2);
  4024. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4025. inc(usedtmpref.offset,dir*2);
  4026. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4027. so.shiftimm:=16;
  4028. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4029. end
  4030. else
  4031. begin
  4032. if target_info.endian=endian_big then
  4033. inc(usedtmpref.offset,3);
  4034. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4035. inc(usedtmpref.offset,dir);
  4036. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4037. so.shiftimm:=8;
  4038. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4039. inc(usedtmpref.offset,dir);
  4040. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4041. so.shiftimm:=16;
  4042. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4043. inc(usedtmpref.offset,dir);
  4044. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4045. so.shiftimm:=24;
  4046. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4047. end;
  4048. end
  4049. else
  4050. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4051. end;
  4052. end
  4053. else
  4054. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4055. if (fromsize=OS_S8) and (tosize = OS_16) then
  4056. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4057. end;
  4058. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4059. begin
  4060. if op = OP_NOT then
  4061. begin
  4062. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4063. case size of
  4064. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4065. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4066. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4067. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4068. end;
  4069. end
  4070. else
  4071. inherited a_op_reg_reg(list, op, size, src, dst);
  4072. end;
  4073. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4074. var
  4075. shift, width : byte;
  4076. tmpreg : tregister;
  4077. so : tshifterop;
  4078. l1 : longint;
  4079. begin
  4080. ovloc.loc:=LOC_VOID;
  4081. if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
  4082. case op of
  4083. OP_ADD:
  4084. begin
  4085. op:=OP_SUB;
  4086. a:=aint(dword(-a));
  4087. end;
  4088. OP_SUB:
  4089. begin
  4090. op:=OP_ADD;
  4091. a:=aint(dword(-a));
  4092. end
  4093. end;
  4094. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4095. case op of
  4096. OP_NEG,OP_NOT,
  4097. OP_DIV,OP_IDIV:
  4098. internalerror(200308285);
  4099. OP_SHL:
  4100. begin
  4101. if a>32 then
  4102. internalerror(2014020703);
  4103. if a<>0 then
  4104. begin
  4105. shifterop_reset(so);
  4106. so.shiftmode:=SM_LSL;
  4107. so.shiftimm:=a;
  4108. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4109. end
  4110. else
  4111. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4112. end;
  4113. OP_ROL:
  4114. begin
  4115. if a>32 then
  4116. internalerror(2014020704);
  4117. if a<>0 then
  4118. begin
  4119. shifterop_reset(so);
  4120. so.shiftmode:=SM_ROR;
  4121. so.shiftimm:=32-a;
  4122. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4123. end
  4124. else
  4125. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4126. end;
  4127. OP_ROR:
  4128. begin
  4129. if a>32 then
  4130. internalerror(2014020705);
  4131. if a<>0 then
  4132. begin
  4133. shifterop_reset(so);
  4134. so.shiftmode:=SM_ROR;
  4135. so.shiftimm:=a;
  4136. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4137. end
  4138. else
  4139. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4140. end;
  4141. OP_SHR:
  4142. begin
  4143. if a>32 then
  4144. internalerror(200308292);
  4145. shifterop_reset(so);
  4146. if a<>0 then
  4147. begin
  4148. so.shiftmode:=SM_LSR;
  4149. so.shiftimm:=a;
  4150. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4151. end
  4152. else
  4153. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4154. end;
  4155. OP_SAR:
  4156. begin
  4157. if a>32 then
  4158. internalerror(200308295);
  4159. if a<>0 then
  4160. begin
  4161. shifterop_reset(so);
  4162. so.shiftmode:=SM_ASR;
  4163. so.shiftimm:=a;
  4164. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4165. end
  4166. else
  4167. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4168. end;
  4169. else
  4170. if (op in [OP_SUB, OP_ADD]) and
  4171. ((a < 0) or
  4172. (a > 4095)) then
  4173. begin
  4174. tmpreg:=getintregister(list,size);
  4175. a_load_const_reg(list, size, a, tmpreg);
  4176. if cgsetflags or setflags then
  4177. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4178. list.concat(setoppostfix(
  4179. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4180. end
  4181. else
  4182. begin
  4183. if cgsetflags or setflags then
  4184. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4185. list.concat(setoppostfix(
  4186. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4187. end;
  4188. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4189. begin
  4190. ovloc.loc:=LOC_FLAGS;
  4191. case op of
  4192. OP_ADD:
  4193. ovloc.resflags:=F_CS;
  4194. OP_SUB:
  4195. ovloc.resflags:=F_CC;
  4196. end;
  4197. end;
  4198. end
  4199. else
  4200. begin
  4201. { there could be added some more sophisticated optimizations }
  4202. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4203. a_load_reg_reg(list,size,size,src,dst)
  4204. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4205. a_load_const_reg(list,size,0,dst)
  4206. else if (op in [OP_IMUL]) and (a=-1) then
  4207. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4208. { we do this here instead in the peephole optimizer because
  4209. it saves us a register }
  4210. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4211. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4212. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4213. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4214. begin
  4215. if l1>32 then{roozbeh does this ever happen?}
  4216. internalerror(200308296);
  4217. shifterop_reset(so);
  4218. so.shiftmode:=SM_LSL;
  4219. so.shiftimm:=l1;
  4220. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4221. end
  4222. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4223. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4224. begin
  4225. if l1>32 then{does this ever happen?}
  4226. internalerror(201205181);
  4227. shifterop_reset(so);
  4228. so.shiftmode:=SM_LSL;
  4229. so.shiftimm:=l1;
  4230. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4231. end
  4232. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4233. begin
  4234. { nothing to do on success }
  4235. end
  4236. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4237. Just using mov x, #0 might allow some easier optimizations down the line. }
  4238. else if (op = OP_AND) and (dword(a)=0) then
  4239. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4240. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4241. else if (op = OP_AND) and (not(dword(a))=0) then
  4242. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4243. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4244. broader range of shifterconstants.}
  4245. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4246. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4247. else if (op = OP_AND) and is_thumb32_imm(a) then
  4248. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4249. else if (op = OP_AND) and (a = $FFFF) then
  4250. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4251. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4252. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4253. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4254. begin
  4255. a_load_reg_reg(list,size,size,src,dst);
  4256. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4257. end
  4258. else
  4259. begin
  4260. tmpreg:=getintregister(list,size);
  4261. a_load_const_reg(list,size,a,tmpreg);
  4262. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4263. end;
  4264. end;
  4265. maybeadjustresult(list,op,size,dst);
  4266. end;
  4267. const
  4268. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4269. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4270. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4271. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4272. var
  4273. so : tshifterop;
  4274. tmpreg,overflowreg : tregister;
  4275. asmop : tasmop;
  4276. begin
  4277. ovloc.loc:=LOC_VOID;
  4278. case op of
  4279. OP_NEG,OP_NOT:
  4280. internalerror(200308286);
  4281. OP_ROL:
  4282. begin
  4283. if not(size in [OS_32,OS_S32]) then
  4284. internalerror(2008072801);
  4285. { simulate ROL by ror'ing 32-value }
  4286. tmpreg:=getintregister(list,OS_32);
  4287. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4288. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4289. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4290. end;
  4291. OP_ROR:
  4292. begin
  4293. if not(size in [OS_32,OS_S32]) then
  4294. internalerror(2008072802);
  4295. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4296. end;
  4297. OP_IMUL,
  4298. OP_MUL:
  4299. begin
  4300. if cgsetflags or setflags then
  4301. begin
  4302. overflowreg:=getintregister(list,size);
  4303. if op=OP_IMUL then
  4304. asmop:=A_SMULL
  4305. else
  4306. asmop:=A_UMULL;
  4307. { the arm doesn't allow that rd and rm are the same }
  4308. if dst=src2 then
  4309. begin
  4310. if dst<>src1 then
  4311. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4312. else
  4313. begin
  4314. tmpreg:=getintregister(list,size);
  4315. a_load_reg_reg(list,size,size,src2,dst);
  4316. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4317. end;
  4318. end
  4319. else
  4320. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4321. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4322. if op=OP_IMUL then
  4323. begin
  4324. shifterop_reset(so);
  4325. so.shiftmode:=SM_ASR;
  4326. so.shiftimm:=31;
  4327. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4328. end
  4329. else
  4330. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4331. ovloc.loc:=LOC_FLAGS;
  4332. ovloc.resflags:=F_NE;
  4333. end
  4334. else
  4335. begin
  4336. { the arm doesn't allow that rd and rm are the same }
  4337. if dst=src2 then
  4338. begin
  4339. if dst<>src1 then
  4340. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4341. else
  4342. begin
  4343. tmpreg:=getintregister(list,size);
  4344. a_load_reg_reg(list,size,size,src2,dst);
  4345. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4346. end;
  4347. end
  4348. else
  4349. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4350. end;
  4351. end;
  4352. else
  4353. begin
  4354. if cgsetflags or setflags then
  4355. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4356. {$ifdef dummy}
  4357. { R13 is not allowed for certain instruction operands }
  4358. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4359. begin
  4360. if getsupreg(dst)=RS_R13 then
  4361. begin
  4362. tmpreg:=getintregister(list,OS_INT);
  4363. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4364. dst:=tmpreg;
  4365. end;
  4366. if getsupreg(src1)=RS_R13 then
  4367. begin
  4368. tmpreg:=getintregister(list,OS_INT);
  4369. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4370. src1:=tmpreg;
  4371. end;
  4372. end;
  4373. {$endif}
  4374. list.concat(setoppostfix(
  4375. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4376. end;
  4377. end;
  4378. maybeadjustresult(list,op,size,dst);
  4379. end;
  4380. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4381. begin
  4382. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4383. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4384. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4385. end;
  4386. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4387. var
  4388. ref : treference;
  4389. shift : byte;
  4390. firstfloatreg,lastfloatreg,
  4391. r : byte;
  4392. regs : tcpuregisterset;
  4393. stackmisalignment: pint;
  4394. begin
  4395. LocalSize:=align(LocalSize,4);
  4396. { call instruction does not put anything on the stack }
  4397. stackmisalignment:=0;
  4398. if not(nostackframe) then
  4399. begin
  4400. firstfloatreg:=RS_NO;
  4401. lastfloatreg:=RS_NO;
  4402. { save floating point registers? }
  4403. for r:=RS_F0 to RS_F7 do
  4404. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4405. begin
  4406. if firstfloatreg=RS_NO then
  4407. firstfloatreg:=r;
  4408. lastfloatreg:=r;
  4409. inc(stackmisalignment,12);
  4410. end;
  4411. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4412. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4413. begin
  4414. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4415. a_reg_alloc(list,NR_R12);
  4416. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4417. end;
  4418. { save int registers }
  4419. reference_reset(ref,4,[]);
  4420. ref.index:=NR_STACK_POINTER_REG;
  4421. ref.addressmode:=AM_PREINDEXED;
  4422. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4423. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4424. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4425. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4426. include(regs,RS_R14);
  4427. if regs<>[] then
  4428. begin
  4429. for r:=RS_R0 to RS_R15 do
  4430. if (r in regs) then
  4431. inc(stackmisalignment,4);
  4432. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4433. end;
  4434. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4435. begin
  4436. { the framepointer now points to the saved R15, so the saved
  4437. framepointer is at R11-12 (for get_caller_frame) }
  4438. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4439. a_reg_dealloc(list,NR_R12);
  4440. end;
  4441. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4442. if (LocalSize<>0) or
  4443. ((stackmisalignment<>0) and
  4444. ((pi_do_call in current_procinfo.flags) or
  4445. (po_assembler in current_procinfo.procdef.procoptions))) then
  4446. begin
  4447. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4448. if not(is_shifter_const(localsize,shift)) then
  4449. begin
  4450. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4451. a_reg_alloc(list,NR_R12);
  4452. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4453. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4454. a_reg_dealloc(list,NR_R12);
  4455. end
  4456. else
  4457. begin
  4458. a_reg_dealloc(list,NR_R12);
  4459. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4460. end;
  4461. end;
  4462. if firstfloatreg<>RS_NO then
  4463. begin
  4464. reference_reset(ref,4,[]);
  4465. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4466. begin
  4467. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4468. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4469. ref.base:=NR_R12;
  4470. end
  4471. else
  4472. begin
  4473. ref.base:=current_procinfo.framepointer;
  4474. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4475. end;
  4476. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4477. lastfloatreg-firstfloatreg+1,ref));
  4478. end;
  4479. end;
  4480. end;
  4481. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4482. var
  4483. ref : treference;
  4484. firstfloatreg,lastfloatreg,
  4485. r : byte;
  4486. shift : byte;
  4487. regs : tcpuregisterset;
  4488. LocalSize : longint;
  4489. stackmisalignment: pint;
  4490. begin
  4491. if not(nostackframe) then
  4492. begin
  4493. stackmisalignment:=0;
  4494. { restore floating point register }
  4495. firstfloatreg:=RS_NO;
  4496. lastfloatreg:=RS_NO;
  4497. { save floating point registers? }
  4498. for r:=RS_F0 to RS_F7 do
  4499. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4500. begin
  4501. if firstfloatreg=RS_NO then
  4502. firstfloatreg:=r;
  4503. lastfloatreg:=r;
  4504. { floating point register space is already included in
  4505. localsize below by calc_stackframe_size
  4506. inc(stackmisalignment,12);
  4507. }
  4508. end;
  4509. if firstfloatreg<>RS_NO then
  4510. begin
  4511. reference_reset(ref,4,[]);
  4512. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4513. begin
  4514. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4515. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4516. ref.base:=NR_R12;
  4517. end
  4518. else
  4519. begin
  4520. ref.base:=current_procinfo.framepointer;
  4521. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4522. end;
  4523. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4524. lastfloatreg-firstfloatreg+1,ref));
  4525. end;
  4526. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4527. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4528. begin
  4529. exclude(regs,RS_R14);
  4530. include(regs,RS_R15);
  4531. end;
  4532. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4533. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4534. for r:=RS_R0 to RS_R15 do
  4535. if (r in regs) then
  4536. inc(stackmisalignment,4);
  4537. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4538. LocalSize:=current_procinfo.calc_stackframe_size;
  4539. if (LocalSize<>0) or
  4540. ((stackmisalignment<>0) and
  4541. ((pi_do_call in current_procinfo.flags) or
  4542. (po_assembler in current_procinfo.procdef.procoptions))) then
  4543. begin
  4544. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4545. if not(is_shifter_const(LocalSize,shift)) then
  4546. begin
  4547. a_reg_alloc(list,NR_R12);
  4548. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4549. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4550. a_reg_dealloc(list,NR_R12);
  4551. end
  4552. else
  4553. begin
  4554. a_reg_dealloc(list,NR_R12);
  4555. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4556. end;
  4557. end;
  4558. if regs=[] then
  4559. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4560. else
  4561. begin
  4562. reference_reset(ref,4,[]);
  4563. ref.index:=NR_STACK_POINTER_REG;
  4564. ref.addressmode:=AM_PREINDEXED;
  4565. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4566. end;
  4567. end
  4568. else
  4569. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4570. end;
  4571. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4572. var
  4573. tmpreg : tregister;
  4574. tmpref : treference;
  4575. l : tasmlabel;
  4576. begin
  4577. tmpreg:=NR_NO;
  4578. { Be sure to have a base register }
  4579. if (ref.base=NR_NO) then
  4580. begin
  4581. if ref.shiftmode<>SM_None then
  4582. internalerror(2014020706);
  4583. ref.base:=ref.index;
  4584. ref.index:=NR_NO;
  4585. end;
  4586. { absolute symbols can't be handled directly, we've to store the symbol reference
  4587. in the text segment and access it pc relative
  4588. For now, we assume that references where base or index equals to PC are already
  4589. relative, all other references are assumed to be absolute and thus they need
  4590. to be handled extra.
  4591. A proper solution would be to change refoptions to a set and store the information
  4592. if the symbol is absolute or relative there.
  4593. }
  4594. if (assigned(ref.symbol) and
  4595. not(is_pc(ref.base)) and
  4596. not(is_pc(ref.index))
  4597. ) or
  4598. { [#xxx] isn't a valid address operand }
  4599. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4600. //(ref.offset<-4095) or
  4601. (ref.offset<-255) or
  4602. (ref.offset>4095) or
  4603. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4604. ((ref.offset<-255) or
  4605. (ref.offset>255)
  4606. )
  4607. ) or
  4608. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4609. ((ref.offset<-1020) or
  4610. (ref.offset>1020) or
  4611. ((abs(ref.offset) mod 4)<>0) or
  4612. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4613. assigned(ref.symbol)
  4614. )
  4615. ) then
  4616. begin
  4617. reference_reset(tmpref,4,[]);
  4618. { load symbol }
  4619. tmpreg:=getintregister(list,OS_INT);
  4620. if assigned(ref.symbol) then
  4621. begin
  4622. current_asmdata.getjumplabel(l);
  4623. cg.a_label(current_procinfo.aktlocaldata,l);
  4624. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4625. if ref.refaddr=addr_gottpoff then
  4626. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4627. else
  4628. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4629. { load consts entry }
  4630. tmpref.symbol:=l;
  4631. tmpref.base:=NR_R15;
  4632. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4633. { in case of LDF/STF, we got rid of the NR_R15 }
  4634. if is_pc(ref.base) then
  4635. ref.base:=NR_NO;
  4636. if is_pc(ref.index) then
  4637. ref.index:=NR_NO;
  4638. end
  4639. else
  4640. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4641. if (ref.base<>NR_NO) then
  4642. begin
  4643. if ref.index<>NR_NO then
  4644. begin
  4645. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4646. ref.base:=tmpreg;
  4647. end
  4648. else
  4649. begin
  4650. ref.index:=tmpreg;
  4651. ref.shiftimm:=0;
  4652. ref.signindex:=1;
  4653. ref.shiftmode:=SM_None;
  4654. end;
  4655. end
  4656. else
  4657. ref.base:=tmpreg;
  4658. ref.offset:=0;
  4659. ref.symbol:=nil;
  4660. end;
  4661. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4662. begin
  4663. if tmpreg<>NR_NO then
  4664. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4665. else
  4666. begin
  4667. tmpreg:=getintregister(list,OS_ADDR);
  4668. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4669. ref.base:=tmpreg;
  4670. end;
  4671. ref.offset:=0;
  4672. end;
  4673. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4674. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4675. begin
  4676. tmpreg:=getintregister(list,OS_ADDR);
  4677. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4678. ref.base := tmpreg;
  4679. end;
  4680. { floating point operations have only limited references
  4681. we expect here, that a base is already set }
  4682. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4683. begin
  4684. if ref.shiftmode<>SM_none then
  4685. internalerror(200309121);
  4686. if tmpreg<>NR_NO then
  4687. begin
  4688. if ref.base=tmpreg then
  4689. begin
  4690. if ref.signindex<0 then
  4691. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4692. else
  4693. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4694. ref.index:=NR_NO;
  4695. end
  4696. else
  4697. begin
  4698. if ref.index<>tmpreg then
  4699. internalerror(200403161);
  4700. if ref.signindex<0 then
  4701. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4702. else
  4703. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4704. ref.base:=tmpreg;
  4705. ref.index:=NR_NO;
  4706. end;
  4707. end
  4708. else
  4709. begin
  4710. tmpreg:=getintregister(list,OS_ADDR);
  4711. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4712. ref.base:=tmpreg;
  4713. ref.index:=NR_NO;
  4714. end;
  4715. end;
  4716. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4717. Result := ref;
  4718. end;
  4719. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4720. var
  4721. instr: taicpu;
  4722. begin
  4723. if (fromsize=OS_F32) and
  4724. (tosize=OS_F32) then
  4725. begin
  4726. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4727. list.Concat(instr);
  4728. add_move_instruction(instr);
  4729. end
  4730. else if (fromsize=OS_F64) and
  4731. (tosize=OS_F64) then
  4732. begin
  4733. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4734. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4735. end
  4736. else if (fromsize=OS_F32) and
  4737. (tosize=OS_F64) then
  4738. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4739. begin
  4740. //list.concat(nil);
  4741. end;
  4742. end;
  4743. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4744. begin
  4745. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4746. end;
  4747. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4748. begin
  4749. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4750. end;
  4751. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4752. begin
  4753. if //(shuffle=nil) and
  4754. (tosize=OS_F32) then
  4755. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4756. else
  4757. internalerror(2012100813);
  4758. end;
  4759. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4760. begin
  4761. if //(shuffle=nil) and
  4762. (fromsize=OS_F32) then
  4763. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
  4764. else
  4765. internalerror(2012100814);
  4766. end;
  4767. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4768. var tmpreg: tregister;
  4769. begin
  4770. case op of
  4771. OP_NEG:
  4772. begin
  4773. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4774. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4775. tmpreg:=cg.getintregister(list,OS_32);
  4776. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4777. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4778. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4779. end;
  4780. else
  4781. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4782. end;
  4783. end;
  4784. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4785. begin
  4786. case op of
  4787. OP_NEG:
  4788. begin
  4789. list.concat(taicpu.op_reg_const(A_MOV,regdst.reglo,0));
  4790. list.concat(taicpu.op_reg_const(A_MOV,regdst.reghi,0));
  4791. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4792. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4793. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4794. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4795. end;
  4796. OP_NOT:
  4797. begin
  4798. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4799. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4800. end;
  4801. OP_AND,OP_OR,OP_XOR:
  4802. begin
  4803. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4804. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4805. end;
  4806. OP_ADD:
  4807. begin
  4808. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4809. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4810. list.concat(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi));
  4811. end;
  4812. OP_SUB:
  4813. begin
  4814. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4815. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4816. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4817. end;
  4818. else
  4819. internalerror(2003083101);
  4820. end;
  4821. end;
  4822. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4823. var
  4824. tmpreg : tregister;
  4825. begin
  4826. case op of
  4827. OP_AND,OP_OR,OP_XOR:
  4828. begin
  4829. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4830. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4831. end;
  4832. OP_ADD:
  4833. begin
  4834. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4835. begin
  4836. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4837. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4838. end
  4839. else
  4840. begin
  4841. tmpreg:=cg.getintregister(list,OS_32);
  4842. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4843. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4844. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  4845. end;
  4846. tmpreg:=cg.getintregister(list,OS_32);
  4847. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  4848. list.concat(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg));
  4849. end;
  4850. OP_SUB:
  4851. begin
  4852. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4853. begin
  4854. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4855. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  4856. end
  4857. else
  4858. begin
  4859. tmpreg:=cg.getintregister(list,OS_32);
  4860. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4861. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4862. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  4863. end;
  4864. tmpreg:=cg.getintregister(list,OS_32);
  4865. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  4866. list.concat(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg));
  4867. end;
  4868. else
  4869. internalerror(2003083101);
  4870. end;
  4871. end;
  4872. procedure create_codegen;
  4873. begin
  4874. if GenerateThumb2Code then
  4875. begin
  4876. cg:=tthumb2cgarm.create;
  4877. cg64:=tthumb2cg64farm.create;
  4878. casmoptimizer:=TCpuThumb2AsmOptimizer;
  4879. end
  4880. else if GenerateThumbCode then
  4881. begin
  4882. cg:=tthumbcgarm.create;
  4883. cg64:=tthumbcg64farm.create;
  4884. // casmoptimizer:=TCpuThumbAsmOptimizer;
  4885. end
  4886. else
  4887. begin
  4888. cg:=tarmcgarm.create;
  4889. cg64:=tarmcg64farm.create;
  4890. casmoptimizer:=TCpuAsmOptimizer;
  4891. end;
  4892. end;
  4893. end.