cgcpu.pas 216 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
  34. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  35. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  36. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  37. { move instructions }
  38. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  39. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  40. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  41. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  42. { fpu move instructions }
  43. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  44. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  45. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  46. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  47. { comparison operations }
  48. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  49. l : tasmlabel);override;
  50. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  51. procedure a_jmp_name(list : TAsmList;const s : string); override;
  52. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  53. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  54. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  55. procedure g_profilecode(list : TAsmList); override;
  56. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  57. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  58. procedure g_maybe_got_init(list : TAsmList); override;
  59. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  60. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  61. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  62. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  63. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  64. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  65. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  66. procedure g_save_registers(list : TAsmList);override;
  67. procedure g_restore_registers(list : TAsmList);override;
  68. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  69. procedure fixref(list : TAsmList;var ref : treference);
  70. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  71. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  72. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  73. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  74. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  75. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  76. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  77. { Transform unsupported methods into Internal errors }
  78. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  79. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  80. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  81. { clear out potential overflow bits from 8 or 16 bit operations
  82. the upper 24/16 bits of a register after an operation }
  83. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  84. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  85. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  86. procedure g_maybe_tls_init(list : TAsmList); override;
  87. end;
  88. { tcgarm is shared between normal arm and thumb-2 }
  89. tcgarm = class(tbasecgarm)
  90. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  91. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  92. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  93. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  94. size: tcgsize; a: tcgint; src, dst: tregister); override;
  95. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  96. size: tcgsize; src1, src2, dst: tregister); override;
  97. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  98. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  99. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  100. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  101. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  102. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  103. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  104. end;
  105. { normal arm cg }
  106. tarmcgarm = class(tcgarm)
  107. procedure init_register_allocators;override;
  108. procedure done_register_allocators;override;
  109. end;
  110. { 64 bit cg for all arm flavours }
  111. tbasecg64farm = class(tcg64f32)
  112. end;
  113. { tcg64farm is shared between normal arm and thumb-2 }
  114. tcg64farm = class(tbasecg64farm)
  115. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  116. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  117. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  118. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  119. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  120. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  121. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  122. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  123. end;
  124. tarmcg64farm = class(tcg64farm)
  125. end;
  126. tthumbcgarm = class(tbasecgarm)
  127. procedure init_register_allocators;override;
  128. procedure done_register_allocators;override;
  129. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  130. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  131. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  132. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  133. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  134. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  135. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  136. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  137. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  138. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  139. end;
  140. tthumbcg64farm = class(tbasecg64farm)
  141. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  142. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  143. end;
  144. tthumb2cgarm = class(tcgarm)
  145. procedure init_register_allocators;override;
  146. procedure done_register_allocators;override;
  147. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  148. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  149. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  150. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  151. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  152. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  153. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  154. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  155. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  156. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  157. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  158. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  159. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  160. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  161. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  162. end;
  163. tthumb2cg64farm = class(tcg64farm)
  164. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  165. end;
  166. const
  167. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  168. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  169. winstackpagesize = 4096;
  170. function get_fpu_postfix(def : tdef) : toppostfix;
  171. procedure create_codegen;
  172. implementation
  173. uses
  174. globals,verbose,systems,cutils,
  175. aopt,aoptcpu,
  176. fmodule,
  177. symconst,symsym,symtable,
  178. tgobj,
  179. procinfo,cpupi,
  180. paramgr;
  181. { Range check must be disabled explicitly as conversions between signed and unsigned
  182. 32-bit values are done without explicit typecasts }
  183. {$R-}
  184. function get_fpu_postfix(def : tdef) : toppostfix;
  185. begin
  186. if def.typ=floatdef then
  187. begin
  188. case tfloatdef(def).floattype of
  189. s32real:
  190. result:=PF_S;
  191. s64real:
  192. result:=PF_D;
  193. s80real:
  194. result:=PF_E;
  195. else
  196. internalerror(200401272);
  197. end;
  198. end
  199. else
  200. internalerror(200401271);
  201. end;
  202. procedure tarmcgarm.init_register_allocators;
  203. begin
  204. inherited init_register_allocators;
  205. { currently, we always save R14, so we can use it }
  206. if (target_info.system<>system_arm_darwin) then
  207. begin
  208. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  209. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  210. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  211. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  212. else
  213. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  214. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  215. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  216. end
  217. else
  218. { r7 is not available on Darwin, it's used as frame pointer (always,
  219. for backtrace support -- also in gcc/clang -> R11 can be used).
  220. r9 is volatile }
  221. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  222. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  223. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  224. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  225. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  226. { The register allocator currently cannot deal with multiple
  227. non-overlapping subregs per register, so we can only use
  228. half the single precision registers for now (as sub registers of the
  229. double precision ones). }
  230. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  231. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  232. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  233. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  234. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  235. ],first_mm_imreg,[])
  236. else
  237. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  238. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15],first_mm_imreg,[]);
  239. end;
  240. procedure tarmcgarm.done_register_allocators;
  241. begin
  242. rg[R_INTREGISTER].free;
  243. rg[R_FPUREGISTER].free;
  244. rg[R_MMREGISTER].free;
  245. inherited done_register_allocators;
  246. end;
  247. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  248. var
  249. imm_shift : byte;
  250. l : tasmlabel;
  251. hr : treference;
  252. imm1, imm2: DWord;
  253. begin
  254. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  255. internalerror(2002090902);
  256. if is_shifter_const(a,imm_shift) then
  257. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  258. else if is_shifter_const(not(a),imm_shift) then
  259. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  260. { loading of constants with mov and orr }
  261. else if (split_into_shifter_const(a,imm1, imm2)) then
  262. begin
  263. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  264. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  265. end
  266. { loading of constants with mvn and bic }
  267. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  268. begin
  269. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  270. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  271. end
  272. else
  273. begin
  274. reference_reset(hr,4,[]);
  275. current_asmdata.getjumplabel(l);
  276. cg.a_label(current_procinfo.aktlocaldata,l);
  277. hr.symboldata:=current_procinfo.aktlocaldata.last;
  278. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  279. hr.symbol:=l;
  280. hr.base:=NR_PC;
  281. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  282. end;
  283. end;
  284. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  285. var
  286. oppostfix:toppostfix;
  287. usedtmpref: treference;
  288. tmpreg,tmpreg2 : tregister;
  289. so : tshifterop;
  290. dir : integer;
  291. begin
  292. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  293. FromSize := ToSize;
  294. case FromSize of
  295. { signed integer registers }
  296. OS_8:
  297. oppostfix:=PF_B;
  298. OS_S8:
  299. oppostfix:=PF_SB;
  300. OS_16:
  301. oppostfix:=PF_H;
  302. OS_S16:
  303. oppostfix:=PF_SH;
  304. OS_32,
  305. OS_S32:
  306. oppostfix:=PF_None;
  307. else
  308. InternalError(200308297);
  309. end;
  310. if (fromsize=OS_S8) and
  311. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  312. oppostfix:=PF_B;
  313. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  314. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  315. (oppostfix in [PF_SH,PF_H])) then
  316. begin
  317. if target_info.endian=endian_big then
  318. dir:=-1
  319. else
  320. dir:=1;
  321. case FromSize of
  322. OS_16,OS_S16:
  323. begin
  324. { only complicated references need an extra loadaddr }
  325. if assigned(ref.symbol) or
  326. (ref.index<>NR_NO) or
  327. (ref.offset<-4095) or
  328. (ref.offset>4094) or
  329. { sometimes the compiler reused registers }
  330. (reg=ref.index) or
  331. (reg=ref.base) then
  332. begin
  333. tmpreg2:=getintregister(list,OS_INT);
  334. a_loadaddr_ref_reg(list,ref,tmpreg2);
  335. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  336. end
  337. else
  338. usedtmpref:=ref;
  339. if target_info.endian=endian_big then
  340. inc(usedtmpref.offset,1);
  341. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  342. tmpreg:=getintregister(list,OS_INT);
  343. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  344. inc(usedtmpref.offset,dir);
  345. if FromSize=OS_16 then
  346. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  347. else
  348. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  349. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  350. end;
  351. OS_32,OS_S32:
  352. begin
  353. tmpreg:=getintregister(list,OS_INT);
  354. { only complicated references need an extra loadaddr }
  355. if assigned(ref.symbol) or
  356. (ref.index<>NR_NO) or
  357. (ref.offset<-4095) or
  358. (ref.offset>4092) or
  359. { sometimes the compiler reused registers }
  360. (reg=ref.index) or
  361. (reg=ref.base) then
  362. begin
  363. tmpreg2:=getintregister(list,OS_INT);
  364. a_loadaddr_ref_reg(list,ref,tmpreg2);
  365. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  366. end
  367. else
  368. usedtmpref:=ref;
  369. shifterop_reset(so);so.shiftmode:=SM_LSL;
  370. if ref.alignment=2 then
  371. begin
  372. if target_info.endian=endian_big then
  373. inc(usedtmpref.offset,2);
  374. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  375. inc(usedtmpref.offset,dir*2);
  376. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  377. so.shiftimm:=16;
  378. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  379. end
  380. else
  381. begin
  382. tmpreg2:=getintregister(list,OS_INT);
  383. if target_info.endian=endian_big then
  384. inc(usedtmpref.offset,3);
  385. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  386. inc(usedtmpref.offset,dir);
  387. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  388. inc(usedtmpref.offset,dir);
  389. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  390. so.shiftimm:=8;
  391. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  392. inc(usedtmpref.offset,dir);
  393. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  394. so.shiftimm:=16;
  395. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  396. so.shiftimm:=24;
  397. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  398. end;
  399. end
  400. else
  401. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  402. end;
  403. end
  404. else
  405. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  406. if (fromsize=OS_S8) and
  407. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  408. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  409. else if (fromsize=OS_S8) and (tosize = OS_16) then
  410. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  411. end;
  412. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  413. var
  414. hsym : tsym;
  415. href : treference;
  416. paraloc : Pcgparalocation;
  417. shift : byte;
  418. begin
  419. { calculate the parameter info for the procdef }
  420. procdef.init_paraloc_info(callerside);
  421. hsym:=tsym(procdef.parast.Find('self'));
  422. if not(assigned(hsym) and
  423. (hsym.typ=paravarsym)) then
  424. internalerror(200305251);
  425. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  426. while paraloc<>nil do
  427. with paraloc^ do
  428. begin
  429. case loc of
  430. LOC_REGISTER:
  431. begin
  432. if is_shifter_const(ioffset,shift) then
  433. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  434. else
  435. begin
  436. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  437. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  438. end;
  439. end;
  440. LOC_REFERENCE:
  441. begin
  442. { offset in the wrapper needs to be adjusted for the stored
  443. return address }
  444. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  445. if is_shifter_const(ioffset,shift) then
  446. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  447. else
  448. begin
  449. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  450. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  451. end;
  452. end
  453. else
  454. internalerror(200309189);
  455. end;
  456. paraloc:=next;
  457. end;
  458. end;
  459. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  460. var
  461. ref: treference;
  462. begin
  463. paraloc.check_simple_location;
  464. paramanager.allocparaloc(list,paraloc.location);
  465. case paraloc.location^.loc of
  466. LOC_REGISTER,LOC_CREGISTER:
  467. a_load_const_reg(list,size,a,paraloc.location^.register);
  468. LOC_REFERENCE:
  469. begin
  470. reference_reset(ref,paraloc.alignment,[]);
  471. ref.base:=paraloc.location^.reference.index;
  472. ref.offset:=paraloc.location^.reference.offset;
  473. a_load_const_ref(list,size,a,ref);
  474. end;
  475. else
  476. internalerror(2002081101);
  477. end;
  478. end;
  479. procedure tbasecgarm.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
  480. var
  481. tmpref, ref: treference;
  482. location: pcgparalocation;
  483. sizeleft: aint;
  484. begin
  485. location := paraloc.location;
  486. tmpref := r;
  487. sizeleft := paraloc.intsize;
  488. while assigned(location) do
  489. begin
  490. paramanager.allocparaloc(list,location);
  491. case location^.loc of
  492. LOC_REGISTER,LOC_CREGISTER:
  493. a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  494. LOC_REFERENCE:
  495. begin
  496. reference_reset_base(ref,location^.reference.index,location^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  497. { doubles in softemu mode have a strange order of registers and references }
  498. if location^.size=OS_32 then
  499. g_concatcopy(list,tmpref,ref,4)
  500. else
  501. begin
  502. g_concatcopy(list,tmpref,ref,sizeleft);
  503. if assigned(location^.next) then
  504. internalerror(2005010710);
  505. end;
  506. end;
  507. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  508. case location^.size of
  509. OS_F32, OS_F64:
  510. a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  511. else
  512. internalerror(2002072801);
  513. end;
  514. LOC_VOID:
  515. begin
  516. // nothing to do
  517. end;
  518. else
  519. internalerror(2002081103);
  520. end;
  521. inc(tmpref.offset,tcgsize2size[location^.size]);
  522. dec(sizeleft,tcgsize2size[location^.size]);
  523. location := location^.next;
  524. end;
  525. end;
  526. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  527. var
  528. ref: treference;
  529. tmpreg: tregister;
  530. begin
  531. paraloc.check_simple_location;
  532. paramanager.allocparaloc(list,paraloc.location);
  533. case paraloc.location^.loc of
  534. LOC_REGISTER,LOC_CREGISTER:
  535. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  536. LOC_REFERENCE:
  537. begin
  538. reference_reset(ref,paraloc.alignment,[]);
  539. ref.base := paraloc.location^.reference.index;
  540. ref.offset := paraloc.location^.reference.offset;
  541. tmpreg := getintregister(list,OS_ADDR);
  542. a_loadaddr_ref_reg(list,r,tmpreg);
  543. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  544. end;
  545. else
  546. internalerror(2002080701);
  547. end;
  548. end;
  549. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  550. var
  551. branchopcode: tasmop;
  552. r : treference;
  553. sym : TAsmSymbol;
  554. begin
  555. { use always BL as newer binutils do not translate blx apparently
  556. generating BL is also what clang and gcc do by default }
  557. branchopcode:=A_BL;
  558. if not(weak) then
  559. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  560. else
  561. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  562. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  563. if (tf_pic_uses_got in target_info.flags) and
  564. (cs_create_pic in current_settings.moduleswitches) then
  565. begin
  566. r.refaddr:=addr_pic
  567. end
  568. else
  569. r.refaddr:=addr_full;
  570. list.concat(taicpu.op_ref(branchopcode,r));
  571. {
  572. the compiler does not properly set this flag anymore in pass 1, and
  573. for now we only need it after pass 2 (I hope) (JM)
  574. if not(pi_do_call in current_procinfo.flags) then
  575. internalerror(2003060703);
  576. }
  577. include(current_procinfo.flags,pi_do_call);
  578. end;
  579. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  580. begin
  581. { check not really correct: should only be used for non-Thumb cpus }
  582. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  583. begin
  584. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  585. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  586. end
  587. else
  588. list.concat(taicpu.op_reg(A_BLX, reg));
  589. {
  590. the compiler does not properly set this flag anymore in pass 1, and
  591. for now we only need it after pass 2 (I hope) (JM)
  592. if not(pi_do_call in current_procinfo.flags) then
  593. internalerror(2003060703);
  594. }
  595. include(current_procinfo.flags,pi_do_call);
  596. end;
  597. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  598. begin
  599. a_op_const_reg_reg(list,op,size,a,reg,reg);
  600. end;
  601. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  602. var
  603. tmpreg,tmpresreg : tregister;
  604. tmpref : treference;
  605. begin
  606. tmpreg:=getintregister(list,size);
  607. tmpresreg:=getintregister(list,size);
  608. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  609. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  610. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  611. end;
  612. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  613. var
  614. so : tshifterop;
  615. begin
  616. if op = OP_NEG then
  617. begin
  618. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  619. maybeadjustresult(list,OP_NEG,size,dst);
  620. end
  621. else if op = OP_NOT then
  622. begin
  623. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  624. begin
  625. shifterop_reset(so);
  626. so.shiftmode:=SM_LSL;
  627. if size in [OS_8, OS_S8] then
  628. so.shiftimm:=24
  629. else
  630. so.shiftimm:=16;
  631. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  632. {Using a shift here allows this to be folded into another instruction}
  633. if size in [OS_S8, OS_S16] then
  634. so.shiftmode:=SM_ASR
  635. else
  636. so.shiftmode:=SM_LSR;
  637. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  638. end
  639. else
  640. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  641. end
  642. else
  643. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  644. end;
  645. const
  646. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  647. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  648. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  649. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  650. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  651. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  652. op_reg_postfix: array[TOpCG] of TOpPostfix =
  653. (PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
  654. PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None);
  655. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  656. size: tcgsize; a: tcgint; src, dst: tregister);
  657. var
  658. ovloc : tlocation;
  659. begin
  660. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  661. end;
  662. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  663. size: tcgsize; src1, src2, dst: tregister);
  664. var
  665. ovloc : tlocation;
  666. begin
  667. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  668. end;
  669. function opshift2shiftmode(op: TOpCg): tshiftmode;
  670. begin
  671. case op of
  672. OP_SHL: Result:=SM_LSL;
  673. OP_SHR: Result:=SM_LSR;
  674. OP_ROR: Result:=SM_ROR;
  675. OP_ROL: Result:=SM_ROR;
  676. OP_SAR: Result:=SM_ASR;
  677. else internalerror(2012070501);
  678. end
  679. end;
  680. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  681. var
  682. multiplier : dword;
  683. power : longint;
  684. shifterop : tshifterop;
  685. bitsset : byte;
  686. negative : boolean;
  687. first : boolean;
  688. b,
  689. cycles : byte;
  690. maxeffort : byte;
  691. begin
  692. result:=true;
  693. cycles:=0;
  694. negative:=a<0;
  695. shifterop.rs:=NR_NO;
  696. shifterop.shiftmode:=SM_LSL;
  697. if negative then
  698. inc(cycles);
  699. multiplier:=dword(abs(a));
  700. bitsset:=popcnt(multiplier and $fffffffe);
  701. { heuristics to estimate how much instructions are reasonable to replace the mul,
  702. this is currently based on XScale timings }
  703. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  704. actual multiplication, this requires min. 1+4 cycles
  705. because the first shift imm. might cause a stall and because we need more instructions
  706. when replacing the mul we generate max. 3 instructions to replace this mul }
  707. maxeffort:=3;
  708. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  709. a ldr, so generating one more operation to replace this is beneficial }
  710. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  711. inc(maxeffort);
  712. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  713. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  714. dec(maxeffort);
  715. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  716. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  717. dec(maxeffort);
  718. { most simple cases }
  719. if a=1 then
  720. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  721. else if a=0 then
  722. a_load_const_reg(list,OS_32,0,dst)
  723. else if a=-1 then
  724. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  725. { add up ?
  726. basically, one add is needed for each bit being set in the constant factor
  727. however, the least significant bit is for free, it can be hidden in the initial
  728. instruction
  729. }
  730. else if (bitsset+cycles<=maxeffort) and
  731. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  732. begin
  733. first:=true;
  734. while multiplier<>0 do
  735. begin
  736. shifterop.shiftimm:=BsrDWord(multiplier);
  737. if odd(multiplier) then
  738. begin
  739. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  740. dec(multiplier);
  741. end
  742. else
  743. if first then
  744. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  745. else
  746. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  747. first:=false;
  748. dec(multiplier,1 shl shifterop.shiftimm);
  749. end;
  750. if negative then
  751. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  752. end
  753. { subtract from the next greater power of two? }
  754. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  755. begin
  756. first:=true;
  757. while multiplier<>0 do
  758. begin
  759. if first then
  760. begin
  761. multiplier:=(1 shl power)-multiplier;
  762. shifterop.shiftimm:=power;
  763. end
  764. else
  765. shifterop.shiftimm:=BsrDWord(multiplier);
  766. if odd(multiplier) then
  767. begin
  768. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  769. dec(multiplier);
  770. end
  771. else
  772. if first then
  773. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  774. else
  775. begin
  776. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  777. dec(multiplier,1 shl shifterop.shiftimm);
  778. end;
  779. first:=false;
  780. end;
  781. if negative then
  782. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  783. end
  784. else
  785. result:=false;
  786. end;
  787. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  788. var
  789. shift, lsb, width : byte;
  790. tmpreg : tregister;
  791. so : tshifterop;
  792. l1 : longint;
  793. imm1, imm2: DWord;
  794. begin
  795. optimize_op_const(size, op, a);
  796. case op of
  797. OP_NONE:
  798. begin
  799. if src <> dst then
  800. a_load_reg_reg(list, size, size, src, dst);
  801. exit;
  802. end;
  803. OP_MOVE:
  804. begin
  805. a_load_const_reg(list, size, a, dst);
  806. exit;
  807. end;
  808. end;
  809. ovloc.loc:=LOC_VOID;
  810. if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
  811. case op of
  812. OP_ADD:
  813. begin
  814. op:=OP_SUB;
  815. a:=aint(dword(-a));
  816. end;
  817. OP_SUB:
  818. begin
  819. op:=OP_ADD;
  820. a:=aint(dword(-a));
  821. end
  822. end;
  823. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  824. case op of
  825. OP_NEG,OP_NOT:
  826. internalerror(200308281);
  827. OP_SHL,
  828. OP_SHR,
  829. OP_ROL,
  830. OP_ROR,
  831. OP_SAR:
  832. begin
  833. if a>32 then
  834. internalerror(200308294);
  835. shifterop_reset(so);
  836. so.shiftmode:=opshift2shiftmode(op);
  837. if op = OP_ROL then
  838. so.shiftimm:=32-a
  839. else
  840. so.shiftimm:=a;
  841. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  842. end;
  843. else
  844. {if (op in [OP_SUB, OP_ADD]) and
  845. ((a < 0) or
  846. (a > 4095)) then
  847. begin
  848. tmpreg:=getintregister(list,size);
  849. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  850. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  851. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  852. ));
  853. end
  854. else}
  855. begin
  856. if cgsetflags or setflags then
  857. a_reg_alloc(list,NR_DEFAULTFLAGS);
  858. list.concat(setoppostfix(
  859. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  860. end;
  861. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  862. begin
  863. ovloc.loc:=LOC_FLAGS;
  864. case op of
  865. OP_ADD:
  866. ovloc.resflags:=F_CS;
  867. OP_SUB:
  868. ovloc.resflags:=F_CC;
  869. end;
  870. end;
  871. end
  872. else
  873. begin
  874. { there could be added some more sophisticated optimizations }
  875. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  876. a_op_reg_reg(list,OP_NEG,size,src,dst)
  877. { we do this here instead in the peephole optimizer because
  878. it saves us a register }
  879. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  880. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  881. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  882. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  883. begin
  884. if l1>32 then{roozbeh does this ever happen?}
  885. internalerror(200308296);
  886. shifterop_reset(so);
  887. so.shiftmode:=SM_LSL;
  888. so.shiftimm:=l1;
  889. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  890. end
  891. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  892. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  893. begin
  894. if l1>32 then{does this ever happen?}
  895. internalerror(201205181);
  896. shifterop_reset(so);
  897. so.shiftmode:=SM_LSL;
  898. so.shiftimm:=l1;
  899. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  900. end
  901. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  902. begin
  903. { nothing to do on success }
  904. end
  905. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  906. broader range of shifterconstants.}
  907. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  908. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  909. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  910. into the following instruction}
  911. else if (op = OP_AND) and
  912. is_continuous_mask(aword(a), lsb, width) and
  913. ((lsb = 0) or ((lsb + width) = 32)) then
  914. begin
  915. shifterop_reset(so);
  916. if (width = 16) and
  917. (lsb = 0) and
  918. (current_settings.cputype >= cpu_armv6) then
  919. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  920. else if (width = 8) and
  921. (lsb = 0) and
  922. (current_settings.cputype >= cpu_armv6) then
  923. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  924. else if lsb = 0 then
  925. begin
  926. so.shiftmode:=SM_LSL;
  927. so.shiftimm:=32-width;
  928. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  929. so.shiftmode:=SM_LSR;
  930. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  931. end
  932. else
  933. begin
  934. so.shiftmode:=SM_LSR;
  935. so.shiftimm:=lsb;
  936. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  937. so.shiftmode:=SM_LSL;
  938. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  939. end;
  940. end
  941. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  942. begin
  943. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  944. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  945. end
  946. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  947. not(cgsetflags or setflags) and
  948. split_into_shifter_const(a, imm1, imm2) then
  949. begin
  950. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  951. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  952. end
  953. else
  954. begin
  955. tmpreg:=getintregister(list,size);
  956. a_load_const_reg(list,size,a,tmpreg);
  957. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  958. end;
  959. end;
  960. maybeadjustresult(list,op,size,dst);
  961. end;
  962. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  963. var
  964. so : tshifterop;
  965. tmpreg,overflowreg : tregister;
  966. asmop : tasmop;
  967. begin
  968. ovloc.loc:=LOC_VOID;
  969. case op of
  970. OP_NEG,OP_NOT,
  971. OP_DIV,OP_IDIV:
  972. internalerror(200308283);
  973. OP_SHL,
  974. OP_SHR,
  975. OP_SAR,
  976. OP_ROR:
  977. begin
  978. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  979. internalerror(2008072801);
  980. shifterop_reset(so);
  981. so.rs:=src1;
  982. so.shiftmode:=opshift2shiftmode(op);
  983. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  984. end;
  985. OP_ROL:
  986. begin
  987. if not(size in [OS_32,OS_S32]) then
  988. internalerror(2008072801);
  989. { simulate ROL by ror'ing 32-value }
  990. tmpreg:=getintregister(list,OS_32);
  991. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  992. shifterop_reset(so);
  993. so.rs:=tmpreg;
  994. so.shiftmode:=SM_ROR;
  995. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  996. end;
  997. OP_IMUL,
  998. OP_MUL:
  999. begin
  1000. if (cgsetflags or setflags) and
  1001. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1002. begin
  1003. overflowreg:=getintregister(list,size);
  1004. if op=OP_IMUL then
  1005. asmop:=A_SMULL
  1006. else
  1007. asmop:=A_UMULL;
  1008. { the arm doesn't allow that rd and rm are the same }
  1009. if dst=src2 then
  1010. begin
  1011. if dst<>src1 then
  1012. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1013. else
  1014. begin
  1015. tmpreg:=getintregister(list,size);
  1016. a_load_reg_reg(list,size,size,src2,dst);
  1017. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1018. end;
  1019. end
  1020. else
  1021. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1022. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1023. if op=OP_IMUL then
  1024. begin
  1025. shifterop_reset(so);
  1026. so.shiftmode:=SM_ASR;
  1027. so.shiftimm:=31;
  1028. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1029. end
  1030. else
  1031. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1032. ovloc.loc:=LOC_FLAGS;
  1033. ovloc.resflags:=F_NE;
  1034. end
  1035. else
  1036. begin
  1037. { the arm doesn't allow that rd and rm are the same }
  1038. if dst=src2 then
  1039. begin
  1040. if dst<>src1 then
  1041. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1042. else
  1043. begin
  1044. tmpreg:=getintregister(list,size);
  1045. a_load_reg_reg(list,size,size,src2,dst);
  1046. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1047. end;
  1048. end
  1049. else
  1050. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1051. end;
  1052. end;
  1053. else
  1054. begin
  1055. if cgsetflags or setflags then
  1056. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1057. list.concat(setoppostfix(
  1058. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1059. end;
  1060. end;
  1061. maybeadjustresult(list,op,size,dst);
  1062. end;
  1063. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1064. var
  1065. asmop: tasmop;
  1066. begin
  1067. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1068. begin
  1069. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1070. case size of
  1071. OS_32: asmop:=A_UMULL;
  1072. OS_S32: asmop:=A_SMULL;
  1073. else
  1074. InternalError(2014060802);
  1075. end;
  1076. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1077. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1078. 32x32=32 bit multiplication}
  1079. if (dstlo = NR_NO) then
  1080. dstlo:=getintregister(list,size);
  1081. if (dsthi = NR_NO) then
  1082. dsthi:=getintregister(list,size);
  1083. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1084. end
  1085. else if dsthi=NR_NO then
  1086. begin
  1087. if (dstlo = NR_NO) then
  1088. dstlo:=getintregister(list,size);
  1089. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1090. end
  1091. else
  1092. begin
  1093. internalerror(2015083022);
  1094. end;
  1095. end;
  1096. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1097. var
  1098. tmpreg1,tmpreg2 : tregister;
  1099. begin
  1100. tmpreg1:=NR_NO;
  1101. { Be sure to have a base register }
  1102. if (ref.base=NR_NO) then
  1103. begin
  1104. if ref.shiftmode<>SM_None then
  1105. internalerror(2014020701);
  1106. ref.base:=ref.index;
  1107. ref.index:=NR_NO;
  1108. end;
  1109. { absolute symbols can't be handled directly, we've to store the symbol reference
  1110. in the text segment and access it pc relative
  1111. For now, we assume that references where base or index equals to PC are already
  1112. relative, all other references are assumed to be absolute and thus they need
  1113. to be handled extra.
  1114. A proper solution would be to change refoptions to a set and store the information
  1115. if the symbol is absolute or relative there.
  1116. }
  1117. if (assigned(ref.symbol) and
  1118. not(is_pc(ref.base)) and
  1119. not(is_pc(ref.index))
  1120. ) or
  1121. { [#xxx] isn't a valid address operand }
  1122. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1123. (ref.offset<-4095) or
  1124. (ref.offset>4095) or
  1125. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1126. ((ref.offset<-255) or
  1127. (ref.offset>255)
  1128. )
  1129. ) or
  1130. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1131. ((ref.offset<-1020) or
  1132. (ref.offset>1020) or
  1133. ((abs(ref.offset) mod 4)<>0)
  1134. )
  1135. ) or
  1136. ((GenerateThumbCode) and
  1137. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1138. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1139. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1140. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1141. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1142. )
  1143. ) then
  1144. begin
  1145. fixref(list,ref);
  1146. end;
  1147. if GenerateThumbCode then
  1148. begin
  1149. { certain thumb load require base and index }
  1150. if (oppostfix in [PF_SB,PF_SH]) and
  1151. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1152. begin
  1153. tmpreg1:=getintregister(list,OS_ADDR);
  1154. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1155. ref.index:=tmpreg1;
  1156. end;
  1157. { "hi" registers cannot be used as base or index }
  1158. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1159. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1160. begin
  1161. tmpreg1:=getintregister(list,OS_ADDR);
  1162. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1163. ref.base:=tmpreg1;
  1164. end;
  1165. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1166. begin
  1167. tmpreg1:=getintregister(list,OS_ADDR);
  1168. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1169. ref.index:=tmpreg1;
  1170. end;
  1171. end;
  1172. { fold if there is base, index and offset, however, don't fold
  1173. for vfp memory instructions because we later fold the index }
  1174. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1175. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1176. begin
  1177. if tmpreg1<>NR_NO then
  1178. begin
  1179. tmpreg2:=getintregister(list,OS_ADDR);
  1180. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1181. tmpreg1:=tmpreg2;
  1182. end
  1183. else
  1184. begin
  1185. tmpreg1:=getintregister(list,OS_ADDR);
  1186. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1187. ref.base:=tmpreg1;
  1188. end;
  1189. ref.offset:=0;
  1190. end;
  1191. { floating point operations have only limited references
  1192. we expect here, that a base is already set }
  1193. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1194. begin
  1195. if ref.shiftmode<>SM_none then
  1196. internalerror(200309121);
  1197. if tmpreg1<>NR_NO then
  1198. begin
  1199. if ref.base=tmpreg1 then
  1200. begin
  1201. if ref.signindex<0 then
  1202. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1203. else
  1204. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1205. ref.index:=NR_NO;
  1206. end
  1207. else
  1208. begin
  1209. if ref.index<>tmpreg1 then
  1210. internalerror(200403161);
  1211. if ref.signindex<0 then
  1212. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1213. else
  1214. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1215. ref.base:=tmpreg1;
  1216. ref.index:=NR_NO;
  1217. end;
  1218. end
  1219. else
  1220. begin
  1221. tmpreg1:=getintregister(list,OS_ADDR);
  1222. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1223. ref.base:=tmpreg1;
  1224. ref.index:=NR_NO;
  1225. end;
  1226. end;
  1227. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1228. Result := ref;
  1229. end;
  1230. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1231. var
  1232. oppostfix:toppostfix;
  1233. usedtmpref: treference;
  1234. tmpreg : tregister;
  1235. dir : integer;
  1236. begin
  1237. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1238. FromSize := ToSize;
  1239. case ToSize of
  1240. { signed integer registers }
  1241. OS_8,
  1242. OS_S8:
  1243. oppostfix:=PF_B;
  1244. OS_16,
  1245. OS_S16:
  1246. oppostfix:=PF_H;
  1247. OS_32,
  1248. OS_S32,
  1249. { for vfp value stored in integer register }
  1250. OS_F32:
  1251. oppostfix:=PF_None;
  1252. else
  1253. InternalError(200308299);
  1254. end;
  1255. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1256. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1257. (oppostfix =PF_H)) then
  1258. begin
  1259. if target_info.endian=endian_big then
  1260. dir:=-1
  1261. else
  1262. dir:=1;
  1263. case FromSize of
  1264. OS_16,OS_S16:
  1265. begin
  1266. tmpreg:=getintregister(list,OS_INT);
  1267. usedtmpref:=ref;
  1268. if target_info.endian=endian_big then
  1269. inc(usedtmpref.offset,1);
  1270. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1271. inc(usedtmpref.offset,dir);
  1272. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1273. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1274. end;
  1275. OS_32,OS_S32:
  1276. begin
  1277. tmpreg:=getintregister(list,OS_INT);
  1278. usedtmpref:=ref;
  1279. if ref.alignment=2 then
  1280. begin
  1281. if target_info.endian=endian_big then
  1282. inc(usedtmpref.offset,2);
  1283. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1284. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1285. inc(usedtmpref.offset,dir*2);
  1286. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1287. end
  1288. else
  1289. begin
  1290. if target_info.endian=endian_big then
  1291. inc(usedtmpref.offset,3);
  1292. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1293. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1294. inc(usedtmpref.offset,dir);
  1295. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1296. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1297. inc(usedtmpref.offset,dir);
  1298. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1299. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1300. inc(usedtmpref.offset,dir);
  1301. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1302. end;
  1303. end
  1304. else
  1305. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1306. end;
  1307. end
  1308. else
  1309. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1310. end;
  1311. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1312. var
  1313. oppostfix:toppostfix;
  1314. href: treference;
  1315. tmpreg: TRegister;
  1316. begin
  1317. case ToSize of
  1318. { signed integer registers }
  1319. OS_8,
  1320. OS_S8:
  1321. oppostfix:=PF_B;
  1322. OS_16,
  1323. OS_S16:
  1324. oppostfix:=PF_H;
  1325. OS_32,
  1326. OS_S32:
  1327. oppostfix:=PF_None;
  1328. else
  1329. InternalError(2003082910);
  1330. end;
  1331. if (tosize in [OS_S16,OS_16]) and
  1332. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1333. begin
  1334. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1335. tmpreg:=getintregister(list,OS_INT);
  1336. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1337. href:=result;
  1338. inc(href.offset);
  1339. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1340. end
  1341. else
  1342. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1343. end;
  1344. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1345. var
  1346. oppostfix:toppostfix;
  1347. so: tshifterop;
  1348. tmpreg: TRegister;
  1349. href: treference;
  1350. begin
  1351. case FromSize of
  1352. { signed integer registers }
  1353. OS_8:
  1354. oppostfix:=PF_B;
  1355. OS_S8:
  1356. oppostfix:=PF_SB;
  1357. OS_16:
  1358. oppostfix:=PF_H;
  1359. OS_S16:
  1360. oppostfix:=PF_SH;
  1361. OS_32,
  1362. OS_S32:
  1363. oppostfix:=PF_None;
  1364. else
  1365. InternalError(200308291);
  1366. end;
  1367. if (tosize=OS_S8) and
  1368. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1369. begin
  1370. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1371. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1372. end
  1373. else if (tosize in [OS_S16,OS_16]) and
  1374. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1375. begin
  1376. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1377. tmpreg:=getintregister(list,OS_INT);
  1378. href:=result;
  1379. inc(href.offset);
  1380. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1381. shifterop_reset(so);
  1382. so.shiftmode:=SM_LSL;
  1383. so.shiftimm:=8;
  1384. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1385. end
  1386. else
  1387. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1388. end;
  1389. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1390. var
  1391. so : tshifterop;
  1392. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1393. begin
  1394. if GenerateThumbCode then
  1395. begin
  1396. case shiftmode of
  1397. SM_ASR:
  1398. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1399. SM_LSR:
  1400. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1401. SM_LSL:
  1402. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1403. else
  1404. internalerror(2013090301);
  1405. end;
  1406. end
  1407. else
  1408. begin
  1409. so.shiftmode:=shiftmode;
  1410. so.shiftimm:=shiftimm;
  1411. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1412. end;
  1413. end;
  1414. var
  1415. instr: taicpu;
  1416. conv_done: boolean;
  1417. begin
  1418. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1419. internalerror(2002090901);
  1420. conv_done:=false;
  1421. if tosize<>fromsize then
  1422. begin
  1423. shifterop_reset(so);
  1424. conv_done:=true;
  1425. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1426. fromsize:=tosize;
  1427. if current_settings.cputype<cpu_armv6 then
  1428. case fromsize of
  1429. OS_8:
  1430. if GenerateThumbCode then
  1431. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1432. else
  1433. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1434. OS_S8:
  1435. begin
  1436. do_shift(SM_LSL,24,reg1);
  1437. if tosize=OS_16 then
  1438. begin
  1439. do_shift(SM_ASR,8,reg2);
  1440. do_shift(SM_LSR,16,reg2);
  1441. end
  1442. else
  1443. do_shift(SM_ASR,24,reg2);
  1444. end;
  1445. OS_16:
  1446. begin
  1447. do_shift(SM_LSL,16,reg1);
  1448. do_shift(SM_LSR,16,reg2);
  1449. end;
  1450. OS_S16:
  1451. begin
  1452. do_shift(SM_LSL,16,reg1);
  1453. do_shift(SM_ASR,16,reg2)
  1454. end;
  1455. else
  1456. conv_done:=false;
  1457. end
  1458. else
  1459. case fromsize of
  1460. OS_8:
  1461. if GenerateThumbCode then
  1462. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1463. else
  1464. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1465. OS_S8:
  1466. begin
  1467. if tosize=OS_16 then
  1468. begin
  1469. so.shiftmode:=SM_ROR;
  1470. so.shiftimm:=16;
  1471. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1472. do_shift(SM_LSR,16,reg2);
  1473. end
  1474. else
  1475. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1476. end;
  1477. OS_16:
  1478. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1479. OS_S16:
  1480. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1481. else
  1482. conv_done:=false;
  1483. end
  1484. end;
  1485. if not conv_done and (reg1<>reg2) then
  1486. begin
  1487. { same size, only a register mov required }
  1488. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1489. list.Concat(instr);
  1490. { Notify the register allocator that we have written a move instruction so
  1491. it can try to eliminate it. }
  1492. add_move_instruction(instr);
  1493. end;
  1494. end;
  1495. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1496. var
  1497. href,href2 : treference;
  1498. hloc : pcgparalocation;
  1499. begin
  1500. href:=ref;
  1501. hloc:=paraloc.location;
  1502. while assigned(hloc) do
  1503. begin
  1504. case hloc^.loc of
  1505. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1506. begin
  1507. paramanager.allocparaloc(list,paraloc.location);
  1508. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1509. end;
  1510. LOC_REGISTER :
  1511. case hloc^.size of
  1512. OS_32,
  1513. OS_F32:
  1514. begin
  1515. paramanager.allocparaloc(list,paraloc.location);
  1516. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1517. end;
  1518. OS_64,
  1519. OS_F64:
  1520. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1521. else
  1522. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1523. end;
  1524. LOC_REFERENCE :
  1525. begin
  1526. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1527. { concatcopy should choose the best way to copy the data }
  1528. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1529. end;
  1530. else
  1531. internalerror(200408241);
  1532. end;
  1533. inc(href.offset,tcgsize2size[hloc^.size]);
  1534. hloc:=hloc^.next;
  1535. end;
  1536. end;
  1537. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1538. begin
  1539. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1540. end;
  1541. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1542. var
  1543. oppostfix:toppostfix;
  1544. begin
  1545. case fromsize of
  1546. OS_32,
  1547. OS_F32:
  1548. oppostfix:=PF_S;
  1549. OS_64,
  1550. OS_F64:
  1551. oppostfix:=PF_D;
  1552. OS_F80:
  1553. oppostfix:=PF_E;
  1554. else
  1555. InternalError(200309021);
  1556. end;
  1557. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1558. if fromsize<>tosize then
  1559. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1560. end;
  1561. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1562. var
  1563. oppostfix:toppostfix;
  1564. begin
  1565. case tosize of
  1566. OS_F32:
  1567. oppostfix:=PF_S;
  1568. OS_F64:
  1569. oppostfix:=PF_D;
  1570. OS_F80:
  1571. oppostfix:=PF_E;
  1572. else
  1573. InternalError(200309022);
  1574. end;
  1575. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1576. end;
  1577. { comparison operations }
  1578. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1579. l : tasmlabel);
  1580. var
  1581. tmpreg : tregister;
  1582. b : byte;
  1583. begin
  1584. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1585. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1586. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1587. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1588. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1589. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1590. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1591. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1592. else
  1593. begin
  1594. tmpreg:=getintregister(list,size);
  1595. a_load_const_reg(list,size,a,tmpreg);
  1596. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1597. end;
  1598. a_jmp_cond(list,cmp_op,l);
  1599. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1600. end;
  1601. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1602. begin
  1603. if reverse then
  1604. begin
  1605. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1606. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1607. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1608. end
  1609. { it is decided during the compilation of the system unit if this code is used or not
  1610. so no additional check for rbit is needed }
  1611. else
  1612. begin
  1613. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1614. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1615. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1616. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1617. if GenerateThumb2Code then
  1618. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1619. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1620. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1621. end;
  1622. end;
  1623. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1624. begin
  1625. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1626. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1627. a_jmp_cond(list,cmp_op,l);
  1628. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1629. end;
  1630. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1631. var
  1632. ai : taicpu;
  1633. begin
  1634. { generate far jump, leave it to the optimizer to get rid of it }
  1635. if GenerateThumbCode then
  1636. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1637. else
  1638. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1639. ai.is_jmp:=true;
  1640. list.concat(ai);
  1641. end;
  1642. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1643. var
  1644. ai : taicpu;
  1645. begin
  1646. { generate far jump, leave it to the optimizer to get rid of it }
  1647. if GenerateThumbCode then
  1648. ai:=taicpu.op_sym(A_BL,l)
  1649. else
  1650. ai:=taicpu.op_sym(A_B,l);
  1651. ai.is_jmp:=true;
  1652. list.concat(ai);
  1653. end;
  1654. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1655. var
  1656. ai : taicpu;
  1657. inv_flags : TResFlags;
  1658. hlabel : TAsmLabel;
  1659. begin
  1660. if GenerateThumbCode then
  1661. begin
  1662. inv_flags:=f;
  1663. inverse_flags(inv_flags);
  1664. { the optimizer has to fix this if jump range is sufficient short }
  1665. current_asmdata.getjumplabel(hlabel);
  1666. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1667. ai.is_jmp:=true;
  1668. list.concat(ai);
  1669. a_jmp_always(list,l);
  1670. a_label(list,hlabel);
  1671. end
  1672. else
  1673. begin
  1674. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1675. ai.is_jmp:=true;
  1676. list.concat(ai);
  1677. end;
  1678. end;
  1679. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1680. begin
  1681. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1682. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1683. end;
  1684. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1685. begin
  1686. if target_info.system = system_arm_linux then
  1687. begin
  1688. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1689. a_call_name(list,'__gnu_mcount_nc',false);
  1690. end
  1691. else
  1692. internalerror(2014091201);
  1693. end;
  1694. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1695. var
  1696. ref : treference;
  1697. shift : byte;
  1698. firstfloatreg,lastfloatreg,
  1699. r : byte;
  1700. mmregs,
  1701. regs, saveregs : tcpuregisterset;
  1702. registerarea,
  1703. r7offset,
  1704. stackmisalignment : pint;
  1705. imm1, imm2: DWord;
  1706. stack_parameters : Boolean;
  1707. begin
  1708. LocalSize:=align(LocalSize,4);
  1709. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1710. { call instruction does not put anything on the stack }
  1711. registerarea:=0;
  1712. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1713. lastfloatreg:=RS_NO;
  1714. if not(nostackframe) then
  1715. begin
  1716. firstfloatreg:=RS_NO;
  1717. mmregs:=[];
  1718. case current_settings.fputype of
  1719. fpu_fpa,
  1720. fpu_fpa10,
  1721. fpu_fpa11:
  1722. begin
  1723. { save floating point registers? }
  1724. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1725. for r:=RS_F0 to RS_F7 do
  1726. if r in regs then
  1727. begin
  1728. if firstfloatreg=RS_NO then
  1729. firstfloatreg:=r;
  1730. lastfloatreg:=r;
  1731. inc(registerarea,12);
  1732. end;
  1733. end;
  1734. fpu_vfpv2,
  1735. fpu_vfpv3,
  1736. fpu_vfpv4,
  1737. fpu_vfpv3_d16:
  1738. begin;
  1739. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1740. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1741. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1742. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1743. end;
  1744. end;
  1745. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1746. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1747. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1748. { save int registers }
  1749. reference_reset(ref,4,[]);
  1750. ref.index:=NR_STACK_POINTER_REG;
  1751. ref.addressmode:=AM_PREINDEXED;
  1752. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1753. if not(target_info.system in systems_darwin) then
  1754. begin
  1755. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1756. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1757. begin
  1758. a_reg_alloc(list,NR_R12);
  1759. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1760. end;
  1761. { the (old) ARM APCS requires saving both the stack pointer (to
  1762. crawl the stack) and the PC (to identify the function this
  1763. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1764. and R15 -- still needs updating for EABI and Darwin, they don't
  1765. need that }
  1766. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1767. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1768. else
  1769. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1770. include(regs,RS_R14);
  1771. if regs<>[] then
  1772. begin
  1773. for r:=RS_R0 to RS_R15 do
  1774. if r in regs then
  1775. inc(registerarea,4);
  1776. { if the stack is not 8 byte aligned, try to add an extra register,
  1777. so we can avoid the extra sub/add ...,#4 later (KB) }
  1778. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1779. for r:=RS_R3 downto RS_R0 do
  1780. if not(r in regs) then
  1781. begin
  1782. regs:=regs+[r];
  1783. inc(registerarea,4);
  1784. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1785. break;
  1786. end;
  1787. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1788. end;
  1789. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1790. begin
  1791. { the framepointer now points to the saved R15, so the saved
  1792. framepointer is at R11-12 (for get_caller_frame) }
  1793. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1794. a_reg_dealloc(list,NR_R12);
  1795. end;
  1796. end
  1797. else
  1798. begin
  1799. { always save r14 if we use r7 as the framepointer, because
  1800. the parameter offsets are hardcoded in advance and always
  1801. assume that r14 sits on the stack right behind the saved r7
  1802. }
  1803. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1804. include(regs,RS_FRAME_POINTER_REG);
  1805. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1806. include(regs,RS_R14);
  1807. if regs<>[] then
  1808. begin
  1809. { on Darwin, you first have to save [r4-r7,lr], and then
  1810. [r8,r10,r11] and make r7 point to the previously saved
  1811. r7 so that you can perform a stack crawl based on it
  1812. ([r7] is previous stack frame, [r7+4] is return address
  1813. }
  1814. include(regs,RS_FRAME_POINTER_REG);
  1815. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1816. r7offset:=0;
  1817. for r:=RS_R0 to RS_R15 do
  1818. if r in saveregs then
  1819. begin
  1820. inc(registerarea,4);
  1821. if r<RS_FRAME_POINTER_REG then
  1822. inc(r7offset,4);
  1823. end;
  1824. { save the registers }
  1825. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1826. { make r7 point to the saved r7 (regardless of whether this
  1827. frame uses the framepointer, for backtrace purposes) }
  1828. if r7offset<>0 then
  1829. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1830. else
  1831. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1832. { now save the rest (if any) }
  1833. saveregs:=regs-saveregs;
  1834. if saveregs<>[] then
  1835. begin
  1836. for r:=RS_R8 to RS_R11 do
  1837. if r in saveregs then
  1838. inc(registerarea,4);
  1839. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1840. end;
  1841. end;
  1842. end;
  1843. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1844. if (LocalSize<>0) or
  1845. ((stackmisalignment<>0) and
  1846. ((pi_do_call in current_procinfo.flags) or
  1847. (po_assembler in current_procinfo.procdef.procoptions))) then
  1848. begin
  1849. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1850. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1851. begin
  1852. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1853. internalerror(2014030901)
  1854. else
  1855. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1856. end;
  1857. if is_shifter_const(localsize,shift) then
  1858. begin
  1859. a_reg_dealloc(list,NR_R12);
  1860. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1861. end
  1862. else if split_into_shifter_const(localsize, imm1, imm2) then
  1863. begin
  1864. a_reg_dealloc(list,NR_R12);
  1865. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1866. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1867. end
  1868. else
  1869. begin
  1870. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1871. a_reg_alloc(list,NR_R12);
  1872. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1873. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1874. a_reg_dealloc(list,NR_R12);
  1875. end;
  1876. end;
  1877. if (mmregs<>[]) or
  1878. (firstfloatreg<>RS_NO) then
  1879. begin
  1880. reference_reset(ref,4,[]);
  1881. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1882. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1883. begin
  1884. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1885. begin
  1886. a_reg_alloc(list,NR_R12);
  1887. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1888. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1889. a_reg_dealloc(list,NR_R12);
  1890. end
  1891. else
  1892. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1893. ref.base:=NR_R12;
  1894. end
  1895. else
  1896. begin
  1897. ref.base:=current_procinfo.framepointer;
  1898. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1899. end;
  1900. case current_settings.fputype of
  1901. fpu_fpa,
  1902. fpu_fpa10,
  1903. fpu_fpa11:
  1904. begin
  1905. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1906. lastfloatreg-firstfloatreg+1,ref));
  1907. end;
  1908. fpu_vfpv2,
  1909. fpu_vfpv3,
  1910. fpu_vfpv4,
  1911. fpu_vfpv3_d16:
  1912. begin
  1913. ref.index:=ref.base;
  1914. ref.base:=NR_NO;
  1915. { FSTMX is deprecated on ARMv6 and later }
  1916. {if (current_settings.cputype<cpu_armv6) then
  1917. postfix:=PF_IAX
  1918. else
  1919. postfix:=PF_IAD;}
  1920. if mmregs<>[] then
  1921. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1922. end;
  1923. end;
  1924. end;
  1925. end;
  1926. end;
  1927. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1928. var
  1929. ref : treference;
  1930. LocalSize : longint;
  1931. firstfloatreg,lastfloatreg,
  1932. r,
  1933. shift : byte;
  1934. mmregs,
  1935. saveregs,
  1936. regs : tcpuregisterset;
  1937. registerarea,
  1938. stackmisalignment: pint;
  1939. paddingreg: TSuperRegister;
  1940. imm1, imm2: DWord;
  1941. begin
  1942. if not(nostackframe) then
  1943. begin
  1944. registerarea:=0;
  1945. firstfloatreg:=RS_NO;
  1946. lastfloatreg:=RS_NO;
  1947. mmregs:=[];
  1948. saveregs:=[];
  1949. case current_settings.fputype of
  1950. fpu_fpa,
  1951. fpu_fpa10,
  1952. fpu_fpa11:
  1953. begin
  1954. { restore floating point registers? }
  1955. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1956. for r:=RS_F0 to RS_F7 do
  1957. if r in regs then
  1958. begin
  1959. if firstfloatreg=RS_NO then
  1960. firstfloatreg:=r;
  1961. lastfloatreg:=r;
  1962. { floating point register space is already included in
  1963. localsize below by calc_stackframe_size
  1964. inc(registerarea,12);
  1965. }
  1966. end;
  1967. end;
  1968. fpu_vfpv2,
  1969. fpu_vfpv3,
  1970. fpu_vfpv4,
  1971. fpu_vfpv3_d16:
  1972. begin;
  1973. { restore vfp registers? }
  1974. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1975. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1976. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1977. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1978. end;
  1979. end;
  1980. if (firstfloatreg<>RS_NO) or
  1981. (mmregs<>[]) then
  1982. begin
  1983. reference_reset(ref,4,[]);
  1984. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1985. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1986. begin
  1987. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1988. begin
  1989. a_reg_alloc(list,NR_R12);
  1990. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1991. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1992. a_reg_dealloc(list,NR_R12);
  1993. end
  1994. else
  1995. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1996. ref.base:=NR_R12;
  1997. end
  1998. else
  1999. begin
  2000. ref.base:=current_procinfo.framepointer;
  2001. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2002. end;
  2003. case current_settings.fputype of
  2004. fpu_fpa,
  2005. fpu_fpa10,
  2006. fpu_fpa11:
  2007. begin
  2008. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2009. lastfloatreg-firstfloatreg+1,ref));
  2010. end;
  2011. fpu_vfpv2,
  2012. fpu_vfpv3,
  2013. fpu_vfpv4,
  2014. fpu_vfpv3_d16:
  2015. begin
  2016. ref.index:=ref.base;
  2017. ref.base:=NR_NO;
  2018. { FLDMX is deprecated on ARMv6 and later }
  2019. {if (current_settings.cputype<cpu_armv6) then
  2020. mmpostfix:=PF_IAX
  2021. else
  2022. mmpostfix:=PF_IAD;}
  2023. if mmregs<>[] then
  2024. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2025. end;
  2026. end;
  2027. end;
  2028. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2029. if (pi_do_call in current_procinfo.flags) or
  2030. (regs<>[]) or
  2031. ((target_info.system in systems_darwin) and
  2032. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2033. begin
  2034. exclude(regs,RS_R14);
  2035. include(regs,RS_R15);
  2036. if (target_info.system in systems_darwin) then
  2037. include(regs,RS_FRAME_POINTER_REG);
  2038. end;
  2039. if not(target_info.system in systems_darwin) then
  2040. begin
  2041. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2042. The saved PC came after that but is discarded, since we restore
  2043. the stack pointer }
  2044. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2045. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2046. end
  2047. else
  2048. begin
  2049. { restore R8-R11 already if necessary (they've been stored
  2050. before the others) }
  2051. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2052. if saveregs<>[] then
  2053. begin
  2054. reference_reset(ref,4,[]);
  2055. ref.index:=NR_STACK_POINTER_REG;
  2056. ref.addressmode:=AM_PREINDEXED;
  2057. for r:=RS_R8 to RS_R11 do
  2058. if r in saveregs then
  2059. inc(registerarea,4);
  2060. regs:=regs-saveregs;
  2061. end;
  2062. end;
  2063. for r:=RS_R0 to RS_R15 do
  2064. if r in regs then
  2065. inc(registerarea,4);
  2066. { reapply the stack padding reg, in case there was one, see the complimentary
  2067. comment in g_proc_entry() (KB) }
  2068. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2069. if paddingreg < RS_R4 then
  2070. if paddingreg in regs then
  2071. internalerror(201306190)
  2072. else
  2073. begin
  2074. regs:=regs+[paddingreg];
  2075. inc(registerarea,4);
  2076. end;
  2077. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2078. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2079. (target_info.system in systems_darwin) then
  2080. begin
  2081. LocalSize:=current_procinfo.calc_stackframe_size;
  2082. if (LocalSize<>0) or
  2083. ((stackmisalignment<>0) and
  2084. ((pi_do_call in current_procinfo.flags) or
  2085. (po_assembler in current_procinfo.procdef.procoptions))) then
  2086. begin
  2087. if pi_estimatestacksize in current_procinfo.flags then
  2088. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2089. else
  2090. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2091. if is_shifter_const(LocalSize,shift) then
  2092. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2093. else if split_into_shifter_const(localsize, imm1, imm2) then
  2094. begin
  2095. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2096. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2097. end
  2098. else
  2099. begin
  2100. a_reg_alloc(list,NR_R12);
  2101. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2102. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2103. a_reg_dealloc(list,NR_R12);
  2104. end;
  2105. end;
  2106. if (target_info.system in systems_darwin) and
  2107. (saveregs<>[]) then
  2108. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2109. if regs=[] then
  2110. begin
  2111. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2112. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2113. else
  2114. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2115. end
  2116. else
  2117. begin
  2118. reference_reset(ref,4,[]);
  2119. ref.index:=NR_STACK_POINTER_REG;
  2120. ref.addressmode:=AM_PREINDEXED;
  2121. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2122. end;
  2123. end
  2124. else
  2125. begin
  2126. { restore int registers and return }
  2127. reference_reset(ref,4,[]);
  2128. ref.index:=NR_FRAME_POINTER_REG;
  2129. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2130. end;
  2131. end
  2132. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2133. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2134. else
  2135. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2136. end;
  2137. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2138. var
  2139. ref : treference;
  2140. l : TAsmLabel;
  2141. regs : tcpuregisterset;
  2142. r: byte;
  2143. begin
  2144. if (cs_create_pic in current_settings.moduleswitches) and
  2145. (pi_needs_got in current_procinfo.flags) and
  2146. (tf_pic_uses_got in target_info.flags) then
  2147. begin
  2148. { Procedure parametrs are not initialized at this stage.
  2149. Before GOT initialization code, allocate registers used for procedure parameters
  2150. to prevent usage of these registers for temp operations in later stages of code
  2151. generation. }
  2152. regs:=rg[R_INTREGISTER].used_in_proc;
  2153. for r:=RS_R0 to RS_R3 do
  2154. if r in regs then
  2155. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2156. { Allocate scratch register R12 and use it for GOT calculations directly.
  2157. Otherwise the init code can be distorted in later stages of code generation. }
  2158. a_reg_alloc(list,NR_R12);
  2159. reference_reset(ref,4,[]);
  2160. current_asmdata.getglobaldatalabel(l);
  2161. cg.a_label(current_procinfo.aktlocaldata,l);
  2162. ref.symbol:=l;
  2163. ref.base:=NR_PC;
  2164. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2165. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2166. current_asmdata.getaddrlabel(l);
  2167. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2168. cg.a_label(list,l);
  2169. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2170. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2171. { Deallocate registers }
  2172. a_reg_dealloc(list,NR_R12);
  2173. for r:=RS_R3 downto RS_R0 do
  2174. if r in regs then
  2175. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2176. end;
  2177. end;
  2178. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2179. var
  2180. b : byte;
  2181. tmpref : treference;
  2182. instr : taicpu;
  2183. begin
  2184. if ref.addressmode<>AM_OFFSET then
  2185. internalerror(200309071);
  2186. tmpref:=ref;
  2187. { Be sure to have a base register }
  2188. if (tmpref.base=NR_NO) then
  2189. begin
  2190. if tmpref.shiftmode<>SM_None then
  2191. internalerror(2014020702);
  2192. if tmpref.signindex<0 then
  2193. internalerror(200312023);
  2194. tmpref.base:=tmpref.index;
  2195. tmpref.index:=NR_NO;
  2196. end;
  2197. if assigned(tmpref.symbol) or
  2198. not((is_shifter_const(tmpref.offset,b)) or
  2199. (is_shifter_const(-tmpref.offset,b))
  2200. ) then
  2201. fixref(list,tmpref);
  2202. { expect a base here if there is an index }
  2203. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2204. internalerror(200312022);
  2205. if tmpref.index<>NR_NO then
  2206. begin
  2207. if tmpref.shiftmode<>SM_None then
  2208. internalerror(200312021);
  2209. if tmpref.signindex<0 then
  2210. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2211. else
  2212. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2213. if tmpref.offset<>0 then
  2214. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2215. end
  2216. else
  2217. begin
  2218. if tmpref.base=NR_NO then
  2219. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2220. else
  2221. if tmpref.offset<>0 then
  2222. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2223. else
  2224. begin
  2225. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2226. list.concat(instr);
  2227. add_move_instruction(instr);
  2228. end;
  2229. end;
  2230. end;
  2231. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2232. var
  2233. tmpreg, tmpreg2 : tregister;
  2234. tmpref : treference;
  2235. l, piclabel : tasmlabel;
  2236. indirection_done : boolean;
  2237. begin
  2238. { absolute symbols can't be handled directly, we've to store the symbol reference
  2239. in the text segment and access it pc relative
  2240. For now, we assume that references where base or index equals to PC are already
  2241. relative, all other references are assumed to be absolute and thus they need
  2242. to be handled extra.
  2243. A proper solution would be to change refoptions to a set and store the information
  2244. if the symbol is absolute or relative there.
  2245. }
  2246. { create consts entry }
  2247. reference_reset(tmpref,4,[]);
  2248. current_asmdata.getjumplabel(l);
  2249. cg.a_label(current_procinfo.aktlocaldata,l);
  2250. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2251. piclabel:=nil;
  2252. tmpreg:=NR_NO;
  2253. indirection_done:=false;
  2254. if assigned(ref.symbol) then
  2255. begin
  2256. if (target_info.system=system_arm_darwin) and
  2257. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2258. begin
  2259. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2260. if ref.offset<>0 then
  2261. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2262. indirection_done:=true;
  2263. end
  2264. else if ref.refaddr=addr_gottpoff then
  2265. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2266. else if (cs_create_pic in current_settings.moduleswitches) then
  2267. if (tf_pic_uses_got in target_info.flags) then
  2268. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2269. else
  2270. begin
  2271. { ideally, we would want to generate
  2272. ldr r1, LPICConstPool
  2273. LPICLocal:
  2274. ldr/str r2,[pc,r1]
  2275. ...
  2276. LPICConstPool:
  2277. .long _globsym-(LPICLocal+8)
  2278. However, we cannot be sure that the ldr/str will follow
  2279. right after the call to fixref, so we have to load the
  2280. complete address already in a register.
  2281. }
  2282. current_asmdata.getaddrlabel(piclabel);
  2283. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2284. end
  2285. else
  2286. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2287. end
  2288. else
  2289. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2290. { load consts entry }
  2291. if not indirection_done then
  2292. begin
  2293. tmpreg:=getintregister(list,OS_INT);
  2294. tmpref.symbol:=l;
  2295. tmpref.base:=NR_PC;
  2296. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2297. if (cs_create_pic in current_settings.moduleswitches) and
  2298. (tf_pic_uses_got in target_info.flags) and
  2299. assigned(ref.symbol) then
  2300. begin
  2301. reference_reset(tmpref,4,[]);
  2302. tmpref.base:=current_procinfo.got;
  2303. tmpref.index:=tmpreg;
  2304. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2305. if ref.offset<>0 then
  2306. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2307. end;
  2308. end;
  2309. if assigned(piclabel) then
  2310. begin
  2311. cg.a_label(list,piclabel);
  2312. tmpreg2:=getaddressregister(list);
  2313. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2314. tmpreg:=tmpreg2
  2315. end;
  2316. { This routine can be called with PC as base/index in case the offset
  2317. was too large to encode in a load/store. In that case, the entire
  2318. absolute expression has been re-encoded in a new constpool entry, and
  2319. we have to remove the use of PC from the original reference (the code
  2320. above made everything relative to the value loaded from the new
  2321. constpool entry) }
  2322. if is_pc(ref.base) then
  2323. ref.base:=NR_NO;
  2324. if is_pc(ref.index) then
  2325. ref.index:=NR_NO;
  2326. if (ref.base<>NR_NO) then
  2327. begin
  2328. if ref.index<>NR_NO then
  2329. begin
  2330. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2331. ref.base:=tmpreg;
  2332. end
  2333. else
  2334. if ref.base<>NR_PC then
  2335. begin
  2336. ref.index:=tmpreg;
  2337. ref.shiftimm:=0;
  2338. ref.signindex:=1;
  2339. ref.shiftmode:=SM_None;
  2340. end
  2341. else
  2342. ref.base:=tmpreg;
  2343. end
  2344. else
  2345. ref.base:=tmpreg;
  2346. ref.offset:=0;
  2347. ref.symbol:=nil;
  2348. end;
  2349. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2350. var
  2351. paraloc1,paraloc2,paraloc3 : TCGPara;
  2352. pd : tprocdef;
  2353. begin
  2354. pd:=search_system_proc('MOVE');
  2355. paraloc1.init;
  2356. paraloc2.init;
  2357. paraloc3.init;
  2358. paramanager.getintparaloc(list,pd,1,paraloc1);
  2359. paramanager.getintparaloc(list,pd,2,paraloc2);
  2360. paramanager.getintparaloc(list,pd,3,paraloc3);
  2361. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2362. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2363. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2364. paramanager.freecgpara(list,paraloc3);
  2365. paramanager.freecgpara(list,paraloc2);
  2366. paramanager.freecgpara(list,paraloc1);
  2367. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2368. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2369. a_call_name(list,'FPC_MOVE',false);
  2370. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2371. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2372. paraloc3.done;
  2373. paraloc2.done;
  2374. paraloc1.done;
  2375. end;
  2376. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2377. const
  2378. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2379. maxtmpreg_thumb = 5;
  2380. var
  2381. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2382. srcreg,destreg,countreg,r,tmpreg:tregister;
  2383. helpsize:aint;
  2384. copysize:byte;
  2385. cgsize:Tcgsize;
  2386. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2387. maxtmpreg,
  2388. tmpregi,tmpregi2:byte;
  2389. { will never be called with count<=4 }
  2390. procedure genloop(count : aword;size : byte);
  2391. const
  2392. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2393. var
  2394. l : tasmlabel;
  2395. begin
  2396. current_asmdata.getjumplabel(l);
  2397. if count<size then size:=1;
  2398. a_load_const_reg(list,OS_INT,count div size,countreg);
  2399. cg.a_label(list,l);
  2400. srcref.addressmode:=AM_POSTINDEXED;
  2401. dstref.addressmode:=AM_POSTINDEXED;
  2402. srcref.offset:=size;
  2403. dstref.offset:=size;
  2404. r:=getintregister(list,size2opsize[size]);
  2405. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2406. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2407. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2408. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2409. a_jmp_flags(list,F_NE,l);
  2410. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2411. srcref.offset:=1;
  2412. dstref.offset:=1;
  2413. case count mod size of
  2414. 1:
  2415. begin
  2416. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2417. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2418. end;
  2419. 2:
  2420. if aligned then
  2421. begin
  2422. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2423. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2424. end
  2425. else
  2426. begin
  2427. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2428. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2429. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2430. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2431. end;
  2432. 3:
  2433. if aligned then
  2434. begin
  2435. srcref.offset:=2;
  2436. dstref.offset:=2;
  2437. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2438. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2439. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2440. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2441. end
  2442. else
  2443. begin
  2444. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2445. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2446. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2447. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2448. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2449. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2450. end;
  2451. end;
  2452. { keep the registers alive }
  2453. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2454. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2455. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2456. end;
  2457. { save estimation, if a creating a separate ref is needed or
  2458. if we can keep the original reference while copying }
  2459. function SimpleRef(const ref : treference) : boolean;
  2460. begin
  2461. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr=addr_full)) or
  2462. ((ref.symbol=nil) and
  2463. (ref.addressmode=AM_OFFSET) and
  2464. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2465. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2466. { ldrh has a limited offset range }
  2467. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2468. )
  2469. );
  2470. end;
  2471. { will never be called with count<=4 }
  2472. procedure genloop_thumb(count : aword;size : byte);
  2473. procedure refincofs(const ref : treference;const value : longint = 1);
  2474. begin
  2475. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2476. end;
  2477. const
  2478. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2479. var
  2480. l : tasmlabel;
  2481. begin
  2482. current_asmdata.getjumplabel(l);
  2483. if count<size then size:=1;
  2484. a_load_const_reg(list,OS_INT,count div size,countreg);
  2485. cg.a_label(list,l);
  2486. r:=getintregister(list,size2opsize[size]);
  2487. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2488. refincofs(srcref);
  2489. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2490. refincofs(dstref);
  2491. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2492. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2493. a_jmp_flags(list,F_NE,l);
  2494. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2495. case count mod size of
  2496. 1:
  2497. begin
  2498. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2499. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2500. end;
  2501. 2:
  2502. if aligned then
  2503. begin
  2504. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2505. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2506. end
  2507. else
  2508. begin
  2509. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2510. refincofs(srcref);
  2511. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2512. refincofs(dstref);
  2513. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2514. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2515. end;
  2516. 3:
  2517. if aligned then
  2518. begin
  2519. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2520. refincofs(srcref,2);
  2521. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2522. refincofs(dstref,2);
  2523. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2524. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2525. end
  2526. else
  2527. begin
  2528. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2529. refincofs(srcref);
  2530. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2531. refincofs(dstref);
  2532. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2533. refincofs(srcref);
  2534. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2535. refincofs(dstref);
  2536. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2537. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2538. end;
  2539. end;
  2540. { keep the registers alive }
  2541. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2542. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2543. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2544. end;
  2545. begin
  2546. if len=0 then
  2547. exit;
  2548. if GenerateThumbCode then
  2549. maxtmpreg:=maxtmpreg_thumb
  2550. else
  2551. maxtmpreg:=maxtmpreg_arm;
  2552. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2553. dstref:=dest;
  2554. srcref:=source;
  2555. if cs_opt_size in current_settings.optimizerswitches then
  2556. helpsize:=8;
  2557. if aligned and (len=4) then
  2558. begin
  2559. tmpreg:=getintregister(list,OS_32);
  2560. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2561. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2562. end
  2563. else if aligned and (len=2) then
  2564. begin
  2565. tmpreg:=getintregister(list,OS_16);
  2566. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2567. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2568. end
  2569. else if (len<=helpsize) and aligned then
  2570. begin
  2571. tmpregi:=0;
  2572. { loading address in a separate register needed? }
  2573. if SimpleRef(source) then
  2574. begin
  2575. { ... then we don't need a loadaddr }
  2576. srcref:=source;
  2577. end
  2578. else
  2579. begin
  2580. srcreg:=getintregister(list,OS_ADDR);
  2581. a_loadaddr_ref_reg(list,source,srcreg);
  2582. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2583. end;
  2584. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2585. begin
  2586. inc(tmpregi);
  2587. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2588. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2589. inc(srcref.offset,4);
  2590. dec(len,4);
  2591. end;
  2592. { loading address in a separate register needed? }
  2593. if SimpleRef(dest) then
  2594. dstref:=dest
  2595. else
  2596. begin
  2597. destreg:=getintregister(list,OS_ADDR);
  2598. a_loadaddr_ref_reg(list,dest,destreg);
  2599. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2600. end;
  2601. tmpregi2:=1;
  2602. while (tmpregi2<=tmpregi) do
  2603. begin
  2604. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2605. inc(dstref.offset,4);
  2606. inc(tmpregi2);
  2607. end;
  2608. copysize:=4;
  2609. cgsize:=OS_32;
  2610. while len<>0 do
  2611. begin
  2612. if len<2 then
  2613. begin
  2614. copysize:=1;
  2615. cgsize:=OS_8;
  2616. end
  2617. else if len<4 then
  2618. begin
  2619. copysize:=2;
  2620. cgsize:=OS_16;
  2621. end;
  2622. dec(len,copysize);
  2623. r:=getintregister(list,cgsize);
  2624. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2625. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2626. inc(srcref.offset,copysize);
  2627. inc(dstref.offset,copysize);
  2628. end;{end of while}
  2629. end
  2630. else
  2631. begin
  2632. cgsize:=OS_32;
  2633. if (len<=4) then{len<=4 and not aligned}
  2634. begin
  2635. r:=getintregister(list,cgsize);
  2636. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2637. if Len=1 then
  2638. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2639. else
  2640. begin
  2641. tmpreg:=getintregister(list,cgsize);
  2642. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2643. inc(usedtmpref.offset,1);
  2644. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2645. inc(usedtmpref2.offset,1);
  2646. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2647. if len>2 then
  2648. begin
  2649. inc(usedtmpref.offset,1);
  2650. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2651. inc(usedtmpref2.offset,1);
  2652. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2653. if len>3 then
  2654. begin
  2655. inc(usedtmpref.offset,1);
  2656. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2657. inc(usedtmpref2.offset,1);
  2658. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2659. end;
  2660. end;
  2661. end;
  2662. end{end of if len<=4}
  2663. else
  2664. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2665. destreg:=getintregister(list,OS_ADDR);
  2666. a_loadaddr_ref_reg(list,dest,destreg);
  2667. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2668. srcreg:=getintregister(list,OS_ADDR);
  2669. a_loadaddr_ref_reg(list,source,srcreg);
  2670. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2671. countreg:=getintregister(list,OS_32);
  2672. // if cs_opt_size in current_settings.optimizerswitches then
  2673. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2674. {if aligned then
  2675. genloop(len,4)
  2676. else}
  2677. if GenerateThumbCode then
  2678. genloop_thumb(len,1)
  2679. else
  2680. genloop(len,1);
  2681. end;
  2682. end;
  2683. end;
  2684. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2685. begin
  2686. g_concatcopy_internal(list,source,dest,len,false);
  2687. end;
  2688. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2689. begin
  2690. if (source.alignment in [1,3]) or
  2691. (dest.alignment in [1,3]) then
  2692. g_concatcopy_internal(list,source,dest,len,false)
  2693. else
  2694. g_concatcopy_internal(list,source,dest,len,true);
  2695. end;
  2696. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2697. var
  2698. ovloc : tlocation;
  2699. begin
  2700. ovloc.loc:=LOC_VOID;
  2701. g_overflowCheck_loc(list,l,def,ovloc);
  2702. end;
  2703. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2704. var
  2705. hl : tasmlabel;
  2706. ai:TAiCpu;
  2707. hflags : tresflags;
  2708. begin
  2709. if not(cs_check_overflow in current_settings.localswitches) then
  2710. exit;
  2711. current_asmdata.getjumplabel(hl);
  2712. case ovloc.loc of
  2713. LOC_VOID:
  2714. begin
  2715. ai:=taicpu.op_sym(A_B,hl);
  2716. ai.is_jmp:=true;
  2717. if not((def.typ=pointerdef) or
  2718. ((def.typ=orddef) and
  2719. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2720. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2721. ai.SetCondition(C_VC)
  2722. else
  2723. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2724. ai.SetCondition(C_CS)
  2725. else
  2726. ai.SetCondition(C_CC);
  2727. list.concat(ai);
  2728. end;
  2729. LOC_FLAGS:
  2730. begin
  2731. hflags:=ovloc.resflags;
  2732. inverse_flags(hflags);
  2733. cg.a_jmp_flags(list,hflags,hl);
  2734. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2735. end;
  2736. else
  2737. internalerror(200409281);
  2738. end;
  2739. a_call_name(list,'FPC_OVERFLOW',false);
  2740. a_label(list,hl);
  2741. end;
  2742. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2743. begin
  2744. { this work is done in g_proc_entry }
  2745. end;
  2746. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2747. begin
  2748. { this work is done in g_proc_exit }
  2749. end;
  2750. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2751. var
  2752. ai : taicpu;
  2753. hlabel : TAsmLabel;
  2754. begin
  2755. if GenerateThumbCode then
  2756. begin
  2757. { the optimizer has to fix this if jump range is sufficient short }
  2758. current_asmdata.getjumplabel(hlabel);
  2759. ai:=Taicpu.Op_sym(A_B,hlabel);
  2760. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2761. ai.is_jmp:=true;
  2762. list.concat(ai);
  2763. a_jmp_always(list,l);
  2764. a_label(list,hlabel);
  2765. end
  2766. else
  2767. begin
  2768. ai:=Taicpu.Op_sym(A_B,l);
  2769. ai.SetCondition(OpCmp2AsmCond[cond]);
  2770. ai.is_jmp:=true;
  2771. list.concat(ai);
  2772. end;
  2773. end;
  2774. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2775. const
  2776. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2777. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2778. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2779. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2780. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2781. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2782. begin
  2783. result:=convertop[fromsize,tosize];
  2784. if result=A_NONE then
  2785. internalerror(200312205);
  2786. end;
  2787. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2788. const
  2789. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2790. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2791. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2792. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2793. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2794. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2795. begin
  2796. result:=convertop[fromsize,tosize];
  2797. end;
  2798. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2799. var
  2800. instr: taicpu;
  2801. begin
  2802. if (shuffle=nil) or shufflescalar(shuffle) then
  2803. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2804. else
  2805. internalerror(2009112407);
  2806. list.concat(instr);
  2807. case instr.opcode of
  2808. A_VMOV:
  2809. add_move_instruction(instr);
  2810. end;
  2811. end;
  2812. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2813. var
  2814. intreg,
  2815. tmpmmreg : tregister;
  2816. reg64 : tregister64;
  2817. begin
  2818. if assigned(shuffle) and
  2819. not(shufflescalar(shuffle)) then
  2820. internalerror(2009112413);
  2821. case fromsize of
  2822. OS_32,OS_S32:
  2823. begin
  2824. fromsize:=OS_F32;
  2825. { since we are loading an integer, no conversion may be required }
  2826. if (fromsize<>tosize) then
  2827. internalerror(2009112801);
  2828. end;
  2829. OS_64,OS_S64:
  2830. begin
  2831. fromsize:=OS_F64;
  2832. { since we are loading an integer, no conversion may be required }
  2833. if (fromsize<>tosize) then
  2834. internalerror(2009112901);
  2835. end;
  2836. end;
  2837. if (fromsize<>tosize) then
  2838. tmpmmreg:=getmmregister(list,fromsize)
  2839. else
  2840. tmpmmreg:=reg;
  2841. if (ref.alignment in [1,2]) then
  2842. begin
  2843. case fromsize of
  2844. OS_F32:
  2845. begin
  2846. intreg:=getintregister(list,OS_32);
  2847. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2848. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2849. end;
  2850. OS_F64:
  2851. begin
  2852. reg64.reglo:=getintregister(list,OS_32);
  2853. reg64.reghi:=getintregister(list,OS_32);
  2854. cg64.a_load64_ref_reg(list,ref,reg64);
  2855. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2856. end;
  2857. else
  2858. internalerror(2009112412);
  2859. end;
  2860. end
  2861. else
  2862. begin
  2863. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2864. end;
  2865. if (tmpmmreg<>reg) then
  2866. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2867. end;
  2868. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2869. var
  2870. intreg,
  2871. tmpmmreg : tregister;
  2872. reg64 : tregister64;
  2873. begin
  2874. if assigned(shuffle) and
  2875. not(shufflescalar(shuffle)) then
  2876. internalerror(2009112416);
  2877. case tosize of
  2878. OS_32,OS_S32:
  2879. begin
  2880. tosize:=OS_F32;
  2881. { since we are loading an integer, no conversion may be required }
  2882. if (fromsize<>tosize) then
  2883. internalerror(2009112801);
  2884. end;
  2885. OS_64,OS_S64:
  2886. begin
  2887. tosize:=OS_F64;
  2888. { since we are loading an integer, no conversion may be required }
  2889. if (fromsize<>tosize) then
  2890. internalerror(2009112901);
  2891. end;
  2892. end;
  2893. if (fromsize<>tosize) then
  2894. begin
  2895. tmpmmreg:=getmmregister(list,tosize);
  2896. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2897. end
  2898. else
  2899. tmpmmreg:=reg;
  2900. if (ref.alignment in [1,2]) then
  2901. begin
  2902. case tosize of
  2903. OS_F32:
  2904. begin
  2905. intreg:=getintregister(list,OS_32);
  2906. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2907. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2908. end;
  2909. OS_F64:
  2910. begin
  2911. reg64.reglo:=getintregister(list,OS_32);
  2912. reg64.reghi:=getintregister(list,OS_32);
  2913. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2914. cg64.a_load64_reg_ref(list,reg64,ref);
  2915. end;
  2916. else
  2917. internalerror(2009112417);
  2918. end;
  2919. end
  2920. else
  2921. begin
  2922. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2923. end;
  2924. end;
  2925. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2926. begin
  2927. { this code can only be used to transfer raw data, not to perform
  2928. conversions }
  2929. if (tosize<>OS_F32) then
  2930. internalerror(2009112419);
  2931. if not(fromsize in [OS_32,OS_S32]) then
  2932. internalerror(2009112420);
  2933. if assigned(shuffle) and
  2934. not shufflescalar(shuffle) then
  2935. internalerror(2009112516);
  2936. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  2937. end;
  2938. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  2939. begin
  2940. { this code can only be used to transfer raw data, not to perform
  2941. conversions }
  2942. if (fromsize<>OS_F32) then
  2943. internalerror(2009112430);
  2944. if not(tosize in [OS_32,OS_S32]) then
  2945. internalerror(2009112420);
  2946. if assigned(shuffle) and
  2947. not shufflescalar(shuffle) then
  2948. internalerror(2009112514);
  2949. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  2950. end;
  2951. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  2952. var
  2953. tmpreg: tregister;
  2954. begin
  2955. { the vfp doesn't support xor nor any other logical operation, but
  2956. this routine is used to initialise global mm regvars. We can
  2957. easily initialise an mm reg with 0 though. }
  2958. case op of
  2959. OP_XOR:
  2960. begin
  2961. if (src<>dst) or
  2962. (reg_cgsize(src)<>size) or
  2963. assigned(shuffle) then
  2964. internalerror(2009112907);
  2965. tmpreg:=getintregister(list,OS_32);
  2966. a_load_const_reg(list,OS_32,0,tmpreg);
  2967. case size of
  2968. OS_F32:
  2969. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  2970. OS_F64:
  2971. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  2972. else
  2973. internalerror(2009112908);
  2974. end;
  2975. end
  2976. else
  2977. internalerror(2009112906);
  2978. end;
  2979. end;
  2980. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  2981. const
  2982. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  2983. begin
  2984. if (op in overflowops) and
  2985. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  2986. a_load_reg_reg(list,OS_32,size,dst,dst);
  2987. end;
  2988. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  2989. procedure checkreg(var reg : TRegister);
  2990. var
  2991. tmpreg : TRegister;
  2992. begin
  2993. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  2994. (getsupreg(reg)=RS_R15) then
  2995. begin
  2996. tmpreg:=getintregister(list,OS_INT);
  2997. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  2998. reg:=tmpreg;
  2999. end;
  3000. end;
  3001. begin
  3002. checkreg(op1);
  3003. checkreg(op2);
  3004. checkreg(op3);
  3005. checkreg(op4);
  3006. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3007. end;
  3008. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3009. begin
  3010. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3011. a_call_name(list,'fpc_read_tp',false);
  3012. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3013. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3014. end;
  3015. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3016. begin
  3017. case op of
  3018. OP_NEG:
  3019. begin
  3020. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3021. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3022. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3023. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3024. end;
  3025. OP_NOT:
  3026. begin
  3027. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3028. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3029. end;
  3030. else
  3031. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3032. end;
  3033. end;
  3034. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3035. begin
  3036. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3037. end;
  3038. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3039. var
  3040. ovloc : tlocation;
  3041. begin
  3042. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3043. end;
  3044. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3045. var
  3046. ovloc : tlocation;
  3047. begin
  3048. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3049. end;
  3050. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3051. begin
  3052. { this code can only be used to transfer raw data, not to perform
  3053. conversions }
  3054. if (mmsize<>OS_F64) then
  3055. internalerror(2009112405);
  3056. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3057. end;
  3058. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3059. begin
  3060. { this code can only be used to transfer raw data, not to perform
  3061. conversions }
  3062. if (mmsize<>OS_F64) then
  3063. internalerror(2009112406);
  3064. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3065. end;
  3066. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3067. var
  3068. tmpreg : tregister;
  3069. b : byte;
  3070. begin
  3071. ovloc.loc:=LOC_VOID;
  3072. case op of
  3073. OP_NEG,
  3074. OP_NOT :
  3075. internalerror(2012022501);
  3076. end;
  3077. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3078. begin
  3079. case op of
  3080. OP_ADD:
  3081. begin
  3082. if is_shifter_const(lo(value),b) then
  3083. begin
  3084. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3085. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3086. end
  3087. else
  3088. begin
  3089. tmpreg:=cg.getintregister(list,OS_32);
  3090. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3091. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3092. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3093. end;
  3094. if is_shifter_const(hi(value),b) then
  3095. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3096. else
  3097. begin
  3098. tmpreg:=cg.getintregister(list,OS_32);
  3099. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3100. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3101. end;
  3102. end;
  3103. OP_SUB:
  3104. begin
  3105. if is_shifter_const(lo(value),b) then
  3106. begin
  3107. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3108. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3109. end
  3110. else
  3111. begin
  3112. tmpreg:=cg.getintregister(list,OS_32);
  3113. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3114. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3115. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3116. end;
  3117. if is_shifter_const(hi(value),b) then
  3118. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3119. else
  3120. begin
  3121. tmpreg:=cg.getintregister(list,OS_32);
  3122. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3123. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3124. end;
  3125. end;
  3126. else
  3127. internalerror(200502131);
  3128. end;
  3129. if size=OS_64 then
  3130. begin
  3131. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3132. ovloc.loc:=LOC_FLAGS;
  3133. case op of
  3134. OP_ADD:
  3135. ovloc.resflags:=F_CS;
  3136. OP_SUB:
  3137. ovloc.resflags:=F_CC;
  3138. end;
  3139. end;
  3140. end
  3141. else
  3142. begin
  3143. case op of
  3144. OP_AND,OP_OR,OP_XOR:
  3145. begin
  3146. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3147. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3148. end;
  3149. OP_ADD:
  3150. begin
  3151. if is_shifter_const(aint(lo(value)),b) then
  3152. begin
  3153. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3154. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3155. end
  3156. else
  3157. begin
  3158. tmpreg:=cg.getintregister(list,OS_32);
  3159. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3160. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3161. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3162. end;
  3163. if is_shifter_const(aint(hi(value)),b) then
  3164. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3165. else
  3166. begin
  3167. tmpreg:=cg.getintregister(list,OS_32);
  3168. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3169. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3170. end;
  3171. end;
  3172. OP_SUB:
  3173. begin
  3174. if is_shifter_const(aint(lo(value)),b) then
  3175. begin
  3176. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3177. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3178. end
  3179. else
  3180. begin
  3181. tmpreg:=cg.getintregister(list,OS_32);
  3182. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3183. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3184. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3185. end;
  3186. if is_shifter_const(aint(hi(value)),b) then
  3187. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3188. else
  3189. begin
  3190. tmpreg:=cg.getintregister(list,OS_32);
  3191. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3192. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3193. end;
  3194. end;
  3195. else
  3196. internalerror(2003083101);
  3197. end;
  3198. end;
  3199. end;
  3200. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3201. begin
  3202. ovloc.loc:=LOC_VOID;
  3203. case op of
  3204. OP_NEG,
  3205. OP_NOT :
  3206. internalerror(2012022502);
  3207. end;
  3208. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3209. begin
  3210. case op of
  3211. OP_ADD:
  3212. begin
  3213. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3214. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3215. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3216. end;
  3217. OP_SUB:
  3218. begin
  3219. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3220. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3221. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3222. end;
  3223. else
  3224. internalerror(2003083101);
  3225. end;
  3226. if size=OS_64 then
  3227. begin
  3228. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3229. ovloc.loc:=LOC_FLAGS;
  3230. case op of
  3231. OP_ADD:
  3232. ovloc.resflags:=F_CS;
  3233. OP_SUB:
  3234. ovloc.resflags:=F_CC;
  3235. end;
  3236. end;
  3237. end
  3238. else
  3239. begin
  3240. case op of
  3241. OP_AND,OP_OR,OP_XOR:
  3242. begin
  3243. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3244. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3245. end;
  3246. OP_ADD:
  3247. begin
  3248. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3249. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3250. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3251. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3252. end;
  3253. OP_SUB:
  3254. begin
  3255. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3256. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3257. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3258. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3259. end;
  3260. else
  3261. internalerror(2003083101);
  3262. end;
  3263. end;
  3264. end;
  3265. procedure tthumbcgarm.init_register_allocators;
  3266. begin
  3267. inherited init_register_allocators;
  3268. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3269. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3270. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3271. else
  3272. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3273. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3274. end;
  3275. procedure tthumbcgarm.done_register_allocators;
  3276. begin
  3277. rg[R_INTREGISTER].free;
  3278. rg[R_FPUREGISTER].free;
  3279. rg[R_MMREGISTER].free;
  3280. inherited done_register_allocators;
  3281. end;
  3282. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3283. var
  3284. ref : treference;
  3285. r : byte;
  3286. regs : tcpuregisterset;
  3287. stackmisalignment : pint;
  3288. registerarea: DWord;
  3289. stack_parameters: Boolean;
  3290. begin
  3291. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3292. LocalSize:=align(LocalSize,4);
  3293. { call instruction does not put anything on the stack }
  3294. stackmisalignment:=0;
  3295. if not(nostackframe) then
  3296. begin
  3297. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3298. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3299. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3300. { save int registers }
  3301. reference_reset(ref,4,[]);
  3302. ref.index:=NR_STACK_POINTER_REG;
  3303. ref.addressmode:=AM_PREINDEXED;
  3304. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3305. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3306. begin
  3307. //!!!! a_reg_alloc(list,NR_R12);
  3308. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3309. end;
  3310. { the (old) ARM APCS requires saving both the stack pointer (to
  3311. crawl the stack) and the PC (to identify the function this
  3312. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3313. and R15 -- still needs updating for EABI and Darwin, they don't
  3314. need that }
  3315. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3316. regs:=regs+[RS_R7,RS_R14]
  3317. else
  3318. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3319. include(regs,RS_R14);
  3320. { safely estimate stack size }
  3321. if localsize+current_settings.alignment.localalignmax+4>508 then
  3322. begin
  3323. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3324. include(regs,RS_R4);
  3325. end;
  3326. registerarea:=0;
  3327. if regs<>[] then
  3328. begin
  3329. for r:=RS_R0 to RS_R15 do
  3330. if r in regs then
  3331. inc(registerarea,4);
  3332. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3333. end;
  3334. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3335. if stack_parameters or (LocalSize<>0) or
  3336. ((stackmisalignment<>0) and
  3337. ((pi_do_call in current_procinfo.flags) or
  3338. (po_assembler in current_procinfo.procdef.procoptions))) then
  3339. begin
  3340. { do we access stack parameters?
  3341. if yes, the previously estimated stacksize must be used }
  3342. if stack_parameters then
  3343. begin
  3344. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3345. begin
  3346. writeln(localsize);
  3347. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3348. internalerror(2013040601);
  3349. end
  3350. else
  3351. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3352. end
  3353. else
  3354. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3355. if localsize<508 then
  3356. begin
  3357. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3358. end
  3359. else if localsize<=1016 then
  3360. begin
  3361. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3362. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3363. end
  3364. else
  3365. begin
  3366. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3367. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3368. include(regs,RS_R4);
  3369. //!!!! if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  3370. //!!!! a_reg_alloc(list,NR_R12);
  3371. //!!!! a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3372. //!!!! list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  3373. //!!!! a_reg_dealloc(list,NR_R12);
  3374. end;
  3375. end;
  3376. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3377. begin
  3378. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3379. end;
  3380. end;
  3381. end;
  3382. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3383. var
  3384. LocalSize : longint;
  3385. r: byte;
  3386. regs : tcpuregisterset;
  3387. registerarea : DWord;
  3388. stackmisalignment: pint;
  3389. stack_parameters : Boolean;
  3390. begin
  3391. if not(nostackframe) then
  3392. begin
  3393. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3394. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3395. include(regs,RS_R15);
  3396. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3397. include(regs,getsupreg(current_procinfo.framepointer));
  3398. registerarea:=0;
  3399. for r:=RS_R0 to RS_R15 do
  3400. if r in regs then
  3401. inc(registerarea,4);
  3402. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3403. LocalSize:=current_procinfo.calc_stackframe_size;
  3404. if stack_parameters then
  3405. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3406. else
  3407. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3408. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3409. (target_info.system in systems_darwin) then
  3410. begin
  3411. if (LocalSize<>0) or
  3412. ((stackmisalignment<>0) and
  3413. ((pi_do_call in current_procinfo.flags) or
  3414. (po_assembler in current_procinfo.procdef.procoptions))) then
  3415. begin
  3416. if LocalSize=0 then
  3417. else if LocalSize<=508 then
  3418. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3419. else if LocalSize<=1016 then
  3420. begin
  3421. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3422. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3423. end
  3424. else
  3425. begin
  3426. a_reg_alloc(list,NR_R3);
  3427. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3428. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3429. a_reg_dealloc(list,NR_R3);
  3430. end;
  3431. end;
  3432. if regs=[] then
  3433. begin
  3434. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3435. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3436. else
  3437. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3438. end
  3439. else
  3440. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3441. end;
  3442. end
  3443. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3444. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3445. else
  3446. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3447. end;
  3448. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3449. var
  3450. oppostfix:toppostfix;
  3451. usedtmpref: treference;
  3452. tmpreg,tmpreg2 : tregister;
  3453. dir : integer;
  3454. begin
  3455. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3456. FromSize := ToSize;
  3457. case FromSize of
  3458. { signed integer registers }
  3459. OS_8:
  3460. oppostfix:=PF_B;
  3461. OS_S8:
  3462. oppostfix:=PF_SB;
  3463. OS_16:
  3464. oppostfix:=PF_H;
  3465. OS_S16:
  3466. oppostfix:=PF_SH;
  3467. OS_32,
  3468. OS_S32:
  3469. oppostfix:=PF_None;
  3470. else
  3471. InternalError(200308298);
  3472. end;
  3473. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3474. begin
  3475. if target_info.endian=endian_big then
  3476. dir:=-1
  3477. else
  3478. dir:=1;
  3479. case FromSize of
  3480. OS_16,OS_S16:
  3481. begin
  3482. { only complicated references need an extra loadaddr }
  3483. if assigned(ref.symbol) or
  3484. (ref.index<>NR_NO) or
  3485. (ref.offset<-124) or
  3486. (ref.offset>124) or
  3487. { sometimes the compiler reused registers }
  3488. (reg=ref.index) or
  3489. (reg=ref.base) then
  3490. begin
  3491. tmpreg2:=getintregister(list,OS_INT);
  3492. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3493. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3494. end
  3495. else
  3496. usedtmpref:=ref;
  3497. if target_info.endian=endian_big then
  3498. inc(usedtmpref.offset,1);
  3499. tmpreg:=getintregister(list,OS_INT);
  3500. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3501. inc(usedtmpref.offset,dir);
  3502. if FromSize=OS_16 then
  3503. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3504. else
  3505. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3506. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3507. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3508. end;
  3509. OS_32,OS_S32:
  3510. begin
  3511. tmpreg:=getintregister(list,OS_INT);
  3512. { only complicated references need an extra loadaddr }
  3513. if assigned(ref.symbol) or
  3514. (ref.index<>NR_NO) or
  3515. (ref.offset<-124) or
  3516. (ref.offset>124) or
  3517. { sometimes the compiler reused registers }
  3518. (reg=ref.index) or
  3519. (reg=ref.base) then
  3520. begin
  3521. tmpreg2:=getintregister(list,OS_INT);
  3522. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3523. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3524. end
  3525. else
  3526. usedtmpref:=ref;
  3527. if ref.alignment=2 then
  3528. begin
  3529. if target_info.endian=endian_big then
  3530. inc(usedtmpref.offset,2);
  3531. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3532. inc(usedtmpref.offset,dir*2);
  3533. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3534. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3535. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3536. end
  3537. else
  3538. begin
  3539. if target_info.endian=endian_big then
  3540. inc(usedtmpref.offset,3);
  3541. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3542. inc(usedtmpref.offset,dir);
  3543. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3544. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3545. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3546. inc(usedtmpref.offset,dir);
  3547. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3548. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3549. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3550. inc(usedtmpref.offset,dir);
  3551. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3552. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3553. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3554. end;
  3555. end
  3556. else
  3557. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3558. end;
  3559. end
  3560. else
  3561. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3562. if (fromsize=OS_S8) and (tosize = OS_16) then
  3563. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3564. end;
  3565. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3566. var
  3567. l : tasmlabel;
  3568. hr : treference;
  3569. begin
  3570. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3571. internalerror(2002090902);
  3572. if is_thumb_imm(a) then
  3573. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3574. else
  3575. begin
  3576. reference_reset(hr,4,[]);
  3577. current_asmdata.getjumplabel(l);
  3578. cg.a_label(current_procinfo.aktlocaldata,l);
  3579. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3580. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3581. hr.symbol:=l;
  3582. hr.base:=NR_PC;
  3583. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3584. end;
  3585. end;
  3586. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3587. var
  3588. hsym : tsym;
  3589. href,
  3590. tmpref : treference;
  3591. paraloc : Pcgparalocation;
  3592. l : TAsmLabel;
  3593. begin
  3594. { calculate the parameter info for the procdef }
  3595. procdef.init_paraloc_info(callerside);
  3596. hsym:=tsym(procdef.parast.Find('self'));
  3597. if not(assigned(hsym) and
  3598. (hsym.typ=paravarsym)) then
  3599. internalerror(200305251);
  3600. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3601. while paraloc<>nil do
  3602. with paraloc^ do
  3603. begin
  3604. case loc of
  3605. LOC_REGISTER:
  3606. begin
  3607. if is_thumb_imm(ioffset) then
  3608. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3609. else
  3610. begin
  3611. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3612. reference_reset(tmpref,4,[]);
  3613. current_asmdata.getjumplabel(l);
  3614. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3615. cg.a_label(current_procinfo.aktlocaldata,l);
  3616. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3617. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3618. tmpref.symbol:=l;
  3619. tmpref.base:=NR_PC;
  3620. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3621. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3622. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3623. end;
  3624. end;
  3625. LOC_REFERENCE:
  3626. begin
  3627. { offset in the wrapper needs to be adjusted for the stored
  3628. return address }
  3629. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3630. if is_thumb_imm(ioffset) then
  3631. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3632. else
  3633. begin
  3634. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3635. reference_reset(tmpref,4,[]);
  3636. current_asmdata.getjumplabel(l);
  3637. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3638. cg.a_label(current_procinfo.aktlocaldata,l);
  3639. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3640. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3641. tmpref.symbol:=l;
  3642. tmpref.base:=NR_PC;
  3643. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3644. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3645. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3646. end;
  3647. end
  3648. else
  3649. internalerror(200309189);
  3650. end;
  3651. paraloc:=next;
  3652. end;
  3653. end;
  3654. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3655. var
  3656. href : treference;
  3657. tmpreg : TRegister;
  3658. begin
  3659. href:=ref;
  3660. if { LDR/STR limitations }
  3661. (
  3662. (((op=A_LDR) and (oppostfix=PF_None)) or
  3663. ((op=A_STR) and (oppostfix=PF_None))) and
  3664. (ref.base<>NR_STACK_POINTER_REG) and
  3665. (abs(ref.offset)>124)
  3666. ) or
  3667. { LDRB/STRB limitations }
  3668. (
  3669. (((op=A_LDR) and (oppostfix=PF_B)) or
  3670. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3671. ((op=A_STR) and (oppostfix=PF_B)) or
  3672. ((op=A_STRB) and (oppostfix=PF_None))) and
  3673. ((ref.base=NR_STACK_POINTER_REG) or
  3674. (ref.index=NR_STACK_POINTER_REG) or
  3675. (abs(ref.offset)>31)
  3676. )
  3677. ) or
  3678. { LDRH/STRH limitations }
  3679. (
  3680. (((op=A_LDR) and (oppostfix=PF_H)) or
  3681. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3682. ((op=A_STR) and (oppostfix=PF_H)) or
  3683. ((op=A_STRH) and (oppostfix=PF_None))) and
  3684. ((ref.base=NR_STACK_POINTER_REG) or
  3685. (ref.index=NR_STACK_POINTER_REG) or
  3686. (abs(ref.offset)>62) or
  3687. ((abs(ref.offset) mod 2)<>0)
  3688. )
  3689. ) then
  3690. begin
  3691. tmpreg:=getintregister(list,OS_ADDR);
  3692. a_loadaddr_ref_reg(list,ref,tmpreg);
  3693. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3694. end
  3695. else if (op=A_LDR) and
  3696. (oppostfix in [PF_None]) and
  3697. (ref.base=NR_STACK_POINTER_REG) and
  3698. (abs(ref.offset)>1020) then
  3699. begin
  3700. tmpreg:=getintregister(list,OS_ADDR);
  3701. a_loadaddr_ref_reg(list,ref,tmpreg);
  3702. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3703. end
  3704. else if (op=A_LDR) and
  3705. ((oppostfix in [PF_SH,PF_SB]) or
  3706. (abs(ref.offset)>124)) then
  3707. begin
  3708. tmpreg:=getintregister(list,OS_ADDR);
  3709. a_loadaddr_ref_reg(list,ref,tmpreg);
  3710. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3711. end;
  3712. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3713. end;
  3714. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3715. var
  3716. tmpreg : tregister;
  3717. begin
  3718. case op of
  3719. OP_NEG:
  3720. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3721. OP_NOT:
  3722. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  3723. OP_DIV,OP_IDIV:
  3724. internalerror(200308284);
  3725. OP_ROL:
  3726. begin
  3727. if not(size in [OS_32,OS_S32]) then
  3728. internalerror(2008072801);
  3729. { simulate ROL by ror'ing 32-value }
  3730. tmpreg:=getintregister(list,OS_32);
  3731. a_load_const_reg(list,OS_32,32,tmpreg);
  3732. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3733. list.concat(taicpu.op_reg_reg(A_ROR,dst,src));
  3734. end;
  3735. else
  3736. begin
  3737. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3738. list.concat(setoppostfix(
  3739. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix[op]));
  3740. end;
  3741. end;
  3742. maybeadjustresult(list,op,size,dst);
  3743. end;
  3744. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3745. var
  3746. tmpreg : tregister;
  3747. {$ifdef DUMMY}
  3748. l1 : longint;
  3749. {$endif DUMMY}
  3750. begin
  3751. //!!! ovloc.loc:=LOC_VOID;
  3752. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3753. case op of
  3754. OP_ADD:
  3755. begin
  3756. op:=OP_SUB;
  3757. a:=aint(dword(-a));
  3758. end;
  3759. OP_SUB:
  3760. begin
  3761. op:=OP_ADD;
  3762. a:=aint(dword(-a));
  3763. end
  3764. end;
  3765. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3766. begin
  3767. // if cgsetflags or setflags then
  3768. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3769. list.concat(setoppostfix(
  3770. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix[op]));
  3771. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3772. begin
  3773. //!!! ovloc.loc:=LOC_FLAGS;
  3774. case op of
  3775. OP_ADD:
  3776. //!!! ovloc.resflags:=F_CS;
  3777. ;
  3778. OP_SUB:
  3779. //!!! ovloc.resflags:=F_CC;
  3780. ;
  3781. end;
  3782. end;
  3783. end
  3784. else
  3785. begin
  3786. { there could be added some more sophisticated optimizations }
  3787. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3788. a_load_reg_reg(list,size,size,dst,dst)
  3789. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3790. a_load_const_reg(list,size,0,dst)
  3791. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3792. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3793. { we do this here instead in the peephole optimizer because
  3794. it saves us a register }
  3795. {$ifdef DUMMY}
  3796. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3797. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3798. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3799. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3800. begin
  3801. if l1>32 then{roozbeh does this ever happen?}
  3802. internalerror(200308296);
  3803. shifterop_reset(so);
  3804. so.shiftmode:=SM_LSL;
  3805. so.shiftimm:=l1;
  3806. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3807. end
  3808. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3809. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3810. begin
  3811. if l1>32 then{does this ever happen?}
  3812. internalerror(201205181);
  3813. shifterop_reset(so);
  3814. so.shiftmode:=SM_LSL;
  3815. so.shiftimm:=l1;
  3816. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3817. end
  3818. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3819. begin
  3820. { nothing to do on success }
  3821. end
  3822. {$endif DUMMY}
  3823. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3824. Just using mov x, #0 might allow some easier optimizations down the line. }
  3825. else if (op = OP_AND) and (dword(a)=0) then
  3826. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  3827. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3828. else if (op = OP_AND) and (not(dword(a))=0) then
  3829. // do nothing
  3830. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3831. broader range of shifterconstants.}
  3832. {$ifdef DUMMY}
  3833. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3834. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3835. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3836. begin
  3837. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3838. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3839. end
  3840. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3841. not(cgsetflags or setflags) and
  3842. split_into_shifter_const(a, imm1, imm2) then
  3843. begin
  3844. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3845. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3846. end
  3847. {$endif DUMMY}
  3848. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3849. begin
  3850. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3851. end
  3852. else
  3853. begin
  3854. tmpreg:=getintregister(list,size);
  3855. a_load_const_reg(list,size,a,tmpreg);
  3856. a_op_reg_reg(list,op,size,tmpreg,dst);
  3857. end;
  3858. end;
  3859. maybeadjustresult(list,op,size,dst);
  3860. end;
  3861. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3862. begin
  3863. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3864. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3865. else
  3866. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3867. end;
  3868. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3869. var
  3870. l1,l2 : tasmlabel;
  3871. ai : taicpu;
  3872. begin
  3873. current_asmdata.getjumplabel(l1);
  3874. current_asmdata.getjumplabel(l2);
  3875. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3876. ai.is_jmp:=true;
  3877. list.concat(ai);
  3878. list.concat(taicpu.op_reg_const(A_MOV,reg,0));
  3879. list.concat(taicpu.op_sym(A_B,l2));
  3880. cg.a_label(list,l1);
  3881. list.concat(taicpu.op_reg_const(A_MOV,reg,1));
  3882. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3883. cg.a_label(list,l2);
  3884. end;
  3885. procedure tthumb2cgarm.init_register_allocators;
  3886. begin
  3887. inherited init_register_allocators;
  3888. { currently, we save R14 always, so we can use it }
  3889. if (target_info.system<>system_arm_darwin) then
  3890. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3891. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3892. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  3893. else
  3894. { r9 is not available on Darwin according to the llvm code generator }
  3895. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3896. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3897. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  3898. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  3899. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  3900. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  3901. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3902. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3903. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  3904. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3905. ],first_mm_imreg,[])
  3906. else if current_settings.fputype in [fpu_fpv4_s16,fpu_vfpv3_d16] then
  3907. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3908. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3909. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3910. ],first_mm_imreg,[])
  3911. else
  3912. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
  3913. [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
  3914. end;
  3915. procedure tthumb2cgarm.done_register_allocators;
  3916. begin
  3917. rg[R_INTREGISTER].free;
  3918. rg[R_FPUREGISTER].free;
  3919. rg[R_MMREGISTER].free;
  3920. inherited done_register_allocators;
  3921. end;
  3922. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  3923. begin
  3924. list.concat(taicpu.op_reg(A_BLX, reg));
  3925. {
  3926. the compiler does not properly set this flag anymore in pass 1, and
  3927. for now we only need it after pass 2 (I hope) (JM)
  3928. if not(pi_do_call in current_procinfo.flags) then
  3929. internalerror(2003060703);
  3930. }
  3931. include(current_procinfo.flags,pi_do_call);
  3932. end;
  3933. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3934. var
  3935. l : tasmlabel;
  3936. hr : treference;
  3937. begin
  3938. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3939. internalerror(2002090902);
  3940. if is_thumb32_imm(a) then
  3941. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3942. else if is_thumb32_imm(not(a)) then
  3943. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  3944. else if (a and $FFFF)=a then
  3945. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  3946. else
  3947. begin
  3948. reference_reset(hr,4,[]);
  3949. current_asmdata.getjumplabel(l);
  3950. cg.a_label(current_procinfo.aktlocaldata,l);
  3951. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3952. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3953. hr.symbol:=l;
  3954. hr.base:=NR_PC;
  3955. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3956. end;
  3957. end;
  3958. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3959. var
  3960. oppostfix:toppostfix;
  3961. usedtmpref: treference;
  3962. tmpreg,tmpreg2 : tregister;
  3963. so : tshifterop;
  3964. dir : integer;
  3965. begin
  3966. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3967. FromSize := ToSize;
  3968. case FromSize of
  3969. { signed integer registers }
  3970. OS_8:
  3971. oppostfix:=PF_B;
  3972. OS_S8:
  3973. oppostfix:=PF_SB;
  3974. OS_16:
  3975. oppostfix:=PF_H;
  3976. OS_S16:
  3977. oppostfix:=PF_SH;
  3978. OS_32,
  3979. OS_S32:
  3980. oppostfix:=PF_None;
  3981. else
  3982. InternalError(200308299);
  3983. end;
  3984. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3985. begin
  3986. if target_info.endian=endian_big then
  3987. dir:=-1
  3988. else
  3989. dir:=1;
  3990. case FromSize of
  3991. OS_16,OS_S16:
  3992. begin
  3993. { only complicated references need an extra loadaddr }
  3994. if assigned(ref.symbol) or
  3995. (ref.index<>NR_NO) or
  3996. (ref.offset<-255) or
  3997. (ref.offset>4094) or
  3998. { sometimes the compiler reused registers }
  3999. (reg=ref.index) or
  4000. (reg=ref.base) then
  4001. begin
  4002. tmpreg2:=getintregister(list,OS_INT);
  4003. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4004. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4005. end
  4006. else
  4007. usedtmpref:=ref;
  4008. if target_info.endian=endian_big then
  4009. inc(usedtmpref.offset,1);
  4010. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4011. tmpreg:=getintregister(list,OS_INT);
  4012. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4013. inc(usedtmpref.offset,dir);
  4014. if FromSize=OS_16 then
  4015. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4016. else
  4017. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4018. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4019. end;
  4020. OS_32,OS_S32:
  4021. begin
  4022. tmpreg:=getintregister(list,OS_INT);
  4023. { only complicated references need an extra loadaddr }
  4024. if assigned(ref.symbol) or
  4025. (ref.index<>NR_NO) or
  4026. (ref.offset<-255) or
  4027. (ref.offset>4092) or
  4028. { sometimes the compiler reused registers }
  4029. (reg=ref.index) or
  4030. (reg=ref.base) then
  4031. begin
  4032. tmpreg2:=getintregister(list,OS_INT);
  4033. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4034. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4035. end
  4036. else
  4037. usedtmpref:=ref;
  4038. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4039. if ref.alignment=2 then
  4040. begin
  4041. if target_info.endian=endian_big then
  4042. inc(usedtmpref.offset,2);
  4043. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4044. inc(usedtmpref.offset,dir*2);
  4045. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4046. so.shiftimm:=16;
  4047. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4048. end
  4049. else
  4050. begin
  4051. if target_info.endian=endian_big then
  4052. inc(usedtmpref.offset,3);
  4053. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4054. inc(usedtmpref.offset,dir);
  4055. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4056. so.shiftimm:=8;
  4057. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4058. inc(usedtmpref.offset,dir);
  4059. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4060. so.shiftimm:=16;
  4061. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4062. inc(usedtmpref.offset,dir);
  4063. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4064. so.shiftimm:=24;
  4065. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4066. end;
  4067. end
  4068. else
  4069. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4070. end;
  4071. end
  4072. else
  4073. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4074. if (fromsize=OS_S8) and (tosize = OS_16) then
  4075. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4076. end;
  4077. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4078. begin
  4079. if op = OP_NOT then
  4080. begin
  4081. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4082. case size of
  4083. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4084. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4085. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4086. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4087. end;
  4088. end
  4089. else
  4090. inherited a_op_reg_reg(list, op, size, src, dst);
  4091. end;
  4092. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4093. var
  4094. shift, width : byte;
  4095. tmpreg : tregister;
  4096. so : tshifterop;
  4097. l1 : longint;
  4098. begin
  4099. ovloc.loc:=LOC_VOID;
  4100. if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
  4101. case op of
  4102. OP_ADD:
  4103. begin
  4104. op:=OP_SUB;
  4105. a:=aint(dword(-a));
  4106. end;
  4107. OP_SUB:
  4108. begin
  4109. op:=OP_ADD;
  4110. a:=aint(dword(-a));
  4111. end
  4112. end;
  4113. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4114. case op of
  4115. OP_NEG,OP_NOT,
  4116. OP_DIV,OP_IDIV:
  4117. internalerror(200308285);
  4118. OP_SHL:
  4119. begin
  4120. if a>32 then
  4121. internalerror(2014020703);
  4122. if a<>0 then
  4123. begin
  4124. shifterop_reset(so);
  4125. so.shiftmode:=SM_LSL;
  4126. so.shiftimm:=a;
  4127. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4128. end
  4129. else
  4130. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4131. end;
  4132. OP_ROL:
  4133. begin
  4134. if a>32 then
  4135. internalerror(2014020704);
  4136. if a<>0 then
  4137. begin
  4138. shifterop_reset(so);
  4139. so.shiftmode:=SM_ROR;
  4140. so.shiftimm:=32-a;
  4141. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4142. end
  4143. else
  4144. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4145. end;
  4146. OP_ROR:
  4147. begin
  4148. if a>32 then
  4149. internalerror(2014020705);
  4150. if a<>0 then
  4151. begin
  4152. shifterop_reset(so);
  4153. so.shiftmode:=SM_ROR;
  4154. so.shiftimm:=a;
  4155. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4156. end
  4157. else
  4158. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4159. end;
  4160. OP_SHR:
  4161. begin
  4162. if a>32 then
  4163. internalerror(200308292);
  4164. shifterop_reset(so);
  4165. if a<>0 then
  4166. begin
  4167. so.shiftmode:=SM_LSR;
  4168. so.shiftimm:=a;
  4169. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4170. end
  4171. else
  4172. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4173. end;
  4174. OP_SAR:
  4175. begin
  4176. if a>32 then
  4177. internalerror(200308295);
  4178. if a<>0 then
  4179. begin
  4180. shifterop_reset(so);
  4181. so.shiftmode:=SM_ASR;
  4182. so.shiftimm:=a;
  4183. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4184. end
  4185. else
  4186. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4187. end;
  4188. else
  4189. if (op in [OP_SUB, OP_ADD]) and
  4190. ((a < 0) or
  4191. (a > 4095)) then
  4192. begin
  4193. tmpreg:=getintregister(list,size);
  4194. a_load_const_reg(list, size, a, tmpreg);
  4195. if cgsetflags or setflags then
  4196. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4197. list.concat(setoppostfix(
  4198. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4199. end
  4200. else
  4201. begin
  4202. if cgsetflags or setflags then
  4203. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4204. list.concat(setoppostfix(
  4205. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4206. end;
  4207. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4208. begin
  4209. ovloc.loc:=LOC_FLAGS;
  4210. case op of
  4211. OP_ADD:
  4212. ovloc.resflags:=F_CS;
  4213. OP_SUB:
  4214. ovloc.resflags:=F_CC;
  4215. end;
  4216. end;
  4217. end
  4218. else
  4219. begin
  4220. { there could be added some more sophisticated optimizations }
  4221. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4222. a_load_reg_reg(list,size,size,src,dst)
  4223. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4224. a_load_const_reg(list,size,0,dst)
  4225. else if (op in [OP_IMUL]) and (a=-1) then
  4226. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4227. { we do this here instead in the peephole optimizer because
  4228. it saves us a register }
  4229. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4230. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4231. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4232. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4233. begin
  4234. if l1>32 then{roozbeh does this ever happen?}
  4235. internalerror(200308296);
  4236. shifterop_reset(so);
  4237. so.shiftmode:=SM_LSL;
  4238. so.shiftimm:=l1;
  4239. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4240. end
  4241. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4242. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4243. begin
  4244. if l1>32 then{does this ever happen?}
  4245. internalerror(201205181);
  4246. shifterop_reset(so);
  4247. so.shiftmode:=SM_LSL;
  4248. so.shiftimm:=l1;
  4249. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4250. end
  4251. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4252. begin
  4253. { nothing to do on success }
  4254. end
  4255. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4256. Just using mov x, #0 might allow some easier optimizations down the line. }
  4257. else if (op = OP_AND) and (dword(a)=0) then
  4258. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4259. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4260. else if (op = OP_AND) and (not(dword(a))=0) then
  4261. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4262. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4263. broader range of shifterconstants.}
  4264. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4265. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4266. else if (op = OP_AND) and is_thumb32_imm(a) then
  4267. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4268. else if (op = OP_AND) and (a = $FFFF) then
  4269. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4270. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4271. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4272. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4273. begin
  4274. a_load_reg_reg(list,size,size,src,dst);
  4275. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4276. end
  4277. else
  4278. begin
  4279. tmpreg:=getintregister(list,size);
  4280. a_load_const_reg(list,size,a,tmpreg);
  4281. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4282. end;
  4283. end;
  4284. maybeadjustresult(list,op,size,dst);
  4285. end;
  4286. const
  4287. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4288. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4289. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4290. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4291. var
  4292. so : tshifterop;
  4293. tmpreg,overflowreg : tregister;
  4294. asmop : tasmop;
  4295. begin
  4296. ovloc.loc:=LOC_VOID;
  4297. case op of
  4298. OP_NEG,OP_NOT:
  4299. internalerror(200308286);
  4300. OP_ROL:
  4301. begin
  4302. if not(size in [OS_32,OS_S32]) then
  4303. internalerror(2008072801);
  4304. { simulate ROL by ror'ing 32-value }
  4305. tmpreg:=getintregister(list,OS_32);
  4306. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4307. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4308. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4309. end;
  4310. OP_ROR:
  4311. begin
  4312. if not(size in [OS_32,OS_S32]) then
  4313. internalerror(2008072802);
  4314. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4315. end;
  4316. OP_IMUL,
  4317. OP_MUL:
  4318. begin
  4319. if cgsetflags or setflags then
  4320. begin
  4321. overflowreg:=getintregister(list,size);
  4322. if op=OP_IMUL then
  4323. asmop:=A_SMULL
  4324. else
  4325. asmop:=A_UMULL;
  4326. { the arm doesn't allow that rd and rm are the same }
  4327. if dst=src2 then
  4328. begin
  4329. if dst<>src1 then
  4330. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4331. else
  4332. begin
  4333. tmpreg:=getintregister(list,size);
  4334. a_load_reg_reg(list,size,size,src2,dst);
  4335. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4336. end;
  4337. end
  4338. else
  4339. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4340. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4341. if op=OP_IMUL then
  4342. begin
  4343. shifterop_reset(so);
  4344. so.shiftmode:=SM_ASR;
  4345. so.shiftimm:=31;
  4346. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4347. end
  4348. else
  4349. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4350. ovloc.loc:=LOC_FLAGS;
  4351. ovloc.resflags:=F_NE;
  4352. end
  4353. else
  4354. begin
  4355. { the arm doesn't allow that rd and rm are the same }
  4356. if dst=src2 then
  4357. begin
  4358. if dst<>src1 then
  4359. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4360. else
  4361. begin
  4362. tmpreg:=getintregister(list,size);
  4363. a_load_reg_reg(list,size,size,src2,dst);
  4364. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4365. end;
  4366. end
  4367. else
  4368. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4369. end;
  4370. end;
  4371. else
  4372. begin
  4373. if cgsetflags or setflags then
  4374. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4375. {$ifdef dummy}
  4376. { R13 is not allowed for certain instruction operands }
  4377. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4378. begin
  4379. if getsupreg(dst)=RS_R13 then
  4380. begin
  4381. tmpreg:=getintregister(list,OS_INT);
  4382. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4383. dst:=tmpreg;
  4384. end;
  4385. if getsupreg(src1)=RS_R13 then
  4386. begin
  4387. tmpreg:=getintregister(list,OS_INT);
  4388. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4389. src1:=tmpreg;
  4390. end;
  4391. end;
  4392. {$endif}
  4393. list.concat(setoppostfix(
  4394. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4395. end;
  4396. end;
  4397. maybeadjustresult(list,op,size,dst);
  4398. end;
  4399. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4400. begin
  4401. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4402. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4403. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4404. end;
  4405. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4406. var
  4407. ref : treference;
  4408. shift : byte;
  4409. firstfloatreg,lastfloatreg,
  4410. r : byte;
  4411. regs : tcpuregisterset;
  4412. stackmisalignment: pint;
  4413. begin
  4414. LocalSize:=align(LocalSize,4);
  4415. { call instruction does not put anything on the stack }
  4416. stackmisalignment:=0;
  4417. if not(nostackframe) then
  4418. begin
  4419. firstfloatreg:=RS_NO;
  4420. lastfloatreg:=RS_NO;
  4421. { save floating point registers? }
  4422. for r:=RS_F0 to RS_F7 do
  4423. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4424. begin
  4425. if firstfloatreg=RS_NO then
  4426. firstfloatreg:=r;
  4427. lastfloatreg:=r;
  4428. inc(stackmisalignment,12);
  4429. end;
  4430. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4431. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4432. begin
  4433. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4434. a_reg_alloc(list,NR_R12);
  4435. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4436. end;
  4437. { save int registers }
  4438. reference_reset(ref,4,[]);
  4439. ref.index:=NR_STACK_POINTER_REG;
  4440. ref.addressmode:=AM_PREINDEXED;
  4441. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4442. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4443. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4444. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4445. include(regs,RS_R14);
  4446. if regs<>[] then
  4447. begin
  4448. for r:=RS_R0 to RS_R15 do
  4449. if (r in regs) then
  4450. inc(stackmisalignment,4);
  4451. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4452. end;
  4453. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4454. begin
  4455. { the framepointer now points to the saved R15, so the saved
  4456. framepointer is at R11-12 (for get_caller_frame) }
  4457. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4458. a_reg_dealloc(list,NR_R12);
  4459. end;
  4460. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4461. if (LocalSize<>0) or
  4462. ((stackmisalignment<>0) and
  4463. ((pi_do_call in current_procinfo.flags) or
  4464. (po_assembler in current_procinfo.procdef.procoptions))) then
  4465. begin
  4466. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4467. if not(is_shifter_const(localsize,shift)) then
  4468. begin
  4469. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4470. a_reg_alloc(list,NR_R12);
  4471. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4472. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4473. a_reg_dealloc(list,NR_R12);
  4474. end
  4475. else
  4476. begin
  4477. a_reg_dealloc(list,NR_R12);
  4478. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4479. end;
  4480. end;
  4481. if firstfloatreg<>RS_NO then
  4482. begin
  4483. reference_reset(ref,4,[]);
  4484. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4485. begin
  4486. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4487. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4488. ref.base:=NR_R12;
  4489. end
  4490. else
  4491. begin
  4492. ref.base:=current_procinfo.framepointer;
  4493. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4494. end;
  4495. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4496. lastfloatreg-firstfloatreg+1,ref));
  4497. end;
  4498. end;
  4499. end;
  4500. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4501. var
  4502. ref : treference;
  4503. firstfloatreg,lastfloatreg,
  4504. r : byte;
  4505. shift : byte;
  4506. regs : tcpuregisterset;
  4507. LocalSize : longint;
  4508. stackmisalignment: pint;
  4509. begin
  4510. if not(nostackframe) then
  4511. begin
  4512. stackmisalignment:=0;
  4513. { restore floating point register }
  4514. firstfloatreg:=RS_NO;
  4515. lastfloatreg:=RS_NO;
  4516. { save floating point registers? }
  4517. for r:=RS_F0 to RS_F7 do
  4518. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4519. begin
  4520. if firstfloatreg=RS_NO then
  4521. firstfloatreg:=r;
  4522. lastfloatreg:=r;
  4523. { floating point register space is already included in
  4524. localsize below by calc_stackframe_size
  4525. inc(stackmisalignment,12);
  4526. }
  4527. end;
  4528. if firstfloatreg<>RS_NO then
  4529. begin
  4530. reference_reset(ref,4,[]);
  4531. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4532. begin
  4533. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4534. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4535. ref.base:=NR_R12;
  4536. end
  4537. else
  4538. begin
  4539. ref.base:=current_procinfo.framepointer;
  4540. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4541. end;
  4542. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4543. lastfloatreg-firstfloatreg+1,ref));
  4544. end;
  4545. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4546. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4547. begin
  4548. exclude(regs,RS_R14);
  4549. include(regs,RS_R15);
  4550. end;
  4551. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4552. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4553. for r:=RS_R0 to RS_R15 do
  4554. if (r in regs) then
  4555. inc(stackmisalignment,4);
  4556. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4557. LocalSize:=current_procinfo.calc_stackframe_size;
  4558. if (LocalSize<>0) or
  4559. ((stackmisalignment<>0) and
  4560. ((pi_do_call in current_procinfo.flags) or
  4561. (po_assembler in current_procinfo.procdef.procoptions))) then
  4562. begin
  4563. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4564. if not(is_shifter_const(LocalSize,shift)) then
  4565. begin
  4566. a_reg_alloc(list,NR_R12);
  4567. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4568. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4569. a_reg_dealloc(list,NR_R12);
  4570. end
  4571. else
  4572. begin
  4573. a_reg_dealloc(list,NR_R12);
  4574. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4575. end;
  4576. end;
  4577. if regs=[] then
  4578. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4579. else
  4580. begin
  4581. reference_reset(ref,4,[]);
  4582. ref.index:=NR_STACK_POINTER_REG;
  4583. ref.addressmode:=AM_PREINDEXED;
  4584. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4585. end;
  4586. end
  4587. else
  4588. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4589. end;
  4590. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4591. var
  4592. tmpreg : tregister;
  4593. tmpref : treference;
  4594. l : tasmlabel;
  4595. begin
  4596. tmpreg:=NR_NO;
  4597. { Be sure to have a base register }
  4598. if (ref.base=NR_NO) then
  4599. begin
  4600. if ref.shiftmode<>SM_None then
  4601. internalerror(2014020706);
  4602. ref.base:=ref.index;
  4603. ref.index:=NR_NO;
  4604. end;
  4605. { absolute symbols can't be handled directly, we've to store the symbol reference
  4606. in the text segment and access it pc relative
  4607. For now, we assume that references where base or index equals to PC are already
  4608. relative, all other references are assumed to be absolute and thus they need
  4609. to be handled extra.
  4610. A proper solution would be to change refoptions to a set and store the information
  4611. if the symbol is absolute or relative there.
  4612. }
  4613. if (assigned(ref.symbol) and
  4614. not(is_pc(ref.base)) and
  4615. not(is_pc(ref.index))
  4616. ) or
  4617. { [#xxx] isn't a valid address operand }
  4618. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4619. //(ref.offset<-4095) or
  4620. (ref.offset<-255) or
  4621. (ref.offset>4095) or
  4622. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4623. ((ref.offset<-255) or
  4624. (ref.offset>255)
  4625. )
  4626. ) or
  4627. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4628. ((ref.offset<-1020) or
  4629. (ref.offset>1020) or
  4630. ((abs(ref.offset) mod 4)<>0) or
  4631. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4632. assigned(ref.symbol)
  4633. )
  4634. ) then
  4635. begin
  4636. reference_reset(tmpref,4,[]);
  4637. { load symbol }
  4638. tmpreg:=getintregister(list,OS_INT);
  4639. if assigned(ref.symbol) then
  4640. begin
  4641. current_asmdata.getjumplabel(l);
  4642. cg.a_label(current_procinfo.aktlocaldata,l);
  4643. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4644. if ref.refaddr=addr_gottpoff then
  4645. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4646. else
  4647. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4648. { load consts entry }
  4649. tmpref.symbol:=l;
  4650. tmpref.base:=NR_R15;
  4651. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4652. { in case of LDF/STF, we got rid of the NR_R15 }
  4653. if is_pc(ref.base) then
  4654. ref.base:=NR_NO;
  4655. if is_pc(ref.index) then
  4656. ref.index:=NR_NO;
  4657. end
  4658. else
  4659. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4660. if (ref.base<>NR_NO) then
  4661. begin
  4662. if ref.index<>NR_NO then
  4663. begin
  4664. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4665. ref.base:=tmpreg;
  4666. end
  4667. else
  4668. begin
  4669. ref.index:=tmpreg;
  4670. ref.shiftimm:=0;
  4671. ref.signindex:=1;
  4672. ref.shiftmode:=SM_None;
  4673. end;
  4674. end
  4675. else
  4676. ref.base:=tmpreg;
  4677. ref.offset:=0;
  4678. ref.symbol:=nil;
  4679. end;
  4680. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4681. begin
  4682. if tmpreg<>NR_NO then
  4683. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4684. else
  4685. begin
  4686. tmpreg:=getintregister(list,OS_ADDR);
  4687. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4688. ref.base:=tmpreg;
  4689. end;
  4690. ref.offset:=0;
  4691. end;
  4692. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4693. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4694. begin
  4695. tmpreg:=getintregister(list,OS_ADDR);
  4696. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4697. ref.base := tmpreg;
  4698. end;
  4699. { floating point operations have only limited references
  4700. we expect here, that a base is already set }
  4701. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4702. begin
  4703. if ref.shiftmode<>SM_none then
  4704. internalerror(200309121);
  4705. if tmpreg<>NR_NO then
  4706. begin
  4707. if ref.base=tmpreg then
  4708. begin
  4709. if ref.signindex<0 then
  4710. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4711. else
  4712. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4713. ref.index:=NR_NO;
  4714. end
  4715. else
  4716. begin
  4717. if ref.index<>tmpreg then
  4718. internalerror(200403161);
  4719. if ref.signindex<0 then
  4720. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4721. else
  4722. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4723. ref.base:=tmpreg;
  4724. ref.index:=NR_NO;
  4725. end;
  4726. end
  4727. else
  4728. begin
  4729. tmpreg:=getintregister(list,OS_ADDR);
  4730. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4731. ref.base:=tmpreg;
  4732. ref.index:=NR_NO;
  4733. end;
  4734. end;
  4735. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4736. Result := ref;
  4737. end;
  4738. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4739. var
  4740. instr: taicpu;
  4741. begin
  4742. if (fromsize=OS_F32) and
  4743. (tosize=OS_F32) then
  4744. begin
  4745. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4746. list.Concat(instr);
  4747. add_move_instruction(instr);
  4748. end
  4749. else if (fromsize=OS_F64) and
  4750. (tosize=OS_F64) then
  4751. begin
  4752. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4753. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4754. end
  4755. else if (fromsize=OS_F32) and
  4756. (tosize=OS_F64) then
  4757. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4758. begin
  4759. //list.concat(nil);
  4760. end;
  4761. end;
  4762. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4763. begin
  4764. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4765. end;
  4766. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4767. begin
  4768. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4769. end;
  4770. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4771. begin
  4772. if //(shuffle=nil) and
  4773. (tosize=OS_F32) then
  4774. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4775. else
  4776. internalerror(2012100813);
  4777. end;
  4778. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4779. begin
  4780. if //(shuffle=nil) and
  4781. (fromsize=OS_F32) then
  4782. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
  4783. else
  4784. internalerror(2012100814);
  4785. end;
  4786. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4787. var tmpreg: tregister;
  4788. begin
  4789. case op of
  4790. OP_NEG:
  4791. begin
  4792. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4793. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4794. tmpreg:=cg.getintregister(list,OS_32);
  4795. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4796. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4797. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4798. end;
  4799. else
  4800. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4801. end;
  4802. end;
  4803. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4804. begin
  4805. case op of
  4806. OP_NEG:
  4807. begin
  4808. list.concat(taicpu.op_reg_const(A_MOV,regdst.reglo,0));
  4809. list.concat(taicpu.op_reg_const(A_MOV,regdst.reghi,0));
  4810. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4811. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4812. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4813. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4814. end;
  4815. OP_NOT:
  4816. begin
  4817. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4818. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4819. end;
  4820. OP_AND,OP_OR,OP_XOR:
  4821. begin
  4822. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4823. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4824. end;
  4825. OP_ADD:
  4826. begin
  4827. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4828. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4829. list.concat(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi));
  4830. end;
  4831. OP_SUB:
  4832. begin
  4833. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4834. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4835. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4836. end;
  4837. else
  4838. internalerror(2003083101);
  4839. end;
  4840. end;
  4841. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4842. var
  4843. tmpreg : tregister;
  4844. begin
  4845. case op of
  4846. OP_AND,OP_OR,OP_XOR:
  4847. begin
  4848. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4849. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4850. end;
  4851. OP_ADD:
  4852. begin
  4853. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4854. begin
  4855. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4856. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4857. end
  4858. else
  4859. begin
  4860. tmpreg:=cg.getintregister(list,OS_32);
  4861. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4862. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4863. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  4864. end;
  4865. tmpreg:=cg.getintregister(list,OS_32);
  4866. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  4867. list.concat(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg));
  4868. end;
  4869. OP_SUB:
  4870. begin
  4871. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4872. begin
  4873. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4874. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  4875. end
  4876. else
  4877. begin
  4878. tmpreg:=cg.getintregister(list,OS_32);
  4879. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4880. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4881. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  4882. end;
  4883. tmpreg:=cg.getintregister(list,OS_32);
  4884. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  4885. list.concat(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg));
  4886. end;
  4887. else
  4888. internalerror(2003083101);
  4889. end;
  4890. end;
  4891. procedure create_codegen;
  4892. begin
  4893. if GenerateThumb2Code then
  4894. begin
  4895. cg:=tthumb2cgarm.create;
  4896. cg64:=tthumb2cg64farm.create;
  4897. casmoptimizer:=TCpuThumb2AsmOptimizer;
  4898. end
  4899. else if GenerateThumbCode then
  4900. begin
  4901. cg:=tthumbcgarm.create;
  4902. cg64:=tthumbcg64farm.create;
  4903. // casmoptimizer:=TCpuThumbAsmOptimizer;
  4904. end
  4905. else
  4906. begin
  4907. cg:=tarmcgarm.create;
  4908. cg64:=tarmcg64farm.create;
  4909. casmoptimizer:=TCpuAsmOptimizer;
  4910. end;
  4911. end;
  4912. end.