cgcpu.pas 214 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
  34. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  35. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  36. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  37. { move instructions }
  38. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  39. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  40. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  41. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  42. { fpu move instructions }
  43. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  44. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  45. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  46. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  47. { comparison operations }
  48. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  49. l : tasmlabel);override;
  50. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  51. procedure a_jmp_name(list : TAsmList;const s : string); override;
  52. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  53. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  54. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  55. procedure g_profilecode(list : TAsmList); override;
  56. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  57. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  58. procedure g_maybe_got_init(list : TAsmList); override;
  59. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  60. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  61. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  62. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  63. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  64. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  65. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  66. procedure g_save_registers(list : TAsmList);override;
  67. procedure g_restore_registers(list : TAsmList);override;
  68. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  69. procedure fixref(list : TAsmList;var ref : treference);
  70. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  71. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  72. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  73. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  74. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  75. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  76. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  77. { Transform unsupported methods into Internal errors }
  78. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  79. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  80. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  81. { clear out potential overflow bits from 8 or 16 bit operations }
  82. { the upper 24/16 bits of a register after an operation }
  83. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  84. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  85. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  86. end;
  87. { tcgarm is shared between normal arm and thumb-2 }
  88. tcgarm = class(tbasecgarm)
  89. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  90. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  91. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  92. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  93. size: tcgsize; a: tcgint; src, dst: tregister); override;
  94. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  95. size: tcgsize; src1, src2, dst: tregister); override;
  96. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  97. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  98. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  99. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  100. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  101. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  102. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  103. end;
  104. { normal arm cg }
  105. tarmcgarm = class(tcgarm)
  106. procedure init_register_allocators;override;
  107. procedure done_register_allocators;override;
  108. end;
  109. { 64 bit cg for all arm flavours }
  110. tbasecg64farm = class(tcg64f32)
  111. end;
  112. { tcg64farm is shared between normal arm and thumb-2 }
  113. tcg64farm = class(tbasecg64farm)
  114. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  115. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  116. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  117. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  118. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  119. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  120. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  121. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  122. end;
  123. tarmcg64farm = class(tcg64farm)
  124. end;
  125. tthumbcgarm = class(tbasecgarm)
  126. procedure init_register_allocators;override;
  127. procedure done_register_allocators;override;
  128. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  129. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  130. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  131. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  132. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  133. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  134. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  135. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  136. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  137. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  138. end;
  139. tthumbcg64farm = class(tbasecg64farm)
  140. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  141. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  142. end;
  143. tthumb2cgarm = class(tcgarm)
  144. procedure init_register_allocators;override;
  145. procedure done_register_allocators;override;
  146. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  147. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  148. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  149. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  150. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  151. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  152. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  153. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  154. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  155. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  156. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  157. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  158. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  159. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  160. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  161. end;
  162. tthumb2cg64farm = class(tcg64farm)
  163. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  164. end;
  165. const
  166. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  167. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  168. winstackpagesize = 4096;
  169. function get_fpu_postfix(def : tdef) : toppostfix;
  170. procedure create_codegen;
  171. implementation
  172. uses
  173. globals,verbose,systems,cutils,
  174. aopt,aoptcpu,
  175. fmodule,
  176. symconst,symsym,symtable,
  177. tgobj,
  178. procinfo,cpupi,
  179. paramgr;
  180. { Range check must be disabled explicitly as conversions between signed and unsigned
  181. 32-bit values are done without explicit typecasts }
  182. {$R-}
  183. function get_fpu_postfix(def : tdef) : toppostfix;
  184. begin
  185. if def.typ=floatdef then
  186. begin
  187. case tfloatdef(def).floattype of
  188. s32real:
  189. result:=PF_S;
  190. s64real:
  191. result:=PF_D;
  192. s80real:
  193. result:=PF_E;
  194. else
  195. internalerror(200401272);
  196. end;
  197. end
  198. else
  199. internalerror(200401271);
  200. end;
  201. procedure tarmcgarm.init_register_allocators;
  202. begin
  203. inherited init_register_allocators;
  204. { currently, we always save R14, so we can use it }
  205. if (target_info.system<>system_arm_darwin) then
  206. begin
  207. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  208. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  209. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  210. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  211. else
  212. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  213. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  214. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  215. end
  216. else
  217. { r7 is not available on Darwin, it's used as frame pointer (always,
  218. for backtrace support -- also in gcc/clang -> R11 can be used).
  219. r9 is volatile }
  220. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  221. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  222. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  223. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  224. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  225. { The register allocator currently cannot deal with multiple
  226. non-overlapping subregs per register, so we can only use
  227. half the single precision registers for now (as sub registers of the
  228. double precision ones). }
  229. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  230. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  231. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  232. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  233. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  234. ],first_mm_imreg,[])
  235. else
  236. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  237. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15],first_mm_imreg,[]);
  238. end;
  239. procedure tarmcgarm.done_register_allocators;
  240. begin
  241. rg[R_INTREGISTER].free;
  242. rg[R_FPUREGISTER].free;
  243. rg[R_MMREGISTER].free;
  244. inherited done_register_allocators;
  245. end;
  246. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  247. var
  248. imm_shift : byte;
  249. l : tasmlabel;
  250. hr : treference;
  251. imm1, imm2: DWord;
  252. begin
  253. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  254. internalerror(2002090902);
  255. if is_shifter_const(a,imm_shift) then
  256. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  257. else if is_shifter_const(not(a),imm_shift) then
  258. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  259. { loading of constants with mov and orr }
  260. else if (split_into_shifter_const(a,imm1, imm2)) then
  261. begin
  262. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  263. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  264. end
  265. { loading of constants with mvn and bic }
  266. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  267. begin
  268. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  269. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  270. end
  271. else
  272. begin
  273. reference_reset(hr,4,[]);
  274. current_asmdata.getjumplabel(l);
  275. cg.a_label(current_procinfo.aktlocaldata,l);
  276. hr.symboldata:=current_procinfo.aktlocaldata.last;
  277. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  278. hr.symbol:=l;
  279. hr.base:=NR_PC;
  280. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  281. end;
  282. end;
  283. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  284. var
  285. oppostfix:toppostfix;
  286. usedtmpref: treference;
  287. tmpreg,tmpreg2 : tregister;
  288. so : tshifterop;
  289. dir : integer;
  290. begin
  291. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  292. FromSize := ToSize;
  293. case FromSize of
  294. { signed integer registers }
  295. OS_8:
  296. oppostfix:=PF_B;
  297. OS_S8:
  298. oppostfix:=PF_SB;
  299. OS_16:
  300. oppostfix:=PF_H;
  301. OS_S16:
  302. oppostfix:=PF_SH;
  303. OS_32,
  304. OS_S32:
  305. oppostfix:=PF_None;
  306. else
  307. InternalError(200308297);
  308. end;
  309. if (fromsize=OS_S8) and
  310. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  311. oppostfix:=PF_B;
  312. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  313. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  314. (oppostfix in [PF_SH,PF_H])) then
  315. begin
  316. if target_info.endian=endian_big then
  317. dir:=-1
  318. else
  319. dir:=1;
  320. case FromSize of
  321. OS_16,OS_S16:
  322. begin
  323. { only complicated references need an extra loadaddr }
  324. if assigned(ref.symbol) or
  325. (ref.index<>NR_NO) or
  326. (ref.offset<-4095) or
  327. (ref.offset>4094) or
  328. { sometimes the compiler reused registers }
  329. (reg=ref.index) or
  330. (reg=ref.base) then
  331. begin
  332. tmpreg2:=getintregister(list,OS_INT);
  333. a_loadaddr_ref_reg(list,ref,tmpreg2);
  334. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  335. end
  336. else
  337. usedtmpref:=ref;
  338. if target_info.endian=endian_big then
  339. inc(usedtmpref.offset,1);
  340. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  341. tmpreg:=getintregister(list,OS_INT);
  342. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  343. inc(usedtmpref.offset,dir);
  344. if FromSize=OS_16 then
  345. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  346. else
  347. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  348. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  349. end;
  350. OS_32,OS_S32:
  351. begin
  352. tmpreg:=getintregister(list,OS_INT);
  353. { only complicated references need an extra loadaddr }
  354. if assigned(ref.symbol) or
  355. (ref.index<>NR_NO) or
  356. (ref.offset<-4095) or
  357. (ref.offset>4092) or
  358. { sometimes the compiler reused registers }
  359. (reg=ref.index) or
  360. (reg=ref.base) then
  361. begin
  362. tmpreg2:=getintregister(list,OS_INT);
  363. a_loadaddr_ref_reg(list,ref,tmpreg2);
  364. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  365. end
  366. else
  367. usedtmpref:=ref;
  368. shifterop_reset(so);so.shiftmode:=SM_LSL;
  369. if ref.alignment=2 then
  370. begin
  371. if target_info.endian=endian_big then
  372. inc(usedtmpref.offset,2);
  373. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  374. inc(usedtmpref.offset,dir*2);
  375. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  376. so.shiftimm:=16;
  377. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  378. end
  379. else
  380. begin
  381. tmpreg2:=getintregister(list,OS_INT);
  382. if target_info.endian=endian_big then
  383. inc(usedtmpref.offset,3);
  384. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  385. inc(usedtmpref.offset,dir);
  386. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  387. inc(usedtmpref.offset,dir);
  388. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  389. so.shiftimm:=8;
  390. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  391. inc(usedtmpref.offset,dir);
  392. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  393. so.shiftimm:=16;
  394. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  395. so.shiftimm:=24;
  396. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  397. end;
  398. end
  399. else
  400. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  401. end;
  402. end
  403. else
  404. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  405. if (fromsize=OS_S8) and
  406. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  407. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  408. else if (fromsize=OS_S8) and (tosize = OS_16) then
  409. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  410. end;
  411. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  412. var
  413. hsym : tsym;
  414. href : treference;
  415. paraloc : Pcgparalocation;
  416. shift : byte;
  417. begin
  418. { calculate the parameter info for the procdef }
  419. procdef.init_paraloc_info(callerside);
  420. hsym:=tsym(procdef.parast.Find('self'));
  421. if not(assigned(hsym) and
  422. (hsym.typ=paravarsym)) then
  423. internalerror(200305251);
  424. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  425. while paraloc<>nil do
  426. with paraloc^ do
  427. begin
  428. case loc of
  429. LOC_REGISTER:
  430. begin
  431. if is_shifter_const(ioffset,shift) then
  432. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  433. else
  434. begin
  435. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  436. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  437. end;
  438. end;
  439. LOC_REFERENCE:
  440. begin
  441. { offset in the wrapper needs to be adjusted for the stored
  442. return address }
  443. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  444. if is_shifter_const(ioffset,shift) then
  445. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  446. else
  447. begin
  448. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  449. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  450. end;
  451. end
  452. else
  453. internalerror(200309189);
  454. end;
  455. paraloc:=next;
  456. end;
  457. end;
  458. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  459. var
  460. ref: treference;
  461. begin
  462. paraloc.check_simple_location;
  463. paramanager.allocparaloc(list,paraloc.location);
  464. case paraloc.location^.loc of
  465. LOC_REGISTER,LOC_CREGISTER:
  466. a_load_const_reg(list,size,a,paraloc.location^.register);
  467. LOC_REFERENCE:
  468. begin
  469. reference_reset(ref,paraloc.alignment,[]);
  470. ref.base:=paraloc.location^.reference.index;
  471. ref.offset:=paraloc.location^.reference.offset;
  472. a_load_const_ref(list,size,a,ref);
  473. end;
  474. else
  475. internalerror(2002081101);
  476. end;
  477. end;
  478. procedure tbasecgarm.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
  479. var
  480. tmpref, ref: treference;
  481. location: pcgparalocation;
  482. sizeleft: aint;
  483. begin
  484. location := paraloc.location;
  485. tmpref := r;
  486. sizeleft := paraloc.intsize;
  487. while assigned(location) do
  488. begin
  489. paramanager.allocparaloc(list,location);
  490. case location^.loc of
  491. LOC_REGISTER,LOC_CREGISTER:
  492. a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  493. LOC_REFERENCE:
  494. begin
  495. reference_reset_base(ref,location^.reference.index,location^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  496. { doubles in softemu mode have a strange order of registers and references }
  497. if location^.size=OS_32 then
  498. g_concatcopy(list,tmpref,ref,4)
  499. else
  500. begin
  501. g_concatcopy(list,tmpref,ref,sizeleft);
  502. if assigned(location^.next) then
  503. internalerror(2005010710);
  504. end;
  505. end;
  506. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  507. case location^.size of
  508. OS_F32, OS_F64:
  509. a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  510. else
  511. internalerror(2002072801);
  512. end;
  513. LOC_VOID:
  514. begin
  515. // nothing to do
  516. end;
  517. else
  518. internalerror(2002081103);
  519. end;
  520. inc(tmpref.offset,tcgsize2size[location^.size]);
  521. dec(sizeleft,tcgsize2size[location^.size]);
  522. location := location^.next;
  523. end;
  524. end;
  525. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  526. var
  527. ref: treference;
  528. tmpreg: tregister;
  529. begin
  530. paraloc.check_simple_location;
  531. paramanager.allocparaloc(list,paraloc.location);
  532. case paraloc.location^.loc of
  533. LOC_REGISTER,LOC_CREGISTER:
  534. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  535. LOC_REFERENCE:
  536. begin
  537. reference_reset(ref,paraloc.alignment,[]);
  538. ref.base := paraloc.location^.reference.index;
  539. ref.offset := paraloc.location^.reference.offset;
  540. tmpreg := getintregister(list,OS_ADDR);
  541. a_loadaddr_ref_reg(list,r,tmpreg);
  542. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  543. end;
  544. else
  545. internalerror(2002080701);
  546. end;
  547. end;
  548. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  549. var
  550. branchopcode: tasmop;
  551. r : treference;
  552. sym : TAsmSymbol;
  553. begin
  554. { use always BL as newer binutils do not translate blx apparently
  555. generating BL is also what clang and gcc do by default }
  556. branchopcode:=A_BL;
  557. if not(weak) then
  558. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  559. else
  560. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  561. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  562. if (tf_pic_uses_got in target_info.flags) and
  563. (cs_create_pic in current_settings.moduleswitches) then
  564. begin
  565. r.refaddr:=addr_pic
  566. end
  567. else
  568. r.refaddr:=addr_full;
  569. list.concat(taicpu.op_ref(branchopcode,r));
  570. {
  571. the compiler does not properly set this flag anymore in pass 1, and
  572. for now we only need it after pass 2 (I hope) (JM)
  573. if not(pi_do_call in current_procinfo.flags) then
  574. internalerror(2003060703);
  575. }
  576. include(current_procinfo.flags,pi_do_call);
  577. end;
  578. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  579. begin
  580. { check not really correct: should only be used for non-Thumb cpus }
  581. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  582. begin
  583. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  584. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  585. end
  586. else
  587. list.concat(taicpu.op_reg(A_BLX, reg));
  588. {
  589. the compiler does not properly set this flag anymore in pass 1, and
  590. for now we only need it after pass 2 (I hope) (JM)
  591. if not(pi_do_call in current_procinfo.flags) then
  592. internalerror(2003060703);
  593. }
  594. include(current_procinfo.flags,pi_do_call);
  595. end;
  596. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  597. begin
  598. a_op_const_reg_reg(list,op,size,a,reg,reg);
  599. end;
  600. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  601. var
  602. tmpreg,tmpresreg : tregister;
  603. tmpref : treference;
  604. begin
  605. tmpreg:=getintregister(list,size);
  606. tmpresreg:=getintregister(list,size);
  607. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  608. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  609. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  610. end;
  611. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  612. var
  613. so : tshifterop;
  614. begin
  615. if op = OP_NEG then
  616. begin
  617. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  618. maybeadjustresult(list,OP_NEG,size,dst);
  619. end
  620. else if op = OP_NOT then
  621. begin
  622. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  623. begin
  624. shifterop_reset(so);
  625. so.shiftmode:=SM_LSL;
  626. if size in [OS_8, OS_S8] then
  627. so.shiftimm:=24
  628. else
  629. so.shiftimm:=16;
  630. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  631. {Using a shift here allows this to be folded into another instruction}
  632. if size in [OS_S8, OS_S16] then
  633. so.shiftmode:=SM_ASR
  634. else
  635. so.shiftmode:=SM_LSR;
  636. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  637. end
  638. else
  639. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  640. end
  641. else
  642. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  643. end;
  644. const
  645. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  646. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  647. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  648. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  649. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  650. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  651. op_reg_postfix: array[TOpCG] of TOpPostfix =
  652. (PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
  653. PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None);
  654. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  655. size: tcgsize; a: tcgint; src, dst: tregister);
  656. var
  657. ovloc : tlocation;
  658. begin
  659. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  660. end;
  661. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  662. size: tcgsize; src1, src2, dst: tregister);
  663. var
  664. ovloc : tlocation;
  665. begin
  666. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  667. end;
  668. function opshift2shiftmode(op: TOpCg): tshiftmode;
  669. begin
  670. case op of
  671. OP_SHL: Result:=SM_LSL;
  672. OP_SHR: Result:=SM_LSR;
  673. OP_ROR: Result:=SM_ROR;
  674. OP_ROL: Result:=SM_ROR;
  675. OP_SAR: Result:=SM_ASR;
  676. else internalerror(2012070501);
  677. end
  678. end;
  679. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  680. var
  681. multiplier : dword;
  682. power : longint;
  683. shifterop : tshifterop;
  684. bitsset : byte;
  685. negative : boolean;
  686. first : boolean;
  687. b,
  688. cycles : byte;
  689. maxeffort : byte;
  690. begin
  691. result:=true;
  692. cycles:=0;
  693. negative:=a<0;
  694. shifterop.rs:=NR_NO;
  695. shifterop.shiftmode:=SM_LSL;
  696. if negative then
  697. inc(cycles);
  698. multiplier:=dword(abs(a));
  699. bitsset:=popcnt(multiplier and $fffffffe);
  700. { heuristics to estimate how much instructions are reasonable to replace the mul,
  701. this is currently based on XScale timings }
  702. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  703. actual multiplication, this requires min. 1+4 cycles
  704. because the first shift imm. might cause a stall and because we need more instructions
  705. when replacing the mul we generate max. 3 instructions to replace this mul }
  706. maxeffort:=3;
  707. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  708. a ldr, so generating one more operation to replace this is beneficial }
  709. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  710. inc(maxeffort);
  711. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  712. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  713. dec(maxeffort);
  714. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  715. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  716. dec(maxeffort);
  717. { most simple cases }
  718. if a=1 then
  719. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  720. else if a=0 then
  721. a_load_const_reg(list,OS_32,0,dst)
  722. else if a=-1 then
  723. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  724. { add up ?
  725. basically, one add is needed for each bit being set in the constant factor
  726. however, the least significant bit is for free, it can be hidden in the initial
  727. instruction
  728. }
  729. else if (bitsset+cycles<=maxeffort) and
  730. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  731. begin
  732. first:=true;
  733. while multiplier<>0 do
  734. begin
  735. shifterop.shiftimm:=BsrDWord(multiplier);
  736. if odd(multiplier) then
  737. begin
  738. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  739. dec(multiplier);
  740. end
  741. else
  742. if first then
  743. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  744. else
  745. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  746. first:=false;
  747. dec(multiplier,1 shl shifterop.shiftimm);
  748. end;
  749. if negative then
  750. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  751. end
  752. { subtract from the next greater power of two? }
  753. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  754. begin
  755. first:=true;
  756. while multiplier<>0 do
  757. begin
  758. if first then
  759. begin
  760. multiplier:=(1 shl power)-multiplier;
  761. shifterop.shiftimm:=power;
  762. end
  763. else
  764. shifterop.shiftimm:=BsrDWord(multiplier);
  765. if odd(multiplier) then
  766. begin
  767. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  768. dec(multiplier);
  769. end
  770. else
  771. if first then
  772. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  773. else
  774. begin
  775. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  776. dec(multiplier,1 shl shifterop.shiftimm);
  777. end;
  778. first:=false;
  779. end;
  780. if negative then
  781. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  782. end
  783. else
  784. result:=false;
  785. end;
  786. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  787. var
  788. shift, lsb, width : byte;
  789. tmpreg : tregister;
  790. so : tshifterop;
  791. l1 : longint;
  792. imm1, imm2: DWord;
  793. begin
  794. optimize_op_const(size, op, a);
  795. case op of
  796. OP_NONE:
  797. begin
  798. if src <> dst then
  799. a_load_reg_reg(list, size, size, src, dst);
  800. exit;
  801. end;
  802. OP_MOVE:
  803. begin
  804. a_load_const_reg(list, size, a, dst);
  805. exit;
  806. end;
  807. end;
  808. ovloc.loc:=LOC_VOID;
  809. if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
  810. case op of
  811. OP_ADD:
  812. begin
  813. op:=OP_SUB;
  814. a:=aint(dword(-a));
  815. end;
  816. OP_SUB:
  817. begin
  818. op:=OP_ADD;
  819. a:=aint(dword(-a));
  820. end
  821. end;
  822. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  823. case op of
  824. OP_NEG,OP_NOT:
  825. internalerror(200308281);
  826. OP_SHL,
  827. OP_SHR,
  828. OP_ROL,
  829. OP_ROR,
  830. OP_SAR:
  831. begin
  832. if a>32 then
  833. internalerror(200308294);
  834. shifterop_reset(so);
  835. so.shiftmode:=opshift2shiftmode(op);
  836. if op = OP_ROL then
  837. so.shiftimm:=32-a
  838. else
  839. so.shiftimm:=a;
  840. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  841. end;
  842. else
  843. {if (op in [OP_SUB, OP_ADD]) and
  844. ((a < 0) or
  845. (a > 4095)) then
  846. begin
  847. tmpreg:=getintregister(list,size);
  848. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  849. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  850. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  851. ));
  852. end
  853. else}
  854. begin
  855. if cgsetflags or setflags then
  856. a_reg_alloc(list,NR_DEFAULTFLAGS);
  857. list.concat(setoppostfix(
  858. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  859. end;
  860. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  861. begin
  862. ovloc.loc:=LOC_FLAGS;
  863. case op of
  864. OP_ADD:
  865. ovloc.resflags:=F_CS;
  866. OP_SUB:
  867. ovloc.resflags:=F_CC;
  868. end;
  869. end;
  870. end
  871. else
  872. begin
  873. { there could be added some more sophisticated optimizations }
  874. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  875. a_op_reg_reg(list,OP_NEG,size,src,dst)
  876. { we do this here instead in the peephole optimizer because
  877. it saves us a register }
  878. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  879. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  880. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  881. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  882. begin
  883. if l1>32 then{roozbeh does this ever happen?}
  884. internalerror(200308296);
  885. shifterop_reset(so);
  886. so.shiftmode:=SM_LSL;
  887. so.shiftimm:=l1;
  888. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  889. end
  890. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  891. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  892. begin
  893. if l1>32 then{does this ever happen?}
  894. internalerror(201205181);
  895. shifterop_reset(so);
  896. so.shiftmode:=SM_LSL;
  897. so.shiftimm:=l1;
  898. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  899. end
  900. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  901. begin
  902. { nothing to do on success }
  903. end
  904. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  905. broader range of shifterconstants.}
  906. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  907. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  908. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  909. into the following instruction}
  910. else if (op = OP_AND) and
  911. is_continuous_mask(a, lsb, width) and
  912. ((lsb = 0) or ((lsb + width) = 32)) then
  913. begin
  914. shifterop_reset(so);
  915. if (width = 16) and
  916. (lsb = 0) and
  917. (current_settings.cputype >= cpu_armv6) then
  918. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  919. else if (width = 8) and
  920. (lsb = 0) and
  921. (current_settings.cputype >= cpu_armv6) then
  922. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  923. else if lsb = 0 then
  924. begin
  925. so.shiftmode:=SM_LSL;
  926. so.shiftimm:=32-width;
  927. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  928. so.shiftmode:=SM_LSR;
  929. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  930. end
  931. else
  932. begin
  933. so.shiftmode:=SM_LSR;
  934. so.shiftimm:=lsb;
  935. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  936. so.shiftmode:=SM_LSL;
  937. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  938. end;
  939. end
  940. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  941. begin
  942. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  943. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  944. end
  945. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  946. not(cgsetflags or setflags) and
  947. split_into_shifter_const(a, imm1, imm2) then
  948. begin
  949. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  950. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  951. end
  952. else
  953. begin
  954. tmpreg:=getintregister(list,size);
  955. a_load_const_reg(list,size,a,tmpreg);
  956. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  957. end;
  958. end;
  959. maybeadjustresult(list,op,size,dst);
  960. end;
  961. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  962. var
  963. so : tshifterop;
  964. tmpreg,overflowreg : tregister;
  965. asmop : tasmop;
  966. begin
  967. ovloc.loc:=LOC_VOID;
  968. case op of
  969. OP_NEG,OP_NOT,
  970. OP_DIV,OP_IDIV:
  971. internalerror(200308283);
  972. OP_SHL,
  973. OP_SHR,
  974. OP_SAR,
  975. OP_ROR:
  976. begin
  977. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  978. internalerror(2008072801);
  979. shifterop_reset(so);
  980. so.rs:=src1;
  981. so.shiftmode:=opshift2shiftmode(op);
  982. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  983. end;
  984. OP_ROL:
  985. begin
  986. if not(size in [OS_32,OS_S32]) then
  987. internalerror(2008072801);
  988. { simulate ROL by ror'ing 32-value }
  989. tmpreg:=getintregister(list,OS_32);
  990. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  991. shifterop_reset(so);
  992. so.rs:=tmpreg;
  993. so.shiftmode:=SM_ROR;
  994. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  995. end;
  996. OP_IMUL,
  997. OP_MUL:
  998. begin
  999. if (cgsetflags or setflags) and
  1000. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1001. begin
  1002. overflowreg:=getintregister(list,size);
  1003. if op=OP_IMUL then
  1004. asmop:=A_SMULL
  1005. else
  1006. asmop:=A_UMULL;
  1007. { the arm doesn't allow that rd and rm are the same }
  1008. if dst=src2 then
  1009. begin
  1010. if dst<>src1 then
  1011. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1012. else
  1013. begin
  1014. tmpreg:=getintregister(list,size);
  1015. a_load_reg_reg(list,size,size,src2,dst);
  1016. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1017. end;
  1018. end
  1019. else
  1020. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1021. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1022. if op=OP_IMUL then
  1023. begin
  1024. shifterop_reset(so);
  1025. so.shiftmode:=SM_ASR;
  1026. so.shiftimm:=31;
  1027. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1028. end
  1029. else
  1030. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1031. ovloc.loc:=LOC_FLAGS;
  1032. ovloc.resflags:=F_NE;
  1033. end
  1034. else
  1035. begin
  1036. { the arm doesn't allow that rd and rm are the same }
  1037. if dst=src2 then
  1038. begin
  1039. if dst<>src1 then
  1040. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1041. else
  1042. begin
  1043. tmpreg:=getintregister(list,size);
  1044. a_load_reg_reg(list,size,size,src2,dst);
  1045. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1046. end;
  1047. end
  1048. else
  1049. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1050. end;
  1051. end;
  1052. else
  1053. begin
  1054. if cgsetflags or setflags then
  1055. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1056. list.concat(setoppostfix(
  1057. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1058. end;
  1059. end;
  1060. maybeadjustresult(list,op,size,dst);
  1061. end;
  1062. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1063. var
  1064. asmop: tasmop;
  1065. begin
  1066. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1067. begin
  1068. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1069. case size of
  1070. OS_32: asmop:=A_UMULL;
  1071. OS_S32: asmop:=A_SMULL;
  1072. else
  1073. InternalError(2014060802);
  1074. end;
  1075. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1076. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1077. 32x32=32 bit multiplication}
  1078. if (dstlo = NR_NO) then
  1079. dstlo:=getintregister(list,size);
  1080. if (dsthi = NR_NO) then
  1081. dsthi:=getintregister(list,size);
  1082. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1083. end
  1084. else if dsthi=NR_NO then
  1085. begin
  1086. if (dstlo = NR_NO) then
  1087. dstlo:=getintregister(list,size);
  1088. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1089. end
  1090. else
  1091. begin
  1092. internalerror(2015083022);
  1093. end;
  1094. end;
  1095. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1096. var
  1097. tmpreg1,tmpreg2 : tregister;
  1098. begin
  1099. tmpreg1:=NR_NO;
  1100. { Be sure to have a base register }
  1101. if (ref.base=NR_NO) then
  1102. begin
  1103. if ref.shiftmode<>SM_None then
  1104. internalerror(2014020701);
  1105. ref.base:=ref.index;
  1106. ref.index:=NR_NO;
  1107. end;
  1108. { absolute symbols can't be handled directly, we've to store the symbol reference
  1109. in the text segment and access it pc relative
  1110. For now, we assume that references where base or index equals to PC are already
  1111. relative, all other references are assumed to be absolute and thus they need
  1112. to be handled extra.
  1113. A proper solution would be to change refoptions to a set and store the information
  1114. if the symbol is absolute or relative there.
  1115. }
  1116. if (assigned(ref.symbol) and
  1117. not(is_pc(ref.base)) and
  1118. not(is_pc(ref.index))
  1119. ) or
  1120. { [#xxx] isn't a valid address operand }
  1121. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1122. (ref.offset<-4095) or
  1123. (ref.offset>4095) or
  1124. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1125. ((ref.offset<-255) or
  1126. (ref.offset>255)
  1127. )
  1128. ) or
  1129. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1130. ((ref.offset<-1020) or
  1131. (ref.offset>1020) or
  1132. ((abs(ref.offset) mod 4)<>0)
  1133. )
  1134. ) or
  1135. ((GenerateThumbCode) and
  1136. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1137. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1138. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1139. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1140. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1141. )
  1142. ) then
  1143. begin
  1144. fixref(list,ref);
  1145. end;
  1146. if GenerateThumbCode then
  1147. begin
  1148. { certain thumb load require base and index }
  1149. if (oppostfix in [PF_SB,PF_SH]) and
  1150. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1151. begin
  1152. tmpreg1:=getintregister(list,OS_ADDR);
  1153. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1154. ref.index:=tmpreg1;
  1155. end;
  1156. { "hi" registers cannot be used as base or index }
  1157. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1158. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1159. begin
  1160. tmpreg1:=getintregister(list,OS_ADDR);
  1161. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1162. ref.base:=tmpreg1;
  1163. end;
  1164. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1165. begin
  1166. tmpreg1:=getintregister(list,OS_ADDR);
  1167. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1168. ref.index:=tmpreg1;
  1169. end;
  1170. end;
  1171. { fold if there is base, index and offset, however, don't fold
  1172. for vfp memory instructions because we later fold the index }
  1173. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1174. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1175. begin
  1176. if tmpreg1<>NR_NO then
  1177. begin
  1178. tmpreg2:=getintregister(list,OS_ADDR);
  1179. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1180. tmpreg1:=tmpreg2;
  1181. end
  1182. else
  1183. begin
  1184. tmpreg1:=getintregister(list,OS_ADDR);
  1185. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1186. ref.base:=tmpreg1;
  1187. end;
  1188. ref.offset:=0;
  1189. end;
  1190. { floating point operations have only limited references
  1191. we expect here, that a base is already set }
  1192. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1193. begin
  1194. if ref.shiftmode<>SM_none then
  1195. internalerror(200309121);
  1196. if tmpreg1<>NR_NO then
  1197. begin
  1198. if ref.base=tmpreg1 then
  1199. begin
  1200. if ref.signindex<0 then
  1201. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1202. else
  1203. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1204. ref.index:=NR_NO;
  1205. end
  1206. else
  1207. begin
  1208. if ref.index<>tmpreg1 then
  1209. internalerror(200403161);
  1210. if ref.signindex<0 then
  1211. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1212. else
  1213. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1214. ref.base:=tmpreg1;
  1215. ref.index:=NR_NO;
  1216. end;
  1217. end
  1218. else
  1219. begin
  1220. tmpreg1:=getintregister(list,OS_ADDR);
  1221. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1222. ref.base:=tmpreg1;
  1223. ref.index:=NR_NO;
  1224. end;
  1225. end;
  1226. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1227. Result := ref;
  1228. end;
  1229. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1230. var
  1231. oppostfix:toppostfix;
  1232. usedtmpref: treference;
  1233. tmpreg : tregister;
  1234. dir : integer;
  1235. begin
  1236. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1237. FromSize := ToSize;
  1238. case ToSize of
  1239. { signed integer registers }
  1240. OS_8,
  1241. OS_S8:
  1242. oppostfix:=PF_B;
  1243. OS_16,
  1244. OS_S16:
  1245. oppostfix:=PF_H;
  1246. OS_32,
  1247. OS_S32,
  1248. { for vfp value stored in integer register }
  1249. OS_F32:
  1250. oppostfix:=PF_None;
  1251. else
  1252. InternalError(200308299);
  1253. end;
  1254. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1255. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1256. (oppostfix =PF_H)) then
  1257. begin
  1258. if target_info.endian=endian_big then
  1259. dir:=-1
  1260. else
  1261. dir:=1;
  1262. case FromSize of
  1263. OS_16,OS_S16:
  1264. begin
  1265. tmpreg:=getintregister(list,OS_INT);
  1266. usedtmpref:=ref;
  1267. if target_info.endian=endian_big then
  1268. inc(usedtmpref.offset,1);
  1269. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1270. inc(usedtmpref.offset,dir);
  1271. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1272. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1273. end;
  1274. OS_32,OS_S32:
  1275. begin
  1276. tmpreg:=getintregister(list,OS_INT);
  1277. usedtmpref:=ref;
  1278. if ref.alignment=2 then
  1279. begin
  1280. if target_info.endian=endian_big then
  1281. inc(usedtmpref.offset,2);
  1282. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1283. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1284. inc(usedtmpref.offset,dir*2);
  1285. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1286. end
  1287. else
  1288. begin
  1289. if target_info.endian=endian_big then
  1290. inc(usedtmpref.offset,3);
  1291. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1292. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1293. inc(usedtmpref.offset,dir);
  1294. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1295. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1296. inc(usedtmpref.offset,dir);
  1297. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1298. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1299. inc(usedtmpref.offset,dir);
  1300. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1301. end;
  1302. end
  1303. else
  1304. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1305. end;
  1306. end
  1307. else
  1308. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1309. end;
  1310. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1311. var
  1312. oppostfix:toppostfix;
  1313. href: treference;
  1314. tmpreg: TRegister;
  1315. begin
  1316. case ToSize of
  1317. { signed integer registers }
  1318. OS_8,
  1319. OS_S8:
  1320. oppostfix:=PF_B;
  1321. OS_16,
  1322. OS_S16:
  1323. oppostfix:=PF_H;
  1324. OS_32,
  1325. OS_S32:
  1326. oppostfix:=PF_None;
  1327. else
  1328. InternalError(2003082910);
  1329. end;
  1330. if (tosize in [OS_S16,OS_16]) and
  1331. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1332. begin
  1333. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1334. tmpreg:=getintregister(list,OS_INT);
  1335. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1336. href:=result;
  1337. inc(href.offset);
  1338. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1339. end
  1340. else
  1341. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1342. end;
  1343. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1344. var
  1345. oppostfix:toppostfix;
  1346. so: tshifterop;
  1347. tmpreg: TRegister;
  1348. href: treference;
  1349. begin
  1350. case FromSize of
  1351. { signed integer registers }
  1352. OS_8:
  1353. oppostfix:=PF_B;
  1354. OS_S8:
  1355. oppostfix:=PF_SB;
  1356. OS_16:
  1357. oppostfix:=PF_H;
  1358. OS_S16:
  1359. oppostfix:=PF_SH;
  1360. OS_32,
  1361. OS_S32:
  1362. oppostfix:=PF_None;
  1363. else
  1364. InternalError(200308291);
  1365. end;
  1366. if (tosize=OS_S8) and
  1367. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1368. begin
  1369. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1370. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1371. end
  1372. else if (tosize in [OS_S16,OS_16]) and
  1373. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1374. begin
  1375. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1376. tmpreg:=getintregister(list,OS_INT);
  1377. href:=result;
  1378. inc(href.offset);
  1379. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1380. shifterop_reset(so);
  1381. so.shiftmode:=SM_LSL;
  1382. so.shiftimm:=8;
  1383. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1384. end
  1385. else
  1386. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1387. end;
  1388. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1389. var
  1390. so : tshifterop;
  1391. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1392. begin
  1393. if GenerateThumbCode then
  1394. begin
  1395. case shiftmode of
  1396. SM_ASR:
  1397. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1398. SM_LSR:
  1399. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1400. SM_LSL:
  1401. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1402. else
  1403. internalerror(2013090301);
  1404. end;
  1405. end
  1406. else
  1407. begin
  1408. so.shiftmode:=shiftmode;
  1409. so.shiftimm:=shiftimm;
  1410. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1411. end;
  1412. end;
  1413. var
  1414. instr: taicpu;
  1415. conv_done: boolean;
  1416. begin
  1417. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1418. internalerror(2002090901);
  1419. conv_done:=false;
  1420. if tosize<>fromsize then
  1421. begin
  1422. shifterop_reset(so);
  1423. conv_done:=true;
  1424. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1425. fromsize:=tosize;
  1426. if current_settings.cputype<cpu_armv6 then
  1427. case fromsize of
  1428. OS_8:
  1429. if GenerateThumbCode then
  1430. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1431. else
  1432. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1433. OS_S8:
  1434. begin
  1435. do_shift(SM_LSL,24,reg1);
  1436. if tosize=OS_16 then
  1437. begin
  1438. do_shift(SM_ASR,8,reg2);
  1439. do_shift(SM_LSR,16,reg2);
  1440. end
  1441. else
  1442. do_shift(SM_ASR,24,reg2);
  1443. end;
  1444. OS_16:
  1445. begin
  1446. do_shift(SM_LSL,16,reg1);
  1447. do_shift(SM_LSR,16,reg2);
  1448. end;
  1449. OS_S16:
  1450. begin
  1451. do_shift(SM_LSL,16,reg1);
  1452. do_shift(SM_ASR,16,reg2)
  1453. end;
  1454. else
  1455. conv_done:=false;
  1456. end
  1457. else
  1458. case fromsize of
  1459. OS_8:
  1460. if GenerateThumbCode then
  1461. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1462. else
  1463. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1464. OS_S8:
  1465. begin
  1466. if tosize=OS_16 then
  1467. begin
  1468. so.shiftmode:=SM_ROR;
  1469. so.shiftimm:=16;
  1470. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1471. do_shift(SM_LSR,16,reg2);
  1472. end
  1473. else
  1474. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1475. end;
  1476. OS_16:
  1477. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1478. OS_S16:
  1479. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1480. else
  1481. conv_done:=false;
  1482. end
  1483. end;
  1484. if not conv_done and (reg1<>reg2) then
  1485. begin
  1486. { same size, only a register mov required }
  1487. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1488. list.Concat(instr);
  1489. { Notify the register allocator that we have written a move instruction so
  1490. it can try to eliminate it. }
  1491. add_move_instruction(instr);
  1492. end;
  1493. end;
  1494. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1495. var
  1496. href,href2 : treference;
  1497. hloc : pcgparalocation;
  1498. begin
  1499. href:=ref;
  1500. hloc:=paraloc.location;
  1501. while assigned(hloc) do
  1502. begin
  1503. case hloc^.loc of
  1504. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1505. begin
  1506. paramanager.allocparaloc(list,paraloc.location);
  1507. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1508. end;
  1509. LOC_REGISTER :
  1510. case hloc^.size of
  1511. OS_32,
  1512. OS_F32:
  1513. begin
  1514. paramanager.allocparaloc(list,paraloc.location);
  1515. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1516. end;
  1517. OS_64,
  1518. OS_F64:
  1519. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1520. else
  1521. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1522. end;
  1523. LOC_REFERENCE :
  1524. begin
  1525. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1526. { concatcopy should choose the best way to copy the data }
  1527. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1528. end;
  1529. else
  1530. internalerror(200408241);
  1531. end;
  1532. inc(href.offset,tcgsize2size[hloc^.size]);
  1533. hloc:=hloc^.next;
  1534. end;
  1535. end;
  1536. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1537. begin
  1538. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1539. end;
  1540. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1541. var
  1542. oppostfix:toppostfix;
  1543. begin
  1544. case fromsize of
  1545. OS_32,
  1546. OS_F32:
  1547. oppostfix:=PF_S;
  1548. OS_64,
  1549. OS_F64:
  1550. oppostfix:=PF_D;
  1551. OS_F80:
  1552. oppostfix:=PF_E;
  1553. else
  1554. InternalError(200309021);
  1555. end;
  1556. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1557. if fromsize<>tosize then
  1558. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1559. end;
  1560. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1561. var
  1562. oppostfix:toppostfix;
  1563. begin
  1564. case tosize of
  1565. OS_F32:
  1566. oppostfix:=PF_S;
  1567. OS_F64:
  1568. oppostfix:=PF_D;
  1569. OS_F80:
  1570. oppostfix:=PF_E;
  1571. else
  1572. InternalError(200309022);
  1573. end;
  1574. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1575. end;
  1576. { comparison operations }
  1577. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1578. l : tasmlabel);
  1579. var
  1580. tmpreg : tregister;
  1581. b : byte;
  1582. begin
  1583. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1584. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1585. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1586. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1587. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1588. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1589. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1590. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1591. else
  1592. begin
  1593. tmpreg:=getintregister(list,size);
  1594. a_load_const_reg(list,size,a,tmpreg);
  1595. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1596. end;
  1597. a_jmp_cond(list,cmp_op,l);
  1598. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1599. end;
  1600. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1601. begin
  1602. if reverse then
  1603. begin
  1604. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1605. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1606. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1607. end
  1608. { it is decided during the compilation of the system unit if this code is used or not
  1609. so no additional check for rbit is needed }
  1610. else
  1611. begin
  1612. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1613. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1614. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1615. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1616. if GenerateThumb2Code then
  1617. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1618. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1619. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1620. end;
  1621. end;
  1622. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1623. begin
  1624. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1625. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1626. a_jmp_cond(list,cmp_op,l);
  1627. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1628. end;
  1629. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1630. var
  1631. ai : taicpu;
  1632. begin
  1633. { generate far jump, leave it to the optimizer to get rid of it }
  1634. if GenerateThumbCode then
  1635. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1636. else
  1637. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1638. ai.is_jmp:=true;
  1639. list.concat(ai);
  1640. end;
  1641. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1642. var
  1643. ai : taicpu;
  1644. begin
  1645. { generate far jump, leave it to the optimizer to get rid of it }
  1646. if GenerateThumbCode then
  1647. ai:=taicpu.op_sym(A_BL,l)
  1648. else
  1649. ai:=taicpu.op_sym(A_B,l);
  1650. ai.is_jmp:=true;
  1651. list.concat(ai);
  1652. end;
  1653. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1654. var
  1655. ai : taicpu;
  1656. inv_flags : TResFlags;
  1657. hlabel : TAsmLabel;
  1658. begin
  1659. if GenerateThumbCode then
  1660. begin
  1661. inv_flags:=f;
  1662. inverse_flags(inv_flags);
  1663. { the optimizer has to fix this if jump range is sufficient short }
  1664. current_asmdata.getjumplabel(hlabel);
  1665. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1666. ai.is_jmp:=true;
  1667. list.concat(ai);
  1668. a_jmp_always(list,l);
  1669. a_label(list,hlabel);
  1670. end
  1671. else
  1672. begin
  1673. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1674. ai.is_jmp:=true;
  1675. list.concat(ai);
  1676. end;
  1677. end;
  1678. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1679. begin
  1680. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1681. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1682. end;
  1683. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1684. begin
  1685. if target_info.system = system_arm_linux then
  1686. begin
  1687. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1688. a_call_name(list,'__gnu_mcount_nc',false);
  1689. end
  1690. else
  1691. internalerror(2014091201);
  1692. end;
  1693. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1694. var
  1695. ref : treference;
  1696. shift : byte;
  1697. firstfloatreg,lastfloatreg,
  1698. r : byte;
  1699. mmregs,
  1700. regs, saveregs : tcpuregisterset;
  1701. registerarea,
  1702. r7offset,
  1703. stackmisalignment : pint;
  1704. imm1, imm2: DWord;
  1705. stack_parameters : Boolean;
  1706. begin
  1707. LocalSize:=align(LocalSize,4);
  1708. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1709. { call instruction does not put anything on the stack }
  1710. registerarea:=0;
  1711. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1712. lastfloatreg:=RS_NO;
  1713. if not(nostackframe) then
  1714. begin
  1715. firstfloatreg:=RS_NO;
  1716. mmregs:=[];
  1717. case current_settings.fputype of
  1718. fpu_fpa,
  1719. fpu_fpa10,
  1720. fpu_fpa11:
  1721. begin
  1722. { save floating point registers? }
  1723. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1724. for r:=RS_F0 to RS_F7 do
  1725. if r in regs then
  1726. begin
  1727. if firstfloatreg=RS_NO then
  1728. firstfloatreg:=r;
  1729. lastfloatreg:=r;
  1730. inc(registerarea,12);
  1731. end;
  1732. end;
  1733. fpu_vfpv2,
  1734. fpu_vfpv3,
  1735. fpu_vfpv4,
  1736. fpu_vfpv3_d16:
  1737. begin;
  1738. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1739. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1740. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1741. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1742. end;
  1743. end;
  1744. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1745. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1746. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1747. { save int registers }
  1748. reference_reset(ref,4,[]);
  1749. ref.index:=NR_STACK_POINTER_REG;
  1750. ref.addressmode:=AM_PREINDEXED;
  1751. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1752. if not(target_info.system in systems_darwin) then
  1753. begin
  1754. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1755. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1756. begin
  1757. a_reg_alloc(list,NR_R12);
  1758. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1759. end;
  1760. { the (old) ARM APCS requires saving both the stack pointer (to
  1761. crawl the stack) and the PC (to identify the function this
  1762. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1763. and R15 -- still needs updating for EABI and Darwin, they don't
  1764. need that }
  1765. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1766. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1767. else
  1768. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1769. include(regs,RS_R14);
  1770. if regs<>[] then
  1771. begin
  1772. for r:=RS_R0 to RS_R15 do
  1773. if r in regs then
  1774. inc(registerarea,4);
  1775. { if the stack is not 8 byte aligned, try to add an extra register,
  1776. so we can avoid the extra sub/add ...,#4 later (KB) }
  1777. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1778. for r:=RS_R3 downto RS_R0 do
  1779. if not(r in regs) then
  1780. begin
  1781. regs:=regs+[r];
  1782. inc(registerarea,4);
  1783. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1784. break;
  1785. end;
  1786. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1787. end;
  1788. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1789. begin
  1790. { the framepointer now points to the saved R15, so the saved
  1791. framepointer is at R11-12 (for get_caller_frame) }
  1792. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1793. a_reg_dealloc(list,NR_R12);
  1794. end;
  1795. end
  1796. else
  1797. begin
  1798. { always save r14 if we use r7 as the framepointer, because
  1799. the parameter offsets are hardcoded in advance and always
  1800. assume that r14 sits on the stack right behind the saved r7
  1801. }
  1802. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1803. include(regs,RS_FRAME_POINTER_REG);
  1804. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1805. include(regs,RS_R14);
  1806. if regs<>[] then
  1807. begin
  1808. { on Darwin, you first have to save [r4-r7,lr], and then
  1809. [r8,r10,r11] and make r7 point to the previously saved
  1810. r7 so that you can perform a stack crawl based on it
  1811. ([r7] is previous stack frame, [r7+4] is return address
  1812. }
  1813. include(regs,RS_FRAME_POINTER_REG);
  1814. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1815. r7offset:=0;
  1816. for r:=RS_R0 to RS_R15 do
  1817. if r in saveregs then
  1818. begin
  1819. inc(registerarea,4);
  1820. if r<RS_FRAME_POINTER_REG then
  1821. inc(r7offset,4);
  1822. end;
  1823. { save the registers }
  1824. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1825. { make r7 point to the saved r7 (regardless of whether this
  1826. frame uses the framepointer, for backtrace purposes) }
  1827. if r7offset<>0 then
  1828. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1829. else
  1830. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1831. { now save the rest (if any) }
  1832. saveregs:=regs-saveregs;
  1833. if saveregs<>[] then
  1834. begin
  1835. for r:=RS_R8 to RS_R11 do
  1836. if r in saveregs then
  1837. inc(registerarea,4);
  1838. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1839. end;
  1840. end;
  1841. end;
  1842. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1843. if (LocalSize<>0) or
  1844. ((stackmisalignment<>0) and
  1845. ((pi_do_call in current_procinfo.flags) or
  1846. (po_assembler in current_procinfo.procdef.procoptions))) then
  1847. begin
  1848. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1849. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1850. begin
  1851. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1852. internalerror(2014030901)
  1853. else
  1854. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1855. end;
  1856. if is_shifter_const(localsize,shift) then
  1857. begin
  1858. a_reg_dealloc(list,NR_R12);
  1859. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1860. end
  1861. else if split_into_shifter_const(localsize, imm1, imm2) then
  1862. begin
  1863. a_reg_dealloc(list,NR_R12);
  1864. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1865. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1866. end
  1867. else
  1868. begin
  1869. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1870. a_reg_alloc(list,NR_R12);
  1871. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1872. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1873. a_reg_dealloc(list,NR_R12);
  1874. end;
  1875. end;
  1876. if (mmregs<>[]) or
  1877. (firstfloatreg<>RS_NO) then
  1878. begin
  1879. reference_reset(ref,4,[]);
  1880. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1881. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1882. begin
  1883. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1884. begin
  1885. a_reg_alloc(list,NR_R12);
  1886. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1887. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1888. a_reg_dealloc(list,NR_R12);
  1889. end
  1890. else
  1891. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1892. ref.base:=NR_R12;
  1893. end
  1894. else
  1895. begin
  1896. ref.base:=current_procinfo.framepointer;
  1897. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1898. end;
  1899. case current_settings.fputype of
  1900. fpu_fpa,
  1901. fpu_fpa10,
  1902. fpu_fpa11:
  1903. begin
  1904. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1905. lastfloatreg-firstfloatreg+1,ref));
  1906. end;
  1907. fpu_vfpv2,
  1908. fpu_vfpv3,
  1909. fpu_vfpv4,
  1910. fpu_vfpv3_d16:
  1911. begin
  1912. ref.index:=ref.base;
  1913. ref.base:=NR_NO;
  1914. { FSTMX is deprecated on ARMv6 and later }
  1915. {if (current_settings.cputype<cpu_armv6) then
  1916. postfix:=PF_IAX
  1917. else
  1918. postfix:=PF_IAD;}
  1919. if mmregs<>[] then
  1920. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1921. end;
  1922. end;
  1923. end;
  1924. end;
  1925. end;
  1926. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1927. var
  1928. ref : treference;
  1929. LocalSize : longint;
  1930. firstfloatreg,lastfloatreg,
  1931. r,
  1932. shift : byte;
  1933. mmregs,
  1934. saveregs,
  1935. regs : tcpuregisterset;
  1936. registerarea,
  1937. stackmisalignment: pint;
  1938. paddingreg: TSuperRegister;
  1939. imm1, imm2: DWord;
  1940. begin
  1941. if not(nostackframe) then
  1942. begin
  1943. registerarea:=0;
  1944. firstfloatreg:=RS_NO;
  1945. lastfloatreg:=RS_NO;
  1946. mmregs:=[];
  1947. saveregs:=[];
  1948. case current_settings.fputype of
  1949. fpu_fpa,
  1950. fpu_fpa10,
  1951. fpu_fpa11:
  1952. begin
  1953. { restore floating point registers? }
  1954. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1955. for r:=RS_F0 to RS_F7 do
  1956. if r in regs then
  1957. begin
  1958. if firstfloatreg=RS_NO then
  1959. firstfloatreg:=r;
  1960. lastfloatreg:=r;
  1961. { floating point register space is already included in
  1962. localsize below by calc_stackframe_size
  1963. inc(registerarea,12);
  1964. }
  1965. end;
  1966. end;
  1967. fpu_vfpv2,
  1968. fpu_vfpv3,
  1969. fpu_vfpv4,
  1970. fpu_vfpv3_d16:
  1971. begin;
  1972. { restore vfp registers? }
  1973. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1974. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1975. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1976. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1977. end;
  1978. end;
  1979. if (firstfloatreg<>RS_NO) or
  1980. (mmregs<>[]) then
  1981. begin
  1982. reference_reset(ref,4,[]);
  1983. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1984. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1985. begin
  1986. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1987. begin
  1988. a_reg_alloc(list,NR_R12);
  1989. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1990. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1991. a_reg_dealloc(list,NR_R12);
  1992. end
  1993. else
  1994. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1995. ref.base:=NR_R12;
  1996. end
  1997. else
  1998. begin
  1999. ref.base:=current_procinfo.framepointer;
  2000. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  2001. end;
  2002. case current_settings.fputype of
  2003. fpu_fpa,
  2004. fpu_fpa10,
  2005. fpu_fpa11:
  2006. begin
  2007. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2008. lastfloatreg-firstfloatreg+1,ref));
  2009. end;
  2010. fpu_vfpv2,
  2011. fpu_vfpv3,
  2012. fpu_vfpv4,
  2013. fpu_vfpv3_d16:
  2014. begin
  2015. ref.index:=ref.base;
  2016. ref.base:=NR_NO;
  2017. { FLDMX is deprecated on ARMv6 and later }
  2018. {if (current_settings.cputype<cpu_armv6) then
  2019. mmpostfix:=PF_IAX
  2020. else
  2021. mmpostfix:=PF_IAD;}
  2022. if mmregs<>[] then
  2023. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2024. end;
  2025. end;
  2026. end;
  2027. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2028. if (pi_do_call in current_procinfo.flags) or
  2029. (regs<>[]) or
  2030. ((target_info.system in systems_darwin) and
  2031. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2032. begin
  2033. exclude(regs,RS_R14);
  2034. include(regs,RS_R15);
  2035. if (target_info.system in systems_darwin) then
  2036. include(regs,RS_FRAME_POINTER_REG);
  2037. end;
  2038. if not(target_info.system in systems_darwin) then
  2039. begin
  2040. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2041. The saved PC came after that but is discarded, since we restore
  2042. the stack pointer }
  2043. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2044. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2045. end
  2046. else
  2047. begin
  2048. { restore R8-R11 already if necessary (they've been stored
  2049. before the others) }
  2050. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2051. if saveregs<>[] then
  2052. begin
  2053. reference_reset(ref,4,[]);
  2054. ref.index:=NR_STACK_POINTER_REG;
  2055. ref.addressmode:=AM_PREINDEXED;
  2056. for r:=RS_R8 to RS_R11 do
  2057. if r in saveregs then
  2058. inc(registerarea,4);
  2059. regs:=regs-saveregs;
  2060. end;
  2061. end;
  2062. for r:=RS_R0 to RS_R15 do
  2063. if r in regs then
  2064. inc(registerarea,4);
  2065. { reapply the stack padding reg, in case there was one, see the complimentary
  2066. comment in g_proc_entry() (KB) }
  2067. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2068. if paddingreg < RS_R4 then
  2069. if paddingreg in regs then
  2070. internalerror(201306190)
  2071. else
  2072. begin
  2073. regs:=regs+[paddingreg];
  2074. inc(registerarea,4);
  2075. end;
  2076. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2077. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2078. (target_info.system in systems_darwin) then
  2079. begin
  2080. LocalSize:=current_procinfo.calc_stackframe_size;
  2081. if (LocalSize<>0) or
  2082. ((stackmisalignment<>0) and
  2083. ((pi_do_call in current_procinfo.flags) or
  2084. (po_assembler in current_procinfo.procdef.procoptions))) then
  2085. begin
  2086. if pi_estimatestacksize in current_procinfo.flags then
  2087. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2088. else
  2089. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2090. if is_shifter_const(LocalSize,shift) then
  2091. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2092. else if split_into_shifter_const(localsize, imm1, imm2) then
  2093. begin
  2094. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2095. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2096. end
  2097. else
  2098. begin
  2099. a_reg_alloc(list,NR_R12);
  2100. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2101. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2102. a_reg_dealloc(list,NR_R12);
  2103. end;
  2104. end;
  2105. if (target_info.system in systems_darwin) and
  2106. (saveregs<>[]) then
  2107. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2108. if regs=[] then
  2109. begin
  2110. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2111. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2112. else
  2113. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2114. end
  2115. else
  2116. begin
  2117. reference_reset(ref,4,[]);
  2118. ref.index:=NR_STACK_POINTER_REG;
  2119. ref.addressmode:=AM_PREINDEXED;
  2120. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2121. end;
  2122. end
  2123. else
  2124. begin
  2125. { restore int registers and return }
  2126. reference_reset(ref,4,[]);
  2127. ref.index:=NR_FRAME_POINTER_REG;
  2128. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2129. end;
  2130. end
  2131. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2132. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2133. else
  2134. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2135. end;
  2136. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2137. var
  2138. ref : treference;
  2139. l : TAsmLabel;
  2140. regs : tcpuregisterset;
  2141. r: byte;
  2142. begin
  2143. if (cs_create_pic in current_settings.moduleswitches) and
  2144. (pi_needs_got in current_procinfo.flags) and
  2145. (tf_pic_uses_got in target_info.flags) then
  2146. begin
  2147. { Procedure parametrs are not initialized at this stage.
  2148. Before GOT initialization code, allocate registers used for procedure parameters
  2149. to prevent usage of these registers for temp operations in later stages of code
  2150. generation. }
  2151. regs:=rg[R_INTREGISTER].used_in_proc;
  2152. for r:=RS_R0 to RS_R3 do
  2153. if r in regs then
  2154. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2155. { Allocate scratch register R12 and use it for GOT calculations directly.
  2156. Otherwise the init code can be distorted in later stages of code generation. }
  2157. a_reg_alloc(list,NR_R12);
  2158. reference_reset(ref,4,[]);
  2159. current_asmdata.getglobaldatalabel(l);
  2160. cg.a_label(current_procinfo.aktlocaldata,l);
  2161. ref.symbol:=l;
  2162. ref.base:=NR_PC;
  2163. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2164. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2165. current_asmdata.getaddrlabel(l);
  2166. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2167. cg.a_label(list,l);
  2168. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2169. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2170. { Deallocate registers }
  2171. a_reg_dealloc(list,NR_R12);
  2172. for r:=RS_R3 downto RS_R0 do
  2173. if r in regs then
  2174. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2175. end;
  2176. end;
  2177. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2178. var
  2179. b : byte;
  2180. tmpref : treference;
  2181. instr : taicpu;
  2182. begin
  2183. if ref.addressmode<>AM_OFFSET then
  2184. internalerror(200309071);
  2185. tmpref:=ref;
  2186. { Be sure to have a base register }
  2187. if (tmpref.base=NR_NO) then
  2188. begin
  2189. if tmpref.shiftmode<>SM_None then
  2190. internalerror(2014020702);
  2191. if tmpref.signindex<0 then
  2192. internalerror(200312023);
  2193. tmpref.base:=tmpref.index;
  2194. tmpref.index:=NR_NO;
  2195. end;
  2196. if assigned(tmpref.symbol) or
  2197. not((is_shifter_const(tmpref.offset,b)) or
  2198. (is_shifter_const(-tmpref.offset,b))
  2199. ) then
  2200. fixref(list,tmpref);
  2201. { expect a base here if there is an index }
  2202. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2203. internalerror(200312022);
  2204. if tmpref.index<>NR_NO then
  2205. begin
  2206. if tmpref.shiftmode<>SM_None then
  2207. internalerror(200312021);
  2208. if tmpref.signindex<0 then
  2209. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2210. else
  2211. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2212. if tmpref.offset<>0 then
  2213. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2214. end
  2215. else
  2216. begin
  2217. if tmpref.base=NR_NO then
  2218. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2219. else
  2220. if tmpref.offset<>0 then
  2221. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2222. else
  2223. begin
  2224. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2225. list.concat(instr);
  2226. add_move_instruction(instr);
  2227. end;
  2228. end;
  2229. end;
  2230. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2231. var
  2232. tmpreg, tmpreg2 : tregister;
  2233. tmpref : treference;
  2234. l, piclabel : tasmlabel;
  2235. indirection_done : boolean;
  2236. begin
  2237. { absolute symbols can't be handled directly, we've to store the symbol reference
  2238. in the text segment and access it pc relative
  2239. For now, we assume that references where base or index equals to PC are already
  2240. relative, all other references are assumed to be absolute and thus they need
  2241. to be handled extra.
  2242. A proper solution would be to change refoptions to a set and store the information
  2243. if the symbol is absolute or relative there.
  2244. }
  2245. { create consts entry }
  2246. reference_reset(tmpref,4,[]);
  2247. current_asmdata.getjumplabel(l);
  2248. cg.a_label(current_procinfo.aktlocaldata,l);
  2249. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2250. piclabel:=nil;
  2251. tmpreg:=NR_NO;
  2252. indirection_done:=false;
  2253. if assigned(ref.symbol) then
  2254. begin
  2255. if (target_info.system=system_arm_darwin) and
  2256. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2257. begin
  2258. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2259. if ref.offset<>0 then
  2260. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2261. indirection_done:=true;
  2262. end
  2263. else if (cs_create_pic in current_settings.moduleswitches) then
  2264. if (tf_pic_uses_got in target_info.flags) then
  2265. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2266. else
  2267. begin
  2268. { ideally, we would want to generate
  2269. ldr r1, LPICConstPool
  2270. LPICLocal:
  2271. ldr/str r2,[pc,r1]
  2272. ...
  2273. LPICConstPool:
  2274. .long _globsym-(LPICLocal+8)
  2275. However, we cannot be sure that the ldr/str will follow
  2276. right after the call to fixref, so we have to load the
  2277. complete address already in a register.
  2278. }
  2279. current_asmdata.getaddrlabel(piclabel);
  2280. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2281. end
  2282. else
  2283. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2284. end
  2285. else
  2286. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2287. { load consts entry }
  2288. if not indirection_done then
  2289. begin
  2290. tmpreg:=getintregister(list,OS_INT);
  2291. tmpref.symbol:=l;
  2292. tmpref.base:=NR_PC;
  2293. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2294. if (cs_create_pic in current_settings.moduleswitches) and
  2295. (tf_pic_uses_got in target_info.flags) and
  2296. assigned(ref.symbol) then
  2297. begin
  2298. reference_reset(tmpref,4,[]);
  2299. tmpref.base:=current_procinfo.got;
  2300. tmpref.index:=tmpreg;
  2301. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2302. if ref.offset<>0 then
  2303. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2304. end;
  2305. end;
  2306. if assigned(piclabel) then
  2307. begin
  2308. cg.a_label(list,piclabel);
  2309. tmpreg2:=getaddressregister(list);
  2310. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2311. tmpreg:=tmpreg2
  2312. end;
  2313. { This routine can be called with PC as base/index in case the offset
  2314. was too large to encode in a load/store. In that case, the entire
  2315. absolute expression has been re-encoded in a new constpool entry, and
  2316. we have to remove the use of PC from the original reference (the code
  2317. above made everything relative to the value loaded from the new
  2318. constpool entry) }
  2319. if is_pc(ref.base) then
  2320. ref.base:=NR_NO;
  2321. if is_pc(ref.index) then
  2322. ref.index:=NR_NO;
  2323. if (ref.base<>NR_NO) then
  2324. begin
  2325. if ref.index<>NR_NO then
  2326. begin
  2327. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2328. ref.base:=tmpreg;
  2329. end
  2330. else
  2331. if ref.base<>NR_PC then
  2332. begin
  2333. ref.index:=tmpreg;
  2334. ref.shiftimm:=0;
  2335. ref.signindex:=1;
  2336. ref.shiftmode:=SM_None;
  2337. end
  2338. else
  2339. ref.base:=tmpreg;
  2340. end
  2341. else
  2342. ref.base:=tmpreg;
  2343. ref.offset:=0;
  2344. ref.symbol:=nil;
  2345. end;
  2346. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2347. var
  2348. paraloc1,paraloc2,paraloc3 : TCGPara;
  2349. pd : tprocdef;
  2350. begin
  2351. pd:=search_system_proc('MOVE');
  2352. paraloc1.init;
  2353. paraloc2.init;
  2354. paraloc3.init;
  2355. paramanager.getintparaloc(list,pd,1,paraloc1);
  2356. paramanager.getintparaloc(list,pd,2,paraloc2);
  2357. paramanager.getintparaloc(list,pd,3,paraloc3);
  2358. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2359. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2360. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2361. paramanager.freecgpara(list,paraloc3);
  2362. paramanager.freecgpara(list,paraloc2);
  2363. paramanager.freecgpara(list,paraloc1);
  2364. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2365. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2366. a_call_name(list,'FPC_MOVE',false);
  2367. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2368. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2369. paraloc3.done;
  2370. paraloc2.done;
  2371. paraloc1.done;
  2372. end;
  2373. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2374. const
  2375. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2376. maxtmpreg_thumb = 5;
  2377. var
  2378. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2379. srcreg,destreg,countreg,r,tmpreg:tregister;
  2380. helpsize:aint;
  2381. copysize:byte;
  2382. cgsize:Tcgsize;
  2383. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2384. maxtmpreg,
  2385. tmpregi,tmpregi2:byte;
  2386. { will never be called with count<=4 }
  2387. procedure genloop(count : aword;size : byte);
  2388. const
  2389. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2390. var
  2391. l : tasmlabel;
  2392. begin
  2393. current_asmdata.getjumplabel(l);
  2394. if count<size then size:=1;
  2395. a_load_const_reg(list,OS_INT,count div size,countreg);
  2396. cg.a_label(list,l);
  2397. srcref.addressmode:=AM_POSTINDEXED;
  2398. dstref.addressmode:=AM_POSTINDEXED;
  2399. srcref.offset:=size;
  2400. dstref.offset:=size;
  2401. r:=getintregister(list,size2opsize[size]);
  2402. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2403. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2404. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2405. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2406. a_jmp_flags(list,F_NE,l);
  2407. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2408. srcref.offset:=1;
  2409. dstref.offset:=1;
  2410. case count mod size of
  2411. 1:
  2412. begin
  2413. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2414. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2415. end;
  2416. 2:
  2417. if aligned then
  2418. begin
  2419. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2420. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2421. end
  2422. else
  2423. begin
  2424. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2425. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2426. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2427. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2428. end;
  2429. 3:
  2430. if aligned then
  2431. begin
  2432. srcref.offset:=2;
  2433. dstref.offset:=2;
  2434. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2435. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2436. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2437. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2438. end
  2439. else
  2440. begin
  2441. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2442. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2443. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2444. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2445. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2446. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2447. end;
  2448. end;
  2449. { keep the registers alive }
  2450. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2451. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2452. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2453. end;
  2454. { will never be called with count<=4 }
  2455. procedure genloop_thumb(count : aword;size : byte);
  2456. procedure refincofs(const ref : treference;const value : longint = 1);
  2457. begin
  2458. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2459. end;
  2460. const
  2461. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2462. var
  2463. l : tasmlabel;
  2464. begin
  2465. current_asmdata.getjumplabel(l);
  2466. if count<size then size:=1;
  2467. a_load_const_reg(list,OS_INT,count div size,countreg);
  2468. cg.a_label(list,l);
  2469. r:=getintregister(list,size2opsize[size]);
  2470. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2471. refincofs(srcref);
  2472. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2473. refincofs(dstref);
  2474. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2475. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2476. a_jmp_flags(list,F_NE,l);
  2477. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2478. case count mod size of
  2479. 1:
  2480. begin
  2481. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2482. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2483. end;
  2484. 2:
  2485. if aligned then
  2486. begin
  2487. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2488. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2489. end
  2490. else
  2491. begin
  2492. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2493. refincofs(srcref);
  2494. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2495. refincofs(dstref);
  2496. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2497. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2498. end;
  2499. 3:
  2500. if aligned then
  2501. begin
  2502. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2503. refincofs(srcref,2);
  2504. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2505. refincofs(dstref,2);
  2506. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2507. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2508. end
  2509. else
  2510. begin
  2511. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2512. refincofs(srcref);
  2513. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2514. refincofs(dstref);
  2515. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2516. refincofs(srcref);
  2517. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2518. refincofs(dstref);
  2519. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2520. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2521. end;
  2522. end;
  2523. { keep the registers alive }
  2524. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2525. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2526. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2527. end;
  2528. begin
  2529. if len=0 then
  2530. exit;
  2531. if GenerateThumbCode then
  2532. maxtmpreg:=maxtmpreg_thumb
  2533. else
  2534. maxtmpreg:=maxtmpreg_arm;
  2535. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2536. dstref:=dest;
  2537. srcref:=source;
  2538. if cs_opt_size in current_settings.optimizerswitches then
  2539. helpsize:=8;
  2540. if aligned and (len=4) then
  2541. begin
  2542. tmpreg:=getintregister(list,OS_32);
  2543. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2544. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2545. end
  2546. else if aligned and (len=2) then
  2547. begin
  2548. tmpreg:=getintregister(list,OS_16);
  2549. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2550. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2551. end
  2552. else if (len<=helpsize) and aligned then
  2553. begin
  2554. tmpregi:=0;
  2555. srcreg:=getintregister(list,OS_ADDR);
  2556. { explicit pc relative addressing, could be
  2557. e.g. a floating point constant }
  2558. if source.base=NR_PC then
  2559. begin
  2560. { ... then we don't need a loadaddr }
  2561. srcref:=source;
  2562. end
  2563. else
  2564. begin
  2565. a_loadaddr_ref_reg(list,source,srcreg);
  2566. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2567. end;
  2568. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2569. begin
  2570. inc(tmpregi);
  2571. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2572. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2573. inc(srcref.offset,4);
  2574. dec(len,4);
  2575. end;
  2576. destreg:=getintregister(list,OS_ADDR);
  2577. a_loadaddr_ref_reg(list,dest,destreg);
  2578. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2579. tmpregi2:=1;
  2580. while (tmpregi2<=tmpregi) do
  2581. begin
  2582. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2583. inc(dstref.offset,4);
  2584. inc(tmpregi2);
  2585. end;
  2586. copysize:=4;
  2587. cgsize:=OS_32;
  2588. while len<>0 do
  2589. begin
  2590. if len<2 then
  2591. begin
  2592. copysize:=1;
  2593. cgsize:=OS_8;
  2594. end
  2595. else if len<4 then
  2596. begin
  2597. copysize:=2;
  2598. cgsize:=OS_16;
  2599. end;
  2600. dec(len,copysize);
  2601. r:=getintregister(list,cgsize);
  2602. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2603. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2604. inc(srcref.offset,copysize);
  2605. inc(dstref.offset,copysize);
  2606. end;{end of while}
  2607. end
  2608. else
  2609. begin
  2610. cgsize:=OS_32;
  2611. if (len<=4) then{len<=4 and not aligned}
  2612. begin
  2613. r:=getintregister(list,cgsize);
  2614. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2615. if Len=1 then
  2616. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2617. else
  2618. begin
  2619. tmpreg:=getintregister(list,cgsize);
  2620. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2621. inc(usedtmpref.offset,1);
  2622. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2623. inc(usedtmpref2.offset,1);
  2624. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2625. if len>2 then
  2626. begin
  2627. inc(usedtmpref.offset,1);
  2628. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2629. inc(usedtmpref2.offset,1);
  2630. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2631. if len>3 then
  2632. begin
  2633. inc(usedtmpref.offset,1);
  2634. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2635. inc(usedtmpref2.offset,1);
  2636. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2637. end;
  2638. end;
  2639. end;
  2640. end{end of if len<=4}
  2641. else
  2642. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2643. destreg:=getintregister(list,OS_ADDR);
  2644. a_loadaddr_ref_reg(list,dest,destreg);
  2645. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2646. srcreg:=getintregister(list,OS_ADDR);
  2647. a_loadaddr_ref_reg(list,source,srcreg);
  2648. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2649. countreg:=getintregister(list,OS_32);
  2650. // if cs_opt_size in current_settings.optimizerswitches then
  2651. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2652. {if aligned then
  2653. genloop(len,4)
  2654. else}
  2655. if GenerateThumbCode then
  2656. genloop_thumb(len,1)
  2657. else
  2658. genloop(len,1);
  2659. end;
  2660. end;
  2661. end;
  2662. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2663. begin
  2664. g_concatcopy_internal(list,source,dest,len,false);
  2665. end;
  2666. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2667. begin
  2668. if (source.alignment in [1,3]) or
  2669. (dest.alignment in [1,3]) then
  2670. g_concatcopy_internal(list,source,dest,len,false)
  2671. else
  2672. g_concatcopy_internal(list,source,dest,len,true);
  2673. end;
  2674. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2675. var
  2676. ovloc : tlocation;
  2677. begin
  2678. ovloc.loc:=LOC_VOID;
  2679. g_overflowCheck_loc(list,l,def,ovloc);
  2680. end;
  2681. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2682. var
  2683. hl : tasmlabel;
  2684. ai:TAiCpu;
  2685. hflags : tresflags;
  2686. begin
  2687. if not(cs_check_overflow in current_settings.localswitches) then
  2688. exit;
  2689. current_asmdata.getjumplabel(hl);
  2690. case ovloc.loc of
  2691. LOC_VOID:
  2692. begin
  2693. ai:=taicpu.op_sym(A_B,hl);
  2694. ai.is_jmp:=true;
  2695. if not((def.typ=pointerdef) or
  2696. ((def.typ=orddef) and
  2697. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2698. pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2699. ai.SetCondition(C_VC)
  2700. else
  2701. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2702. ai.SetCondition(C_CS)
  2703. else
  2704. ai.SetCondition(C_CC);
  2705. list.concat(ai);
  2706. end;
  2707. LOC_FLAGS:
  2708. begin
  2709. hflags:=ovloc.resflags;
  2710. inverse_flags(hflags);
  2711. cg.a_jmp_flags(list,hflags,hl);
  2712. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2713. end;
  2714. else
  2715. internalerror(200409281);
  2716. end;
  2717. a_call_name(list,'FPC_OVERFLOW',false);
  2718. a_label(list,hl);
  2719. end;
  2720. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2721. begin
  2722. { this work is done in g_proc_entry }
  2723. end;
  2724. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2725. begin
  2726. { this work is done in g_proc_exit }
  2727. end;
  2728. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2729. var
  2730. ai : taicpu;
  2731. hlabel : TAsmLabel;
  2732. begin
  2733. if GenerateThumbCode then
  2734. begin
  2735. { the optimizer has to fix this if jump range is sufficient short }
  2736. current_asmdata.getjumplabel(hlabel);
  2737. ai:=Taicpu.Op_sym(A_B,hlabel);
  2738. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2739. ai.is_jmp:=true;
  2740. list.concat(ai);
  2741. a_jmp_always(list,l);
  2742. a_label(list,hlabel);
  2743. end
  2744. else
  2745. begin
  2746. ai:=Taicpu.Op_sym(A_B,l);
  2747. ai.SetCondition(OpCmp2AsmCond[cond]);
  2748. ai.is_jmp:=true;
  2749. list.concat(ai);
  2750. end;
  2751. end;
  2752. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2753. const
  2754. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2755. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2756. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2757. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2758. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2759. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2760. begin
  2761. result:=convertop[fromsize,tosize];
  2762. if result=A_NONE then
  2763. internalerror(200312205);
  2764. end;
  2765. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2766. const
  2767. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2768. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2769. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2770. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2771. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2772. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2773. begin
  2774. result:=convertop[fromsize,tosize];
  2775. end;
  2776. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2777. var
  2778. instr: taicpu;
  2779. begin
  2780. if (shuffle=nil) or shufflescalar(shuffle) then
  2781. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2782. else
  2783. internalerror(2009112407);
  2784. list.concat(instr);
  2785. case instr.opcode of
  2786. A_VMOV:
  2787. add_move_instruction(instr);
  2788. end;
  2789. end;
  2790. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2791. var
  2792. intreg,
  2793. tmpmmreg : tregister;
  2794. reg64 : tregister64;
  2795. begin
  2796. if assigned(shuffle) and
  2797. not(shufflescalar(shuffle)) then
  2798. internalerror(2009112413);
  2799. case fromsize of
  2800. OS_32,OS_S32:
  2801. begin
  2802. fromsize:=OS_F32;
  2803. { since we are loading an integer, no conversion may be required }
  2804. if (fromsize<>tosize) then
  2805. internalerror(2009112801);
  2806. end;
  2807. OS_64,OS_S64:
  2808. begin
  2809. fromsize:=OS_F64;
  2810. { since we are loading an integer, no conversion may be required }
  2811. if (fromsize<>tosize) then
  2812. internalerror(2009112901);
  2813. end;
  2814. end;
  2815. if (fromsize<>tosize) then
  2816. tmpmmreg:=getmmregister(list,fromsize)
  2817. else
  2818. tmpmmreg:=reg;
  2819. if (ref.alignment in [1,2]) then
  2820. begin
  2821. case fromsize of
  2822. OS_F32:
  2823. begin
  2824. intreg:=getintregister(list,OS_32);
  2825. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2826. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2827. end;
  2828. OS_F64:
  2829. begin
  2830. reg64.reglo:=getintregister(list,OS_32);
  2831. reg64.reghi:=getintregister(list,OS_32);
  2832. cg64.a_load64_ref_reg(list,ref,reg64);
  2833. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2834. end;
  2835. else
  2836. internalerror(2009112412);
  2837. end;
  2838. end
  2839. else
  2840. begin
  2841. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2842. end;
  2843. if (tmpmmreg<>reg) then
  2844. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2845. end;
  2846. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2847. var
  2848. intreg,
  2849. tmpmmreg : tregister;
  2850. reg64 : tregister64;
  2851. begin
  2852. if assigned(shuffle) and
  2853. not(shufflescalar(shuffle)) then
  2854. internalerror(2009112416);
  2855. case tosize of
  2856. OS_32,OS_S32:
  2857. begin
  2858. tosize:=OS_F32;
  2859. { since we are loading an integer, no conversion may be required }
  2860. if (fromsize<>tosize) then
  2861. internalerror(2009112801);
  2862. end;
  2863. OS_64,OS_S64:
  2864. begin
  2865. tosize:=OS_F64;
  2866. { since we are loading an integer, no conversion may be required }
  2867. if (fromsize<>tosize) then
  2868. internalerror(2009112901);
  2869. end;
  2870. end;
  2871. if (fromsize<>tosize) then
  2872. begin
  2873. tmpmmreg:=getmmregister(list,tosize);
  2874. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2875. end
  2876. else
  2877. tmpmmreg:=reg;
  2878. if (ref.alignment in [1,2]) then
  2879. begin
  2880. case tosize of
  2881. OS_F32:
  2882. begin
  2883. intreg:=getintregister(list,OS_32);
  2884. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2885. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2886. end;
  2887. OS_F64:
  2888. begin
  2889. reg64.reglo:=getintregister(list,OS_32);
  2890. reg64.reghi:=getintregister(list,OS_32);
  2891. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2892. cg64.a_load64_reg_ref(list,reg64,ref);
  2893. end;
  2894. else
  2895. internalerror(2009112417);
  2896. end;
  2897. end
  2898. else
  2899. begin
  2900. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2901. end;
  2902. end;
  2903. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2904. begin
  2905. { this code can only be used to transfer raw data, not to perform
  2906. conversions }
  2907. if (tosize<>OS_F32) then
  2908. internalerror(2009112419);
  2909. if not(fromsize in [OS_32,OS_S32]) then
  2910. internalerror(2009112420);
  2911. if assigned(shuffle) and
  2912. not shufflescalar(shuffle) then
  2913. internalerror(2009112516);
  2914. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  2915. end;
  2916. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  2917. begin
  2918. { this code can only be used to transfer raw data, not to perform
  2919. conversions }
  2920. if (fromsize<>OS_F32) then
  2921. internalerror(2009112430);
  2922. if not(tosize in [OS_32,OS_S32]) then
  2923. internalerror(2009112420);
  2924. if assigned(shuffle) and
  2925. not shufflescalar(shuffle) then
  2926. internalerror(2009112514);
  2927. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  2928. end;
  2929. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  2930. var
  2931. tmpreg: tregister;
  2932. begin
  2933. { the vfp doesn't support xor nor any other logical operation, but
  2934. this routine is used to initialise global mm regvars. We can
  2935. easily initialise an mm reg with 0 though. }
  2936. case op of
  2937. OP_XOR:
  2938. begin
  2939. if (src<>dst) or
  2940. (reg_cgsize(src)<>size) or
  2941. assigned(shuffle) then
  2942. internalerror(2009112907);
  2943. tmpreg:=getintregister(list,OS_32);
  2944. a_load_const_reg(list,OS_32,0,tmpreg);
  2945. case size of
  2946. OS_F32:
  2947. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  2948. OS_F64:
  2949. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  2950. else
  2951. internalerror(2009112908);
  2952. end;
  2953. end
  2954. else
  2955. internalerror(2009112906);
  2956. end;
  2957. end;
  2958. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  2959. const
  2960. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  2961. begin
  2962. if (op in overflowops) and
  2963. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  2964. a_load_reg_reg(list,OS_32,size,dst,dst);
  2965. end;
  2966. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  2967. procedure checkreg(var reg : TRegister);
  2968. var
  2969. tmpreg : TRegister;
  2970. begin
  2971. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  2972. (getsupreg(reg)=RS_R15) then
  2973. begin
  2974. tmpreg:=getintregister(list,OS_INT);
  2975. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  2976. reg:=tmpreg;
  2977. end;
  2978. end;
  2979. begin
  2980. checkreg(op1);
  2981. checkreg(op2);
  2982. checkreg(op3);
  2983. checkreg(op4);
  2984. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  2985. end;
  2986. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  2987. begin
  2988. case op of
  2989. OP_NEG:
  2990. begin
  2991. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2992. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  2993. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  2994. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2995. end;
  2996. OP_NOT:
  2997. begin
  2998. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  2999. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3000. end;
  3001. else
  3002. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3003. end;
  3004. end;
  3005. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3006. begin
  3007. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3008. end;
  3009. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3010. var
  3011. ovloc : tlocation;
  3012. begin
  3013. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3014. end;
  3015. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3016. var
  3017. ovloc : tlocation;
  3018. begin
  3019. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3020. end;
  3021. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3022. begin
  3023. { this code can only be used to transfer raw data, not to perform
  3024. conversions }
  3025. if (mmsize<>OS_F64) then
  3026. internalerror(2009112405);
  3027. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3028. end;
  3029. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3030. begin
  3031. { this code can only be used to transfer raw data, not to perform
  3032. conversions }
  3033. if (mmsize<>OS_F64) then
  3034. internalerror(2009112406);
  3035. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3036. end;
  3037. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3038. var
  3039. tmpreg : tregister;
  3040. b : byte;
  3041. begin
  3042. ovloc.loc:=LOC_VOID;
  3043. case op of
  3044. OP_NEG,
  3045. OP_NOT :
  3046. internalerror(2012022501);
  3047. end;
  3048. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3049. begin
  3050. case op of
  3051. OP_ADD:
  3052. begin
  3053. if is_shifter_const(lo(value),b) then
  3054. begin
  3055. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3056. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3057. end
  3058. else
  3059. begin
  3060. tmpreg:=cg.getintregister(list,OS_32);
  3061. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3062. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3063. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3064. end;
  3065. if is_shifter_const(hi(value),b) then
  3066. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3067. else
  3068. begin
  3069. tmpreg:=cg.getintregister(list,OS_32);
  3070. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3071. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3072. end;
  3073. end;
  3074. OP_SUB:
  3075. begin
  3076. if is_shifter_const(lo(value),b) then
  3077. begin
  3078. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3079. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3080. end
  3081. else
  3082. begin
  3083. tmpreg:=cg.getintregister(list,OS_32);
  3084. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3085. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3086. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3087. end;
  3088. if is_shifter_const(hi(value),b) then
  3089. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3090. else
  3091. begin
  3092. tmpreg:=cg.getintregister(list,OS_32);
  3093. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3094. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3095. end;
  3096. end;
  3097. else
  3098. internalerror(200502131);
  3099. end;
  3100. if size=OS_64 then
  3101. begin
  3102. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3103. ovloc.loc:=LOC_FLAGS;
  3104. case op of
  3105. OP_ADD:
  3106. ovloc.resflags:=F_CS;
  3107. OP_SUB:
  3108. ovloc.resflags:=F_CC;
  3109. end;
  3110. end;
  3111. end
  3112. else
  3113. begin
  3114. case op of
  3115. OP_AND,OP_OR,OP_XOR:
  3116. begin
  3117. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3118. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3119. end;
  3120. OP_ADD:
  3121. begin
  3122. if is_shifter_const(aint(lo(value)),b) then
  3123. begin
  3124. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3125. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3126. end
  3127. else
  3128. begin
  3129. tmpreg:=cg.getintregister(list,OS_32);
  3130. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3131. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3132. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3133. end;
  3134. if is_shifter_const(aint(hi(value)),b) then
  3135. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3136. else
  3137. begin
  3138. tmpreg:=cg.getintregister(list,OS_32);
  3139. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3140. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3141. end;
  3142. end;
  3143. OP_SUB:
  3144. begin
  3145. if is_shifter_const(aint(lo(value)),b) then
  3146. begin
  3147. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3148. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3149. end
  3150. else
  3151. begin
  3152. tmpreg:=cg.getintregister(list,OS_32);
  3153. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3154. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3155. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3156. end;
  3157. if is_shifter_const(aint(hi(value)),b) then
  3158. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3159. else
  3160. begin
  3161. tmpreg:=cg.getintregister(list,OS_32);
  3162. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3163. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3164. end;
  3165. end;
  3166. else
  3167. internalerror(2003083101);
  3168. end;
  3169. end;
  3170. end;
  3171. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3172. begin
  3173. ovloc.loc:=LOC_VOID;
  3174. case op of
  3175. OP_NEG,
  3176. OP_NOT :
  3177. internalerror(2012022502);
  3178. end;
  3179. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3180. begin
  3181. case op of
  3182. OP_ADD:
  3183. begin
  3184. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3185. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3186. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3187. end;
  3188. OP_SUB:
  3189. begin
  3190. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3191. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3192. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3193. end;
  3194. else
  3195. internalerror(2003083101);
  3196. end;
  3197. if size=OS_64 then
  3198. begin
  3199. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3200. ovloc.loc:=LOC_FLAGS;
  3201. case op of
  3202. OP_ADD:
  3203. ovloc.resflags:=F_CS;
  3204. OP_SUB:
  3205. ovloc.resflags:=F_CC;
  3206. end;
  3207. end;
  3208. end
  3209. else
  3210. begin
  3211. case op of
  3212. OP_AND,OP_OR,OP_XOR:
  3213. begin
  3214. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3215. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3216. end;
  3217. OP_ADD:
  3218. begin
  3219. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3220. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3221. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3222. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3223. end;
  3224. OP_SUB:
  3225. begin
  3226. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3227. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3228. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3229. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3230. end;
  3231. else
  3232. internalerror(2003083101);
  3233. end;
  3234. end;
  3235. end;
  3236. procedure tthumbcgarm.init_register_allocators;
  3237. begin
  3238. inherited init_register_allocators;
  3239. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3240. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3241. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3242. else
  3243. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3244. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3245. end;
  3246. procedure tthumbcgarm.done_register_allocators;
  3247. begin
  3248. rg[R_INTREGISTER].free;
  3249. rg[R_FPUREGISTER].free;
  3250. rg[R_MMREGISTER].free;
  3251. inherited done_register_allocators;
  3252. end;
  3253. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3254. var
  3255. ref : treference;
  3256. r : byte;
  3257. regs : tcpuregisterset;
  3258. stackmisalignment : pint;
  3259. registerarea: DWord;
  3260. stack_parameters: Boolean;
  3261. begin
  3262. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3263. LocalSize:=align(LocalSize,4);
  3264. { call instruction does not put anything on the stack }
  3265. stackmisalignment:=0;
  3266. if not(nostackframe) then
  3267. begin
  3268. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3269. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3270. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3271. { save int registers }
  3272. reference_reset(ref,4,[]);
  3273. ref.index:=NR_STACK_POINTER_REG;
  3274. ref.addressmode:=AM_PREINDEXED;
  3275. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3276. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3277. begin
  3278. //!!!! a_reg_alloc(list,NR_R12);
  3279. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3280. end;
  3281. { the (old) ARM APCS requires saving both the stack pointer (to
  3282. crawl the stack) and the PC (to identify the function this
  3283. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3284. and R15 -- still needs updating for EABI and Darwin, they don't
  3285. need that }
  3286. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3287. regs:=regs+[RS_R7,RS_R14]
  3288. else
  3289. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3290. include(regs,RS_R14);
  3291. { safely estimate stack size }
  3292. if localsize+current_settings.alignment.localalignmax+4>508 then
  3293. begin
  3294. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3295. include(regs,RS_R4);
  3296. end;
  3297. registerarea:=0;
  3298. if regs<>[] then
  3299. begin
  3300. for r:=RS_R0 to RS_R15 do
  3301. if r in regs then
  3302. inc(registerarea,4);
  3303. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3304. end;
  3305. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3306. if stack_parameters or (LocalSize<>0) or
  3307. ((stackmisalignment<>0) and
  3308. ((pi_do_call in current_procinfo.flags) or
  3309. (po_assembler in current_procinfo.procdef.procoptions))) then
  3310. begin
  3311. { do we access stack parameters?
  3312. if yes, the previously estimated stacksize must be used }
  3313. if stack_parameters then
  3314. begin
  3315. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3316. begin
  3317. writeln(localsize);
  3318. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3319. internalerror(2013040601);
  3320. end
  3321. else
  3322. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3323. end
  3324. else
  3325. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3326. if localsize<508 then
  3327. begin
  3328. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3329. end
  3330. else if localsize<=1016 then
  3331. begin
  3332. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3333. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3334. end
  3335. else
  3336. begin
  3337. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3338. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3339. include(regs,RS_R4);
  3340. //!!!! if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  3341. //!!!! a_reg_alloc(list,NR_R12);
  3342. //!!!! a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3343. //!!!! list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  3344. //!!!! a_reg_dealloc(list,NR_R12);
  3345. end;
  3346. end;
  3347. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3348. begin
  3349. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3350. end;
  3351. end;
  3352. end;
  3353. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3354. var
  3355. LocalSize : longint;
  3356. r: byte;
  3357. regs : tcpuregisterset;
  3358. registerarea : DWord;
  3359. stackmisalignment: pint;
  3360. stack_parameters : Boolean;
  3361. begin
  3362. if not(nostackframe) then
  3363. begin
  3364. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3365. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3366. include(regs,RS_R15);
  3367. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3368. include(regs,getsupreg(current_procinfo.framepointer));
  3369. registerarea:=0;
  3370. for r:=RS_R0 to RS_R15 do
  3371. if r in regs then
  3372. inc(registerarea,4);
  3373. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3374. LocalSize:=current_procinfo.calc_stackframe_size;
  3375. if stack_parameters then
  3376. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3377. else
  3378. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3379. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3380. (target_info.system in systems_darwin) then
  3381. begin
  3382. if (LocalSize<>0) or
  3383. ((stackmisalignment<>0) and
  3384. ((pi_do_call in current_procinfo.flags) or
  3385. (po_assembler in current_procinfo.procdef.procoptions))) then
  3386. begin
  3387. if LocalSize=0 then
  3388. else if LocalSize<=508 then
  3389. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3390. else if LocalSize<=1016 then
  3391. begin
  3392. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3393. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3394. end
  3395. else
  3396. begin
  3397. a_reg_alloc(list,NR_R3);
  3398. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3399. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3400. a_reg_dealloc(list,NR_R3);
  3401. end;
  3402. end;
  3403. if regs=[] then
  3404. begin
  3405. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3406. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3407. else
  3408. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3409. end
  3410. else
  3411. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3412. end;
  3413. end
  3414. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3415. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3416. else
  3417. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3418. end;
  3419. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3420. var
  3421. oppostfix:toppostfix;
  3422. usedtmpref: treference;
  3423. tmpreg,tmpreg2 : tregister;
  3424. dir : integer;
  3425. begin
  3426. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3427. FromSize := ToSize;
  3428. case FromSize of
  3429. { signed integer registers }
  3430. OS_8:
  3431. oppostfix:=PF_B;
  3432. OS_S8:
  3433. oppostfix:=PF_SB;
  3434. OS_16:
  3435. oppostfix:=PF_H;
  3436. OS_S16:
  3437. oppostfix:=PF_SH;
  3438. OS_32,
  3439. OS_S32:
  3440. oppostfix:=PF_None;
  3441. else
  3442. InternalError(200308298);
  3443. end;
  3444. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3445. begin
  3446. if target_info.endian=endian_big then
  3447. dir:=-1
  3448. else
  3449. dir:=1;
  3450. case FromSize of
  3451. OS_16,OS_S16:
  3452. begin
  3453. { only complicated references need an extra loadaddr }
  3454. if assigned(ref.symbol) or
  3455. (ref.index<>NR_NO) or
  3456. (ref.offset<-124) or
  3457. (ref.offset>124) or
  3458. { sometimes the compiler reused registers }
  3459. (reg=ref.index) or
  3460. (reg=ref.base) then
  3461. begin
  3462. tmpreg2:=getintregister(list,OS_INT);
  3463. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3464. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3465. end
  3466. else
  3467. usedtmpref:=ref;
  3468. if target_info.endian=endian_big then
  3469. inc(usedtmpref.offset,1);
  3470. tmpreg:=getintregister(list,OS_INT);
  3471. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3472. inc(usedtmpref.offset,dir);
  3473. if FromSize=OS_16 then
  3474. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3475. else
  3476. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3477. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3478. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3479. end;
  3480. OS_32,OS_S32:
  3481. begin
  3482. tmpreg:=getintregister(list,OS_INT);
  3483. { only complicated references need an extra loadaddr }
  3484. if assigned(ref.symbol) or
  3485. (ref.index<>NR_NO) or
  3486. (ref.offset<-124) or
  3487. (ref.offset>124) or
  3488. { sometimes the compiler reused registers }
  3489. (reg=ref.index) or
  3490. (reg=ref.base) then
  3491. begin
  3492. tmpreg2:=getintregister(list,OS_INT);
  3493. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3494. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3495. end
  3496. else
  3497. usedtmpref:=ref;
  3498. if ref.alignment=2 then
  3499. begin
  3500. if target_info.endian=endian_big then
  3501. inc(usedtmpref.offset,2);
  3502. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3503. inc(usedtmpref.offset,dir*2);
  3504. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3505. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3506. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3507. end
  3508. else
  3509. begin
  3510. if target_info.endian=endian_big then
  3511. inc(usedtmpref.offset,3);
  3512. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3513. inc(usedtmpref.offset,dir);
  3514. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3515. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3516. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3517. inc(usedtmpref.offset,dir);
  3518. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3519. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3520. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3521. inc(usedtmpref.offset,dir);
  3522. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3523. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3524. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3525. end;
  3526. end
  3527. else
  3528. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3529. end;
  3530. end
  3531. else
  3532. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3533. if (fromsize=OS_S8) and (tosize = OS_16) then
  3534. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3535. end;
  3536. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3537. var
  3538. l : tasmlabel;
  3539. hr : treference;
  3540. begin
  3541. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3542. internalerror(2002090902);
  3543. if is_thumb_imm(a) then
  3544. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3545. else
  3546. begin
  3547. reference_reset(hr,4,[]);
  3548. current_asmdata.getjumplabel(l);
  3549. cg.a_label(current_procinfo.aktlocaldata,l);
  3550. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3551. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3552. hr.symbol:=l;
  3553. hr.base:=NR_PC;
  3554. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3555. end;
  3556. end;
  3557. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3558. var
  3559. hsym : tsym;
  3560. href,
  3561. tmpref : treference;
  3562. paraloc : Pcgparalocation;
  3563. l : TAsmLabel;
  3564. begin
  3565. { calculate the parameter info for the procdef }
  3566. procdef.init_paraloc_info(callerside);
  3567. hsym:=tsym(procdef.parast.Find('self'));
  3568. if not(assigned(hsym) and
  3569. (hsym.typ=paravarsym)) then
  3570. internalerror(200305251);
  3571. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3572. while paraloc<>nil do
  3573. with paraloc^ do
  3574. begin
  3575. case loc of
  3576. LOC_REGISTER:
  3577. begin
  3578. if is_thumb_imm(ioffset) then
  3579. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3580. else
  3581. begin
  3582. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3583. reference_reset(tmpref,4,[]);
  3584. current_asmdata.getjumplabel(l);
  3585. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3586. cg.a_label(current_procinfo.aktlocaldata,l);
  3587. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3588. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3589. tmpref.symbol:=l;
  3590. tmpref.base:=NR_PC;
  3591. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3592. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3593. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3594. end;
  3595. end;
  3596. LOC_REFERENCE:
  3597. begin
  3598. { offset in the wrapper needs to be adjusted for the stored
  3599. return address }
  3600. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3601. if is_thumb_imm(ioffset) then
  3602. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3603. else
  3604. begin
  3605. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3606. reference_reset(tmpref,4,[]);
  3607. current_asmdata.getjumplabel(l);
  3608. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3609. cg.a_label(current_procinfo.aktlocaldata,l);
  3610. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3611. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3612. tmpref.symbol:=l;
  3613. tmpref.base:=NR_PC;
  3614. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3615. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3616. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3617. end;
  3618. end
  3619. else
  3620. internalerror(200309189);
  3621. end;
  3622. paraloc:=next;
  3623. end;
  3624. end;
  3625. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3626. var
  3627. href : treference;
  3628. tmpreg : TRegister;
  3629. begin
  3630. href:=ref;
  3631. if { LDR/STR limitations }
  3632. (
  3633. (((op=A_LDR) and (oppostfix=PF_None)) or
  3634. ((op=A_STR) and (oppostfix=PF_None))) and
  3635. (ref.base<>NR_STACK_POINTER_REG) and
  3636. (abs(ref.offset)>124)
  3637. ) or
  3638. { LDRB/STRB limitations }
  3639. (
  3640. (((op=A_LDR) and (oppostfix=PF_B)) or
  3641. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3642. ((op=A_STR) and (oppostfix=PF_B)) or
  3643. ((op=A_STRB) and (oppostfix=PF_None))) and
  3644. ((ref.base=NR_STACK_POINTER_REG) or
  3645. (ref.index=NR_STACK_POINTER_REG) or
  3646. (abs(ref.offset)>31)
  3647. )
  3648. ) or
  3649. { LDRH/STRH limitations }
  3650. (
  3651. (((op=A_LDR) and (oppostfix=PF_H)) or
  3652. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3653. ((op=A_STR) and (oppostfix=PF_H)) or
  3654. ((op=A_STRH) and (oppostfix=PF_None))) and
  3655. ((ref.base=NR_STACK_POINTER_REG) or
  3656. (ref.index=NR_STACK_POINTER_REG) or
  3657. (abs(ref.offset)>62) or
  3658. ((abs(ref.offset) mod 2)<>0)
  3659. )
  3660. ) then
  3661. begin
  3662. tmpreg:=getintregister(list,OS_ADDR);
  3663. a_loadaddr_ref_reg(list,ref,tmpreg);
  3664. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3665. end
  3666. else if (op=A_LDR) and
  3667. (oppostfix in [PF_None]) and
  3668. (ref.base=NR_STACK_POINTER_REG) and
  3669. (abs(ref.offset)>1020) then
  3670. begin
  3671. tmpreg:=getintregister(list,OS_ADDR);
  3672. a_loadaddr_ref_reg(list,ref,tmpreg);
  3673. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3674. end
  3675. else if (op=A_LDR) and
  3676. ((oppostfix in [PF_SH,PF_SB]) or
  3677. (abs(ref.offset)>124)) then
  3678. begin
  3679. tmpreg:=getintregister(list,OS_ADDR);
  3680. a_loadaddr_ref_reg(list,ref,tmpreg);
  3681. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3682. end;
  3683. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3684. end;
  3685. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3686. var
  3687. tmpreg : tregister;
  3688. begin
  3689. case op of
  3690. OP_NEG:
  3691. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3692. OP_NOT:
  3693. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  3694. OP_DIV,OP_IDIV:
  3695. internalerror(200308284);
  3696. OP_ROL:
  3697. begin
  3698. if not(size in [OS_32,OS_S32]) then
  3699. internalerror(2008072801);
  3700. { simulate ROL by ror'ing 32-value }
  3701. tmpreg:=getintregister(list,OS_32);
  3702. a_load_const_reg(list,OS_32,32,tmpreg);
  3703. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3704. list.concat(taicpu.op_reg_reg(A_ROR,dst,src));
  3705. end;
  3706. else
  3707. begin
  3708. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3709. list.concat(setoppostfix(
  3710. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix[op]));
  3711. end;
  3712. end;
  3713. maybeadjustresult(list,op,size,dst);
  3714. end;
  3715. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3716. var
  3717. tmpreg : tregister;
  3718. {$ifdef DUMMY}
  3719. l1 : longint;
  3720. {$endif DUMMY}
  3721. begin
  3722. //!!! ovloc.loc:=LOC_VOID;
  3723. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3724. case op of
  3725. OP_ADD:
  3726. begin
  3727. op:=OP_SUB;
  3728. a:=aint(dword(-a));
  3729. end;
  3730. OP_SUB:
  3731. begin
  3732. op:=OP_ADD;
  3733. a:=aint(dword(-a));
  3734. end
  3735. end;
  3736. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3737. begin
  3738. // if cgsetflags or setflags then
  3739. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3740. list.concat(setoppostfix(
  3741. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix[op]));
  3742. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3743. begin
  3744. //!!! ovloc.loc:=LOC_FLAGS;
  3745. case op of
  3746. OP_ADD:
  3747. //!!! ovloc.resflags:=F_CS;
  3748. ;
  3749. OP_SUB:
  3750. //!!! ovloc.resflags:=F_CC;
  3751. ;
  3752. end;
  3753. end;
  3754. end
  3755. else
  3756. begin
  3757. { there could be added some more sophisticated optimizations }
  3758. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3759. a_load_reg_reg(list,size,size,dst,dst)
  3760. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3761. a_load_const_reg(list,size,0,dst)
  3762. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3763. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3764. { we do this here instead in the peephole optimizer because
  3765. it saves us a register }
  3766. {$ifdef DUMMY}
  3767. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3768. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3769. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3770. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3771. begin
  3772. if l1>32 then{roozbeh does this ever happen?}
  3773. internalerror(200308296);
  3774. shifterop_reset(so);
  3775. so.shiftmode:=SM_LSL;
  3776. so.shiftimm:=l1;
  3777. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3778. end
  3779. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3780. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3781. begin
  3782. if l1>32 then{does this ever happen?}
  3783. internalerror(201205181);
  3784. shifterop_reset(so);
  3785. so.shiftmode:=SM_LSL;
  3786. so.shiftimm:=l1;
  3787. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3788. end
  3789. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3790. begin
  3791. { nothing to do on success }
  3792. end
  3793. {$endif DUMMY}
  3794. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3795. Just using mov x, #0 might allow some easier optimizations down the line. }
  3796. else if (op = OP_AND) and (dword(a)=0) then
  3797. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  3798. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3799. else if (op = OP_AND) and (not(dword(a))=0) then
  3800. // do nothing
  3801. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3802. broader range of shifterconstants.}
  3803. {$ifdef DUMMY}
  3804. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3805. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3806. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3807. begin
  3808. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3809. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3810. end
  3811. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3812. not(cgsetflags or setflags) and
  3813. split_into_shifter_const(a, imm1, imm2) then
  3814. begin
  3815. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3816. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3817. end
  3818. {$endif DUMMY}
  3819. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3820. begin
  3821. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3822. end
  3823. else
  3824. begin
  3825. tmpreg:=getintregister(list,size);
  3826. a_load_const_reg(list,size,a,tmpreg);
  3827. a_op_reg_reg(list,op,size,tmpreg,dst);
  3828. end;
  3829. end;
  3830. maybeadjustresult(list,op,size,dst);
  3831. end;
  3832. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3833. begin
  3834. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3835. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3836. else
  3837. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3838. end;
  3839. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3840. var
  3841. l1,l2 : tasmlabel;
  3842. ai : taicpu;
  3843. begin
  3844. current_asmdata.getjumplabel(l1);
  3845. current_asmdata.getjumplabel(l2);
  3846. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3847. ai.is_jmp:=true;
  3848. list.concat(ai);
  3849. list.concat(taicpu.op_reg_const(A_MOV,reg,0));
  3850. list.concat(taicpu.op_sym(A_B,l2));
  3851. cg.a_label(list,l1);
  3852. list.concat(taicpu.op_reg_const(A_MOV,reg,1));
  3853. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3854. cg.a_label(list,l2);
  3855. end;
  3856. procedure tthumb2cgarm.init_register_allocators;
  3857. begin
  3858. inherited init_register_allocators;
  3859. { currently, we save R14 always, so we can use it }
  3860. if (target_info.system<>system_arm_darwin) then
  3861. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3862. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3863. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  3864. else
  3865. { r9 is not available on Darwin according to the llvm code generator }
  3866. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3867. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3868. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  3869. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  3870. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  3871. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  3872. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3873. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3874. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  3875. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3876. ],first_mm_imreg,[])
  3877. else if current_settings.fputype in [fpu_fpv4_s16,fpu_vfpv3_d16] then
  3878. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3879. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3880. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3881. ],first_mm_imreg,[])
  3882. else
  3883. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
  3884. [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
  3885. end;
  3886. procedure tthumb2cgarm.done_register_allocators;
  3887. begin
  3888. rg[R_INTREGISTER].free;
  3889. rg[R_FPUREGISTER].free;
  3890. rg[R_MMREGISTER].free;
  3891. inherited done_register_allocators;
  3892. end;
  3893. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  3894. begin
  3895. list.concat(taicpu.op_reg(A_BLX, reg));
  3896. {
  3897. the compiler does not properly set this flag anymore in pass 1, and
  3898. for now we only need it after pass 2 (I hope) (JM)
  3899. if not(pi_do_call in current_procinfo.flags) then
  3900. internalerror(2003060703);
  3901. }
  3902. include(current_procinfo.flags,pi_do_call);
  3903. end;
  3904. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3905. var
  3906. l : tasmlabel;
  3907. hr : treference;
  3908. begin
  3909. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3910. internalerror(2002090902);
  3911. if is_thumb32_imm(a) then
  3912. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3913. else if is_thumb32_imm(not(a)) then
  3914. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  3915. else if (a and $FFFF)=a then
  3916. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  3917. else
  3918. begin
  3919. reference_reset(hr,4,[]);
  3920. current_asmdata.getjumplabel(l);
  3921. cg.a_label(current_procinfo.aktlocaldata,l);
  3922. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3923. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3924. hr.symbol:=l;
  3925. hr.base:=NR_PC;
  3926. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3927. end;
  3928. end;
  3929. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3930. var
  3931. oppostfix:toppostfix;
  3932. usedtmpref: treference;
  3933. tmpreg,tmpreg2 : tregister;
  3934. so : tshifterop;
  3935. dir : integer;
  3936. begin
  3937. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3938. FromSize := ToSize;
  3939. case FromSize of
  3940. { signed integer registers }
  3941. OS_8:
  3942. oppostfix:=PF_B;
  3943. OS_S8:
  3944. oppostfix:=PF_SB;
  3945. OS_16:
  3946. oppostfix:=PF_H;
  3947. OS_S16:
  3948. oppostfix:=PF_SH;
  3949. OS_32,
  3950. OS_S32:
  3951. oppostfix:=PF_None;
  3952. else
  3953. InternalError(200308299);
  3954. end;
  3955. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3956. begin
  3957. if target_info.endian=endian_big then
  3958. dir:=-1
  3959. else
  3960. dir:=1;
  3961. case FromSize of
  3962. OS_16,OS_S16:
  3963. begin
  3964. { only complicated references need an extra loadaddr }
  3965. if assigned(ref.symbol) or
  3966. (ref.index<>NR_NO) or
  3967. (ref.offset<-255) or
  3968. (ref.offset>4094) or
  3969. { sometimes the compiler reused registers }
  3970. (reg=ref.index) or
  3971. (reg=ref.base) then
  3972. begin
  3973. tmpreg2:=getintregister(list,OS_INT);
  3974. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3975. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3976. end
  3977. else
  3978. usedtmpref:=ref;
  3979. if target_info.endian=endian_big then
  3980. inc(usedtmpref.offset,1);
  3981. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  3982. tmpreg:=getintregister(list,OS_INT);
  3983. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3984. inc(usedtmpref.offset,dir);
  3985. if FromSize=OS_16 then
  3986. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3987. else
  3988. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3989. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3990. end;
  3991. OS_32,OS_S32:
  3992. begin
  3993. tmpreg:=getintregister(list,OS_INT);
  3994. { only complicated references need an extra loadaddr }
  3995. if assigned(ref.symbol) or
  3996. (ref.index<>NR_NO) or
  3997. (ref.offset<-255) or
  3998. (ref.offset>4092) or
  3999. { sometimes the compiler reused registers }
  4000. (reg=ref.index) or
  4001. (reg=ref.base) then
  4002. begin
  4003. tmpreg2:=getintregister(list,OS_INT);
  4004. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4005. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4006. end
  4007. else
  4008. usedtmpref:=ref;
  4009. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4010. if ref.alignment=2 then
  4011. begin
  4012. if target_info.endian=endian_big then
  4013. inc(usedtmpref.offset,2);
  4014. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4015. inc(usedtmpref.offset,dir*2);
  4016. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4017. so.shiftimm:=16;
  4018. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4019. end
  4020. else
  4021. begin
  4022. if target_info.endian=endian_big then
  4023. inc(usedtmpref.offset,3);
  4024. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4025. inc(usedtmpref.offset,dir);
  4026. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4027. so.shiftimm:=8;
  4028. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4029. inc(usedtmpref.offset,dir);
  4030. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4031. so.shiftimm:=16;
  4032. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4033. inc(usedtmpref.offset,dir);
  4034. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4035. so.shiftimm:=24;
  4036. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4037. end;
  4038. end
  4039. else
  4040. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4041. end;
  4042. end
  4043. else
  4044. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4045. if (fromsize=OS_S8) and (tosize = OS_16) then
  4046. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4047. end;
  4048. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4049. begin
  4050. if op = OP_NOT then
  4051. begin
  4052. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4053. case size of
  4054. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4055. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4056. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4057. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4058. end;
  4059. end
  4060. else
  4061. inherited a_op_reg_reg(list, op, size, src, dst);
  4062. end;
  4063. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4064. var
  4065. shift, width : byte;
  4066. tmpreg : tregister;
  4067. so : tshifterop;
  4068. l1 : longint;
  4069. begin
  4070. ovloc.loc:=LOC_VOID;
  4071. if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
  4072. case op of
  4073. OP_ADD:
  4074. begin
  4075. op:=OP_SUB;
  4076. a:=aint(dword(-a));
  4077. end;
  4078. OP_SUB:
  4079. begin
  4080. op:=OP_ADD;
  4081. a:=aint(dword(-a));
  4082. end
  4083. end;
  4084. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4085. case op of
  4086. OP_NEG,OP_NOT,
  4087. OP_DIV,OP_IDIV:
  4088. internalerror(200308285);
  4089. OP_SHL:
  4090. begin
  4091. if a>32 then
  4092. internalerror(2014020703);
  4093. if a<>0 then
  4094. begin
  4095. shifterop_reset(so);
  4096. so.shiftmode:=SM_LSL;
  4097. so.shiftimm:=a;
  4098. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4099. end
  4100. else
  4101. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4102. end;
  4103. OP_ROL:
  4104. begin
  4105. if a>32 then
  4106. internalerror(2014020704);
  4107. if a<>0 then
  4108. begin
  4109. shifterop_reset(so);
  4110. so.shiftmode:=SM_ROR;
  4111. so.shiftimm:=32-a;
  4112. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4113. end
  4114. else
  4115. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4116. end;
  4117. OP_ROR:
  4118. begin
  4119. if a>32 then
  4120. internalerror(2014020705);
  4121. if a<>0 then
  4122. begin
  4123. shifterop_reset(so);
  4124. so.shiftmode:=SM_ROR;
  4125. so.shiftimm:=a;
  4126. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4127. end
  4128. else
  4129. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4130. end;
  4131. OP_SHR:
  4132. begin
  4133. if a>32 then
  4134. internalerror(200308292);
  4135. shifterop_reset(so);
  4136. if a<>0 then
  4137. begin
  4138. so.shiftmode:=SM_LSR;
  4139. so.shiftimm:=a;
  4140. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4141. end
  4142. else
  4143. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4144. end;
  4145. OP_SAR:
  4146. begin
  4147. if a>32 then
  4148. internalerror(200308295);
  4149. if a<>0 then
  4150. begin
  4151. shifterop_reset(so);
  4152. so.shiftmode:=SM_ASR;
  4153. so.shiftimm:=a;
  4154. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4155. end
  4156. else
  4157. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4158. end;
  4159. else
  4160. if (op in [OP_SUB, OP_ADD]) and
  4161. ((a < 0) or
  4162. (a > 4095)) then
  4163. begin
  4164. tmpreg:=getintregister(list,size);
  4165. a_load_const_reg(list, size, a, tmpreg);
  4166. if cgsetflags or setflags then
  4167. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4168. list.concat(setoppostfix(
  4169. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4170. end
  4171. else
  4172. begin
  4173. if cgsetflags or setflags then
  4174. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4175. list.concat(setoppostfix(
  4176. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4177. end;
  4178. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4179. begin
  4180. ovloc.loc:=LOC_FLAGS;
  4181. case op of
  4182. OP_ADD:
  4183. ovloc.resflags:=F_CS;
  4184. OP_SUB:
  4185. ovloc.resflags:=F_CC;
  4186. end;
  4187. end;
  4188. end
  4189. else
  4190. begin
  4191. { there could be added some more sophisticated optimizations }
  4192. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4193. a_load_reg_reg(list,size,size,src,dst)
  4194. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4195. a_load_const_reg(list,size,0,dst)
  4196. else if (op in [OP_IMUL]) and (a=-1) then
  4197. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4198. { we do this here instead in the peephole optimizer because
  4199. it saves us a register }
  4200. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4201. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4202. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4203. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4204. begin
  4205. if l1>32 then{roozbeh does this ever happen?}
  4206. internalerror(200308296);
  4207. shifterop_reset(so);
  4208. so.shiftmode:=SM_LSL;
  4209. so.shiftimm:=l1;
  4210. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4211. end
  4212. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4213. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4214. begin
  4215. if l1>32 then{does this ever happen?}
  4216. internalerror(201205181);
  4217. shifterop_reset(so);
  4218. so.shiftmode:=SM_LSL;
  4219. so.shiftimm:=l1;
  4220. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4221. end
  4222. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4223. begin
  4224. { nothing to do on success }
  4225. end
  4226. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4227. Just using mov x, #0 might allow some easier optimizations down the line. }
  4228. else if (op = OP_AND) and (dword(a)=0) then
  4229. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4230. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4231. else if (op = OP_AND) and (not(dword(a))=0) then
  4232. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4233. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4234. broader range of shifterconstants.}
  4235. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4236. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4237. else if (op = OP_AND) and is_thumb32_imm(a) then
  4238. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4239. else if (op = OP_AND) and (a = $FFFF) then
  4240. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4241. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4242. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4243. else if (op = OP_AND) and is_continuous_mask(not(a), shift, width) then
  4244. begin
  4245. a_load_reg_reg(list,size,size,src,dst);
  4246. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4247. end
  4248. else
  4249. begin
  4250. tmpreg:=getintregister(list,size);
  4251. a_load_const_reg(list,size,a,tmpreg);
  4252. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4253. end;
  4254. end;
  4255. maybeadjustresult(list,op,size,dst);
  4256. end;
  4257. const
  4258. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4259. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4260. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4261. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4262. var
  4263. so : tshifterop;
  4264. tmpreg,overflowreg : tregister;
  4265. asmop : tasmop;
  4266. begin
  4267. ovloc.loc:=LOC_VOID;
  4268. case op of
  4269. OP_NEG,OP_NOT:
  4270. internalerror(200308286);
  4271. OP_ROL:
  4272. begin
  4273. if not(size in [OS_32,OS_S32]) then
  4274. internalerror(2008072801);
  4275. { simulate ROL by ror'ing 32-value }
  4276. tmpreg:=getintregister(list,OS_32);
  4277. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4278. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4279. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4280. end;
  4281. OP_ROR:
  4282. begin
  4283. if not(size in [OS_32,OS_S32]) then
  4284. internalerror(2008072802);
  4285. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4286. end;
  4287. OP_IMUL,
  4288. OP_MUL:
  4289. begin
  4290. if cgsetflags or setflags then
  4291. begin
  4292. overflowreg:=getintregister(list,size);
  4293. if op=OP_IMUL then
  4294. asmop:=A_SMULL
  4295. else
  4296. asmop:=A_UMULL;
  4297. { the arm doesn't allow that rd and rm are the same }
  4298. if dst=src2 then
  4299. begin
  4300. if dst<>src1 then
  4301. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4302. else
  4303. begin
  4304. tmpreg:=getintregister(list,size);
  4305. a_load_reg_reg(list,size,size,src2,dst);
  4306. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4307. end;
  4308. end
  4309. else
  4310. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4311. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4312. if op=OP_IMUL then
  4313. begin
  4314. shifterop_reset(so);
  4315. so.shiftmode:=SM_ASR;
  4316. so.shiftimm:=31;
  4317. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4318. end
  4319. else
  4320. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4321. ovloc.loc:=LOC_FLAGS;
  4322. ovloc.resflags:=F_NE;
  4323. end
  4324. else
  4325. begin
  4326. { the arm doesn't allow that rd and rm are the same }
  4327. if dst=src2 then
  4328. begin
  4329. if dst<>src1 then
  4330. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4331. else
  4332. begin
  4333. tmpreg:=getintregister(list,size);
  4334. a_load_reg_reg(list,size,size,src2,dst);
  4335. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4336. end;
  4337. end
  4338. else
  4339. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4340. end;
  4341. end;
  4342. else
  4343. begin
  4344. if cgsetflags or setflags then
  4345. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4346. {$ifdef dummy}
  4347. { R13 is not allowed for certain instruction operands }
  4348. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4349. begin
  4350. if getsupreg(dst)=RS_R13 then
  4351. begin
  4352. tmpreg:=getintregister(list,OS_INT);
  4353. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4354. dst:=tmpreg;
  4355. end;
  4356. if getsupreg(src1)=RS_R13 then
  4357. begin
  4358. tmpreg:=getintregister(list,OS_INT);
  4359. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4360. src1:=tmpreg;
  4361. end;
  4362. end;
  4363. {$endif}
  4364. list.concat(setoppostfix(
  4365. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4366. end;
  4367. end;
  4368. maybeadjustresult(list,op,size,dst);
  4369. end;
  4370. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4371. begin
  4372. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4373. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4374. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4375. end;
  4376. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4377. var
  4378. ref : treference;
  4379. shift : byte;
  4380. firstfloatreg,lastfloatreg,
  4381. r : byte;
  4382. regs : tcpuregisterset;
  4383. stackmisalignment: pint;
  4384. begin
  4385. LocalSize:=align(LocalSize,4);
  4386. { call instruction does not put anything on the stack }
  4387. stackmisalignment:=0;
  4388. if not(nostackframe) then
  4389. begin
  4390. firstfloatreg:=RS_NO;
  4391. lastfloatreg:=RS_NO;
  4392. { save floating point registers? }
  4393. for r:=RS_F0 to RS_F7 do
  4394. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4395. begin
  4396. if firstfloatreg=RS_NO then
  4397. firstfloatreg:=r;
  4398. lastfloatreg:=r;
  4399. inc(stackmisalignment,12);
  4400. end;
  4401. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4402. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4403. begin
  4404. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4405. a_reg_alloc(list,NR_R12);
  4406. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4407. end;
  4408. { save int registers }
  4409. reference_reset(ref,4,[]);
  4410. ref.index:=NR_STACK_POINTER_REG;
  4411. ref.addressmode:=AM_PREINDEXED;
  4412. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4413. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4414. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4415. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4416. include(regs,RS_R14);
  4417. if regs<>[] then
  4418. begin
  4419. for r:=RS_R0 to RS_R15 do
  4420. if (r in regs) then
  4421. inc(stackmisalignment,4);
  4422. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4423. end;
  4424. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4425. begin
  4426. { the framepointer now points to the saved R15, so the saved
  4427. framepointer is at R11-12 (for get_caller_frame) }
  4428. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4429. a_reg_dealloc(list,NR_R12);
  4430. end;
  4431. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4432. if (LocalSize<>0) or
  4433. ((stackmisalignment<>0) and
  4434. ((pi_do_call in current_procinfo.flags) or
  4435. (po_assembler in current_procinfo.procdef.procoptions))) then
  4436. begin
  4437. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4438. if not(is_shifter_const(localsize,shift)) then
  4439. begin
  4440. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4441. a_reg_alloc(list,NR_R12);
  4442. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4443. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4444. a_reg_dealloc(list,NR_R12);
  4445. end
  4446. else
  4447. begin
  4448. a_reg_dealloc(list,NR_R12);
  4449. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4450. end;
  4451. end;
  4452. if firstfloatreg<>RS_NO then
  4453. begin
  4454. reference_reset(ref,4,[]);
  4455. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4456. begin
  4457. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4458. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4459. ref.base:=NR_R12;
  4460. end
  4461. else
  4462. begin
  4463. ref.base:=current_procinfo.framepointer;
  4464. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4465. end;
  4466. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4467. lastfloatreg-firstfloatreg+1,ref));
  4468. end;
  4469. end;
  4470. end;
  4471. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4472. var
  4473. ref : treference;
  4474. firstfloatreg,lastfloatreg,
  4475. r : byte;
  4476. shift : byte;
  4477. regs : tcpuregisterset;
  4478. LocalSize : longint;
  4479. stackmisalignment: pint;
  4480. begin
  4481. if not(nostackframe) then
  4482. begin
  4483. stackmisalignment:=0;
  4484. { restore floating point register }
  4485. firstfloatreg:=RS_NO;
  4486. lastfloatreg:=RS_NO;
  4487. { save floating point registers? }
  4488. for r:=RS_F0 to RS_F7 do
  4489. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4490. begin
  4491. if firstfloatreg=RS_NO then
  4492. firstfloatreg:=r;
  4493. lastfloatreg:=r;
  4494. { floating point register space is already included in
  4495. localsize below by calc_stackframe_size
  4496. inc(stackmisalignment,12);
  4497. }
  4498. end;
  4499. if firstfloatreg<>RS_NO then
  4500. begin
  4501. reference_reset(ref,4,[]);
  4502. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4503. begin
  4504. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4505. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4506. ref.base:=NR_R12;
  4507. end
  4508. else
  4509. begin
  4510. ref.base:=current_procinfo.framepointer;
  4511. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4512. end;
  4513. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4514. lastfloatreg-firstfloatreg+1,ref));
  4515. end;
  4516. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4517. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4518. begin
  4519. exclude(regs,RS_R14);
  4520. include(regs,RS_R15);
  4521. end;
  4522. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4523. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4524. for r:=RS_R0 to RS_R15 do
  4525. if (r in regs) then
  4526. inc(stackmisalignment,4);
  4527. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4528. LocalSize:=current_procinfo.calc_stackframe_size;
  4529. if (LocalSize<>0) or
  4530. ((stackmisalignment<>0) and
  4531. ((pi_do_call in current_procinfo.flags) or
  4532. (po_assembler in current_procinfo.procdef.procoptions))) then
  4533. begin
  4534. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4535. if not(is_shifter_const(LocalSize,shift)) then
  4536. begin
  4537. a_reg_alloc(list,NR_R12);
  4538. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4539. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4540. a_reg_dealloc(list,NR_R12);
  4541. end
  4542. else
  4543. begin
  4544. a_reg_dealloc(list,NR_R12);
  4545. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4546. end;
  4547. end;
  4548. if regs=[] then
  4549. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4550. else
  4551. begin
  4552. reference_reset(ref,4,[]);
  4553. ref.index:=NR_STACK_POINTER_REG;
  4554. ref.addressmode:=AM_PREINDEXED;
  4555. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4556. end;
  4557. end
  4558. else
  4559. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4560. end;
  4561. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4562. var
  4563. tmpreg : tregister;
  4564. tmpref : treference;
  4565. l : tasmlabel;
  4566. begin
  4567. tmpreg:=NR_NO;
  4568. { Be sure to have a base register }
  4569. if (ref.base=NR_NO) then
  4570. begin
  4571. if ref.shiftmode<>SM_None then
  4572. internalerror(2014020706);
  4573. ref.base:=ref.index;
  4574. ref.index:=NR_NO;
  4575. end;
  4576. { absolute symbols can't be handled directly, we've to store the symbol reference
  4577. in the text segment and access it pc relative
  4578. For now, we assume that references where base or index equals to PC are already
  4579. relative, all other references are assumed to be absolute and thus they need
  4580. to be handled extra.
  4581. A proper solution would be to change refoptions to a set and store the information
  4582. if the symbol is absolute or relative there.
  4583. }
  4584. if (assigned(ref.symbol) and
  4585. not(is_pc(ref.base)) and
  4586. not(is_pc(ref.index))
  4587. ) or
  4588. { [#xxx] isn't a valid address operand }
  4589. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4590. //(ref.offset<-4095) or
  4591. (ref.offset<-255) or
  4592. (ref.offset>4095) or
  4593. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4594. ((ref.offset<-255) or
  4595. (ref.offset>255)
  4596. )
  4597. ) or
  4598. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4599. ((ref.offset<-1020) or
  4600. (ref.offset>1020) or
  4601. ((abs(ref.offset) mod 4)<>0) or
  4602. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4603. assigned(ref.symbol)
  4604. )
  4605. ) then
  4606. begin
  4607. reference_reset(tmpref,4,[]);
  4608. { load symbol }
  4609. tmpreg:=getintregister(list,OS_INT);
  4610. if assigned(ref.symbol) then
  4611. begin
  4612. current_asmdata.getjumplabel(l);
  4613. cg.a_label(current_procinfo.aktlocaldata,l);
  4614. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4615. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4616. { load consts entry }
  4617. tmpref.symbol:=l;
  4618. tmpref.base:=NR_R15;
  4619. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4620. { in case of LDF/STF, we got rid of the NR_R15 }
  4621. if is_pc(ref.base) then
  4622. ref.base:=NR_NO;
  4623. if is_pc(ref.index) then
  4624. ref.index:=NR_NO;
  4625. end
  4626. else
  4627. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4628. if (ref.base<>NR_NO) then
  4629. begin
  4630. if ref.index<>NR_NO then
  4631. begin
  4632. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4633. ref.base:=tmpreg;
  4634. end
  4635. else
  4636. begin
  4637. ref.index:=tmpreg;
  4638. ref.shiftimm:=0;
  4639. ref.signindex:=1;
  4640. ref.shiftmode:=SM_None;
  4641. end;
  4642. end
  4643. else
  4644. ref.base:=tmpreg;
  4645. ref.offset:=0;
  4646. ref.symbol:=nil;
  4647. end;
  4648. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4649. begin
  4650. if tmpreg<>NR_NO then
  4651. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4652. else
  4653. begin
  4654. tmpreg:=getintregister(list,OS_ADDR);
  4655. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4656. ref.base:=tmpreg;
  4657. end;
  4658. ref.offset:=0;
  4659. end;
  4660. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4661. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4662. begin
  4663. tmpreg:=getintregister(list,OS_ADDR);
  4664. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4665. ref.base := tmpreg;
  4666. end;
  4667. { floating point operations have only limited references
  4668. we expect here, that a base is already set }
  4669. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4670. begin
  4671. if ref.shiftmode<>SM_none then
  4672. internalerror(200309121);
  4673. if tmpreg<>NR_NO then
  4674. begin
  4675. if ref.base=tmpreg then
  4676. begin
  4677. if ref.signindex<0 then
  4678. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4679. else
  4680. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4681. ref.index:=NR_NO;
  4682. end
  4683. else
  4684. begin
  4685. if ref.index<>tmpreg then
  4686. internalerror(200403161);
  4687. if ref.signindex<0 then
  4688. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4689. else
  4690. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4691. ref.base:=tmpreg;
  4692. ref.index:=NR_NO;
  4693. end;
  4694. end
  4695. else
  4696. begin
  4697. tmpreg:=getintregister(list,OS_ADDR);
  4698. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4699. ref.base:=tmpreg;
  4700. ref.index:=NR_NO;
  4701. end;
  4702. end;
  4703. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4704. Result := ref;
  4705. end;
  4706. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4707. var
  4708. instr: taicpu;
  4709. begin
  4710. if (fromsize=OS_F32) and
  4711. (tosize=OS_F32) then
  4712. begin
  4713. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4714. list.Concat(instr);
  4715. add_move_instruction(instr);
  4716. end
  4717. else if (fromsize=OS_F64) and
  4718. (tosize=OS_F64) then
  4719. begin
  4720. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4721. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4722. end
  4723. else if (fromsize=OS_F32) and
  4724. (tosize=OS_F64) then
  4725. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4726. begin
  4727. //list.concat(nil);
  4728. end;
  4729. end;
  4730. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4731. begin
  4732. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4733. end;
  4734. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4735. begin
  4736. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4737. end;
  4738. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4739. begin
  4740. if //(shuffle=nil) and
  4741. (tosize=OS_F32) then
  4742. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4743. else
  4744. internalerror(2012100813);
  4745. end;
  4746. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4747. begin
  4748. if //(shuffle=nil) and
  4749. (fromsize=OS_F32) then
  4750. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
  4751. else
  4752. internalerror(2012100814);
  4753. end;
  4754. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4755. var tmpreg: tregister;
  4756. begin
  4757. case op of
  4758. OP_NEG:
  4759. begin
  4760. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4761. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4762. tmpreg:=cg.getintregister(list,OS_32);
  4763. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4764. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4765. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4766. end;
  4767. else
  4768. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4769. end;
  4770. end;
  4771. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4772. begin
  4773. case op of
  4774. OP_NEG:
  4775. begin
  4776. list.concat(taicpu.op_reg_const(A_MOV,regdst.reglo,0));
  4777. list.concat(taicpu.op_reg_const(A_MOV,regdst.reghi,0));
  4778. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4779. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4780. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4781. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4782. end;
  4783. OP_NOT:
  4784. begin
  4785. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4786. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4787. end;
  4788. OP_AND,OP_OR,OP_XOR:
  4789. begin
  4790. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4791. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4792. end;
  4793. OP_ADD:
  4794. begin
  4795. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4796. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4797. list.concat(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi));
  4798. end;
  4799. OP_SUB:
  4800. begin
  4801. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4802. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4803. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4804. end;
  4805. else
  4806. internalerror(2003083101);
  4807. end;
  4808. end;
  4809. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4810. var
  4811. tmpreg : tregister;
  4812. begin
  4813. case op of
  4814. OP_AND,OP_OR,OP_XOR:
  4815. begin
  4816. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4817. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4818. end;
  4819. OP_ADD:
  4820. begin
  4821. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4822. begin
  4823. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4824. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4825. end
  4826. else
  4827. begin
  4828. tmpreg:=cg.getintregister(list,OS_32);
  4829. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4830. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4831. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  4832. end;
  4833. tmpreg:=cg.getintregister(list,OS_32);
  4834. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  4835. list.concat(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg));
  4836. end;
  4837. OP_SUB:
  4838. begin
  4839. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4840. begin
  4841. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4842. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  4843. end
  4844. else
  4845. begin
  4846. tmpreg:=cg.getintregister(list,OS_32);
  4847. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4848. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4849. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  4850. end;
  4851. tmpreg:=cg.getintregister(list,OS_32);
  4852. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  4853. list.concat(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg));
  4854. end;
  4855. else
  4856. internalerror(2003083101);
  4857. end;
  4858. end;
  4859. procedure create_codegen;
  4860. begin
  4861. if GenerateThumb2Code then
  4862. begin
  4863. cg:=tthumb2cgarm.create;
  4864. cg64:=tthumb2cg64farm.create;
  4865. casmoptimizer:=TCpuThumb2AsmOptimizer;
  4866. end
  4867. else if GenerateThumbCode then
  4868. begin
  4869. cg:=tthumbcgarm.create;
  4870. cg64:=tthumbcg64farm.create;
  4871. // casmoptimizer:=TCpuThumbAsmOptimizer;
  4872. end
  4873. else
  4874. begin
  4875. cg:=tarmcgarm.create;
  4876. cg64:=tarmcg64farm.create;
  4877. casmoptimizer:=TCpuAsmOptimizer;
  4878. end;
  4879. end;
  4880. end.