cgcpu.pas 213 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
  34. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  35. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  36. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  37. { move instructions }
  38. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  39. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  40. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  41. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  42. { fpu move instructions }
  43. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  44. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  45. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  46. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  47. { comparison operations }
  48. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  49. l : tasmlabel);override;
  50. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  51. procedure a_jmp_name(list : TAsmList;const s : string); override;
  52. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  53. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  54. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  55. procedure g_profilecode(list : TAsmList); override;
  56. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  57. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  58. procedure g_maybe_got_init(list : TAsmList); override;
  59. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  60. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  61. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  62. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  63. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  64. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  65. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  66. procedure g_save_registers(list : TAsmList);override;
  67. procedure g_restore_registers(list : TAsmList);override;
  68. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  69. procedure fixref(list : TAsmList;var ref : treference);
  70. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  71. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  72. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  73. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  74. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  75. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  76. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  77. { Transform unsupported methods into Internal errors }
  78. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  79. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  80. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  81. { clear out potential overflow bits from 8 or 16 bit operations }
  82. { the upper 24/16 bits of a register after an operation }
  83. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  84. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  85. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  86. end;
  87. { tcgarm is shared between normal arm and thumb-2 }
  88. tcgarm = class(tbasecgarm)
  89. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  90. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  91. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  92. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  93. size: tcgsize; a: tcgint; src, dst: tregister); override;
  94. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  95. size: tcgsize; src1, src2, dst: tregister); override;
  96. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  97. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  98. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  99. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  100. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  101. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  102. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  103. end;
  104. { normal arm cg }
  105. tarmcgarm = class(tcgarm)
  106. procedure init_register_allocators;override;
  107. procedure done_register_allocators;override;
  108. end;
  109. { 64 bit cg for all arm flavours }
  110. tbasecg64farm = class(tcg64f32)
  111. end;
  112. { tcg64farm is shared between normal arm and thumb-2 }
  113. tcg64farm = class(tbasecg64farm)
  114. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  115. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  116. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  117. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  118. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  119. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  120. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  121. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  122. end;
  123. tarmcg64farm = class(tcg64farm)
  124. end;
  125. tthumbcgarm = class(tbasecgarm)
  126. procedure init_register_allocators;override;
  127. procedure done_register_allocators;override;
  128. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  129. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  130. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  131. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  132. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  133. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  134. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  135. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  136. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  137. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  138. end;
  139. tthumbcg64farm = class(tbasecg64farm)
  140. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  141. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  142. end;
  143. tthumb2cgarm = class(tcgarm)
  144. procedure init_register_allocators;override;
  145. procedure done_register_allocators;override;
  146. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  147. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  148. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  149. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  150. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  151. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  152. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  153. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  154. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  155. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  156. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  157. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  158. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  159. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  160. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  161. end;
  162. tthumb2cg64farm = class(tcg64farm)
  163. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  164. end;
  165. const
  166. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  167. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  168. winstackpagesize = 4096;
  169. function get_fpu_postfix(def : tdef) : toppostfix;
  170. procedure create_codegen;
  171. implementation
  172. uses
  173. globals,verbose,systems,cutils,
  174. aopt,aoptcpu,
  175. fmodule,
  176. symconst,symsym,symtable,
  177. tgobj,
  178. procinfo,cpupi,
  179. paramgr;
  180. function get_fpu_postfix(def : tdef) : toppostfix;
  181. begin
  182. if def.typ=floatdef then
  183. begin
  184. case tfloatdef(def).floattype of
  185. s32real:
  186. result:=PF_S;
  187. s64real:
  188. result:=PF_D;
  189. s80real:
  190. result:=PF_E;
  191. else
  192. internalerror(200401272);
  193. end;
  194. end
  195. else
  196. internalerror(200401271);
  197. end;
  198. procedure tarmcgarm.init_register_allocators;
  199. begin
  200. inherited init_register_allocators;
  201. { currently, we always save R14, so we can use it }
  202. if (target_info.system<>system_arm_darwin) then
  203. begin
  204. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  205. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  206. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  207. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  208. else
  209. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  210. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  211. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  212. end
  213. else
  214. { r7 is not available on Darwin, it's used as frame pointer (always,
  215. for backtrace support -- also in gcc/clang -> R11 can be used).
  216. r9 is volatile }
  217. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  218. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  219. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  220. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  221. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  222. { The register allocator currently cannot deal with multiple
  223. non-overlapping subregs per register, so we can only use
  224. half the single precision registers for now (as sub registers of the
  225. double precision ones). }
  226. if current_settings.fputype=fpu_vfpv3 then
  227. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  228. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  229. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  230. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  231. ],first_mm_imreg,[])
  232. else
  233. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  234. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15],first_mm_imreg,[]);
  235. end;
  236. procedure tarmcgarm.done_register_allocators;
  237. begin
  238. rg[R_INTREGISTER].free;
  239. rg[R_FPUREGISTER].free;
  240. rg[R_MMREGISTER].free;
  241. inherited done_register_allocators;
  242. end;
  243. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  244. var
  245. imm_shift : byte;
  246. l : tasmlabel;
  247. hr : treference;
  248. imm1, imm2: DWord;
  249. begin
  250. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  251. internalerror(2002090902);
  252. if is_shifter_const(a,imm_shift) then
  253. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  254. else if is_shifter_const(not(a),imm_shift) then
  255. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  256. { loading of constants with mov and orr }
  257. else if (split_into_shifter_const(a,imm1, imm2)) then
  258. begin
  259. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  260. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  261. end
  262. { loading of constants with mvn and bic }
  263. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  264. begin
  265. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  266. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  267. end
  268. else
  269. begin
  270. reference_reset(hr,4);
  271. current_asmdata.getjumplabel(l);
  272. cg.a_label(current_procinfo.aktlocaldata,l);
  273. hr.symboldata:=current_procinfo.aktlocaldata.last;
  274. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  275. hr.symbol:=l;
  276. hr.base:=NR_PC;
  277. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  278. end;
  279. end;
  280. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  281. var
  282. oppostfix:toppostfix;
  283. usedtmpref: treference;
  284. tmpreg,tmpreg2 : tregister;
  285. so : tshifterop;
  286. dir : integer;
  287. begin
  288. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  289. FromSize := ToSize;
  290. case FromSize of
  291. { signed integer registers }
  292. OS_8:
  293. oppostfix:=PF_B;
  294. OS_S8:
  295. oppostfix:=PF_SB;
  296. OS_16:
  297. oppostfix:=PF_H;
  298. OS_S16:
  299. oppostfix:=PF_SH;
  300. OS_32,
  301. OS_S32:
  302. oppostfix:=PF_None;
  303. else
  304. InternalError(200308297);
  305. end;
  306. if (fromsize=OS_S8) and
  307. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  308. oppostfix:=PF_B;
  309. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  310. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  311. (oppostfix in [PF_SH,PF_H])) then
  312. begin
  313. if target_info.endian=endian_big then
  314. dir:=-1
  315. else
  316. dir:=1;
  317. case FromSize of
  318. OS_16,OS_S16:
  319. begin
  320. { only complicated references need an extra loadaddr }
  321. if assigned(ref.symbol) or
  322. (ref.index<>NR_NO) or
  323. (ref.offset<-4095) or
  324. (ref.offset>4094) or
  325. { sometimes the compiler reused registers }
  326. (reg=ref.index) or
  327. (reg=ref.base) then
  328. begin
  329. tmpreg2:=getintregister(list,OS_INT);
  330. a_loadaddr_ref_reg(list,ref,tmpreg2);
  331. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  332. end
  333. else
  334. usedtmpref:=ref;
  335. if target_info.endian=endian_big then
  336. inc(usedtmpref.offset,1);
  337. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  338. tmpreg:=getintregister(list,OS_INT);
  339. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  340. inc(usedtmpref.offset,dir);
  341. if FromSize=OS_16 then
  342. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  343. else
  344. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  345. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  346. end;
  347. OS_32,OS_S32:
  348. begin
  349. tmpreg:=getintregister(list,OS_INT);
  350. { only complicated references need an extra loadaddr }
  351. if assigned(ref.symbol) or
  352. (ref.index<>NR_NO) or
  353. (ref.offset<-4095) or
  354. (ref.offset>4092) or
  355. { sometimes the compiler reused registers }
  356. (reg=ref.index) or
  357. (reg=ref.base) then
  358. begin
  359. tmpreg2:=getintregister(list,OS_INT);
  360. a_loadaddr_ref_reg(list,ref,tmpreg2);
  361. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  362. end
  363. else
  364. usedtmpref:=ref;
  365. shifterop_reset(so);so.shiftmode:=SM_LSL;
  366. if ref.alignment=2 then
  367. begin
  368. if target_info.endian=endian_big then
  369. inc(usedtmpref.offset,2);
  370. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  371. inc(usedtmpref.offset,dir*2);
  372. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  373. so.shiftimm:=16;
  374. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  375. end
  376. else
  377. begin
  378. tmpreg2:=getintregister(list,OS_INT);
  379. if target_info.endian=endian_big then
  380. inc(usedtmpref.offset,3);
  381. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  382. inc(usedtmpref.offset,dir);
  383. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  384. inc(usedtmpref.offset,dir);
  385. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  386. so.shiftimm:=8;
  387. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  388. inc(usedtmpref.offset,dir);
  389. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  390. so.shiftimm:=16;
  391. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  392. so.shiftimm:=24;
  393. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  394. end;
  395. end
  396. else
  397. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  398. end;
  399. end
  400. else
  401. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  402. if (fromsize=OS_S8) and
  403. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  404. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  405. else if (fromsize=OS_S8) and (tosize = OS_16) then
  406. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  407. end;
  408. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  409. var
  410. hsym : tsym;
  411. href : treference;
  412. paraloc : Pcgparalocation;
  413. shift : byte;
  414. begin
  415. { calculate the parameter info for the procdef }
  416. procdef.init_paraloc_info(callerside);
  417. hsym:=tsym(procdef.parast.Find('self'));
  418. if not(assigned(hsym) and
  419. (hsym.typ=paravarsym)) then
  420. internalerror(200305251);
  421. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  422. while paraloc<>nil do
  423. with paraloc^ do
  424. begin
  425. case loc of
  426. LOC_REGISTER:
  427. begin
  428. if is_shifter_const(ioffset,shift) then
  429. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  430. else
  431. begin
  432. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  433. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  434. end;
  435. end;
  436. LOC_REFERENCE:
  437. begin
  438. { offset in the wrapper needs to be adjusted for the stored
  439. return address }
  440. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),sizeof(pint));
  441. if is_shifter_const(ioffset,shift) then
  442. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  443. else
  444. begin
  445. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  446. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  447. end;
  448. end
  449. else
  450. internalerror(200309189);
  451. end;
  452. paraloc:=next;
  453. end;
  454. end;
  455. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  456. var
  457. ref: treference;
  458. begin
  459. paraloc.check_simple_location;
  460. paramanager.allocparaloc(list,paraloc.location);
  461. case paraloc.location^.loc of
  462. LOC_REGISTER,LOC_CREGISTER:
  463. a_load_const_reg(list,size,a,paraloc.location^.register);
  464. LOC_REFERENCE:
  465. begin
  466. reference_reset(ref,paraloc.alignment);
  467. ref.base:=paraloc.location^.reference.index;
  468. ref.offset:=paraloc.location^.reference.offset;
  469. a_load_const_ref(list,size,a,ref);
  470. end;
  471. else
  472. internalerror(2002081101);
  473. end;
  474. end;
  475. procedure tbasecgarm.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
  476. var
  477. tmpref, ref: treference;
  478. location: pcgparalocation;
  479. sizeleft: aint;
  480. begin
  481. location := paraloc.location;
  482. tmpref := r;
  483. sizeleft := paraloc.intsize;
  484. while assigned(location) do
  485. begin
  486. paramanager.allocparaloc(list,location);
  487. case location^.loc of
  488. LOC_REGISTER,LOC_CREGISTER:
  489. a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  490. LOC_REFERENCE:
  491. begin
  492. reference_reset_base(ref,location^.reference.index,location^.reference.offset,paraloc.alignment);
  493. { doubles in softemu mode have a strange order of registers and references }
  494. if location^.size=OS_32 then
  495. g_concatcopy(list,tmpref,ref,4)
  496. else
  497. begin
  498. g_concatcopy(list,tmpref,ref,sizeleft);
  499. if assigned(location^.next) then
  500. internalerror(2005010710);
  501. end;
  502. end;
  503. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  504. case location^.size of
  505. OS_F32, OS_F64:
  506. a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  507. else
  508. internalerror(2002072801);
  509. end;
  510. LOC_VOID:
  511. begin
  512. // nothing to do
  513. end;
  514. else
  515. internalerror(2002081103);
  516. end;
  517. inc(tmpref.offset,tcgsize2size[location^.size]);
  518. dec(sizeleft,tcgsize2size[location^.size]);
  519. location := location^.next;
  520. end;
  521. end;
  522. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  523. var
  524. ref: treference;
  525. tmpreg: tregister;
  526. begin
  527. paraloc.check_simple_location;
  528. paramanager.allocparaloc(list,paraloc.location);
  529. case paraloc.location^.loc of
  530. LOC_REGISTER,LOC_CREGISTER:
  531. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  532. LOC_REFERENCE:
  533. begin
  534. reference_reset(ref,paraloc.alignment);
  535. ref.base := paraloc.location^.reference.index;
  536. ref.offset := paraloc.location^.reference.offset;
  537. tmpreg := getintregister(list,OS_ADDR);
  538. a_loadaddr_ref_reg(list,r,tmpreg);
  539. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  540. end;
  541. else
  542. internalerror(2002080701);
  543. end;
  544. end;
  545. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  546. var
  547. branchopcode: tasmop;
  548. r : treference;
  549. sym : TAsmSymbol;
  550. begin
  551. { check not really correct: should only be used for non-Thumb cpus }
  552. // if (CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype]) and
  553. // { WinCE GNU AS (not sure if this applies in general) does not support BLX imm }
  554. // (target_info.system<>system_arm_wince) then
  555. // branchopcode:=A_BLX
  556. // else
  557. { use always BL as newer binutils do not translate blx apparently
  558. generating BL is also what clang and gcc do by default }
  559. branchopcode:=A_BL;
  560. if not(weak) then
  561. sym:=current_asmdata.RefAsmSymbol(s)
  562. else
  563. sym:=current_asmdata.WeakRefAsmSymbol(s);
  564. reference_reset_symbol(r,sym,0,sizeof(pint));
  565. if (tf_pic_uses_got in target_info.flags) and
  566. (cs_create_pic in current_settings.moduleswitches) then
  567. begin
  568. r.refaddr:=addr_pic
  569. end
  570. else
  571. r.refaddr:=addr_full;
  572. list.concat(taicpu.op_ref(branchopcode,r));
  573. {
  574. the compiler does not properly set this flag anymore in pass 1, and
  575. for now we only need it after pass 2 (I hope) (JM)
  576. if not(pi_do_call in current_procinfo.flags) then
  577. internalerror(2003060703);
  578. }
  579. include(current_procinfo.flags,pi_do_call);
  580. end;
  581. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  582. begin
  583. { check not really correct: should only be used for non-Thumb cpus }
  584. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  585. begin
  586. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  587. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  588. end
  589. else
  590. list.concat(taicpu.op_reg(A_BLX, reg));
  591. {
  592. the compiler does not properly set this flag anymore in pass 1, and
  593. for now we only need it after pass 2 (I hope) (JM)
  594. if not(pi_do_call in current_procinfo.flags) then
  595. internalerror(2003060703);
  596. }
  597. include(current_procinfo.flags,pi_do_call);
  598. end;
  599. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  600. begin
  601. a_op_const_reg_reg(list,op,size,a,reg,reg);
  602. end;
  603. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  604. var
  605. tmpreg,tmpresreg : tregister;
  606. tmpref : treference;
  607. begin
  608. tmpreg:=getintregister(list,size);
  609. tmpresreg:=getintregister(list,size);
  610. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  611. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  612. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  613. end;
  614. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  615. var
  616. so : tshifterop;
  617. begin
  618. if op = OP_NEG then
  619. begin
  620. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  621. maybeadjustresult(list,OP_NEG,size,dst);
  622. end
  623. else if op = OP_NOT then
  624. begin
  625. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  626. begin
  627. shifterop_reset(so);
  628. so.shiftmode:=SM_LSL;
  629. if size in [OS_8, OS_S8] then
  630. so.shiftimm:=24
  631. else
  632. so.shiftimm:=16;
  633. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  634. {Using a shift here allows this to be folded into another instruction}
  635. if size in [OS_S8, OS_S16] then
  636. so.shiftmode:=SM_ASR
  637. else
  638. so.shiftmode:=SM_LSR;
  639. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  640. end
  641. else
  642. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  643. end
  644. else
  645. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  646. end;
  647. const
  648. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  649. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  650. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  651. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  652. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  653. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  654. op_reg_postfix: array[TOpCG] of TOpPostfix =
  655. (PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
  656. PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None);
  657. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  658. size: tcgsize; a: tcgint; src, dst: tregister);
  659. var
  660. ovloc : tlocation;
  661. begin
  662. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  663. end;
  664. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  665. size: tcgsize; src1, src2, dst: tregister);
  666. var
  667. ovloc : tlocation;
  668. begin
  669. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  670. end;
  671. function opshift2shiftmode(op: TOpCg): tshiftmode;
  672. begin
  673. case op of
  674. OP_SHL: Result:=SM_LSL;
  675. OP_SHR: Result:=SM_LSR;
  676. OP_ROR: Result:=SM_ROR;
  677. OP_ROL: Result:=SM_ROR;
  678. OP_SAR: Result:=SM_ASR;
  679. else internalerror(2012070501);
  680. end
  681. end;
  682. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  683. var
  684. multiplier : dword;
  685. power : longint;
  686. shifterop : tshifterop;
  687. bitsset : byte;
  688. negative : boolean;
  689. first : boolean;
  690. b,
  691. cycles : byte;
  692. maxeffort : byte;
  693. begin
  694. result:=true;
  695. cycles:=0;
  696. negative:=a<0;
  697. shifterop.rs:=NR_NO;
  698. shifterop.shiftmode:=SM_LSL;
  699. if negative then
  700. inc(cycles);
  701. multiplier:=dword(abs(a));
  702. bitsset:=popcnt(multiplier and $fffffffe);
  703. { heuristics to estimate how much instructions are reasonable to replace the mul,
  704. this is currently based on XScale timings }
  705. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  706. actual multiplication, this requires min. 1+4 cycles
  707. because the first shift imm. might cause a stall and because we need more instructions
  708. when replacing the mul we generate max. 3 instructions to replace this mul }
  709. maxeffort:=3;
  710. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  711. a ldr, so generating one more operation to replace this is beneficial }
  712. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  713. inc(maxeffort);
  714. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  715. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  716. dec(maxeffort);
  717. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  718. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  719. dec(maxeffort);
  720. { most simple cases }
  721. if a=1 then
  722. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  723. else if a=0 then
  724. a_load_const_reg(list,OS_32,0,dst)
  725. else if a=-1 then
  726. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  727. { add up ?
  728. basically, one add is needed for each bit being set in the constant factor
  729. however, the least significant bit is for free, it can be hidden in the initial
  730. instruction
  731. }
  732. else if (bitsset+cycles<=maxeffort) and
  733. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  734. begin
  735. first:=true;
  736. while multiplier<>0 do
  737. begin
  738. shifterop.shiftimm:=BsrDWord(multiplier);
  739. if odd(multiplier) then
  740. begin
  741. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  742. dec(multiplier);
  743. end
  744. else
  745. if first then
  746. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  747. else
  748. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  749. first:=false;
  750. dec(multiplier,1 shl shifterop.shiftimm);
  751. end;
  752. if negative then
  753. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  754. end
  755. { subtract from the next greater power of two? }
  756. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  757. begin
  758. first:=true;
  759. while multiplier<>0 do
  760. begin
  761. if first then
  762. begin
  763. multiplier:=(1 shl power)-multiplier;
  764. shifterop.shiftimm:=power;
  765. end
  766. else
  767. shifterop.shiftimm:=BsrDWord(multiplier);
  768. if odd(multiplier) then
  769. begin
  770. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  771. dec(multiplier);
  772. end
  773. else
  774. if first then
  775. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  776. else
  777. begin
  778. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  779. dec(multiplier,1 shl shifterop.shiftimm);
  780. end;
  781. first:=false;
  782. end;
  783. if negative then
  784. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  785. end
  786. else
  787. result:=false;
  788. end;
  789. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  790. var
  791. shift, lsb, width : byte;
  792. tmpreg : tregister;
  793. so : tshifterop;
  794. l1 : longint;
  795. imm1, imm2: DWord;
  796. begin
  797. optimize_op_const(size, op, a);
  798. case op of
  799. OP_NONE:
  800. begin
  801. if src <> dst then
  802. a_load_reg_reg(list, size, size, src, dst);
  803. exit;
  804. end;
  805. OP_MOVE:
  806. begin
  807. a_load_const_reg(list, size, a, dst);
  808. exit;
  809. end;
  810. end;
  811. ovloc.loc:=LOC_VOID;
  812. if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
  813. case op of
  814. OP_ADD:
  815. begin
  816. op:=OP_SUB;
  817. a:=aint(dword(-a));
  818. end;
  819. OP_SUB:
  820. begin
  821. op:=OP_ADD;
  822. a:=aint(dword(-a));
  823. end
  824. end;
  825. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  826. case op of
  827. OP_NEG,OP_NOT:
  828. internalerror(200308281);
  829. OP_SHL,
  830. OP_SHR,
  831. OP_ROL,
  832. OP_ROR,
  833. OP_SAR:
  834. begin
  835. if a>32 then
  836. internalerror(200308294);
  837. shifterop_reset(so);
  838. so.shiftmode:=opshift2shiftmode(op);
  839. if op = OP_ROL then
  840. so.shiftimm:=32-a
  841. else
  842. so.shiftimm:=a;
  843. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  844. end;
  845. else
  846. {if (op in [OP_SUB, OP_ADD]) and
  847. ((a < 0) or
  848. (a > 4095)) then
  849. begin
  850. tmpreg:=getintregister(list,size);
  851. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  852. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  853. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  854. ));
  855. end
  856. else}
  857. begin
  858. if cgsetflags or setflags then
  859. a_reg_alloc(list,NR_DEFAULTFLAGS);
  860. list.concat(setoppostfix(
  861. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  862. end;
  863. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  864. begin
  865. ovloc.loc:=LOC_FLAGS;
  866. case op of
  867. OP_ADD:
  868. ovloc.resflags:=F_CS;
  869. OP_SUB:
  870. ovloc.resflags:=F_CC;
  871. end;
  872. end;
  873. end
  874. else
  875. begin
  876. { there could be added some more sophisticated optimizations }
  877. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  878. a_op_reg_reg(list,OP_NEG,size,src,dst)
  879. { we do this here instead in the peephole optimizer because
  880. it saves us a register }
  881. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  882. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  883. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  884. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  885. begin
  886. if l1>32 then{roozbeh does this ever happen?}
  887. internalerror(200308296);
  888. shifterop_reset(so);
  889. so.shiftmode:=SM_LSL;
  890. so.shiftimm:=l1;
  891. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  892. end
  893. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  894. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  895. begin
  896. if l1>32 then{does this ever happen?}
  897. internalerror(201205181);
  898. shifterop_reset(so);
  899. so.shiftmode:=SM_LSL;
  900. so.shiftimm:=l1;
  901. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  902. end
  903. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  904. begin
  905. { nothing to do on success }
  906. end
  907. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  908. broader range of shifterconstants.}
  909. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  910. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  911. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  912. into the following instruction}
  913. else if (op = OP_AND) and
  914. is_continuous_mask(a, lsb, width) and
  915. ((lsb = 0) or ((lsb + width) = 32)) then
  916. begin
  917. shifterop_reset(so);
  918. if (width = 16) and
  919. (lsb = 0) and
  920. (current_settings.cputype >= cpu_armv6) then
  921. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  922. else if (width = 8) and
  923. (lsb = 0) and
  924. (current_settings.cputype >= cpu_armv6) then
  925. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  926. else if lsb = 0 then
  927. begin
  928. so.shiftmode:=SM_LSL;
  929. so.shiftimm:=32-width;
  930. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  931. so.shiftmode:=SM_LSR;
  932. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  933. end
  934. else
  935. begin
  936. so.shiftmode:=SM_LSR;
  937. so.shiftimm:=lsb;
  938. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  939. so.shiftmode:=SM_LSL;
  940. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  941. end;
  942. end
  943. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  944. begin
  945. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  946. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  947. end
  948. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  949. not(cgsetflags or setflags) and
  950. split_into_shifter_const(a, imm1, imm2) then
  951. begin
  952. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  953. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  954. end
  955. else
  956. begin
  957. tmpreg:=getintregister(list,size);
  958. a_load_const_reg(list,size,a,tmpreg);
  959. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  960. end;
  961. end;
  962. maybeadjustresult(list,op,size,dst);
  963. end;
  964. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  965. var
  966. so : tshifterop;
  967. tmpreg,overflowreg : tregister;
  968. asmop : tasmop;
  969. begin
  970. ovloc.loc:=LOC_VOID;
  971. case op of
  972. OP_NEG,OP_NOT,
  973. OP_DIV,OP_IDIV:
  974. internalerror(200308283);
  975. OP_SHL,
  976. OP_SHR,
  977. OP_SAR,
  978. OP_ROR:
  979. begin
  980. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  981. internalerror(2008072801);
  982. shifterop_reset(so);
  983. so.rs:=src1;
  984. so.shiftmode:=opshift2shiftmode(op);
  985. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  986. end;
  987. OP_ROL:
  988. begin
  989. if not(size in [OS_32,OS_S32]) then
  990. internalerror(2008072801);
  991. { simulate ROL by ror'ing 32-value }
  992. tmpreg:=getintregister(list,OS_32);
  993. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  994. shifterop_reset(so);
  995. so.rs:=tmpreg;
  996. so.shiftmode:=SM_ROR;
  997. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  998. end;
  999. OP_IMUL,
  1000. OP_MUL:
  1001. begin
  1002. if (cgsetflags or setflags) and
  1003. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  1004. begin
  1005. overflowreg:=getintregister(list,size);
  1006. if op=OP_IMUL then
  1007. asmop:=A_SMULL
  1008. else
  1009. asmop:=A_UMULL;
  1010. { the arm doesn't allow that rd and rm are the same }
  1011. if dst=src2 then
  1012. begin
  1013. if dst<>src1 then
  1014. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  1015. else
  1016. begin
  1017. tmpreg:=getintregister(list,size);
  1018. a_load_reg_reg(list,size,size,src2,dst);
  1019. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  1020. end;
  1021. end
  1022. else
  1023. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  1024. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1025. if op=OP_IMUL then
  1026. begin
  1027. shifterop_reset(so);
  1028. so.shiftmode:=SM_ASR;
  1029. so.shiftimm:=31;
  1030. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1031. end
  1032. else
  1033. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1034. ovloc.loc:=LOC_FLAGS;
  1035. ovloc.resflags:=F_NE;
  1036. end
  1037. else
  1038. begin
  1039. { the arm doesn't allow that rd and rm are the same }
  1040. if dst=src2 then
  1041. begin
  1042. if dst<>src1 then
  1043. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1044. else
  1045. begin
  1046. tmpreg:=getintregister(list,size);
  1047. a_load_reg_reg(list,size,size,src2,dst);
  1048. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1049. end;
  1050. end
  1051. else
  1052. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1053. end;
  1054. end;
  1055. else
  1056. begin
  1057. if cgsetflags or setflags then
  1058. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1059. list.concat(setoppostfix(
  1060. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1061. end;
  1062. end;
  1063. maybeadjustresult(list,op,size,dst);
  1064. end;
  1065. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1066. var
  1067. asmop: tasmop;
  1068. begin
  1069. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1070. begin
  1071. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1072. case size of
  1073. OS_32: asmop:=A_UMULL;
  1074. OS_S32: asmop:=A_SMULL;
  1075. else
  1076. InternalError(2014060802);
  1077. end;
  1078. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1079. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1080. 32x32=32 bit multiplication}
  1081. if (dstlo = NR_NO) then
  1082. dstlo:=getintregister(list,size);
  1083. if (dsthi = NR_NO) then
  1084. dsthi:=getintregister(list,size);
  1085. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1086. end
  1087. else if dsthi=NR_NO then
  1088. begin
  1089. if (dstlo = NR_NO) then
  1090. dstlo:=getintregister(list,size);
  1091. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1092. end
  1093. else
  1094. begin
  1095. internalerror(2015083022);
  1096. end;
  1097. end;
  1098. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1099. var
  1100. tmpreg1,tmpreg2 : tregister;
  1101. begin
  1102. tmpreg1:=NR_NO;
  1103. { Be sure to have a base register }
  1104. if (ref.base=NR_NO) then
  1105. begin
  1106. if ref.shiftmode<>SM_None then
  1107. internalerror(2014020701);
  1108. ref.base:=ref.index;
  1109. ref.index:=NR_NO;
  1110. end;
  1111. { absolute symbols can't be handled directly, we've to store the symbol reference
  1112. in the text segment and access it pc relative
  1113. For now, we assume that references where base or index equals to PC are already
  1114. relative, all other references are assumed to be absolute and thus they need
  1115. to be handled extra.
  1116. A proper solution would be to change refoptions to a set and store the information
  1117. if the symbol is absolute or relative there.
  1118. }
  1119. if (assigned(ref.symbol) and
  1120. not(is_pc(ref.base)) and
  1121. not(is_pc(ref.index))
  1122. ) or
  1123. { [#xxx] isn't a valid address operand }
  1124. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1125. (ref.offset<-4095) or
  1126. (ref.offset>4095) or
  1127. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1128. ((ref.offset<-255) or
  1129. (ref.offset>255)
  1130. )
  1131. ) or
  1132. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1133. ((ref.offset<-1020) or
  1134. (ref.offset>1020) or
  1135. ((abs(ref.offset) mod 4)<>0)
  1136. )
  1137. ) or
  1138. ((GenerateThumbCode) and
  1139. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1140. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1141. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1142. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1143. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1144. )
  1145. ) then
  1146. begin
  1147. fixref(list,ref);
  1148. end;
  1149. if GenerateThumbCode then
  1150. begin
  1151. { certain thumb load require base and index }
  1152. if (oppostfix in [PF_SB,PF_SH]) and
  1153. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1154. begin
  1155. tmpreg1:=getintregister(list,OS_ADDR);
  1156. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1157. ref.index:=tmpreg1;
  1158. end;
  1159. { "hi" registers cannot be used as base or index }
  1160. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1161. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1162. begin
  1163. tmpreg1:=getintregister(list,OS_ADDR);
  1164. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1165. ref.base:=tmpreg1;
  1166. end;
  1167. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1168. begin
  1169. tmpreg1:=getintregister(list,OS_ADDR);
  1170. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1171. ref.index:=tmpreg1;
  1172. end;
  1173. end;
  1174. { fold if there is base, index and offset, however, don't fold
  1175. for vfp memory instructions because we later fold the index }
  1176. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1177. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1178. begin
  1179. if tmpreg1<>NR_NO then
  1180. begin
  1181. tmpreg2:=getintregister(list,OS_ADDR);
  1182. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1183. tmpreg1:=tmpreg2;
  1184. end
  1185. else
  1186. begin
  1187. tmpreg1:=getintregister(list,OS_ADDR);
  1188. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1189. ref.base:=tmpreg1;
  1190. end;
  1191. ref.offset:=0;
  1192. end;
  1193. { floating point operations have only limited references
  1194. we expect here, that a base is already set }
  1195. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1196. begin
  1197. if ref.shiftmode<>SM_none then
  1198. internalerror(200309121);
  1199. if tmpreg1<>NR_NO then
  1200. begin
  1201. if ref.base=tmpreg1 then
  1202. begin
  1203. if ref.signindex<0 then
  1204. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1205. else
  1206. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1207. ref.index:=NR_NO;
  1208. end
  1209. else
  1210. begin
  1211. if ref.index<>tmpreg1 then
  1212. internalerror(200403161);
  1213. if ref.signindex<0 then
  1214. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1215. else
  1216. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1217. ref.base:=tmpreg1;
  1218. ref.index:=NR_NO;
  1219. end;
  1220. end
  1221. else
  1222. begin
  1223. tmpreg1:=getintregister(list,OS_ADDR);
  1224. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1225. ref.base:=tmpreg1;
  1226. ref.index:=NR_NO;
  1227. end;
  1228. end;
  1229. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1230. Result := ref;
  1231. end;
  1232. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1233. var
  1234. oppostfix:toppostfix;
  1235. usedtmpref: treference;
  1236. tmpreg : tregister;
  1237. dir : integer;
  1238. begin
  1239. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1240. FromSize := ToSize;
  1241. case ToSize of
  1242. { signed integer registers }
  1243. OS_8,
  1244. OS_S8:
  1245. oppostfix:=PF_B;
  1246. OS_16,
  1247. OS_S16:
  1248. oppostfix:=PF_H;
  1249. OS_32,
  1250. OS_S32,
  1251. { for vfp value stored in integer register }
  1252. OS_F32:
  1253. oppostfix:=PF_None;
  1254. else
  1255. InternalError(200308299);
  1256. end;
  1257. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1258. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1259. (oppostfix =PF_H)) then
  1260. begin
  1261. if target_info.endian=endian_big then
  1262. dir:=-1
  1263. else
  1264. dir:=1;
  1265. case FromSize of
  1266. OS_16,OS_S16:
  1267. begin
  1268. tmpreg:=getintregister(list,OS_INT);
  1269. usedtmpref:=ref;
  1270. if target_info.endian=endian_big then
  1271. inc(usedtmpref.offset,1);
  1272. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1273. inc(usedtmpref.offset,dir);
  1274. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1275. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1276. end;
  1277. OS_32,OS_S32:
  1278. begin
  1279. tmpreg:=getintregister(list,OS_INT);
  1280. usedtmpref:=ref;
  1281. if ref.alignment=2 then
  1282. begin
  1283. if target_info.endian=endian_big then
  1284. inc(usedtmpref.offset,2);
  1285. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1286. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1287. inc(usedtmpref.offset,dir*2);
  1288. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1289. end
  1290. else
  1291. begin
  1292. if target_info.endian=endian_big then
  1293. inc(usedtmpref.offset,3);
  1294. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1295. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1296. inc(usedtmpref.offset,dir);
  1297. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1298. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1299. inc(usedtmpref.offset,dir);
  1300. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1301. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1302. inc(usedtmpref.offset,dir);
  1303. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1304. end;
  1305. end
  1306. else
  1307. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1308. end;
  1309. end
  1310. else
  1311. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1312. end;
  1313. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1314. var
  1315. oppostfix:toppostfix;
  1316. href: treference;
  1317. tmpreg: TRegister;
  1318. begin
  1319. case ToSize of
  1320. { signed integer registers }
  1321. OS_8,
  1322. OS_S8:
  1323. oppostfix:=PF_B;
  1324. OS_16,
  1325. OS_S16:
  1326. oppostfix:=PF_H;
  1327. OS_32,
  1328. OS_S32:
  1329. oppostfix:=PF_None;
  1330. else
  1331. InternalError(2003082910);
  1332. end;
  1333. if (tosize in [OS_S16,OS_16]) and
  1334. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1335. begin
  1336. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1337. tmpreg:=getintregister(list,OS_INT);
  1338. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1339. href:=result;
  1340. inc(href.offset);
  1341. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1342. end
  1343. else
  1344. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1345. end;
  1346. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1347. var
  1348. oppostfix:toppostfix;
  1349. so: tshifterop;
  1350. tmpreg: TRegister;
  1351. href: treference;
  1352. begin
  1353. case FromSize of
  1354. { signed integer registers }
  1355. OS_8:
  1356. oppostfix:=PF_B;
  1357. OS_S8:
  1358. oppostfix:=PF_SB;
  1359. OS_16:
  1360. oppostfix:=PF_H;
  1361. OS_S16:
  1362. oppostfix:=PF_SH;
  1363. OS_32,
  1364. OS_S32:
  1365. oppostfix:=PF_None;
  1366. else
  1367. InternalError(200308291);
  1368. end;
  1369. if (tosize=OS_S8) and
  1370. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1371. begin
  1372. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1373. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1374. end
  1375. else if (tosize in [OS_S16,OS_16]) and
  1376. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1377. begin
  1378. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1379. tmpreg:=getintregister(list,OS_INT);
  1380. href:=result;
  1381. inc(href.offset);
  1382. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1383. shifterop_reset(so);
  1384. so.shiftmode:=SM_LSL;
  1385. so.shiftimm:=8;
  1386. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1387. end
  1388. else
  1389. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1390. end;
  1391. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1392. var
  1393. so : tshifterop;
  1394. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1395. begin
  1396. if GenerateThumbCode then
  1397. begin
  1398. case shiftmode of
  1399. SM_ASR:
  1400. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1401. SM_LSR:
  1402. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1403. SM_LSL:
  1404. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1405. else
  1406. internalerror(2013090301);
  1407. end;
  1408. end
  1409. else
  1410. begin
  1411. so.shiftmode:=shiftmode;
  1412. so.shiftimm:=shiftimm;
  1413. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1414. end;
  1415. end;
  1416. var
  1417. instr: taicpu;
  1418. conv_done: boolean;
  1419. begin
  1420. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1421. internalerror(2002090901);
  1422. conv_done:=false;
  1423. if tosize<>fromsize then
  1424. begin
  1425. shifterop_reset(so);
  1426. conv_done:=true;
  1427. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1428. fromsize:=tosize;
  1429. if current_settings.cputype<cpu_armv6 then
  1430. case fromsize of
  1431. OS_8:
  1432. if GenerateThumbCode then
  1433. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1434. else
  1435. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1436. OS_S8:
  1437. begin
  1438. do_shift(SM_LSL,24,reg1);
  1439. if tosize=OS_16 then
  1440. begin
  1441. do_shift(SM_ASR,8,reg2);
  1442. do_shift(SM_LSR,16,reg2);
  1443. end
  1444. else
  1445. do_shift(SM_ASR,24,reg2);
  1446. end;
  1447. OS_16:
  1448. begin
  1449. do_shift(SM_LSL,16,reg1);
  1450. do_shift(SM_LSR,16,reg2);
  1451. end;
  1452. OS_S16:
  1453. begin
  1454. do_shift(SM_LSL,16,reg1);
  1455. do_shift(SM_ASR,16,reg2)
  1456. end;
  1457. else
  1458. conv_done:=false;
  1459. end
  1460. else
  1461. case fromsize of
  1462. OS_8:
  1463. if GenerateThumbCode then
  1464. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1465. else
  1466. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1467. OS_S8:
  1468. begin
  1469. if tosize=OS_16 then
  1470. begin
  1471. so.shiftmode:=SM_ROR;
  1472. so.shiftimm:=16;
  1473. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1474. do_shift(SM_LSR,16,reg2);
  1475. end
  1476. else
  1477. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1478. end;
  1479. OS_16:
  1480. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1481. OS_S16:
  1482. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1483. else
  1484. conv_done:=false;
  1485. end
  1486. end;
  1487. if not conv_done and (reg1<>reg2) then
  1488. begin
  1489. { same size, only a register mov required }
  1490. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1491. list.Concat(instr);
  1492. { Notify the register allocator that we have written a move instruction so
  1493. it can try to eliminate it. }
  1494. add_move_instruction(instr);
  1495. end;
  1496. end;
  1497. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1498. var
  1499. href,href2 : treference;
  1500. hloc : pcgparalocation;
  1501. begin
  1502. href:=ref;
  1503. hloc:=paraloc.location;
  1504. while assigned(hloc) do
  1505. begin
  1506. case hloc^.loc of
  1507. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1508. begin
  1509. paramanager.allocparaloc(list,paraloc.location);
  1510. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1511. end;
  1512. LOC_REGISTER :
  1513. case hloc^.size of
  1514. OS_32,
  1515. OS_F32:
  1516. begin
  1517. paramanager.allocparaloc(list,paraloc.location);
  1518. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1519. end;
  1520. OS_64,
  1521. OS_F64:
  1522. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1523. else
  1524. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1525. end;
  1526. LOC_REFERENCE :
  1527. begin
  1528. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,paraloc.alignment);
  1529. { concatcopy should choose the best way to copy the data }
  1530. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1531. end;
  1532. else
  1533. internalerror(200408241);
  1534. end;
  1535. inc(href.offset,tcgsize2size[hloc^.size]);
  1536. hloc:=hloc^.next;
  1537. end;
  1538. end;
  1539. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1540. begin
  1541. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1542. end;
  1543. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1544. var
  1545. oppostfix:toppostfix;
  1546. begin
  1547. case fromsize of
  1548. OS_32,
  1549. OS_F32:
  1550. oppostfix:=PF_S;
  1551. OS_64,
  1552. OS_F64:
  1553. oppostfix:=PF_D;
  1554. OS_F80:
  1555. oppostfix:=PF_E;
  1556. else
  1557. InternalError(200309021);
  1558. end;
  1559. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1560. if fromsize<>tosize then
  1561. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1562. end;
  1563. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1564. var
  1565. oppostfix:toppostfix;
  1566. begin
  1567. case tosize of
  1568. OS_F32:
  1569. oppostfix:=PF_S;
  1570. OS_F64:
  1571. oppostfix:=PF_D;
  1572. OS_F80:
  1573. oppostfix:=PF_E;
  1574. else
  1575. InternalError(200309022);
  1576. end;
  1577. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1578. end;
  1579. { comparison operations }
  1580. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1581. l : tasmlabel);
  1582. var
  1583. tmpreg : tregister;
  1584. b : byte;
  1585. begin
  1586. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1587. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1588. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1589. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1590. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1591. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1592. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1593. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1594. else
  1595. begin
  1596. tmpreg:=getintregister(list,size);
  1597. a_load_const_reg(list,size,a,tmpreg);
  1598. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1599. end;
  1600. a_jmp_cond(list,cmp_op,l);
  1601. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1602. end;
  1603. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1604. begin
  1605. if reverse then
  1606. begin
  1607. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1608. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1609. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1610. end
  1611. { it is decided during the compilation of the system unit if this code is used or not
  1612. so no additional check for rbit is needed }
  1613. else
  1614. begin
  1615. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1616. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1617. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1618. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1619. if GenerateThumb2Code then
  1620. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1621. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1622. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1623. end;
  1624. end;
  1625. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1626. begin
  1627. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1628. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1629. a_jmp_cond(list,cmp_op,l);
  1630. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1631. end;
  1632. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1633. var
  1634. ai : taicpu;
  1635. begin
  1636. { generate far jump, leave it to the optimizer to get rid of it }
  1637. if GenerateThumbCode then
  1638. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s))
  1639. else
  1640. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s));
  1641. ai.is_jmp:=true;
  1642. list.concat(ai);
  1643. end;
  1644. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1645. var
  1646. ai : taicpu;
  1647. begin
  1648. { generate far jump, leave it to the optimizer to get rid of it }
  1649. if GenerateThumbCode then
  1650. ai:=taicpu.op_sym(A_BL,l)
  1651. else
  1652. ai:=taicpu.op_sym(A_B,l);
  1653. ai.is_jmp:=true;
  1654. list.concat(ai);
  1655. end;
  1656. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1657. var
  1658. ai : taicpu;
  1659. inv_flags : TResFlags;
  1660. hlabel : TAsmLabel;
  1661. begin
  1662. if GenerateThumbCode then
  1663. begin
  1664. inv_flags:=f;
  1665. inverse_flags(inv_flags);
  1666. { the optimizer has to fix this if jump range is sufficient short }
  1667. current_asmdata.getjumplabel(hlabel);
  1668. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1669. ai.is_jmp:=true;
  1670. list.concat(ai);
  1671. a_jmp_always(list,l);
  1672. a_label(list,hlabel);
  1673. end
  1674. else
  1675. begin
  1676. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1677. ai.is_jmp:=true;
  1678. list.concat(ai);
  1679. end;
  1680. end;
  1681. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1682. begin
  1683. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1684. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1685. end;
  1686. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1687. begin
  1688. if target_info.system = system_arm_linux then
  1689. begin
  1690. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1691. a_call_name(list,'__gnu_mcount_nc',false);
  1692. end
  1693. else
  1694. internalerror(2014091201);
  1695. end;
  1696. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1697. var
  1698. ref : treference;
  1699. shift : byte;
  1700. firstfloatreg,lastfloatreg,
  1701. r : byte;
  1702. mmregs,
  1703. regs, saveregs : tcpuregisterset;
  1704. registerarea,
  1705. r7offset,
  1706. stackmisalignment : pint;
  1707. imm1, imm2: DWord;
  1708. stack_parameters : Boolean;
  1709. begin
  1710. LocalSize:=align(LocalSize,4);
  1711. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1712. { call instruction does not put anything on the stack }
  1713. registerarea:=0;
  1714. tarmprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1715. lastfloatreg:=RS_NO;
  1716. if not(nostackframe) then
  1717. begin
  1718. firstfloatreg:=RS_NO;
  1719. mmregs:=[];
  1720. case current_settings.fputype of
  1721. fpu_fpa,
  1722. fpu_fpa10,
  1723. fpu_fpa11:
  1724. begin
  1725. { save floating point registers? }
  1726. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1727. for r:=RS_F0 to RS_F7 do
  1728. if r in regs then
  1729. begin
  1730. if firstfloatreg=RS_NO then
  1731. firstfloatreg:=r;
  1732. lastfloatreg:=r;
  1733. inc(registerarea,12);
  1734. end;
  1735. end;
  1736. fpu_vfpv2,
  1737. fpu_vfpv3,
  1738. fpu_vfpv3_d16:
  1739. begin;
  1740. mmregs:=rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
  1741. end;
  1742. end;
  1743. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1744. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1745. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1746. { save int registers }
  1747. reference_reset(ref,4);
  1748. ref.index:=NR_STACK_POINTER_REG;
  1749. ref.addressmode:=AM_PREINDEXED;
  1750. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1751. if not(target_info.system in systems_darwin) then
  1752. begin
  1753. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1754. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1755. begin
  1756. a_reg_alloc(list,NR_R12);
  1757. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1758. end;
  1759. { the (old) ARM APCS requires saving both the stack pointer (to
  1760. crawl the stack) and the PC (to identify the function this
  1761. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1762. and R15 -- still needs updating for EABI and Darwin, they don't
  1763. need that }
  1764. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1765. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1766. else
  1767. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1768. include(regs,RS_R14);
  1769. if regs<>[] then
  1770. begin
  1771. for r:=RS_R0 to RS_R15 do
  1772. if r in regs then
  1773. inc(registerarea,4);
  1774. { if the stack is not 8 byte aligned, try to add an extra register,
  1775. so we can avoid the extra sub/add ...,#4 later (KB) }
  1776. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1777. for r:=RS_R3 downto RS_R0 do
  1778. if not(r in regs) then
  1779. begin
  1780. regs:=regs+[r];
  1781. inc(registerarea,4);
  1782. tarmprocinfo(current_procinfo).stackpaddingreg:=r;
  1783. break;
  1784. end;
  1785. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1786. end;
  1787. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1788. begin
  1789. { the framepointer now points to the saved R15, so the saved
  1790. framepointer is at R11-12 (for get_caller_frame) }
  1791. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1792. a_reg_dealloc(list,NR_R12);
  1793. end;
  1794. end
  1795. else
  1796. begin
  1797. { always save r14 if we use r7 as the framepointer, because
  1798. the parameter offsets are hardcoded in advance and always
  1799. assume that r14 sits on the stack right behind the saved r7
  1800. }
  1801. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1802. include(regs,RS_FRAME_POINTER_REG);
  1803. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1804. include(regs,RS_R14);
  1805. if regs<>[] then
  1806. begin
  1807. { on Darwin, you first have to save [r4-r7,lr], and then
  1808. [r8,r10,r11] and make r7 point to the previously saved
  1809. r7 so that you can perform a stack crawl based on it
  1810. ([r7] is previous stack frame, [r7+4] is return address
  1811. }
  1812. include(regs,RS_FRAME_POINTER_REG);
  1813. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1814. r7offset:=0;
  1815. for r:=RS_R0 to RS_R15 do
  1816. if r in saveregs then
  1817. begin
  1818. inc(registerarea,4);
  1819. if r<RS_FRAME_POINTER_REG then
  1820. inc(r7offset,4);
  1821. end;
  1822. { save the registers }
  1823. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1824. { make r7 point to the saved r7 (regardless of whether this
  1825. frame uses the framepointer, for backtrace purposes) }
  1826. if r7offset<>0 then
  1827. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1828. else
  1829. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1830. { now save the rest (if any) }
  1831. saveregs:=regs-saveregs;
  1832. if saveregs<>[] then
  1833. begin
  1834. for r:=RS_R8 to RS_R11 do
  1835. if r in saveregs then
  1836. inc(registerarea,4);
  1837. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1838. end;
  1839. end;
  1840. end;
  1841. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1842. if (LocalSize<>0) or
  1843. ((stackmisalignment<>0) and
  1844. ((pi_do_call in current_procinfo.flags) or
  1845. (po_assembler in current_procinfo.procdef.procoptions))) then
  1846. begin
  1847. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1848. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1849. begin
  1850. if localsize>tarmprocinfo(current_procinfo).stackframesize then
  1851. internalerror(2014030901)
  1852. else
  1853. localsize:=tarmprocinfo(current_procinfo).stackframesize-registerarea;
  1854. end;
  1855. if is_shifter_const(localsize,shift) then
  1856. begin
  1857. a_reg_dealloc(list,NR_R12);
  1858. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1859. end
  1860. else if split_into_shifter_const(localsize, imm1, imm2) then
  1861. begin
  1862. a_reg_dealloc(list,NR_R12);
  1863. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1864. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1865. end
  1866. else
  1867. begin
  1868. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1869. a_reg_alloc(list,NR_R12);
  1870. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1871. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1872. a_reg_dealloc(list,NR_R12);
  1873. end;
  1874. end;
  1875. if (mmregs<>[]) or
  1876. (firstfloatreg<>RS_NO) then
  1877. begin
  1878. reference_reset(ref,4);
  1879. if (tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023) or
  1880. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
  1881. begin
  1882. if not is_shifter_const(tarmprocinfo(current_procinfo).floatregstart,shift) then
  1883. begin
  1884. a_reg_alloc(list,NR_R12);
  1885. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  1886. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1887. a_reg_dealloc(list,NR_R12);
  1888. end
  1889. else
  1890. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tarmprocinfo(current_procinfo).floatregstart));
  1891. ref.base:=NR_R12;
  1892. end
  1893. else
  1894. begin
  1895. ref.base:=current_procinfo.framepointer;
  1896. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  1897. end;
  1898. case current_settings.fputype of
  1899. fpu_fpa,
  1900. fpu_fpa10,
  1901. fpu_fpa11:
  1902. begin
  1903. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1904. lastfloatreg-firstfloatreg+1,ref));
  1905. end;
  1906. fpu_vfpv2,
  1907. fpu_vfpv3,
  1908. fpu_vfpv3_d16:
  1909. begin
  1910. ref.index:=ref.base;
  1911. ref.base:=NR_NO;
  1912. { FSTMX is deprecated on ARMv6 and later }
  1913. {if (current_settings.cputype<cpu_armv6) then
  1914. postfix:=PF_IAX
  1915. else
  1916. postfix:=PF_IAD;}
  1917. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1918. end;
  1919. end;
  1920. end;
  1921. end;
  1922. end;
  1923. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1924. var
  1925. ref : treference;
  1926. LocalSize : longint;
  1927. firstfloatreg,lastfloatreg,
  1928. r,
  1929. shift : byte;
  1930. mmregs,
  1931. saveregs,
  1932. regs : tcpuregisterset;
  1933. registerarea,
  1934. stackmisalignment: pint;
  1935. paddingreg: TSuperRegister;
  1936. imm1, imm2: DWord;
  1937. begin
  1938. if not(nostackframe) then
  1939. begin
  1940. registerarea:=0;
  1941. firstfloatreg:=RS_NO;
  1942. lastfloatreg:=RS_NO;
  1943. mmregs:=[];
  1944. saveregs:=[];
  1945. case current_settings.fputype of
  1946. fpu_fpa,
  1947. fpu_fpa10,
  1948. fpu_fpa11:
  1949. begin
  1950. { restore floating point registers? }
  1951. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1952. for r:=RS_F0 to RS_F7 do
  1953. if r in regs then
  1954. begin
  1955. if firstfloatreg=RS_NO then
  1956. firstfloatreg:=r;
  1957. lastfloatreg:=r;
  1958. { floating point register space is already included in
  1959. localsize below by calc_stackframe_size
  1960. inc(registerarea,12);
  1961. }
  1962. end;
  1963. end;
  1964. fpu_vfpv2,
  1965. fpu_vfpv3,
  1966. fpu_vfpv3_d16:
  1967. begin;
  1968. { restore vfp registers? }
  1969. mmregs:=rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
  1970. end;
  1971. end;
  1972. if (firstfloatreg<>RS_NO) or
  1973. (mmregs<>[]) then
  1974. begin
  1975. reference_reset(ref,4);
  1976. if (tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023) or
  1977. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
  1978. begin
  1979. if not is_shifter_const(tarmprocinfo(current_procinfo).floatregstart,shift) then
  1980. begin
  1981. a_reg_alloc(list,NR_R12);
  1982. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  1983. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1984. a_reg_dealloc(list,NR_R12);
  1985. end
  1986. else
  1987. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tarmprocinfo(current_procinfo).floatregstart));
  1988. ref.base:=NR_R12;
  1989. end
  1990. else
  1991. begin
  1992. ref.base:=current_procinfo.framepointer;
  1993. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  1994. end;
  1995. case current_settings.fputype of
  1996. fpu_fpa,
  1997. fpu_fpa10,
  1998. fpu_fpa11:
  1999. begin
  2000. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  2001. lastfloatreg-firstfloatreg+1,ref));
  2002. end;
  2003. fpu_vfpv2,
  2004. fpu_vfpv3,
  2005. fpu_vfpv3_d16:
  2006. begin
  2007. ref.index:=ref.base;
  2008. ref.base:=NR_NO;
  2009. { FLDMX is deprecated on ARMv6 and later }
  2010. {if (current_settings.cputype<cpu_armv6) then
  2011. mmpostfix:=PF_IAX
  2012. else
  2013. mmpostfix:=PF_IAD;}
  2014. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2015. end;
  2016. end;
  2017. end;
  2018. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2019. if (pi_do_call in current_procinfo.flags) or
  2020. (regs<>[]) or
  2021. ((target_info.system in systems_darwin) and
  2022. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2023. begin
  2024. exclude(regs,RS_R14);
  2025. include(regs,RS_R15);
  2026. if (target_info.system in systems_darwin) then
  2027. include(regs,RS_FRAME_POINTER_REG);
  2028. end;
  2029. if not(target_info.system in systems_darwin) then
  2030. begin
  2031. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2032. The saved PC came after that but is discarded, since we restore
  2033. the stack pointer }
  2034. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2035. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2036. end
  2037. else
  2038. begin
  2039. { restore R8-R11 already if necessary (they've been stored
  2040. before the others) }
  2041. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2042. if saveregs<>[] then
  2043. begin
  2044. reference_reset(ref,4);
  2045. ref.index:=NR_STACK_POINTER_REG;
  2046. ref.addressmode:=AM_PREINDEXED;
  2047. for r:=RS_R8 to RS_R11 do
  2048. if r in saveregs then
  2049. inc(registerarea,4);
  2050. regs:=regs-saveregs;
  2051. end;
  2052. end;
  2053. for r:=RS_R0 to RS_R15 do
  2054. if r in regs then
  2055. inc(registerarea,4);
  2056. { reapply the stack padding reg, in case there was one, see the complimentary
  2057. comment in g_proc_entry() (KB) }
  2058. paddingreg:=tarmprocinfo(current_procinfo).stackpaddingreg;
  2059. if paddingreg < RS_R4 then
  2060. if paddingreg in regs then
  2061. internalerror(201306190)
  2062. else
  2063. begin
  2064. regs:=regs+[paddingreg];
  2065. inc(registerarea,4);
  2066. end;
  2067. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2068. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2069. (target_info.system in systems_darwin) then
  2070. begin
  2071. LocalSize:=current_procinfo.calc_stackframe_size;
  2072. if (LocalSize<>0) or
  2073. ((stackmisalignment<>0) and
  2074. ((pi_do_call in current_procinfo.flags) or
  2075. (po_assembler in current_procinfo.procdef.procoptions))) then
  2076. begin
  2077. if pi_estimatestacksize in current_procinfo.flags then
  2078. LocalSize:=tarmprocinfo(current_procinfo).stackframesize-registerarea
  2079. else
  2080. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2081. if is_shifter_const(LocalSize,shift) then
  2082. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2083. else if split_into_shifter_const(localsize, imm1, imm2) then
  2084. begin
  2085. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2086. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2087. end
  2088. else
  2089. begin
  2090. a_reg_alloc(list,NR_R12);
  2091. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2092. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2093. a_reg_dealloc(list,NR_R12);
  2094. end;
  2095. end;
  2096. if (target_info.system in systems_darwin) and
  2097. (saveregs<>[]) then
  2098. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2099. if regs=[] then
  2100. begin
  2101. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2102. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2103. else
  2104. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2105. end
  2106. else
  2107. begin
  2108. reference_reset(ref,4);
  2109. ref.index:=NR_STACK_POINTER_REG;
  2110. ref.addressmode:=AM_PREINDEXED;
  2111. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2112. end;
  2113. end
  2114. else
  2115. begin
  2116. { restore int registers and return }
  2117. reference_reset(ref,4);
  2118. ref.index:=NR_FRAME_POINTER_REG;
  2119. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2120. end;
  2121. end
  2122. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2123. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2124. else
  2125. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2126. end;
  2127. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2128. var
  2129. ref : treference;
  2130. l : TAsmLabel;
  2131. regs : tcpuregisterset;
  2132. r: byte;
  2133. begin
  2134. if (cs_create_pic in current_settings.moduleswitches) and
  2135. (pi_needs_got in current_procinfo.flags) and
  2136. (tf_pic_uses_got in target_info.flags) then
  2137. begin
  2138. { Procedure parametrs are not initialized at this stage.
  2139. Before GOT initialization code, allocate registers used for procedure parameters
  2140. to prevent usage of these registers for temp operations in later stages of code
  2141. generation. }
  2142. regs:=rg[R_INTREGISTER].used_in_proc;
  2143. for r:=RS_R0 to RS_R3 do
  2144. if r in regs then
  2145. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2146. { Allocate scratch register R12 and use it for GOT calculations directly.
  2147. Otherwise the init code can be distorted in later stages of code generation. }
  2148. a_reg_alloc(list,NR_R12);
  2149. reference_reset(ref,4);
  2150. current_asmdata.getglobaldatalabel(l);
  2151. cg.a_label(current_procinfo.aktlocaldata,l);
  2152. ref.symbol:=l;
  2153. ref.base:=NR_PC;
  2154. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2155. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2156. current_asmdata.getaddrlabel(l);
  2157. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_'),-8));
  2158. cg.a_label(list,l);
  2159. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2160. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2161. { Deallocate registers }
  2162. a_reg_dealloc(list,NR_R12);
  2163. for r:=RS_R3 downto RS_R0 do
  2164. if r in regs then
  2165. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2166. end;
  2167. end;
  2168. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2169. var
  2170. b : byte;
  2171. tmpref : treference;
  2172. instr : taicpu;
  2173. begin
  2174. if ref.addressmode<>AM_OFFSET then
  2175. internalerror(200309071);
  2176. tmpref:=ref;
  2177. { Be sure to have a base register }
  2178. if (tmpref.base=NR_NO) then
  2179. begin
  2180. if tmpref.shiftmode<>SM_None then
  2181. internalerror(2014020702);
  2182. if tmpref.signindex<0 then
  2183. internalerror(200312023);
  2184. tmpref.base:=tmpref.index;
  2185. tmpref.index:=NR_NO;
  2186. end;
  2187. if assigned(tmpref.symbol) or
  2188. not((is_shifter_const(tmpref.offset,b)) or
  2189. (is_shifter_const(-tmpref.offset,b))
  2190. ) then
  2191. fixref(list,tmpref);
  2192. { expect a base here if there is an index }
  2193. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2194. internalerror(200312022);
  2195. if tmpref.index<>NR_NO then
  2196. begin
  2197. if tmpref.shiftmode<>SM_None then
  2198. internalerror(200312021);
  2199. if tmpref.signindex<0 then
  2200. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2201. else
  2202. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2203. if tmpref.offset<>0 then
  2204. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2205. end
  2206. else
  2207. begin
  2208. if tmpref.base=NR_NO then
  2209. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2210. else
  2211. if tmpref.offset<>0 then
  2212. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2213. else
  2214. begin
  2215. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2216. list.concat(instr);
  2217. add_move_instruction(instr);
  2218. end;
  2219. end;
  2220. end;
  2221. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2222. var
  2223. tmpreg, tmpreg2 : tregister;
  2224. tmpref : treference;
  2225. l, piclabel : tasmlabel;
  2226. indirection_done : boolean;
  2227. begin
  2228. { absolute symbols can't be handled directly, we've to store the symbol reference
  2229. in the text segment and access it pc relative
  2230. For now, we assume that references where base or index equals to PC are already
  2231. relative, all other references are assumed to be absolute and thus they need
  2232. to be handled extra.
  2233. A proper solution would be to change refoptions to a set and store the information
  2234. if the symbol is absolute or relative there.
  2235. }
  2236. { create consts entry }
  2237. reference_reset(tmpref,4);
  2238. current_asmdata.getjumplabel(l);
  2239. cg.a_label(current_procinfo.aktlocaldata,l);
  2240. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2241. piclabel:=nil;
  2242. tmpreg:=NR_NO;
  2243. indirection_done:=false;
  2244. if assigned(ref.symbol) then
  2245. begin
  2246. if (target_info.system=system_arm_darwin) and
  2247. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2248. begin
  2249. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2250. if ref.offset<>0 then
  2251. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2252. indirection_done:=true;
  2253. end
  2254. else if (cs_create_pic in current_settings.moduleswitches) then
  2255. if (tf_pic_uses_got in target_info.flags) then
  2256. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2257. else
  2258. begin
  2259. { ideally, we would want to generate
  2260. ldr r1, LPICConstPool
  2261. LPICLocal:
  2262. ldr/str r2,[pc,r1]
  2263. ...
  2264. LPICConstPool:
  2265. .long _globsym-(LPICLocal+8)
  2266. However, we cannot be sure that the ldr/str will follow
  2267. right after the call to fixref, so we have to load the
  2268. complete address already in a register.
  2269. }
  2270. current_asmdata.getaddrlabel(piclabel);
  2271. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2272. end
  2273. else
  2274. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2275. end
  2276. else
  2277. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2278. { load consts entry }
  2279. if not indirection_done then
  2280. begin
  2281. tmpreg:=getintregister(list,OS_INT);
  2282. tmpref.symbol:=l;
  2283. tmpref.base:=NR_PC;
  2284. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2285. if (cs_create_pic in current_settings.moduleswitches) and
  2286. (tf_pic_uses_got in target_info.flags) and
  2287. assigned(ref.symbol) then
  2288. begin
  2289. reference_reset(tmpref,4);
  2290. tmpref.base:=current_procinfo.got;
  2291. tmpref.index:=tmpreg;
  2292. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2293. if ref.offset<>0 then
  2294. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2295. end;
  2296. end;
  2297. if assigned(piclabel) then
  2298. begin
  2299. cg.a_label(list,piclabel);
  2300. tmpreg2:=getaddressregister(list);
  2301. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2302. tmpreg:=tmpreg2
  2303. end;
  2304. { This routine can be called with PC as base/index in case the offset
  2305. was too large to encode in a load/store. In that case, the entire
  2306. absolute expression has been re-encoded in a new constpool entry, and
  2307. we have to remove the use of PC from the original reference (the code
  2308. above made everything relative to the value loaded from the new
  2309. constpool entry) }
  2310. if is_pc(ref.base) then
  2311. ref.base:=NR_NO;
  2312. if is_pc(ref.index) then
  2313. ref.index:=NR_NO;
  2314. if (ref.base<>NR_NO) then
  2315. begin
  2316. if ref.index<>NR_NO then
  2317. begin
  2318. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2319. ref.base:=tmpreg;
  2320. end
  2321. else
  2322. if ref.base<>NR_PC then
  2323. begin
  2324. ref.index:=tmpreg;
  2325. ref.shiftimm:=0;
  2326. ref.signindex:=1;
  2327. ref.shiftmode:=SM_None;
  2328. end
  2329. else
  2330. ref.base:=tmpreg;
  2331. end
  2332. else
  2333. ref.base:=tmpreg;
  2334. ref.offset:=0;
  2335. ref.symbol:=nil;
  2336. end;
  2337. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2338. var
  2339. paraloc1,paraloc2,paraloc3 : TCGPara;
  2340. pd : tprocdef;
  2341. begin
  2342. pd:=search_system_proc('MOVE');
  2343. paraloc1.init;
  2344. paraloc2.init;
  2345. paraloc3.init;
  2346. paramanager.getintparaloc(list,pd,1,paraloc1);
  2347. paramanager.getintparaloc(list,pd,2,paraloc2);
  2348. paramanager.getintparaloc(list,pd,3,paraloc3);
  2349. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2350. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2351. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2352. paramanager.freecgpara(list,paraloc3);
  2353. paramanager.freecgpara(list,paraloc2);
  2354. paramanager.freecgpara(list,paraloc1);
  2355. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2356. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2357. a_call_name(list,'FPC_MOVE',false);
  2358. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2359. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2360. paraloc3.done;
  2361. paraloc2.done;
  2362. paraloc1.done;
  2363. end;
  2364. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2365. const
  2366. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2367. maxtmpreg_thumb = 5;
  2368. var
  2369. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2370. srcreg,destreg,countreg,r,tmpreg:tregister;
  2371. helpsize:aint;
  2372. copysize:byte;
  2373. cgsize:Tcgsize;
  2374. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2375. maxtmpreg,
  2376. tmpregi,tmpregi2:byte;
  2377. { will never be called with count<=4 }
  2378. procedure genloop(count : aword;size : byte);
  2379. const
  2380. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2381. var
  2382. l : tasmlabel;
  2383. begin
  2384. current_asmdata.getjumplabel(l);
  2385. if count<size then size:=1;
  2386. a_load_const_reg(list,OS_INT,count div size,countreg);
  2387. cg.a_label(list,l);
  2388. srcref.addressmode:=AM_POSTINDEXED;
  2389. dstref.addressmode:=AM_POSTINDEXED;
  2390. srcref.offset:=size;
  2391. dstref.offset:=size;
  2392. r:=getintregister(list,size2opsize[size]);
  2393. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2394. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2395. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2396. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2397. a_jmp_flags(list,F_NE,l);
  2398. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2399. srcref.offset:=1;
  2400. dstref.offset:=1;
  2401. case count mod size of
  2402. 1:
  2403. begin
  2404. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2405. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2406. end;
  2407. 2:
  2408. if aligned then
  2409. begin
  2410. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2411. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2412. end
  2413. else
  2414. begin
  2415. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2416. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2417. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2418. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2419. end;
  2420. 3:
  2421. if aligned then
  2422. begin
  2423. srcref.offset:=2;
  2424. dstref.offset:=2;
  2425. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2426. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2427. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2428. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2429. end
  2430. else
  2431. begin
  2432. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2433. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2434. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2435. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2436. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2437. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2438. end;
  2439. end;
  2440. { keep the registers alive }
  2441. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2442. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2443. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2444. end;
  2445. { will never be called with count<=4 }
  2446. procedure genloop_thumb(count : aword;size : byte);
  2447. procedure refincofs(const ref : treference;const value : longint = 1);
  2448. begin
  2449. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2450. end;
  2451. const
  2452. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2453. var
  2454. l : tasmlabel;
  2455. begin
  2456. current_asmdata.getjumplabel(l);
  2457. if count<size then size:=1;
  2458. a_load_const_reg(list,OS_INT,count div size,countreg);
  2459. cg.a_label(list,l);
  2460. r:=getintregister(list,size2opsize[size]);
  2461. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2462. refincofs(srcref);
  2463. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2464. refincofs(dstref);
  2465. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2466. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2467. a_jmp_flags(list,F_NE,l);
  2468. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2469. case count mod size of
  2470. 1:
  2471. begin
  2472. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2473. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2474. end;
  2475. 2:
  2476. if aligned then
  2477. begin
  2478. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2479. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2480. end
  2481. else
  2482. begin
  2483. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2484. refincofs(srcref);
  2485. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2486. refincofs(dstref);
  2487. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2488. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2489. end;
  2490. 3:
  2491. if aligned then
  2492. begin
  2493. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2494. refincofs(srcref,2);
  2495. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2496. refincofs(dstref,2);
  2497. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2498. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2499. end
  2500. else
  2501. begin
  2502. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2503. refincofs(srcref);
  2504. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2505. refincofs(dstref);
  2506. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2507. refincofs(srcref);
  2508. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2509. refincofs(dstref);
  2510. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2511. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2512. end;
  2513. end;
  2514. { keep the registers alive }
  2515. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2516. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2517. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2518. end;
  2519. begin
  2520. if len=0 then
  2521. exit;
  2522. if GenerateThumbCode then
  2523. maxtmpreg:=maxtmpreg_thumb
  2524. else
  2525. maxtmpreg:=maxtmpreg_arm;
  2526. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2527. dstref:=dest;
  2528. srcref:=source;
  2529. if cs_opt_size in current_settings.optimizerswitches then
  2530. helpsize:=8;
  2531. if aligned and (len=4) then
  2532. begin
  2533. tmpreg:=getintregister(list,OS_32);
  2534. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2535. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2536. end
  2537. else if aligned and (len=2) then
  2538. begin
  2539. tmpreg:=getintregister(list,OS_16);
  2540. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2541. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2542. end
  2543. else if (len<=helpsize) and aligned then
  2544. begin
  2545. tmpregi:=0;
  2546. srcreg:=getintregister(list,OS_ADDR);
  2547. { explicit pc relative addressing, could be
  2548. e.g. a floating point constant }
  2549. if source.base=NR_PC then
  2550. begin
  2551. { ... then we don't need a loadaddr }
  2552. srcref:=source;
  2553. end
  2554. else
  2555. begin
  2556. a_loadaddr_ref_reg(list,source,srcreg);
  2557. reference_reset_base(srcref,srcreg,0,source.alignment);
  2558. end;
  2559. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2560. begin
  2561. inc(tmpregi);
  2562. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2563. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2564. inc(srcref.offset,4);
  2565. dec(len,4);
  2566. end;
  2567. destreg:=getintregister(list,OS_ADDR);
  2568. a_loadaddr_ref_reg(list,dest,destreg);
  2569. reference_reset_base(dstref,destreg,0,dest.alignment);
  2570. tmpregi2:=1;
  2571. while (tmpregi2<=tmpregi) do
  2572. begin
  2573. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2574. inc(dstref.offset,4);
  2575. inc(tmpregi2);
  2576. end;
  2577. copysize:=4;
  2578. cgsize:=OS_32;
  2579. while len<>0 do
  2580. begin
  2581. if len<2 then
  2582. begin
  2583. copysize:=1;
  2584. cgsize:=OS_8;
  2585. end
  2586. else if len<4 then
  2587. begin
  2588. copysize:=2;
  2589. cgsize:=OS_16;
  2590. end;
  2591. dec(len,copysize);
  2592. r:=getintregister(list,cgsize);
  2593. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2594. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2595. inc(srcref.offset,copysize);
  2596. inc(dstref.offset,copysize);
  2597. end;{end of while}
  2598. end
  2599. else
  2600. begin
  2601. cgsize:=OS_32;
  2602. if (len<=4) then{len<=4 and not aligned}
  2603. begin
  2604. r:=getintregister(list,cgsize);
  2605. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2606. if Len=1 then
  2607. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2608. else
  2609. begin
  2610. tmpreg:=getintregister(list,cgsize);
  2611. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2612. inc(usedtmpref.offset,1);
  2613. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2614. inc(usedtmpref2.offset,1);
  2615. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2616. if len>2 then
  2617. begin
  2618. inc(usedtmpref.offset,1);
  2619. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2620. inc(usedtmpref2.offset,1);
  2621. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2622. if len>3 then
  2623. begin
  2624. inc(usedtmpref.offset,1);
  2625. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2626. inc(usedtmpref2.offset,1);
  2627. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2628. end;
  2629. end;
  2630. end;
  2631. end{end of if len<=4}
  2632. else
  2633. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2634. destreg:=getintregister(list,OS_ADDR);
  2635. a_loadaddr_ref_reg(list,dest,destreg);
  2636. reference_reset_base(dstref,destreg,0,dest.alignment);
  2637. srcreg:=getintregister(list,OS_ADDR);
  2638. a_loadaddr_ref_reg(list,source,srcreg);
  2639. reference_reset_base(srcref,srcreg,0,source.alignment);
  2640. countreg:=getintregister(list,OS_32);
  2641. // if cs_opt_size in current_settings.optimizerswitches then
  2642. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2643. {if aligned then
  2644. genloop(len,4)
  2645. else}
  2646. if GenerateThumbCode then
  2647. genloop_thumb(len,1)
  2648. else
  2649. genloop(len,1);
  2650. end;
  2651. end;
  2652. end;
  2653. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2654. begin
  2655. g_concatcopy_internal(list,source,dest,len,false);
  2656. end;
  2657. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2658. begin
  2659. if (source.alignment in [1,3]) or
  2660. (dest.alignment in [1,3]) then
  2661. g_concatcopy_internal(list,source,dest,len,false)
  2662. else
  2663. g_concatcopy_internal(list,source,dest,len,true);
  2664. end;
  2665. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2666. var
  2667. ovloc : tlocation;
  2668. begin
  2669. ovloc.loc:=LOC_VOID;
  2670. g_overflowCheck_loc(list,l,def,ovloc);
  2671. end;
  2672. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2673. var
  2674. hl : tasmlabel;
  2675. ai:TAiCpu;
  2676. hflags : tresflags;
  2677. begin
  2678. if not(cs_check_overflow in current_settings.localswitches) then
  2679. exit;
  2680. current_asmdata.getjumplabel(hl);
  2681. case ovloc.loc of
  2682. LOC_VOID:
  2683. begin
  2684. ai:=taicpu.op_sym(A_B,hl);
  2685. ai.is_jmp:=true;
  2686. if not((def.typ=pointerdef) or
  2687. ((def.typ=orddef) and
  2688. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2689. pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2690. ai.SetCondition(C_VC)
  2691. else
  2692. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2693. ai.SetCondition(C_CS)
  2694. else
  2695. ai.SetCondition(C_CC);
  2696. list.concat(ai);
  2697. end;
  2698. LOC_FLAGS:
  2699. begin
  2700. hflags:=ovloc.resflags;
  2701. inverse_flags(hflags);
  2702. cg.a_jmp_flags(list,hflags,hl);
  2703. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2704. end;
  2705. else
  2706. internalerror(200409281);
  2707. end;
  2708. a_call_name(list,'FPC_OVERFLOW',false);
  2709. a_label(list,hl);
  2710. end;
  2711. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2712. begin
  2713. { this work is done in g_proc_entry }
  2714. end;
  2715. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2716. begin
  2717. { this work is done in g_proc_exit }
  2718. end;
  2719. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2720. var
  2721. ai : taicpu;
  2722. hlabel : TAsmLabel;
  2723. begin
  2724. if GenerateThumbCode then
  2725. begin
  2726. { the optimizer has to fix this if jump range is sufficient short }
  2727. current_asmdata.getjumplabel(hlabel);
  2728. ai:=Taicpu.Op_sym(A_B,hlabel);
  2729. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2730. ai.is_jmp:=true;
  2731. list.concat(ai);
  2732. a_jmp_always(list,l);
  2733. a_label(list,hlabel);
  2734. end
  2735. else
  2736. begin
  2737. ai:=Taicpu.Op_sym(A_B,l);
  2738. ai.SetCondition(OpCmp2AsmCond[cond]);
  2739. ai.is_jmp:=true;
  2740. list.concat(ai);
  2741. end;
  2742. end;
  2743. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2744. const
  2745. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2746. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2747. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2748. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2749. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2750. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2751. begin
  2752. result:=convertop[fromsize,tosize];
  2753. if result=A_NONE then
  2754. internalerror(200312205);
  2755. end;
  2756. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2757. const
  2758. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2759. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2760. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2761. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2762. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2763. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2764. begin
  2765. result:=convertop[fromsize,tosize];
  2766. end;
  2767. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2768. var
  2769. instr: taicpu;
  2770. begin
  2771. if (shuffle=nil) or shufflescalar(shuffle) then
  2772. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2773. else
  2774. internalerror(2009112407);
  2775. list.concat(instr);
  2776. case instr.opcode of
  2777. A_VMOV:
  2778. add_move_instruction(instr);
  2779. end;
  2780. end;
  2781. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2782. var
  2783. intreg,
  2784. tmpmmreg : tregister;
  2785. reg64 : tregister64;
  2786. begin
  2787. if assigned(shuffle) and
  2788. not(shufflescalar(shuffle)) then
  2789. internalerror(2009112413);
  2790. case fromsize of
  2791. OS_32,OS_S32:
  2792. begin
  2793. fromsize:=OS_F32;
  2794. { since we are loading an integer, no conversion may be required }
  2795. if (fromsize<>tosize) then
  2796. internalerror(2009112801);
  2797. end;
  2798. OS_64,OS_S64:
  2799. begin
  2800. fromsize:=OS_F64;
  2801. { since we are loading an integer, no conversion may be required }
  2802. if (fromsize<>tosize) then
  2803. internalerror(2009112901);
  2804. end;
  2805. end;
  2806. if (fromsize<>tosize) then
  2807. tmpmmreg:=getmmregister(list,fromsize)
  2808. else
  2809. tmpmmreg:=reg;
  2810. if (ref.alignment in [1,2]) then
  2811. begin
  2812. case fromsize of
  2813. OS_F32:
  2814. begin
  2815. intreg:=getintregister(list,OS_32);
  2816. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2817. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2818. end;
  2819. OS_F64:
  2820. begin
  2821. reg64.reglo:=getintregister(list,OS_32);
  2822. reg64.reghi:=getintregister(list,OS_32);
  2823. cg64.a_load64_ref_reg(list,ref,reg64);
  2824. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2825. end;
  2826. else
  2827. internalerror(2009112412);
  2828. end;
  2829. end
  2830. else
  2831. begin
  2832. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2833. end;
  2834. if (tmpmmreg<>reg) then
  2835. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2836. end;
  2837. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2838. var
  2839. intreg,
  2840. tmpmmreg : tregister;
  2841. reg64 : tregister64;
  2842. begin
  2843. if assigned(shuffle) and
  2844. not(shufflescalar(shuffle)) then
  2845. internalerror(2009112416);
  2846. case tosize of
  2847. OS_32,OS_S32:
  2848. begin
  2849. tosize:=OS_F32;
  2850. { since we are loading an integer, no conversion may be required }
  2851. if (fromsize<>tosize) then
  2852. internalerror(2009112801);
  2853. end;
  2854. OS_64,OS_S64:
  2855. begin
  2856. tosize:=OS_F64;
  2857. { since we are loading an integer, no conversion may be required }
  2858. if (fromsize<>tosize) then
  2859. internalerror(2009112901);
  2860. end;
  2861. end;
  2862. if (fromsize<>tosize) then
  2863. begin
  2864. tmpmmreg:=getmmregister(list,tosize);
  2865. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2866. end
  2867. else
  2868. tmpmmreg:=reg;
  2869. if (ref.alignment in [1,2]) then
  2870. begin
  2871. case tosize of
  2872. OS_F32:
  2873. begin
  2874. intreg:=getintregister(list,OS_32);
  2875. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2876. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2877. end;
  2878. OS_F64:
  2879. begin
  2880. reg64.reglo:=getintregister(list,OS_32);
  2881. reg64.reghi:=getintregister(list,OS_32);
  2882. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2883. cg64.a_load64_reg_ref(list,reg64,ref);
  2884. end;
  2885. else
  2886. internalerror(2009112417);
  2887. end;
  2888. end
  2889. else
  2890. begin
  2891. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2892. end;
  2893. end;
  2894. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2895. begin
  2896. { this code can only be used to transfer raw data, not to perform
  2897. conversions }
  2898. if (tosize<>OS_F32) then
  2899. internalerror(2009112419);
  2900. if not(fromsize in [OS_32,OS_S32]) then
  2901. internalerror(2009112420);
  2902. if assigned(shuffle) and
  2903. not shufflescalar(shuffle) then
  2904. internalerror(2009112516);
  2905. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  2906. end;
  2907. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  2908. begin
  2909. { this code can only be used to transfer raw data, not to perform
  2910. conversions }
  2911. if (fromsize<>OS_F32) then
  2912. internalerror(2009112430);
  2913. if not(tosize in [OS_32,OS_S32]) then
  2914. internalerror(2009112420);
  2915. if assigned(shuffle) and
  2916. not shufflescalar(shuffle) then
  2917. internalerror(2009112514);
  2918. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  2919. end;
  2920. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  2921. var
  2922. tmpreg: tregister;
  2923. begin
  2924. { the vfp doesn't support xor nor any other logical operation, but
  2925. this routine is used to initialise global mm regvars. We can
  2926. easily initialise an mm reg with 0 though. }
  2927. case op of
  2928. OP_XOR:
  2929. begin
  2930. if (src<>dst) or
  2931. (reg_cgsize(src)<>size) or
  2932. assigned(shuffle) then
  2933. internalerror(2009112907);
  2934. tmpreg:=getintregister(list,OS_32);
  2935. a_load_const_reg(list,OS_32,0,tmpreg);
  2936. case size of
  2937. OS_F32:
  2938. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  2939. OS_F64:
  2940. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  2941. else
  2942. internalerror(2009112908);
  2943. end;
  2944. end
  2945. else
  2946. internalerror(2009112906);
  2947. end;
  2948. end;
  2949. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  2950. const
  2951. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  2952. begin
  2953. if (op in overflowops) and
  2954. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  2955. a_load_reg_reg(list,OS_32,size,dst,dst);
  2956. end;
  2957. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  2958. procedure checkreg(var reg : TRegister);
  2959. var
  2960. tmpreg : TRegister;
  2961. begin
  2962. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  2963. (getsupreg(reg)=RS_R15) then
  2964. begin
  2965. tmpreg:=getintregister(list,OS_INT);
  2966. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  2967. reg:=tmpreg;
  2968. end;
  2969. end;
  2970. begin
  2971. checkreg(op1);
  2972. checkreg(op2);
  2973. checkreg(op3);
  2974. checkreg(op4);
  2975. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  2976. end;
  2977. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  2978. begin
  2979. case op of
  2980. OP_NEG:
  2981. begin
  2982. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2983. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  2984. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  2985. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2986. end;
  2987. OP_NOT:
  2988. begin
  2989. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  2990. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  2991. end;
  2992. else
  2993. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  2994. end;
  2995. end;
  2996. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  2997. begin
  2998. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  2999. end;
  3000. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3001. var
  3002. ovloc : tlocation;
  3003. begin
  3004. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3005. end;
  3006. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3007. var
  3008. ovloc : tlocation;
  3009. begin
  3010. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3011. end;
  3012. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3013. begin
  3014. { this code can only be used to transfer raw data, not to perform
  3015. conversions }
  3016. if (mmsize<>OS_F64) then
  3017. internalerror(2009112405);
  3018. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3019. end;
  3020. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3021. begin
  3022. { this code can only be used to transfer raw data, not to perform
  3023. conversions }
  3024. if (mmsize<>OS_F64) then
  3025. internalerror(2009112406);
  3026. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3027. end;
  3028. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3029. var
  3030. tmpreg : tregister;
  3031. b : byte;
  3032. begin
  3033. ovloc.loc:=LOC_VOID;
  3034. case op of
  3035. OP_NEG,
  3036. OP_NOT :
  3037. internalerror(2012022501);
  3038. end;
  3039. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3040. begin
  3041. case op of
  3042. OP_ADD:
  3043. begin
  3044. if is_shifter_const(lo(value),b) then
  3045. begin
  3046. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3047. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3048. end
  3049. else
  3050. begin
  3051. tmpreg:=cg.getintregister(list,OS_32);
  3052. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3053. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3054. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3055. end;
  3056. if is_shifter_const(hi(value),b) then
  3057. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3058. else
  3059. begin
  3060. tmpreg:=cg.getintregister(list,OS_32);
  3061. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3062. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3063. end;
  3064. end;
  3065. OP_SUB:
  3066. begin
  3067. if is_shifter_const(lo(value),b) then
  3068. begin
  3069. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3070. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3071. end
  3072. else
  3073. begin
  3074. tmpreg:=cg.getintregister(list,OS_32);
  3075. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3076. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3077. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3078. end;
  3079. if is_shifter_const(hi(value),b) then
  3080. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3081. else
  3082. begin
  3083. tmpreg:=cg.getintregister(list,OS_32);
  3084. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3085. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3086. end;
  3087. end;
  3088. else
  3089. internalerror(200502131);
  3090. end;
  3091. if size=OS_64 then
  3092. begin
  3093. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3094. ovloc.loc:=LOC_FLAGS;
  3095. case op of
  3096. OP_ADD:
  3097. ovloc.resflags:=F_CS;
  3098. OP_SUB:
  3099. ovloc.resflags:=F_CC;
  3100. end;
  3101. end;
  3102. end
  3103. else
  3104. begin
  3105. case op of
  3106. OP_AND,OP_OR,OP_XOR:
  3107. begin
  3108. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3109. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3110. end;
  3111. OP_ADD:
  3112. begin
  3113. if is_shifter_const(aint(lo(value)),b) then
  3114. begin
  3115. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3116. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3117. end
  3118. else
  3119. begin
  3120. tmpreg:=cg.getintregister(list,OS_32);
  3121. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3122. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3123. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3124. end;
  3125. if is_shifter_const(aint(hi(value)),b) then
  3126. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3127. else
  3128. begin
  3129. tmpreg:=cg.getintregister(list,OS_32);
  3130. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3131. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3132. end;
  3133. end;
  3134. OP_SUB:
  3135. begin
  3136. if is_shifter_const(aint(lo(value)),b) then
  3137. begin
  3138. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3139. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3140. end
  3141. else
  3142. begin
  3143. tmpreg:=cg.getintregister(list,OS_32);
  3144. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3145. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3146. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3147. end;
  3148. if is_shifter_const(aint(hi(value)),b) then
  3149. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3150. else
  3151. begin
  3152. tmpreg:=cg.getintregister(list,OS_32);
  3153. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3154. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3155. end;
  3156. end;
  3157. else
  3158. internalerror(2003083101);
  3159. end;
  3160. end;
  3161. end;
  3162. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3163. begin
  3164. ovloc.loc:=LOC_VOID;
  3165. case op of
  3166. OP_NEG,
  3167. OP_NOT :
  3168. internalerror(2012022502);
  3169. end;
  3170. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3171. begin
  3172. case op of
  3173. OP_ADD:
  3174. begin
  3175. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3176. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3177. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3178. end;
  3179. OP_SUB:
  3180. begin
  3181. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3182. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3183. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3184. end;
  3185. else
  3186. internalerror(2003083101);
  3187. end;
  3188. if size=OS_64 then
  3189. begin
  3190. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3191. ovloc.loc:=LOC_FLAGS;
  3192. case op of
  3193. OP_ADD:
  3194. ovloc.resflags:=F_CS;
  3195. OP_SUB:
  3196. ovloc.resflags:=F_CC;
  3197. end;
  3198. end;
  3199. end
  3200. else
  3201. begin
  3202. case op of
  3203. OP_AND,OP_OR,OP_XOR:
  3204. begin
  3205. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3206. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3207. end;
  3208. OP_ADD:
  3209. begin
  3210. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3211. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3212. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3213. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3214. end;
  3215. OP_SUB:
  3216. begin
  3217. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3218. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3219. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3220. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3221. end;
  3222. else
  3223. internalerror(2003083101);
  3224. end;
  3225. end;
  3226. end;
  3227. procedure tthumbcgarm.init_register_allocators;
  3228. begin
  3229. inherited init_register_allocators;
  3230. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3231. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3232. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3233. else
  3234. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3235. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3236. end;
  3237. procedure tthumbcgarm.done_register_allocators;
  3238. begin
  3239. rg[R_INTREGISTER].free;
  3240. rg[R_FPUREGISTER].free;
  3241. rg[R_MMREGISTER].free;
  3242. inherited done_register_allocators;
  3243. end;
  3244. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3245. var
  3246. ref : treference;
  3247. r : byte;
  3248. regs : tcpuregisterset;
  3249. stackmisalignment : pint;
  3250. registerarea: DWord;
  3251. stack_parameters: Boolean;
  3252. begin
  3253. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3254. LocalSize:=align(LocalSize,4);
  3255. { call instruction does not put anything on the stack }
  3256. stackmisalignment:=0;
  3257. if not(nostackframe) then
  3258. begin
  3259. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3260. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3261. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3262. { save int registers }
  3263. reference_reset(ref,4);
  3264. ref.index:=NR_STACK_POINTER_REG;
  3265. ref.addressmode:=AM_PREINDEXED;
  3266. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3267. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3268. begin
  3269. //!!!! a_reg_alloc(list,NR_R12);
  3270. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3271. end;
  3272. { the (old) ARM APCS requires saving both the stack pointer (to
  3273. crawl the stack) and the PC (to identify the function this
  3274. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3275. and R15 -- still needs updating for EABI and Darwin, they don't
  3276. need that }
  3277. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3278. regs:=regs+[RS_R7,RS_R14]
  3279. else
  3280. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3281. include(regs,RS_R14);
  3282. { safely estimate stack size }
  3283. if localsize+current_settings.alignment.localalignmax+4>508 then
  3284. begin
  3285. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3286. include(regs,RS_R4);
  3287. end;
  3288. registerarea:=0;
  3289. if regs<>[] then
  3290. begin
  3291. for r:=RS_R0 to RS_R15 do
  3292. if r in regs then
  3293. inc(registerarea,4);
  3294. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3295. end;
  3296. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3297. if stack_parameters or (LocalSize<>0) or
  3298. ((stackmisalignment<>0) and
  3299. ((pi_do_call in current_procinfo.flags) or
  3300. (po_assembler in current_procinfo.procdef.procoptions))) then
  3301. begin
  3302. { do we access stack parameters?
  3303. if yes, the previously estimated stacksize must be used }
  3304. if stack_parameters then
  3305. begin
  3306. if localsize>tarmprocinfo(current_procinfo).stackframesize then
  3307. begin
  3308. writeln(localsize);
  3309. writeln(tarmprocinfo(current_procinfo).stackframesize);
  3310. internalerror(2013040601);
  3311. end
  3312. else
  3313. localsize:=tarmprocinfo(current_procinfo).stackframesize-registerarea;
  3314. end
  3315. else
  3316. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3317. if localsize<508 then
  3318. begin
  3319. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3320. end
  3321. else if localsize<=1016 then
  3322. begin
  3323. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3324. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3325. end
  3326. else
  3327. begin
  3328. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3329. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3330. include(regs,RS_R4);
  3331. //!!!! if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  3332. //!!!! a_reg_alloc(list,NR_R12);
  3333. //!!!! a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3334. //!!!! list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  3335. //!!!! a_reg_dealloc(list,NR_R12);
  3336. end;
  3337. end;
  3338. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3339. begin
  3340. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3341. end;
  3342. end;
  3343. end;
  3344. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3345. var
  3346. LocalSize : longint;
  3347. r: byte;
  3348. regs : tcpuregisterset;
  3349. registerarea : DWord;
  3350. stackmisalignment: pint;
  3351. stack_parameters : Boolean;
  3352. begin
  3353. if not(nostackframe) then
  3354. begin
  3355. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3356. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3357. include(regs,RS_R15);
  3358. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3359. include(regs,getsupreg(current_procinfo.framepointer));
  3360. registerarea:=0;
  3361. for r:=RS_R0 to RS_R15 do
  3362. if r in regs then
  3363. inc(registerarea,4);
  3364. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3365. LocalSize:=current_procinfo.calc_stackframe_size;
  3366. if stack_parameters then
  3367. localsize:=tarmprocinfo(current_procinfo).stackframesize-registerarea
  3368. else
  3369. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3370. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3371. (target_info.system in systems_darwin) then
  3372. begin
  3373. if (LocalSize<>0) or
  3374. ((stackmisalignment<>0) and
  3375. ((pi_do_call in current_procinfo.flags) or
  3376. (po_assembler in current_procinfo.procdef.procoptions))) then
  3377. begin
  3378. if LocalSize=0 then
  3379. else if LocalSize<=508 then
  3380. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3381. else if LocalSize<=1016 then
  3382. begin
  3383. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3384. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3385. end
  3386. else
  3387. begin
  3388. a_reg_alloc(list,NR_R3);
  3389. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3390. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3391. a_reg_dealloc(list,NR_R3);
  3392. end;
  3393. end;
  3394. if regs=[] then
  3395. begin
  3396. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3397. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3398. else
  3399. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3400. end
  3401. else
  3402. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3403. end;
  3404. end
  3405. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3406. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3407. else
  3408. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3409. end;
  3410. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3411. var
  3412. oppostfix:toppostfix;
  3413. usedtmpref: treference;
  3414. tmpreg,tmpreg2 : tregister;
  3415. dir : integer;
  3416. begin
  3417. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3418. FromSize := ToSize;
  3419. case FromSize of
  3420. { signed integer registers }
  3421. OS_8:
  3422. oppostfix:=PF_B;
  3423. OS_S8:
  3424. oppostfix:=PF_SB;
  3425. OS_16:
  3426. oppostfix:=PF_H;
  3427. OS_S16:
  3428. oppostfix:=PF_SH;
  3429. OS_32,
  3430. OS_S32:
  3431. oppostfix:=PF_None;
  3432. else
  3433. InternalError(200308298);
  3434. end;
  3435. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3436. begin
  3437. if target_info.endian=endian_big then
  3438. dir:=-1
  3439. else
  3440. dir:=1;
  3441. case FromSize of
  3442. OS_16,OS_S16:
  3443. begin
  3444. { only complicated references need an extra loadaddr }
  3445. if assigned(ref.symbol) or
  3446. (ref.index<>NR_NO) or
  3447. (ref.offset<-124) or
  3448. (ref.offset>124) or
  3449. { sometimes the compiler reused registers }
  3450. (reg=ref.index) or
  3451. (reg=ref.base) then
  3452. begin
  3453. tmpreg2:=getintregister(list,OS_INT);
  3454. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3455. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  3456. end
  3457. else
  3458. usedtmpref:=ref;
  3459. if target_info.endian=endian_big then
  3460. inc(usedtmpref.offset,1);
  3461. tmpreg:=getintregister(list,OS_INT);
  3462. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3463. inc(usedtmpref.offset,dir);
  3464. if FromSize=OS_16 then
  3465. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3466. else
  3467. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3468. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3469. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3470. end;
  3471. OS_32,OS_S32:
  3472. begin
  3473. tmpreg:=getintregister(list,OS_INT);
  3474. { only complicated references need an extra loadaddr }
  3475. if assigned(ref.symbol) or
  3476. (ref.index<>NR_NO) or
  3477. (ref.offset<-124) or
  3478. (ref.offset>124) or
  3479. { sometimes the compiler reused registers }
  3480. (reg=ref.index) or
  3481. (reg=ref.base) then
  3482. begin
  3483. tmpreg2:=getintregister(list,OS_INT);
  3484. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3485. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  3486. end
  3487. else
  3488. usedtmpref:=ref;
  3489. if ref.alignment=2 then
  3490. begin
  3491. if target_info.endian=endian_big then
  3492. inc(usedtmpref.offset,2);
  3493. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3494. inc(usedtmpref.offset,dir*2);
  3495. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3496. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3497. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3498. end
  3499. else
  3500. begin
  3501. if target_info.endian=endian_big then
  3502. inc(usedtmpref.offset,3);
  3503. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3504. inc(usedtmpref.offset,dir);
  3505. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3506. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3507. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3508. inc(usedtmpref.offset,dir);
  3509. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3510. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3511. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3512. inc(usedtmpref.offset,dir);
  3513. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3514. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3515. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3516. end;
  3517. end
  3518. else
  3519. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3520. end;
  3521. end
  3522. else
  3523. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3524. if (fromsize=OS_S8) and (tosize = OS_16) then
  3525. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3526. end;
  3527. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3528. var
  3529. l : tasmlabel;
  3530. hr : treference;
  3531. begin
  3532. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3533. internalerror(2002090902);
  3534. if is_thumb_imm(a) then
  3535. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3536. else
  3537. begin
  3538. reference_reset(hr,4);
  3539. current_asmdata.getjumplabel(l);
  3540. cg.a_label(current_procinfo.aktlocaldata,l);
  3541. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3542. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3543. hr.symbol:=l;
  3544. hr.base:=NR_PC;
  3545. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3546. end;
  3547. end;
  3548. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3549. var
  3550. hsym : tsym;
  3551. href,
  3552. tmpref : treference;
  3553. paraloc : Pcgparalocation;
  3554. l : TAsmLabel;
  3555. begin
  3556. { calculate the parameter info for the procdef }
  3557. procdef.init_paraloc_info(callerside);
  3558. hsym:=tsym(procdef.parast.Find('self'));
  3559. if not(assigned(hsym) and
  3560. (hsym.typ=paravarsym)) then
  3561. internalerror(200305251);
  3562. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3563. while paraloc<>nil do
  3564. with paraloc^ do
  3565. begin
  3566. case loc of
  3567. LOC_REGISTER:
  3568. begin
  3569. if is_thumb_imm(ioffset) then
  3570. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3571. else
  3572. begin
  3573. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3574. reference_reset(tmpref,4);
  3575. current_asmdata.getjumplabel(l);
  3576. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3577. cg.a_label(current_procinfo.aktlocaldata,l);
  3578. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3579. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3580. tmpref.symbol:=l;
  3581. tmpref.base:=NR_PC;
  3582. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3583. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3584. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3585. end;
  3586. end;
  3587. LOC_REFERENCE:
  3588. begin
  3589. { offset in the wrapper needs to be adjusted for the stored
  3590. return address }
  3591. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),sizeof(pint));
  3592. if is_thumb_imm(ioffset) then
  3593. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3594. else
  3595. begin
  3596. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3597. reference_reset(tmpref,4);
  3598. current_asmdata.getjumplabel(l);
  3599. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3600. cg.a_label(current_procinfo.aktlocaldata,l);
  3601. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3602. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3603. tmpref.symbol:=l;
  3604. tmpref.base:=NR_PC;
  3605. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3606. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3607. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3608. end;
  3609. end
  3610. else
  3611. internalerror(200309189);
  3612. end;
  3613. paraloc:=next;
  3614. end;
  3615. end;
  3616. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3617. var
  3618. href : treference;
  3619. tmpreg : TRegister;
  3620. begin
  3621. href:=ref;
  3622. if { LDR/STR limitations }
  3623. (
  3624. (((op=A_LDR) and (oppostfix=PF_None)) or
  3625. ((op=A_STR) and (oppostfix=PF_None))) and
  3626. (ref.base<>NR_STACK_POINTER_REG) and
  3627. (abs(ref.offset)>124)
  3628. ) or
  3629. { LDRB/STRB limitations }
  3630. (
  3631. (((op=A_LDR) and (oppostfix=PF_B)) or
  3632. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3633. ((op=A_STR) and (oppostfix=PF_B)) or
  3634. ((op=A_STRB) and (oppostfix=PF_None))) and
  3635. ((ref.base=NR_STACK_POINTER_REG) or
  3636. (ref.index=NR_STACK_POINTER_REG) or
  3637. (abs(ref.offset)>31)
  3638. )
  3639. ) or
  3640. { LDRH/STRH limitations }
  3641. (
  3642. (((op=A_LDR) and (oppostfix=PF_H)) or
  3643. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3644. ((op=A_STR) and (oppostfix=PF_H)) or
  3645. ((op=A_STRH) and (oppostfix=PF_None))) and
  3646. ((ref.base=NR_STACK_POINTER_REG) or
  3647. (ref.index=NR_STACK_POINTER_REG) or
  3648. (abs(ref.offset)>62) or
  3649. ((abs(ref.offset) mod 2)<>0)
  3650. )
  3651. ) then
  3652. begin
  3653. tmpreg:=getintregister(list,OS_ADDR);
  3654. a_loadaddr_ref_reg(list,ref,tmpreg);
  3655. reference_reset_base(href,tmpreg,0,ref.alignment);
  3656. end
  3657. else if (op=A_LDR) and
  3658. (oppostfix in [PF_None]) and
  3659. (ref.base=NR_STACK_POINTER_REG) and
  3660. (abs(ref.offset)>1020) then
  3661. begin
  3662. tmpreg:=getintregister(list,OS_ADDR);
  3663. a_loadaddr_ref_reg(list,ref,tmpreg);
  3664. reference_reset_base(href,tmpreg,0,ref.alignment);
  3665. end
  3666. else if (op=A_LDR) and
  3667. ((oppostfix in [PF_SH,PF_SB]) or
  3668. (abs(ref.offset)>124)) then
  3669. begin
  3670. tmpreg:=getintregister(list,OS_ADDR);
  3671. a_loadaddr_ref_reg(list,ref,tmpreg);
  3672. reference_reset_base(href,tmpreg,0,ref.alignment);
  3673. end;
  3674. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3675. end;
  3676. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3677. var
  3678. tmpreg : tregister;
  3679. begin
  3680. case op of
  3681. OP_NEG:
  3682. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3683. OP_NOT:
  3684. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  3685. OP_DIV,OP_IDIV:
  3686. internalerror(200308284);
  3687. OP_ROL:
  3688. begin
  3689. if not(size in [OS_32,OS_S32]) then
  3690. internalerror(2008072801);
  3691. { simulate ROL by ror'ing 32-value }
  3692. tmpreg:=getintregister(list,OS_32);
  3693. a_load_const_reg(list,OS_32,32,tmpreg);
  3694. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3695. list.concat(taicpu.op_reg_reg(A_ROR,dst,src));
  3696. end;
  3697. else
  3698. begin
  3699. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3700. list.concat(setoppostfix(
  3701. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix[op]));
  3702. end;
  3703. end;
  3704. maybeadjustresult(list,op,size,dst);
  3705. end;
  3706. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3707. var
  3708. tmpreg : tregister;
  3709. {$ifdef DUMMY}
  3710. l1 : longint;
  3711. {$endif DUMMY}
  3712. begin
  3713. //!!! ovloc.loc:=LOC_VOID;
  3714. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3715. case op of
  3716. OP_ADD:
  3717. begin
  3718. op:=OP_SUB;
  3719. a:=aint(dword(-a));
  3720. end;
  3721. OP_SUB:
  3722. begin
  3723. op:=OP_ADD;
  3724. a:=aint(dword(-a));
  3725. end
  3726. end;
  3727. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3728. begin
  3729. // if cgsetflags or setflags then
  3730. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3731. list.concat(setoppostfix(
  3732. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix[op]));
  3733. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3734. begin
  3735. //!!! ovloc.loc:=LOC_FLAGS;
  3736. case op of
  3737. OP_ADD:
  3738. //!!! ovloc.resflags:=F_CS;
  3739. ;
  3740. OP_SUB:
  3741. //!!! ovloc.resflags:=F_CC;
  3742. ;
  3743. end;
  3744. end;
  3745. end
  3746. else
  3747. begin
  3748. { there could be added some more sophisticated optimizations }
  3749. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3750. a_load_reg_reg(list,size,size,dst,dst)
  3751. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3752. a_load_const_reg(list,size,0,dst)
  3753. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3754. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3755. { we do this here instead in the peephole optimizer because
  3756. it saves us a register }
  3757. {$ifdef DUMMY}
  3758. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3759. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3760. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3761. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3762. begin
  3763. if l1>32 then{roozbeh does this ever happen?}
  3764. internalerror(200308296);
  3765. shifterop_reset(so);
  3766. so.shiftmode:=SM_LSL;
  3767. so.shiftimm:=l1;
  3768. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3769. end
  3770. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3771. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3772. begin
  3773. if l1>32 then{does this ever happen?}
  3774. internalerror(201205181);
  3775. shifterop_reset(so);
  3776. so.shiftmode:=SM_LSL;
  3777. so.shiftimm:=l1;
  3778. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3779. end
  3780. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3781. begin
  3782. { nothing to do on success }
  3783. end
  3784. {$endif DUMMY}
  3785. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3786. Just using mov x, #0 might allow some easier optimizations down the line. }
  3787. else if (op = OP_AND) and (dword(a)=0) then
  3788. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  3789. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3790. else if (op = OP_AND) and (not(dword(a))=0) then
  3791. // do nothing
  3792. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3793. broader range of shifterconstants.}
  3794. {$ifdef DUMMY}
  3795. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3796. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3797. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3798. begin
  3799. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3800. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3801. end
  3802. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3803. not(cgsetflags or setflags) and
  3804. split_into_shifter_const(a, imm1, imm2) then
  3805. begin
  3806. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3807. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3808. end
  3809. {$endif DUMMY}
  3810. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3811. begin
  3812. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3813. end
  3814. else
  3815. begin
  3816. tmpreg:=getintregister(list,size);
  3817. a_load_const_reg(list,size,a,tmpreg);
  3818. a_op_reg_reg(list,op,size,tmpreg,dst);
  3819. end;
  3820. end;
  3821. maybeadjustresult(list,op,size,dst);
  3822. end;
  3823. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3824. begin
  3825. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3826. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3827. else
  3828. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3829. end;
  3830. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3831. var
  3832. l1,l2 : tasmlabel;
  3833. ai : taicpu;
  3834. begin
  3835. current_asmdata.getjumplabel(l1);
  3836. current_asmdata.getjumplabel(l2);
  3837. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3838. ai.is_jmp:=true;
  3839. list.concat(ai);
  3840. list.concat(taicpu.op_reg_const(A_MOV,reg,0));
  3841. list.concat(taicpu.op_sym(A_B,l2));
  3842. cg.a_label(list,l1);
  3843. list.concat(taicpu.op_reg_const(A_MOV,reg,1));
  3844. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3845. cg.a_label(list,l2);
  3846. end;
  3847. procedure tthumb2cgarm.init_register_allocators;
  3848. begin
  3849. inherited init_register_allocators;
  3850. { currently, we save R14 always, so we can use it }
  3851. if (target_info.system<>system_arm_darwin) then
  3852. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3853. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3854. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  3855. else
  3856. { r9 is not available on Darwin according to the llvm code generator }
  3857. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3858. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3859. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  3860. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  3861. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  3862. if current_settings.fputype=fpu_vfpv3 then
  3863. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3864. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3865. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  3866. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3867. ],first_mm_imreg,[])
  3868. else if current_settings.fputype in [fpu_fpv4_s16,fpu_vfpv3_d16] then
  3869. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3870. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3871. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3872. ],first_mm_imreg,[])
  3873. else
  3874. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
  3875. [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
  3876. end;
  3877. procedure tthumb2cgarm.done_register_allocators;
  3878. begin
  3879. rg[R_INTREGISTER].free;
  3880. rg[R_FPUREGISTER].free;
  3881. rg[R_MMREGISTER].free;
  3882. inherited done_register_allocators;
  3883. end;
  3884. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  3885. begin
  3886. list.concat(taicpu.op_reg(A_BLX, reg));
  3887. {
  3888. the compiler does not properly set this flag anymore in pass 1, and
  3889. for now we only need it after pass 2 (I hope) (JM)
  3890. if not(pi_do_call in current_procinfo.flags) then
  3891. internalerror(2003060703);
  3892. }
  3893. include(current_procinfo.flags,pi_do_call);
  3894. end;
  3895. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3896. var
  3897. l : tasmlabel;
  3898. hr : treference;
  3899. begin
  3900. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3901. internalerror(2002090902);
  3902. if is_thumb32_imm(a) then
  3903. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3904. else if is_thumb32_imm(not(a)) then
  3905. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  3906. else if (a and $FFFF)=a then
  3907. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  3908. else
  3909. begin
  3910. reference_reset(hr,4);
  3911. current_asmdata.getjumplabel(l);
  3912. cg.a_label(current_procinfo.aktlocaldata,l);
  3913. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3914. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3915. hr.symbol:=l;
  3916. hr.base:=NR_PC;
  3917. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3918. end;
  3919. end;
  3920. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3921. var
  3922. oppostfix:toppostfix;
  3923. usedtmpref: treference;
  3924. tmpreg,tmpreg2 : tregister;
  3925. so : tshifterop;
  3926. dir : integer;
  3927. begin
  3928. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3929. FromSize := ToSize;
  3930. case FromSize of
  3931. { signed integer registers }
  3932. OS_8:
  3933. oppostfix:=PF_B;
  3934. OS_S8:
  3935. oppostfix:=PF_SB;
  3936. OS_16:
  3937. oppostfix:=PF_H;
  3938. OS_S16:
  3939. oppostfix:=PF_SH;
  3940. OS_32,
  3941. OS_S32:
  3942. oppostfix:=PF_None;
  3943. else
  3944. InternalError(200308299);
  3945. end;
  3946. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3947. begin
  3948. if target_info.endian=endian_big then
  3949. dir:=-1
  3950. else
  3951. dir:=1;
  3952. case FromSize of
  3953. OS_16,OS_S16:
  3954. begin
  3955. { only complicated references need an extra loadaddr }
  3956. if assigned(ref.symbol) or
  3957. (ref.index<>NR_NO) or
  3958. (ref.offset<-255) or
  3959. (ref.offset>4094) or
  3960. { sometimes the compiler reused registers }
  3961. (reg=ref.index) or
  3962. (reg=ref.base) then
  3963. begin
  3964. tmpreg2:=getintregister(list,OS_INT);
  3965. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3966. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  3967. end
  3968. else
  3969. usedtmpref:=ref;
  3970. if target_info.endian=endian_big then
  3971. inc(usedtmpref.offset,1);
  3972. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  3973. tmpreg:=getintregister(list,OS_INT);
  3974. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3975. inc(usedtmpref.offset,dir);
  3976. if FromSize=OS_16 then
  3977. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3978. else
  3979. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3980. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3981. end;
  3982. OS_32,OS_S32:
  3983. begin
  3984. tmpreg:=getintregister(list,OS_INT);
  3985. { only complicated references need an extra loadaddr }
  3986. if assigned(ref.symbol) or
  3987. (ref.index<>NR_NO) or
  3988. (ref.offset<-255) or
  3989. (ref.offset>4092) or
  3990. { sometimes the compiler reused registers }
  3991. (reg=ref.index) or
  3992. (reg=ref.base) then
  3993. begin
  3994. tmpreg2:=getintregister(list,OS_INT);
  3995. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3996. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  3997. end
  3998. else
  3999. usedtmpref:=ref;
  4000. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4001. if ref.alignment=2 then
  4002. begin
  4003. if target_info.endian=endian_big then
  4004. inc(usedtmpref.offset,2);
  4005. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4006. inc(usedtmpref.offset,dir*2);
  4007. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4008. so.shiftimm:=16;
  4009. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4010. end
  4011. else
  4012. begin
  4013. if target_info.endian=endian_big then
  4014. inc(usedtmpref.offset,3);
  4015. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4016. inc(usedtmpref.offset,dir);
  4017. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4018. so.shiftimm:=8;
  4019. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4020. inc(usedtmpref.offset,dir);
  4021. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4022. so.shiftimm:=16;
  4023. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4024. inc(usedtmpref.offset,dir);
  4025. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4026. so.shiftimm:=24;
  4027. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4028. end;
  4029. end
  4030. else
  4031. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4032. end;
  4033. end
  4034. else
  4035. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4036. if (fromsize=OS_S8) and (tosize = OS_16) then
  4037. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4038. end;
  4039. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4040. begin
  4041. if op = OP_NOT then
  4042. begin
  4043. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4044. case size of
  4045. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4046. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4047. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4048. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4049. end;
  4050. end
  4051. else
  4052. inherited a_op_reg_reg(list, op, size, src, dst);
  4053. end;
  4054. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4055. var
  4056. shift, width : byte;
  4057. tmpreg : tregister;
  4058. so : tshifterop;
  4059. l1 : longint;
  4060. begin
  4061. ovloc.loc:=LOC_VOID;
  4062. if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
  4063. case op of
  4064. OP_ADD:
  4065. begin
  4066. op:=OP_SUB;
  4067. a:=aint(dword(-a));
  4068. end;
  4069. OP_SUB:
  4070. begin
  4071. op:=OP_ADD;
  4072. a:=aint(dword(-a));
  4073. end
  4074. end;
  4075. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4076. case op of
  4077. OP_NEG,OP_NOT,
  4078. OP_DIV,OP_IDIV:
  4079. internalerror(200308285);
  4080. OP_SHL:
  4081. begin
  4082. if a>32 then
  4083. internalerror(2014020703);
  4084. if a<>0 then
  4085. begin
  4086. shifterop_reset(so);
  4087. so.shiftmode:=SM_LSL;
  4088. so.shiftimm:=a;
  4089. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4090. end
  4091. else
  4092. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4093. end;
  4094. OP_ROL:
  4095. begin
  4096. if a>32 then
  4097. internalerror(2014020704);
  4098. if a<>0 then
  4099. begin
  4100. shifterop_reset(so);
  4101. so.shiftmode:=SM_ROR;
  4102. so.shiftimm:=32-a;
  4103. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4104. end
  4105. else
  4106. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4107. end;
  4108. OP_ROR:
  4109. begin
  4110. if a>32 then
  4111. internalerror(2014020705);
  4112. if a<>0 then
  4113. begin
  4114. shifterop_reset(so);
  4115. so.shiftmode:=SM_ROR;
  4116. so.shiftimm:=a;
  4117. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4118. end
  4119. else
  4120. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4121. end;
  4122. OP_SHR:
  4123. begin
  4124. if a>32 then
  4125. internalerror(200308292);
  4126. shifterop_reset(so);
  4127. if a<>0 then
  4128. begin
  4129. so.shiftmode:=SM_LSR;
  4130. so.shiftimm:=a;
  4131. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4132. end
  4133. else
  4134. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4135. end;
  4136. OP_SAR:
  4137. begin
  4138. if a>32 then
  4139. internalerror(200308295);
  4140. if a<>0 then
  4141. begin
  4142. shifterop_reset(so);
  4143. so.shiftmode:=SM_ASR;
  4144. so.shiftimm:=a;
  4145. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4146. end
  4147. else
  4148. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4149. end;
  4150. else
  4151. if (op in [OP_SUB, OP_ADD]) and
  4152. ((a < 0) or
  4153. (a > 4095)) then
  4154. begin
  4155. tmpreg:=getintregister(list,size);
  4156. a_load_const_reg(list, size, a, tmpreg);
  4157. if cgsetflags or setflags then
  4158. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4159. list.concat(setoppostfix(
  4160. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4161. end
  4162. else
  4163. begin
  4164. if cgsetflags or setflags then
  4165. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4166. list.concat(setoppostfix(
  4167. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4168. end;
  4169. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4170. begin
  4171. ovloc.loc:=LOC_FLAGS;
  4172. case op of
  4173. OP_ADD:
  4174. ovloc.resflags:=F_CS;
  4175. OP_SUB:
  4176. ovloc.resflags:=F_CC;
  4177. end;
  4178. end;
  4179. end
  4180. else
  4181. begin
  4182. { there could be added some more sophisticated optimizations }
  4183. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4184. a_load_reg_reg(list,size,size,src,dst)
  4185. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4186. a_load_const_reg(list,size,0,dst)
  4187. else if (op in [OP_IMUL]) and (a=-1) then
  4188. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4189. { we do this here instead in the peephole optimizer because
  4190. it saves us a register }
  4191. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4192. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4193. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4194. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4195. begin
  4196. if l1>32 then{roozbeh does this ever happen?}
  4197. internalerror(200308296);
  4198. shifterop_reset(so);
  4199. so.shiftmode:=SM_LSL;
  4200. so.shiftimm:=l1;
  4201. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4202. end
  4203. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4204. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4205. begin
  4206. if l1>32 then{does this ever happen?}
  4207. internalerror(201205181);
  4208. shifterop_reset(so);
  4209. so.shiftmode:=SM_LSL;
  4210. so.shiftimm:=l1;
  4211. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4212. end
  4213. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4214. begin
  4215. { nothing to do on success }
  4216. end
  4217. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4218. Just using mov x, #0 might allow some easier optimizations down the line. }
  4219. else if (op = OP_AND) and (dword(a)=0) then
  4220. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4221. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4222. else if (op = OP_AND) and (not(dword(a))=0) then
  4223. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4224. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4225. broader range of shifterconstants.}
  4226. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4227. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4228. else if (op = OP_AND) and is_thumb32_imm(a) then
  4229. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4230. else if (op = OP_AND) and (a = $FFFF) then
  4231. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4232. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4233. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4234. else if (op = OP_AND) and is_continuous_mask(not(a), shift, width) then
  4235. begin
  4236. a_load_reg_reg(list,size,size,src,dst);
  4237. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4238. end
  4239. else
  4240. begin
  4241. tmpreg:=getintregister(list,size);
  4242. a_load_const_reg(list,size,a,tmpreg);
  4243. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4244. end;
  4245. end;
  4246. maybeadjustresult(list,op,size,dst);
  4247. end;
  4248. const
  4249. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4250. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4251. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4252. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4253. var
  4254. so : tshifterop;
  4255. tmpreg,overflowreg : tregister;
  4256. asmop : tasmop;
  4257. begin
  4258. ovloc.loc:=LOC_VOID;
  4259. case op of
  4260. OP_NEG,OP_NOT:
  4261. internalerror(200308286);
  4262. OP_ROL:
  4263. begin
  4264. if not(size in [OS_32,OS_S32]) then
  4265. internalerror(2008072801);
  4266. { simulate ROL by ror'ing 32-value }
  4267. tmpreg:=getintregister(list,OS_32);
  4268. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4269. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4270. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4271. end;
  4272. OP_ROR:
  4273. begin
  4274. if not(size in [OS_32,OS_S32]) then
  4275. internalerror(2008072802);
  4276. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4277. end;
  4278. OP_IMUL,
  4279. OP_MUL:
  4280. begin
  4281. if cgsetflags or setflags then
  4282. begin
  4283. overflowreg:=getintregister(list,size);
  4284. if op=OP_IMUL then
  4285. asmop:=A_SMULL
  4286. else
  4287. asmop:=A_UMULL;
  4288. { the arm doesn't allow that rd and rm are the same }
  4289. if dst=src2 then
  4290. begin
  4291. if dst<>src1 then
  4292. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4293. else
  4294. begin
  4295. tmpreg:=getintregister(list,size);
  4296. a_load_reg_reg(list,size,size,src2,dst);
  4297. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4298. end;
  4299. end
  4300. else
  4301. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4302. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4303. if op=OP_IMUL then
  4304. begin
  4305. shifterop_reset(so);
  4306. so.shiftmode:=SM_ASR;
  4307. so.shiftimm:=31;
  4308. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4309. end
  4310. else
  4311. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4312. ovloc.loc:=LOC_FLAGS;
  4313. ovloc.resflags:=F_NE;
  4314. end
  4315. else
  4316. begin
  4317. { the arm doesn't allow that rd and rm are the same }
  4318. if dst=src2 then
  4319. begin
  4320. if dst<>src1 then
  4321. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4322. else
  4323. begin
  4324. tmpreg:=getintregister(list,size);
  4325. a_load_reg_reg(list,size,size,src2,dst);
  4326. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4327. end;
  4328. end
  4329. else
  4330. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4331. end;
  4332. end;
  4333. else
  4334. begin
  4335. if cgsetflags or setflags then
  4336. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4337. {$ifdef dummy}
  4338. { R13 is not allowed for certain instruction operands }
  4339. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4340. begin
  4341. if getsupreg(dst)=RS_R13 then
  4342. begin
  4343. tmpreg:=getintregister(list,OS_INT);
  4344. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4345. dst:=tmpreg;
  4346. end;
  4347. if getsupreg(src1)=RS_R13 then
  4348. begin
  4349. tmpreg:=getintregister(list,OS_INT);
  4350. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4351. src1:=tmpreg;
  4352. end;
  4353. end;
  4354. {$endif}
  4355. list.concat(setoppostfix(
  4356. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4357. end;
  4358. end;
  4359. maybeadjustresult(list,op,size,dst);
  4360. end;
  4361. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4362. begin
  4363. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4364. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4365. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4366. end;
  4367. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4368. var
  4369. ref : treference;
  4370. shift : byte;
  4371. firstfloatreg,lastfloatreg,
  4372. r : byte;
  4373. regs : tcpuregisterset;
  4374. stackmisalignment: pint;
  4375. begin
  4376. LocalSize:=align(LocalSize,4);
  4377. { call instruction does not put anything on the stack }
  4378. stackmisalignment:=0;
  4379. if not(nostackframe) then
  4380. begin
  4381. firstfloatreg:=RS_NO;
  4382. lastfloatreg:=RS_NO;
  4383. { save floating point registers? }
  4384. for r:=RS_F0 to RS_F7 do
  4385. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4386. begin
  4387. if firstfloatreg=RS_NO then
  4388. firstfloatreg:=r;
  4389. lastfloatreg:=r;
  4390. inc(stackmisalignment,12);
  4391. end;
  4392. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4393. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4394. begin
  4395. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4396. a_reg_alloc(list,NR_R12);
  4397. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4398. end;
  4399. { save int registers }
  4400. reference_reset(ref,4);
  4401. ref.index:=NR_STACK_POINTER_REG;
  4402. ref.addressmode:=AM_PREINDEXED;
  4403. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4404. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4405. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4406. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4407. include(regs,RS_R14);
  4408. if regs<>[] then
  4409. begin
  4410. for r:=RS_R0 to RS_R15 do
  4411. if (r in regs) then
  4412. inc(stackmisalignment,4);
  4413. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4414. end;
  4415. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4416. begin
  4417. { the framepointer now points to the saved R15, so the saved
  4418. framepointer is at R11-12 (for get_caller_frame) }
  4419. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4420. a_reg_dealloc(list,NR_R12);
  4421. end;
  4422. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4423. if (LocalSize<>0) or
  4424. ((stackmisalignment<>0) and
  4425. ((pi_do_call in current_procinfo.flags) or
  4426. (po_assembler in current_procinfo.procdef.procoptions))) then
  4427. begin
  4428. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4429. if not(is_shifter_const(localsize,shift)) then
  4430. begin
  4431. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4432. a_reg_alloc(list,NR_R12);
  4433. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4434. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4435. a_reg_dealloc(list,NR_R12);
  4436. end
  4437. else
  4438. begin
  4439. a_reg_dealloc(list,NR_R12);
  4440. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4441. end;
  4442. end;
  4443. if firstfloatreg<>RS_NO then
  4444. begin
  4445. reference_reset(ref,4);
  4446. if tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023 then
  4447. begin
  4448. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  4449. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4450. ref.base:=NR_R12;
  4451. end
  4452. else
  4453. begin
  4454. ref.base:=current_procinfo.framepointer;
  4455. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  4456. end;
  4457. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4458. lastfloatreg-firstfloatreg+1,ref));
  4459. end;
  4460. end;
  4461. end;
  4462. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4463. var
  4464. ref : treference;
  4465. firstfloatreg,lastfloatreg,
  4466. r : byte;
  4467. shift : byte;
  4468. regs : tcpuregisterset;
  4469. LocalSize : longint;
  4470. stackmisalignment: pint;
  4471. begin
  4472. if not(nostackframe) then
  4473. begin
  4474. stackmisalignment:=0;
  4475. { restore floating point register }
  4476. firstfloatreg:=RS_NO;
  4477. lastfloatreg:=RS_NO;
  4478. { save floating point registers? }
  4479. for r:=RS_F0 to RS_F7 do
  4480. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4481. begin
  4482. if firstfloatreg=RS_NO then
  4483. firstfloatreg:=r;
  4484. lastfloatreg:=r;
  4485. { floating point register space is already included in
  4486. localsize below by calc_stackframe_size
  4487. inc(stackmisalignment,12);
  4488. }
  4489. end;
  4490. if firstfloatreg<>RS_NO then
  4491. begin
  4492. reference_reset(ref,4);
  4493. if tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023 then
  4494. begin
  4495. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  4496. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4497. ref.base:=NR_R12;
  4498. end
  4499. else
  4500. begin
  4501. ref.base:=current_procinfo.framepointer;
  4502. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  4503. end;
  4504. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4505. lastfloatreg-firstfloatreg+1,ref));
  4506. end;
  4507. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4508. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4509. begin
  4510. exclude(regs,RS_R14);
  4511. include(regs,RS_R15);
  4512. end;
  4513. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4514. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4515. for r:=RS_R0 to RS_R15 do
  4516. if (r in regs) then
  4517. inc(stackmisalignment,4);
  4518. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4519. LocalSize:=current_procinfo.calc_stackframe_size;
  4520. if (LocalSize<>0) or
  4521. ((stackmisalignment<>0) and
  4522. ((pi_do_call in current_procinfo.flags) or
  4523. (po_assembler in current_procinfo.procdef.procoptions))) then
  4524. begin
  4525. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4526. if not(is_shifter_const(LocalSize,shift)) then
  4527. begin
  4528. a_reg_alloc(list,NR_R12);
  4529. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4530. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4531. a_reg_dealloc(list,NR_R12);
  4532. end
  4533. else
  4534. begin
  4535. a_reg_dealloc(list,NR_R12);
  4536. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4537. end;
  4538. end;
  4539. if regs=[] then
  4540. list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
  4541. else
  4542. begin
  4543. reference_reset(ref,4);
  4544. ref.index:=NR_STACK_POINTER_REG;
  4545. ref.addressmode:=AM_PREINDEXED;
  4546. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4547. end;
  4548. end
  4549. else
  4550. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14));
  4551. end;
  4552. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4553. var
  4554. tmpreg : tregister;
  4555. tmpref : treference;
  4556. l : tasmlabel;
  4557. begin
  4558. tmpreg:=NR_NO;
  4559. { Be sure to have a base register }
  4560. if (ref.base=NR_NO) then
  4561. begin
  4562. if ref.shiftmode<>SM_None then
  4563. internalerror(2014020706);
  4564. ref.base:=ref.index;
  4565. ref.index:=NR_NO;
  4566. end;
  4567. { absolute symbols can't be handled directly, we've to store the symbol reference
  4568. in the text segment and access it pc relative
  4569. For now, we assume that references where base or index equals to PC are already
  4570. relative, all other references are assumed to be absolute and thus they need
  4571. to be handled extra.
  4572. A proper solution would be to change refoptions to a set and store the information
  4573. if the symbol is absolute or relative there.
  4574. }
  4575. if (assigned(ref.symbol) and
  4576. not(is_pc(ref.base)) and
  4577. not(is_pc(ref.index))
  4578. ) or
  4579. { [#xxx] isn't a valid address operand }
  4580. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4581. //(ref.offset<-4095) or
  4582. (ref.offset<-255) or
  4583. (ref.offset>4095) or
  4584. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4585. ((ref.offset<-255) or
  4586. (ref.offset>255)
  4587. )
  4588. ) or
  4589. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4590. ((ref.offset<-1020) or
  4591. (ref.offset>1020) or
  4592. ((abs(ref.offset) mod 4)<>0) or
  4593. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4594. assigned(ref.symbol)
  4595. )
  4596. ) then
  4597. begin
  4598. reference_reset(tmpref,4);
  4599. { load symbol }
  4600. tmpreg:=getintregister(list,OS_INT);
  4601. if assigned(ref.symbol) then
  4602. begin
  4603. current_asmdata.getjumplabel(l);
  4604. cg.a_label(current_procinfo.aktlocaldata,l);
  4605. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4606. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4607. { load consts entry }
  4608. tmpref.symbol:=l;
  4609. tmpref.base:=NR_R15;
  4610. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4611. { in case of LDF/STF, we got rid of the NR_R15 }
  4612. if is_pc(ref.base) then
  4613. ref.base:=NR_NO;
  4614. if is_pc(ref.index) then
  4615. ref.index:=NR_NO;
  4616. end
  4617. else
  4618. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4619. if (ref.base<>NR_NO) then
  4620. begin
  4621. if ref.index<>NR_NO then
  4622. begin
  4623. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4624. ref.base:=tmpreg;
  4625. end
  4626. else
  4627. begin
  4628. ref.index:=tmpreg;
  4629. ref.shiftimm:=0;
  4630. ref.signindex:=1;
  4631. ref.shiftmode:=SM_None;
  4632. end;
  4633. end
  4634. else
  4635. ref.base:=tmpreg;
  4636. ref.offset:=0;
  4637. ref.symbol:=nil;
  4638. end;
  4639. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4640. begin
  4641. if tmpreg<>NR_NO then
  4642. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4643. else
  4644. begin
  4645. tmpreg:=getintregister(list,OS_ADDR);
  4646. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4647. ref.base:=tmpreg;
  4648. end;
  4649. ref.offset:=0;
  4650. end;
  4651. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4652. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4653. begin
  4654. tmpreg:=getintregister(list,OS_ADDR);
  4655. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4656. ref.base := tmpreg;
  4657. end;
  4658. { floating point operations have only limited references
  4659. we expect here, that a base is already set }
  4660. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4661. begin
  4662. if ref.shiftmode<>SM_none then
  4663. internalerror(200309121);
  4664. if tmpreg<>NR_NO then
  4665. begin
  4666. if ref.base=tmpreg then
  4667. begin
  4668. if ref.signindex<0 then
  4669. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4670. else
  4671. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4672. ref.index:=NR_NO;
  4673. end
  4674. else
  4675. begin
  4676. if ref.index<>tmpreg then
  4677. internalerror(200403161);
  4678. if ref.signindex<0 then
  4679. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4680. else
  4681. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4682. ref.base:=tmpreg;
  4683. ref.index:=NR_NO;
  4684. end;
  4685. end
  4686. else
  4687. begin
  4688. tmpreg:=getintregister(list,OS_ADDR);
  4689. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4690. ref.base:=tmpreg;
  4691. ref.index:=NR_NO;
  4692. end;
  4693. end;
  4694. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4695. Result := ref;
  4696. end;
  4697. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4698. var
  4699. instr: taicpu;
  4700. begin
  4701. if (fromsize=OS_F32) and
  4702. (tosize=OS_F32) then
  4703. begin
  4704. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4705. list.Concat(instr);
  4706. add_move_instruction(instr);
  4707. end
  4708. else if (fromsize=OS_F64) and
  4709. (tosize=OS_F64) then
  4710. begin
  4711. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4712. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4713. end
  4714. else if (fromsize=OS_F32) and
  4715. (tosize=OS_F64) then
  4716. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4717. begin
  4718. //list.concat(nil);
  4719. end;
  4720. end;
  4721. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4722. begin
  4723. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4724. end;
  4725. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4726. begin
  4727. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4728. end;
  4729. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4730. begin
  4731. if //(shuffle=nil) and
  4732. (tosize=OS_F32) then
  4733. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4734. else
  4735. internalerror(2012100813);
  4736. end;
  4737. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4738. begin
  4739. if //(shuffle=nil) and
  4740. (fromsize=OS_F32) then
  4741. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
  4742. else
  4743. internalerror(2012100814);
  4744. end;
  4745. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4746. var tmpreg: tregister;
  4747. begin
  4748. case op of
  4749. OP_NEG:
  4750. begin
  4751. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4752. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4753. tmpreg:=cg.getintregister(list,OS_32);
  4754. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4755. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4756. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4757. end;
  4758. else
  4759. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4760. end;
  4761. end;
  4762. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4763. begin
  4764. case op of
  4765. OP_NEG:
  4766. begin
  4767. list.concat(taicpu.op_reg_const(A_MOV,regdst.reglo,0));
  4768. list.concat(taicpu.op_reg_const(A_MOV,regdst.reghi,0));
  4769. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4770. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4771. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4772. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4773. end;
  4774. OP_NOT:
  4775. begin
  4776. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4777. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4778. end;
  4779. OP_AND,OP_OR,OP_XOR:
  4780. begin
  4781. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4782. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4783. end;
  4784. OP_ADD:
  4785. begin
  4786. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4787. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4788. list.concat(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi));
  4789. end;
  4790. OP_SUB:
  4791. begin
  4792. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4793. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4794. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4795. end;
  4796. else
  4797. internalerror(2003083101);
  4798. end;
  4799. end;
  4800. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4801. var
  4802. tmpreg : tregister;
  4803. begin
  4804. case op of
  4805. OP_AND,OP_OR,OP_XOR:
  4806. begin
  4807. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4808. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4809. end;
  4810. OP_ADD:
  4811. begin
  4812. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4813. begin
  4814. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4815. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4816. end
  4817. else
  4818. begin
  4819. tmpreg:=cg.getintregister(list,OS_32);
  4820. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4821. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4822. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  4823. end;
  4824. tmpreg:=cg.getintregister(list,OS_32);
  4825. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  4826. list.concat(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg));
  4827. end;
  4828. OP_SUB:
  4829. begin
  4830. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4831. begin
  4832. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4833. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  4834. end
  4835. else
  4836. begin
  4837. tmpreg:=cg.getintregister(list,OS_32);
  4838. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4839. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4840. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  4841. end;
  4842. tmpreg:=cg.getintregister(list,OS_32);
  4843. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  4844. list.concat(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg));
  4845. end;
  4846. else
  4847. internalerror(2003083101);
  4848. end;
  4849. end;
  4850. procedure create_codegen;
  4851. begin
  4852. if GenerateThumb2Code then
  4853. begin
  4854. cg:=tthumb2cgarm.create;
  4855. cg64:=tthumb2cg64farm.create;
  4856. casmoptimizer:=TCpuThumb2AsmOptimizer;
  4857. end
  4858. else if GenerateThumbCode then
  4859. begin
  4860. cg:=tthumbcgarm.create;
  4861. cg64:=tthumbcg64farm.create;
  4862. // casmoptimizer:=TCpuThumbAsmOptimizer;
  4863. end
  4864. else
  4865. begin
  4866. cg:=tarmcgarm.create;
  4867. cg64:=tarmcg64farm.create;
  4868. casmoptimizer:=TCpuAsmOptimizer;
  4869. end;
  4870. end;
  4871. end.