cgcpu.pas 216 KB


  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. { tbasecgarm is shared between all arm architectures }
  29. tbasecgarm = class(tcg)
  30. { true, if the next arithmetic operation should modify the flags }
  31. cgsetflags : boolean;
  32. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  33. protected
  34. procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
  35. public
  36. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  37. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  38. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  39. { move instructions }
  40. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  41. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  42. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  43. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  44. { fpu move instructions }
  45. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  46. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  47. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  48. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  49. { comparison operations }
  50. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  51. l : tasmlabel);override;
  52. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  53. procedure a_jmp_name(list : TAsmList;const s : string); override;
  54. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  55. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  56. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  57. procedure g_profilecode(list : TAsmList); override;
  58. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  59. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  60. procedure g_maybe_got_init(list : TAsmList); override;
  61. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  62. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  63. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  64. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  65. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  66. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  67. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  68. procedure g_save_registers(list : TAsmList);override;
  69. procedure g_restore_registers(list : TAsmList);override;
  70. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  71. procedure fixref(list : TAsmList;var ref : treference);
  72. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  73. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  74. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  75. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  76. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  77. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  78. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  79. { Transform unsupported methods into Internal errors }
  80. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
  81. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  82. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  83. { clear out potential overflow bits from 8 or 16 bit operations
  84. the upper 24/16 bits of a register after an operation }
  85. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  86. { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
  87. procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
  88. procedure g_maybe_tls_init(list : TAsmList); override;
  89. end;
  90. { tcgarm is shared between normal arm and thumb-2 }
  91. tcgarm = class(tbasecgarm)
  92. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  93. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  94. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  95. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  96. size: tcgsize; a: tcgint; src, dst: tregister); override;
  97. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  98. size: tcgsize; src1, src2, dst: tregister); override;
  99. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  100. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  101. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  102. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  103. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  104. {Multiply two 32-bit registers into lo and hi 32-bit registers}
  105. procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
  106. end;
  107. { normal arm cg }
  108. tarmcgarm = class(tcgarm)
  109. procedure init_register_allocators;override;
  110. procedure done_register_allocators;override;
  111. end;
  112. { 64 bit cg for all arm flavours }
  113. tbasecg64farm = class(tcg64f32)
  114. end;
  115. { tcg64farm is shared between normal arm and thumb-2 }
  116. tcg64farm = class(tbasecg64farm)
  117. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  118. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  119. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  120. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  121. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  122. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  123. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  124. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  125. end;
  126. tarmcg64farm = class(tcg64farm)
  127. end;
  128. tthumbcgarm = class(tbasecgarm)
  129. procedure init_register_allocators;override;
  130. procedure done_register_allocators;override;
  131. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  132. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  133. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,dst: TRegister);override;
  134. procedure a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);override;
  135. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); override;
  136. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  137. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister);override;
  138. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  139. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  140. function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
  141. end;
  142. tthumbcg64farm = class(tbasecg64farm)
  143. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  144. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  145. end;
  146. tthumb2cgarm = class(tcgarm)
  147. procedure init_register_allocators;override;
  148. procedure done_register_allocators;override;
  149. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  150. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  151. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  152. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  153. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  154. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  155. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  156. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  157. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  158. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  159. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  160. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  161. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  162. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  163. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  164. end;
  165. tthumb2cg64farm = class(tcg64farm)
  166. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  167. end;
  168. const
  169. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  170. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  171. winstackpagesize = 4096;
  172. function get_fpu_postfix(def : tdef) : toppostfix;
  173. procedure create_codegen;
  174. implementation
  175. uses
  176. globals,verbose,systems,cutils,
  177. aopt,aoptcpu,
  178. fmodule,
  179. symconst,symsym,symtable,
  180. tgobj,
  181. procinfo,cpupi,
  182. paramgr;
  183. { Range check must be disabled explicitly as conversions between signed and unsigned
  184. 32-bit values are done without explicit typecasts }
  185. {$R-}
  186. function get_fpu_postfix(def : tdef) : toppostfix;
  187. begin
  188. if def.typ=floatdef then
  189. begin
  190. case tfloatdef(def).floattype of
  191. s32real:
  192. result:=PF_S;
  193. s64real:
  194. result:=PF_D;
  195. s80real:
  196. result:=PF_E;
  197. else
  198. internalerror(200401272);
  199. end;
  200. end
  201. else
  202. internalerror(200401271);
  203. end;
  204. procedure tarmcgarm.init_register_allocators;
  205. begin
  206. inherited init_register_allocators;
  207. { currently, we always save R14, so we can use it }
  208. if (target_info.system<>system_arm_darwin) then
  209. begin
  210. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  211. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  212. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  213. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  214. else
  215. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  216. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  217. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  218. end
  219. else
  220. { r7 is not available on Darwin, it's used as frame pointer (always,
  221. for backtrace support -- also in gcc/clang -> R11 can be used).
  222. r9 is volatile }
  223. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  224. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  225. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  226. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  227. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  228. { The register allocator currently cannot deal with multiple
  229. non-overlapping subregs per register, so we can only use
  230. half the single precision registers for now (as sub registers of the
  231. double precision ones). }
  232. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  233. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  234. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  235. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  236. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  237. ],first_mm_imreg,[])
  238. else
  239. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  240. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15],first_mm_imreg,[]);
  241. end;
  242. procedure tarmcgarm.done_register_allocators;
  243. begin
  244. rg[R_INTREGISTER].free;
  245. rg[R_FPUREGISTER].free;
  246. rg[R_MMREGISTER].free;
  247. inherited done_register_allocators;
  248. end;
  249. procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  250. var
  251. imm_shift : byte;
  252. l : tasmlabel;
  253. hr : treference;
  254. imm1, imm2: DWord;
  255. begin
  256. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  257. internalerror(2002090902);
  258. if is_shifter_const(a,imm_shift) then
  259. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  260. else if is_shifter_const(not(a),imm_shift) then
  261. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  262. { loading of constants with mov and orr }
  263. else if (split_into_shifter_const(a,imm1, imm2)) then
  264. begin
  265. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  266. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  267. end
  268. { loading of constants with mvn and bic }
  269. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  270. begin
  271. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  272. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  273. end
  274. else
  275. begin
  276. reference_reset(hr,4,[]);
  277. current_asmdata.getjumplabel(l);
  278. cg.a_label(current_procinfo.aktlocaldata,l);
  279. hr.symboldata:=current_procinfo.aktlocaldata.last;
  280. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  281. hr.symbol:=l;
  282. hr.base:=NR_PC;
  283. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  284. end;
  285. end;
  286. procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  287. var
  288. oppostfix:toppostfix;
  289. usedtmpref: treference;
  290. tmpreg,tmpreg2 : tregister;
  291. so : tshifterop;
  292. dir : integer;
  293. begin
  294. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  295. FromSize := ToSize;
  296. case FromSize of
  297. { signed integer registers }
  298. OS_8:
  299. oppostfix:=PF_B;
  300. OS_S8:
  301. oppostfix:=PF_SB;
  302. OS_16:
  303. oppostfix:=PF_H;
  304. OS_S16:
  305. oppostfix:=PF_SH;
  306. OS_32,
  307. OS_S32:
  308. oppostfix:=PF_None;
  309. else
  310. InternalError(200308297);
  311. end;
  312. if (fromsize=OS_S8) and
  313. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  314. oppostfix:=PF_B;
  315. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize])) or
  316. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  317. (oppostfix in [PF_SH,PF_H])) then
  318. begin
  319. if target_info.endian=endian_big then
  320. dir:=-1
  321. else
  322. dir:=1;
  323. case FromSize of
  324. OS_16,OS_S16:
  325. begin
  326. { only complicated references need an extra loadaddr }
  327. if assigned(ref.symbol) or
  328. (ref.index<>NR_NO) or
  329. (ref.offset<-4095) or
  330. (ref.offset>4094) or
  331. { sometimes the compiler reused registers }
  332. (reg=ref.index) or
  333. (reg=ref.base) then
  334. begin
  335. tmpreg2:=getintregister(list,OS_INT);
  336. a_loadaddr_ref_reg(list,ref,tmpreg2);
  337. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  338. end
  339. else
  340. usedtmpref:=ref;
  341. if target_info.endian=endian_big then
  342. inc(usedtmpref.offset,1);
  343. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  344. tmpreg:=getintregister(list,OS_INT);
  345. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  346. inc(usedtmpref.offset,dir);
  347. if FromSize=OS_16 then
  348. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  349. else
  350. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  351. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  352. end;
  353. OS_32,OS_S32:
  354. begin
  355. tmpreg:=getintregister(list,OS_INT);
  356. { only complicated references need an extra loadaddr }
  357. if assigned(ref.symbol) or
  358. (ref.index<>NR_NO) or
  359. (ref.offset<-4095) or
  360. (ref.offset>4092) or
  361. { sometimes the compiler reused registers }
  362. (reg=ref.index) or
  363. (reg=ref.base) then
  364. begin
  365. tmpreg2:=getintregister(list,OS_INT);
  366. a_loadaddr_ref_reg(list,ref,tmpreg2);
  367. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  368. end
  369. else
  370. usedtmpref:=ref;
  371. shifterop_reset(so);so.shiftmode:=SM_LSL;
  372. if ref.alignment=2 then
  373. begin
  374. if target_info.endian=endian_big then
  375. inc(usedtmpref.offset,2);
  376. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  377. inc(usedtmpref.offset,dir*2);
  378. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  379. so.shiftimm:=16;
  380. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  381. end
  382. else
  383. begin
  384. tmpreg2:=getintregister(list,OS_INT);
  385. if target_info.endian=endian_big then
  386. inc(usedtmpref.offset,3);
  387. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  388. inc(usedtmpref.offset,dir);
  389. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  390. inc(usedtmpref.offset,dir);
  391. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  392. so.shiftimm:=8;
  393. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  394. inc(usedtmpref.offset,dir);
  395. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  396. so.shiftimm:=16;
  397. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  398. so.shiftimm:=24;
  399. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  400. end;
  401. end
  402. else
  403. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  404. end;
  405. end
  406. else
  407. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  408. if (fromsize=OS_S8) and
  409. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  410. a_load_reg_reg(list,OS_S8,OS_32,reg,reg)
  411. else if (fromsize=OS_S8) and (tosize = OS_16) then
  412. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  413. end;
  414. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  415. var
  416. hsym : tsym;
  417. href : treference;
  418. paraloc : Pcgparalocation;
  419. shift : byte;
  420. begin
  421. { calculate the parameter info for the procdef }
  422. procdef.init_paraloc_info(callerside);
  423. hsym:=tsym(procdef.parast.Find('self'));
  424. if not(assigned(hsym) and
  425. (hsym.typ=paravarsym)) then
  426. internalerror(200305251);
  427. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  428. while paraloc<>nil do
  429. with paraloc^ do
  430. begin
  431. case loc of
  432. LOC_REGISTER:
  433. begin
  434. if is_shifter_const(ioffset,shift) then
  435. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  436. else
  437. begin
  438. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  439. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  440. end;
  441. end;
  442. LOC_REFERENCE:
  443. begin
  444. { offset in the wrapper needs to be adjusted for the stored
  445. return address }
  446. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  447. if is_shifter_const(ioffset,shift) then
  448. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  449. else
  450. begin
  451. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  452. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  453. end;
  454. end
  455. else
  456. internalerror(200309189);
  457. end;
  458. paraloc:=next;
  459. end;
  460. end;
  461. procedure tbasecgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  462. var
  463. ref: treference;
  464. begin
  465. paraloc.check_simple_location;
  466. paramanager.allocparaloc(list,paraloc.location);
  467. case paraloc.location^.loc of
  468. LOC_REGISTER,LOC_CREGISTER:
  469. a_load_const_reg(list,size,a,paraloc.location^.register);
  470. LOC_REFERENCE:
  471. begin
  472. reference_reset(ref,paraloc.alignment,[]);
  473. ref.base:=paraloc.location^.reference.index;
  474. ref.offset:=paraloc.location^.reference.offset;
  475. a_load_const_ref(list,size,a,ref);
  476. end;
  477. else
  478. internalerror(2002081101);
  479. end;
  480. end;
  481. procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
  482. begin
  483. { doubles in softemu mode have a strange order of registers and references }
  484. if (cgpara.size=OS_F64) and
  485. (location^.size=OS_32) then
  486. begin
  487. g_concatcopy(list,ref,paralocref,4)
  488. end
  489. else
  490. inherited;
  491. end;
  492. procedure tbasecgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  493. var
  494. ref: treference;
  495. tmpreg: tregister;
  496. begin
  497. paraloc.check_simple_location;
  498. paramanager.allocparaloc(list,paraloc.location);
  499. case paraloc.location^.loc of
  500. LOC_REGISTER,LOC_CREGISTER:
  501. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  502. LOC_REFERENCE:
  503. begin
  504. reference_reset(ref,paraloc.alignment,[]);
  505. ref.base := paraloc.location^.reference.index;
  506. ref.offset := paraloc.location^.reference.offset;
  507. tmpreg := getintregister(list,OS_ADDR);
  508. a_loadaddr_ref_reg(list,r,tmpreg);
  509. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  510. end;
  511. else
  512. internalerror(2002080701);
  513. end;
  514. end;
  515. procedure tbasecgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  516. var
  517. branchopcode: tasmop;
  518. r : treference;
  519. sym : TAsmSymbol;
  520. begin
  521. { use always BL as newer binutils do not translate blx apparently
  522. generating BL is also what clang and gcc do by default }
  523. branchopcode:=A_BL;
  524. if not(weak) then
  525. sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
  526. else
  527. sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
  528. reference_reset_symbol(r,sym,0,sizeof(pint),[]);
  529. if (tf_pic_uses_got in target_info.flags) and
  530. (cs_create_pic in current_settings.moduleswitches) then
  531. begin
  532. r.refaddr:=addr_pic
  533. end
  534. else
  535. r.refaddr:=addr_full;
  536. list.concat(taicpu.op_ref(branchopcode,r));
  537. {
  538. the compiler does not properly set this flag anymore in pass 1, and
  539. for now we only need it after pass 2 (I hope) (JM)
  540. if not(pi_do_call in current_procinfo.flags) then
  541. internalerror(2003060703);
  542. }
  543. include(current_procinfo.flags,pi_do_call);
  544. end;
  545. procedure tbasecgarm.a_call_reg(list : TAsmList;reg: tregister);
  546. begin
  547. { check not really correct: should only be used for non-Thumb cpus }
  548. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  549. begin
  550. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  551. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  552. end
  553. else
  554. list.concat(taicpu.op_reg(A_BLX, reg));
  555. {
  556. the compiler does not properly set this flag anymore in pass 1, and
  557. for now we only need it after pass 2 (I hope) (JM)
  558. if not(pi_do_call in current_procinfo.flags) then
  559. internalerror(2003060703);
  560. }
  561. include(current_procinfo.flags,pi_do_call);
  562. end;
  563. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  564. begin
  565. a_op_const_reg_reg(list,op,size,a,reg,reg);
  566. end;
  567. procedure tcgarm.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  568. var
  569. tmpreg,tmpresreg : tregister;
  570. tmpref : treference;
  571. begin
  572. tmpreg:=getintregister(list,size);
  573. tmpresreg:=getintregister(list,size);
  574. tmpref:=a_internal_load_ref_reg(list,size,size,ref,tmpreg);
  575. a_op_const_reg_reg(list,op,size,a,tmpreg,tmpresreg);
  576. a_load_reg_ref(list,size,size,tmpresreg,tmpref);
  577. end;
  578. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  579. var
  580. so : tshifterop;
  581. begin
  582. if op = OP_NEG then
  583. begin
  584. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0));
  585. maybeadjustresult(list,OP_NEG,size,dst);
  586. end
  587. else if op = OP_NOT then
  588. begin
  589. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  590. begin
  591. shifterop_reset(so);
  592. so.shiftmode:=SM_LSL;
  593. if size in [OS_8, OS_S8] then
  594. so.shiftimm:=24
  595. else
  596. so.shiftimm:=16;
  597. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  598. {Using a shift here allows this to be folded into another instruction}
  599. if size in [OS_S8, OS_S16] then
  600. so.shiftmode:=SM_ASR
  601. else
  602. so.shiftmode:=SM_LSR;
  603. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  604. end
  605. else
  606. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  607. end
  608. else
  609. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  610. end;
  611. const
  612. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  613. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  614. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  615. op_reg_opcg2asmop: array[TOpCG] of tasmop =
  616. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  617. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  618. op_reg_postfix: array[TOpCG] of TOpPostfix =
  619. (PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
  620. PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None);
  621. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  622. size: tcgsize; a: tcgint; src, dst: tregister);
  623. var
  624. ovloc : tlocation;
  625. begin
  626. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  627. end;
  628. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  629. size: tcgsize; src1, src2, dst: tregister);
  630. var
  631. ovloc : tlocation;
  632. begin
  633. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  634. end;
  635. function opshift2shiftmode(op: TOpCg): tshiftmode;
  636. begin
  637. case op of
  638. OP_SHL: Result:=SM_LSL;
  639. OP_SHR: Result:=SM_LSR;
  640. OP_ROR: Result:=SM_ROR;
  641. OP_ROL: Result:=SM_ROR;
  642. OP_SAR: Result:=SM_ASR;
  643. else internalerror(2012070501);
  644. end
  645. end;
  646. function tbasecgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  647. var
  648. multiplier : dword;
  649. power : longint;
  650. shifterop : tshifterop;
  651. bitsset : byte;
  652. negative : boolean;
  653. first : boolean;
  654. b,
  655. cycles : byte;
  656. maxeffort : byte;
  657. begin
  658. result:=true;
  659. cycles:=0;
  660. negative:=a<0;
  661. shifterop.rs:=NR_NO;
  662. shifterop.shiftmode:=SM_LSL;
  663. if negative then
  664. inc(cycles);
  665. multiplier:=dword(abs(a));
  666. bitsset:=popcnt(multiplier and $fffffffe);
  667. { heuristics to estimate how much instructions are reasonable to replace the mul,
  668. this is currently based on XScale timings }
  669. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  670. actual multiplication, this requires min. 1+4 cycles
  671. because the first shift imm. might cause a stall and because we need more instructions
  672. when replacing the mul we generate max. 3 instructions to replace this mul }
  673. maxeffort:=3;
  674. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  675. a ldr, so generating one more operation to replace this is beneficial }
  676. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  677. inc(maxeffort);
  678. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  679. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  680. dec(maxeffort);
  681. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  682. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  683. dec(maxeffort);
  684. { most simple cases }
  685. if a=1 then
  686. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  687. else if a=0 then
  688. a_load_const_reg(list,OS_32,0,dst)
  689. else if a=-1 then
  690. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  691. { add up ?
  692. basically, one add is needed for each bit being set in the constant factor
  693. however, the least significant bit is for free, it can be hidden in the initial
  694. instruction
  695. }
  696. else if (bitsset+cycles<=maxeffort) and
  697. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  698. begin
  699. first:=true;
  700. while multiplier<>0 do
  701. begin
  702. shifterop.shiftimm:=BsrDWord(multiplier);
  703. if odd(multiplier) then
  704. begin
  705. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  706. dec(multiplier);
  707. end
  708. else
  709. if first then
  710. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  711. else
  712. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  713. first:=false;
  714. dec(multiplier,1 shl shifterop.shiftimm);
  715. end;
  716. if negative then
  717. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  718. end
  719. { subtract from the next greater power of two? }
  720. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  721. begin
  722. first:=true;
  723. while multiplier<>0 do
  724. begin
  725. if first then
  726. begin
  727. multiplier:=(1 shl power)-multiplier;
  728. shifterop.shiftimm:=power;
  729. end
  730. else
  731. shifterop.shiftimm:=BsrDWord(multiplier);
  732. if odd(multiplier) then
  733. begin
  734. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  735. dec(multiplier);
  736. end
  737. else
  738. if first then
  739. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  740. else
  741. begin
  742. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  743. dec(multiplier,1 shl shifterop.shiftimm);
  744. end;
  745. first:=false;
  746. end;
  747. if negative then
  748. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  749. end
  750. else
  751. result:=false;
  752. end;
  753. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  754. var
  755. shift, lsb, width : byte;
  756. tmpreg : tregister;
  757. so : tshifterop;
  758. l1 : longint;
  759. imm1, imm2: DWord;
  760. begin
  761. optimize_op_const(size, op, a);
  762. case op of
  763. OP_NONE:
  764. begin
  765. if src <> dst then
  766. a_load_reg_reg(list, size, size, src, dst);
  767. exit;
  768. end;
  769. OP_MOVE:
  770. begin
  771. a_load_const_reg(list, size, a, dst);
  772. exit;
  773. end;
  774. else
  775. ;
  776. end;
  777. ovloc.loc:=LOC_VOID;
  778. if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
  779. case op of
  780. OP_ADD:
  781. begin
  782. op:=OP_SUB;
  783. a:=aint(dword(-a));
  784. end;
  785. OP_SUB:
  786. begin
  787. op:=OP_ADD;
  788. a:=aint(dword(-a));
  789. end
  790. else
  791. ;
  792. end;
  793. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  794. case op of
  795. OP_NEG,OP_NOT:
  796. internalerror(200308281);
  797. OP_SHL,
  798. OP_SHR,
  799. OP_ROL,
  800. OP_ROR,
  801. OP_SAR:
  802. begin
  803. if a>32 then
  804. internalerror(200308294);
  805. shifterop_reset(so);
  806. so.shiftmode:=opshift2shiftmode(op);
  807. if op = OP_ROL then
  808. so.shiftimm:=32-a
  809. else
  810. so.shiftimm:=a;
  811. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  812. end;
  813. else
  814. {if (op in [OP_SUB, OP_ADD]) and
  815. ((a < 0) or
  816. (a > 4095)) then
  817. begin
  818. tmpreg:=getintregister(list,size);
  819. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  820. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  821. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  822. ));
  823. end
  824. else}
  825. begin
  826. if cgsetflags or setflags then
  827. a_reg_alloc(list,NR_DEFAULTFLAGS);
  828. list.concat(setoppostfix(
  829. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  830. end;
  831. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  832. begin
  833. ovloc.loc:=LOC_FLAGS;
  834. case op of
  835. OP_ADD:
  836. ovloc.resflags:=F_CS;
  837. OP_SUB:
  838. ovloc.resflags:=F_CC;
  839. else
  840. internalerror(2019050922);
  841. end;
  842. end;
  843. end
  844. else
  845. begin
  846. { there could be added some more sophisticated optimizations }
  847. if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  848. a_op_reg_reg(list,OP_NEG,size,src,dst)
  849. { we do this here instead in the peephole optimizer because
  850. it saves us a register }
  851. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  852. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  853. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  854. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  855. begin
  856. if l1>32 then{roozbeh does this ever happen?}
  857. internalerror(200308296);
  858. shifterop_reset(so);
  859. so.shiftmode:=SM_LSL;
  860. so.shiftimm:=l1;
  861. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  862. end
  863. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  864. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  865. begin
  866. if l1>32 then{does this ever happen?}
  867. internalerror(201205181);
  868. shifterop_reset(so);
  869. so.shiftmode:=SM_LSL;
  870. so.shiftimm:=l1;
  871. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  872. end
  873. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  874. begin
  875. { nothing to do on success }
  876. end
  877. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  878. broader range of shifterconstants.}
  879. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  880. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  881. { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
  882. into the following instruction}
  883. else if (op = OP_AND) and
  884. is_continuous_mask(aword(a), lsb, width) and
  885. ((lsb = 0) or ((lsb + width) = 32)) then
  886. begin
  887. shifterop_reset(so);
  888. if (width = 16) and
  889. (lsb = 0) and
  890. (current_settings.cputype >= cpu_armv6) then
  891. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  892. else if (width = 8) and
  893. (lsb = 0) and
  894. (current_settings.cputype >= cpu_armv6) then
  895. list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
  896. else if lsb = 0 then
  897. begin
  898. so.shiftmode:=SM_LSL;
  899. so.shiftimm:=32-width;
  900. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  901. so.shiftmode:=SM_LSR;
  902. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  903. end
  904. else
  905. begin
  906. so.shiftmode:=SM_LSR;
  907. so.shiftimm:=lsb;
  908. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  909. so.shiftmode:=SM_LSL;
  910. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  911. end;
  912. end
  913. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  914. begin
  915. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  916. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  917. end
  918. else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
  919. not(cgsetflags or setflags) and
  920. split_into_shifter_const(a, imm1, imm2) then
  921. begin
  922. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  923. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  924. end
  925. else
  926. begin
  927. tmpreg:=getintregister(list,size);
  928. a_load_const_reg(list,size,a,tmpreg);
  929. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  930. end;
  931. end;
  932. maybeadjustresult(list,op,size,dst);
  933. end;
  934. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  935. var
  936. so : tshifterop;
  937. tmpreg,overflowreg : tregister;
  938. asmop : tasmop;
  939. begin
  940. ovloc.loc:=LOC_VOID;
  941. case op of
  942. OP_NEG,OP_NOT,
  943. OP_DIV,OP_IDIV:
  944. internalerror(200308283);
  945. OP_SHL,
  946. OP_SHR,
  947. OP_SAR,
  948. OP_ROR:
  949. begin
  950. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  951. internalerror(2008072801);
  952. shifterop_reset(so);
  953. so.rs:=src1;
  954. so.shiftmode:=opshift2shiftmode(op);
  955. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  956. end;
  957. OP_ROL:
  958. begin
  959. if not(size in [OS_32,OS_S32]) then
  960. internalerror(2008072801);
  961. { simulate ROL by ror'ing 32-value }
  962. tmpreg:=getintregister(list,OS_32);
  963. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  964. shifterop_reset(so);
  965. so.rs:=tmpreg;
  966. so.shiftmode:=SM_ROR;
  967. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  968. end;
  969. OP_IMUL,
  970. OP_MUL:
  971. begin
  972. if (cgsetflags or setflags) and
  973. (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
  974. begin
  975. overflowreg:=getintregister(list,size);
  976. if op=OP_IMUL then
  977. asmop:=A_SMULL
  978. else
  979. asmop:=A_UMULL;
  980. { the arm doesn't allow that rd and rm are the same }
  981. if dst=src2 then
  982. begin
  983. if dst<>src1 then
  984. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  985. else
  986. begin
  987. tmpreg:=getintregister(list,size);
  988. a_load_reg_reg(list,size,size,src2,dst);
  989. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  990. end;
  991. end
  992. else
  993. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  994. a_reg_alloc(list,NR_DEFAULTFLAGS);
  995. if op=OP_IMUL then
  996. begin
  997. shifterop_reset(so);
  998. so.shiftmode:=SM_ASR;
  999. so.shiftimm:=31;
  1000. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  1001. end
  1002. else
  1003. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  1004. ovloc.loc:=LOC_FLAGS;
  1005. ovloc.resflags:=F_NE;
  1006. end
  1007. else
  1008. begin
  1009. { the arm doesn't allow that rd and rm are the same }
  1010. if dst=src2 then
  1011. begin
  1012. if dst<>src1 then
  1013. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  1014. else
  1015. begin
  1016. tmpreg:=getintregister(list,size);
  1017. a_load_reg_reg(list,size,size,src2,dst);
  1018. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  1019. end;
  1020. end
  1021. else
  1022. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  1023. end;
  1024. end;
  1025. else
  1026. begin
  1027. if cgsetflags or setflags then
  1028. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1029. list.concat(setoppostfix(
  1030. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  1031. end;
  1032. end;
  1033. maybeadjustresult(list,op,size,dst);
  1034. end;
  1035. procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
  1036. var
  1037. asmop: tasmop;
  1038. begin
  1039. if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
  1040. begin
  1041. list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
  1042. case size of
  1043. OS_32: asmop:=A_UMULL;
  1044. OS_S32: asmop:=A_SMULL;
  1045. else
  1046. InternalError(2014060802);
  1047. end;
  1048. { The caller might omit dstlo or dsthi, when he is not interested in it, we still
  1049. need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
  1050. 32x32=32 bit multiplication}
  1051. if (dstlo = NR_NO) then
  1052. dstlo:=getintregister(list,size);
  1053. if (dsthi = NR_NO) then
  1054. dsthi:=getintregister(list,size);
  1055. list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
  1056. end
  1057. else if dsthi=NR_NO then
  1058. begin
  1059. if (dstlo = NR_NO) then
  1060. dstlo:=getintregister(list,size);
  1061. list.concat(taicpu.op_reg_reg_reg(A_MUL, dstlo, src1,src2));
  1062. end
  1063. else
  1064. begin
  1065. internalerror(2015083022);
  1066. end;
  1067. end;
  1068. function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  1069. var
  1070. tmpreg1,tmpreg2 : tregister;
  1071. begin
  1072. tmpreg1:=NR_NO;
  1073. { Be sure to have a base register }
  1074. if (ref.base=NR_NO) then
  1075. begin
  1076. if ref.shiftmode<>SM_None then
  1077. internalerror(2014020701);
  1078. ref.base:=ref.index;
  1079. ref.index:=NR_NO;
  1080. end;
  1081. { absolute symbols can't be handled directly, we've to store the symbol reference
  1082. in the text segment and access it pc relative
  1083. For now, we assume that references where base or index equals to PC are already
  1084. relative, all other references are assumed to be absolute and thus they need
  1085. to be handled extra.
  1086. A proper solution would be to change refoptions to a set and store the information
  1087. if the symbol is absolute or relative there.
  1088. }
  1089. if (assigned(ref.symbol) and
  1090. not(is_pc(ref.base)) and
  1091. not(is_pc(ref.index))
  1092. ) or
  1093. { [#xxx] isn't a valid address operand }
  1094. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  1095. (ref.offset<-4095) or
  1096. (ref.offset>4095) or
  1097. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  1098. ((ref.offset<-255) or
  1099. (ref.offset>255)
  1100. )
  1101. ) or
  1102. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1103. ((ref.offset<-1020) or
  1104. (ref.offset>1020) or
  1105. ((abs(ref.offset) mod 4)<>0)
  1106. )
  1107. ) or
  1108. ((GenerateThumbCode) and
  1109. (((oppostfix in [PF_SB,PF_SH]) and (ref.offset<>0)) or
  1110. ((oppostfix=PF_None) and ((ref.offset<0) or ((ref.base<>NR_STACK_POINTER_REG) and (ref.offset>124)) or
  1111. ((ref.base=NR_STACK_POINTER_REG) and (ref.offset>1020)) or ((ref.offset mod 4)<>0))) or
  1112. ((oppostfix=PF_H) and ((ref.offset<0) or (ref.offset>62) or ((ref.offset mod 2)<>0) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0)))) or
  1113. ((oppostfix=PF_B) and ((ref.offset<0) or (ref.offset>31) or ((getsupreg(ref.base) in [RS_R8..RS_R15]) and (ref.offset<>0))))
  1114. )
  1115. ) then
  1116. begin
  1117. fixref(list,ref);
  1118. end;
  1119. if GenerateThumbCode then
  1120. begin
  1121. { certain thumb load require base and index }
  1122. if (oppostfix in [PF_SB,PF_SH]) and
  1123. (ref.base<>NR_NO) and (ref.index=NR_NO) then
  1124. begin
  1125. tmpreg1:=getintregister(list,OS_ADDR);
  1126. a_load_const_reg(list,OS_ADDR,0,tmpreg1);
  1127. ref.index:=tmpreg1;
  1128. end;
  1129. { "hi" registers cannot be used as base or index }
  1130. if (getsupreg(ref.base) in [RS_R8..RS_R12,RS_R14]) or
  1131. ((ref.base=NR_R13) and (ref.index<>NR_NO)) then
  1132. begin
  1133. tmpreg1:=getintregister(list,OS_ADDR);
  1134. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg1);
  1135. ref.base:=tmpreg1;
  1136. end;
  1137. if getsupreg(ref.index) in [RS_R8..RS_R14] then
  1138. begin
  1139. tmpreg1:=getintregister(list,OS_ADDR);
  1140. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.index,tmpreg1);
  1141. ref.index:=tmpreg1;
  1142. end;
  1143. end;
  1144. { fold if there is base, index and offset, however, don't fold
  1145. for vfp memory instructions because we later fold the index }
  1146. if not((op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  1147. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  1148. begin
  1149. if tmpreg1<>NR_NO then
  1150. begin
  1151. tmpreg2:=getintregister(list,OS_ADDR);
  1152. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg1,tmpreg2);
  1153. tmpreg1:=tmpreg2;
  1154. end
  1155. else
  1156. begin
  1157. tmpreg1:=getintregister(list,OS_ADDR);
  1158. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg1);
  1159. ref.base:=tmpreg1;
  1160. end;
  1161. ref.offset:=0;
  1162. end;
  1163. { floating point operations have only limited references
  1164. we expect here, that a base is already set }
  1165. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  1166. begin
  1167. if ref.shiftmode<>SM_none then
  1168. internalerror(200309121);
  1169. if tmpreg1<>NR_NO then
  1170. begin
  1171. if ref.base=tmpreg1 then
  1172. begin
  1173. if ref.signindex<0 then
  1174. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,tmpreg1,ref.index))
  1175. else
  1176. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,tmpreg1,ref.index));
  1177. ref.index:=NR_NO;
  1178. end
  1179. else
  1180. begin
  1181. if ref.index<>tmpreg1 then
  1182. internalerror(200403161);
  1183. if ref.signindex<0 then
  1184. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg1,ref.base,tmpreg1))
  1185. else
  1186. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,tmpreg1));
  1187. ref.base:=tmpreg1;
  1188. ref.index:=NR_NO;
  1189. end;
  1190. end
  1191. else
  1192. begin
  1193. tmpreg1:=getintregister(list,OS_ADDR);
  1194. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg1,ref.base,ref.index));
  1195. ref.base:=tmpreg1;
  1196. ref.index:=NR_NO;
  1197. end;
  1198. end;
  1199. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1200. Result := ref;
  1201. end;
  1202. procedure tbasecgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1203. var
  1204. oppostfix:toppostfix;
  1205. usedtmpref: treference;
  1206. tmpreg : tregister;
  1207. dir : integer;
  1208. begin
  1209. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1210. FromSize := ToSize;
  1211. case ToSize of
  1212. { signed integer registers }
  1213. OS_8,
  1214. OS_S8:
  1215. oppostfix:=PF_B;
  1216. OS_16,
  1217. OS_S16:
  1218. oppostfix:=PF_H;
  1219. OS_32,
  1220. OS_S32,
  1221. { for vfp value stored in integer register }
  1222. OS_F32:
  1223. oppostfix:=PF_None;
  1224. else
  1225. InternalError(200308299);
  1226. end;
  1227. if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
  1228. ((not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) and
  1229. (oppostfix =PF_H)) then
  1230. begin
  1231. if target_info.endian=endian_big then
  1232. dir:=-1
  1233. else
  1234. dir:=1;
  1235. case FromSize of
  1236. OS_16,OS_S16:
  1237. begin
  1238. tmpreg:=getintregister(list,OS_INT);
  1239. usedtmpref:=ref;
  1240. if target_info.endian=endian_big then
  1241. inc(usedtmpref.offset,1);
  1242. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1243. inc(usedtmpref.offset,dir);
  1244. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1245. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1246. end;
  1247. OS_32,OS_S32:
  1248. begin
  1249. tmpreg:=getintregister(list,OS_INT);
  1250. usedtmpref:=ref;
  1251. if ref.alignment=2 then
  1252. begin
  1253. if target_info.endian=endian_big then
  1254. inc(usedtmpref.offset,2);
  1255. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1256. a_op_const_reg_reg(list,OP_SHR,OS_INT,16,reg,tmpreg);
  1257. inc(usedtmpref.offset,dir*2);
  1258. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1259. end
  1260. else
  1261. begin
  1262. if target_info.endian=endian_big then
  1263. inc(usedtmpref.offset,3);
  1264. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1265. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1266. inc(usedtmpref.offset,dir);
  1267. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1268. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1269. inc(usedtmpref.offset,dir);
  1270. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1271. a_op_const_reg(list,OP_SHR,OS_INT,8,tmpreg);
  1272. inc(usedtmpref.offset,dir);
  1273. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1274. end;
  1275. end
  1276. else
  1277. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1278. end;
  1279. end
  1280. else
  1281. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1282. end;
  1283. function tbasecgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1284. var
  1285. oppostfix:toppostfix;
  1286. href: treference;
  1287. tmpreg: TRegister;
  1288. begin
  1289. case ToSize of
  1290. { signed integer registers }
  1291. OS_8,
  1292. OS_S8:
  1293. oppostfix:=PF_B;
  1294. OS_16,
  1295. OS_S16:
  1296. oppostfix:=PF_H;
  1297. OS_32,
  1298. OS_S32:
  1299. oppostfix:=PF_None;
  1300. else
  1301. InternalError(2003082910);
  1302. end;
  1303. if (tosize in [OS_S16,OS_16]) and
  1304. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1305. begin
  1306. result:=handle_load_store(list,A_STR,PF_B,reg,ref);
  1307. tmpreg:=getintregister(list,OS_INT);
  1308. a_op_const_reg_reg(list,OP_SHR,OS_INT,8,reg,tmpreg);
  1309. href:=result;
  1310. inc(href.offset);
  1311. handle_load_store(list,A_STR,PF_B,tmpreg,href);
  1312. end
  1313. else
  1314. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1315. end;
  1316. function tbasecgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1317. var
  1318. oppostfix:toppostfix;
  1319. so: tshifterop;
  1320. tmpreg: TRegister;
  1321. href: treference;
  1322. begin
  1323. case FromSize of
  1324. { signed integer registers }
  1325. OS_8:
  1326. oppostfix:=PF_B;
  1327. OS_S8:
  1328. oppostfix:=PF_SB;
  1329. OS_16:
  1330. oppostfix:=PF_H;
  1331. OS_S16:
  1332. oppostfix:=PF_SH;
  1333. OS_32,
  1334. OS_S32:
  1335. oppostfix:=PF_None;
  1336. else
  1337. InternalError(200308291);
  1338. end;
  1339. if (tosize=OS_S8) and
  1340. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1341. begin
  1342. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1343. a_load_reg_reg(list,OS_S8,OS_32,reg,reg);
  1344. end
  1345. else if (tosize in [OS_S16,OS_16]) and
  1346. (not (CPUARM_HAS_ALL_MEM in cpu_capabilities[current_settings.cputype])) then
  1347. begin
  1348. result:=handle_load_store(list,A_LDR,PF_B,reg,ref);
  1349. tmpreg:=getintregister(list,OS_INT);
  1350. href:=result;
  1351. inc(href.offset);
  1352. handle_load_store(list,A_LDR,PF_B,tmpreg,href);
  1353. shifterop_reset(so);
  1354. so.shiftmode:=SM_LSL;
  1355. so.shiftimm:=8;
  1356. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  1357. end
  1358. else
  1359. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1360. end;
  1361. procedure tbasecgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1362. var
  1363. so : tshifterop;
  1364. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1365. begin
  1366. if GenerateThumbCode then
  1367. begin
  1368. case shiftmode of
  1369. SM_ASR:
  1370. a_op_const_reg_reg(list,OP_SAR,OS_32,shiftimm,reg,reg2);
  1371. SM_LSR:
  1372. a_op_const_reg_reg(list,OP_SHR,OS_32,shiftimm,reg,reg2);
  1373. SM_LSL:
  1374. a_op_const_reg_reg(list,OP_SHL,OS_32,shiftimm,reg,reg2);
  1375. else
  1376. internalerror(2013090301);
  1377. end;
  1378. end
  1379. else
  1380. begin
  1381. so.shiftmode:=shiftmode;
  1382. so.shiftimm:=shiftimm;
  1383. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1384. end;
  1385. end;
  1386. var
  1387. instr: taicpu;
  1388. conv_done: boolean;
  1389. begin
  1390. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1391. internalerror(2002090901);
  1392. conv_done:=false;
  1393. if tosize<>fromsize then
  1394. begin
  1395. shifterop_reset(so);
  1396. conv_done:=true;
  1397. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1398. fromsize:=tosize;
  1399. if current_settings.cputype<cpu_armv6 then
  1400. case fromsize of
  1401. OS_8:
  1402. if GenerateThumbCode then
  1403. a_op_const_reg_reg(list,OP_AND,OS_32,$ff,reg1,reg2)
  1404. else
  1405. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1406. OS_S8:
  1407. begin
  1408. do_shift(SM_LSL,24,reg1);
  1409. if tosize=OS_16 then
  1410. begin
  1411. do_shift(SM_ASR,8,reg2);
  1412. do_shift(SM_LSR,16,reg2);
  1413. end
  1414. else
  1415. do_shift(SM_ASR,24,reg2);
  1416. end;
  1417. OS_16:
  1418. begin
  1419. do_shift(SM_LSL,16,reg1);
  1420. do_shift(SM_LSR,16,reg2);
  1421. end;
  1422. OS_S16:
  1423. begin
  1424. do_shift(SM_LSL,16,reg1);
  1425. do_shift(SM_ASR,16,reg2)
  1426. end;
  1427. else
  1428. conv_done:=false;
  1429. end
  1430. else
  1431. case fromsize of
  1432. OS_8:
  1433. if GenerateThumbCode then
  1434. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,reg1))
  1435. else
  1436. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1437. OS_S8:
  1438. begin
  1439. if tosize=OS_16 then
  1440. begin
  1441. so.shiftmode:=SM_ROR;
  1442. so.shiftimm:=16;
  1443. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1444. do_shift(SM_LSR,16,reg2);
  1445. end
  1446. else
  1447. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1448. end;
  1449. OS_16:
  1450. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1451. OS_S16:
  1452. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1453. else
  1454. conv_done:=false;
  1455. end
  1456. end;
  1457. if not conv_done and (reg1<>reg2) then
  1458. begin
  1459. { same size, only a register mov required }
  1460. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1461. list.Concat(instr);
  1462. { Notify the register allocator that we have written a move instruction so
  1463. it can try to eliminate it. }
  1464. add_move_instruction(instr);
  1465. end;
  1466. end;
  1467. procedure tbasecgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1468. var
  1469. href,href2 : treference;
  1470. hloc : pcgparalocation;
  1471. begin
  1472. href:=ref;
  1473. hloc:=paraloc.location;
  1474. while assigned(hloc) do
  1475. begin
  1476. case hloc^.loc of
  1477. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1478. begin
  1479. paramanager.allocparaloc(list,paraloc.location);
  1480. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1481. end;
  1482. LOC_REGISTER :
  1483. case hloc^.size of
  1484. OS_32,
  1485. OS_F32:
  1486. begin
  1487. paramanager.allocparaloc(list,paraloc.location);
  1488. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1489. end;
  1490. OS_64,
  1491. OS_F64:
  1492. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1493. else
  1494. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1495. end;
  1496. LOC_REFERENCE :
  1497. begin
  1498. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
  1499. { concatcopy should choose the best way to copy the data }
  1500. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1501. end;
  1502. else
  1503. internalerror(200408241);
  1504. end;
  1505. inc(href.offset,tcgsize2size[hloc^.size]);
  1506. hloc:=hloc^.next;
  1507. end;
  1508. end;
  1509. procedure tbasecgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1510. begin
  1511. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1512. end;
  1513. procedure tbasecgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1514. var
  1515. oppostfix:toppostfix;
  1516. begin
  1517. case fromsize of
  1518. OS_32,
  1519. OS_F32:
  1520. oppostfix:=PF_S;
  1521. OS_64,
  1522. OS_F64:
  1523. oppostfix:=PF_D;
  1524. OS_F80:
  1525. oppostfix:=PF_E;
  1526. else
  1527. InternalError(200309021);
  1528. end;
  1529. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1530. if fromsize<>tosize then
  1531. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1532. end;
  1533. procedure tbasecgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1534. var
  1535. oppostfix:toppostfix;
  1536. begin
  1537. case tosize of
  1538. OS_F32:
  1539. oppostfix:=PF_S;
  1540. OS_F64:
  1541. oppostfix:=PF_D;
  1542. OS_F80:
  1543. oppostfix:=PF_E;
  1544. else
  1545. InternalError(200309022);
  1546. end;
  1547. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1548. end;
  1549. { comparison operations }
  1550. procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1551. l : tasmlabel);
  1552. var
  1553. tmpreg : tregister;
  1554. b : byte;
  1555. begin
  1556. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1557. if (not(GenerateThumbCode) and is_shifter_const(a,b)) or
  1558. ((GenerateThumbCode) and is_thumb_imm(a)) then
  1559. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1560. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1561. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1562. else if (a<>$7fffffff) and (a<>-1) and not(GenerateThumbCode) and is_shifter_const(-a,b) then
  1563. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1564. else
  1565. begin
  1566. tmpreg:=getintregister(list,size);
  1567. a_load_const_reg(list,size,a,tmpreg);
  1568. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1569. end;
  1570. a_jmp_cond(list,cmp_op,l);
  1571. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1572. end;
  1573. procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
  1574. begin
  1575. if reverse then
  1576. begin
  1577. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1578. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1579. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1580. end
  1581. { it is decided during the compilation of the system unit if this code is used or not
  1582. so no additional check for rbit is needed }
  1583. else
  1584. begin
  1585. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1586. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1587. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1588. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1589. if GenerateThumb2Code then
  1590. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1591. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1592. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1593. end;
  1594. end;
  1595. procedure tbasecgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1596. begin
  1597. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1598. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1599. a_jmp_cond(list,cmp_op,l);
  1600. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1601. end;
  1602. procedure tbasecgarm.a_jmp_name(list : TAsmList;const s : string);
  1603. var
  1604. ai : taicpu;
  1605. begin
  1606. { generate far jump, leave it to the optimizer to get rid of it }
  1607. if GenerateThumbCode then
  1608. ai:=taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION))
  1609. else
  1610. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1611. ai.is_jmp:=true;
  1612. list.concat(ai);
  1613. end;
  1614. procedure tbasecgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1615. var
  1616. ai : taicpu;
  1617. begin
  1618. { generate far jump, leave it to the optimizer to get rid of it }
  1619. if GenerateThumbCode then
  1620. ai:=taicpu.op_sym(A_BL,l)
  1621. else
  1622. ai:=taicpu.op_sym(A_B,l);
  1623. ai.is_jmp:=true;
  1624. list.concat(ai);
  1625. end;
  1626. procedure tbasecgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1627. var
  1628. ai : taicpu;
  1629. inv_flags : TResFlags;
  1630. hlabel : TAsmLabel;
  1631. begin
  1632. if GenerateThumbCode then
  1633. begin
  1634. inv_flags:=f;
  1635. inverse_flags(inv_flags);
  1636. { the optimizer has to fix this if jump range is sufficient short }
  1637. current_asmdata.getjumplabel(hlabel);
  1638. ai:=setcondition(taicpu.op_sym(A_B,hlabel),flags_to_cond(inv_flags));
  1639. ai.is_jmp:=true;
  1640. list.concat(ai);
  1641. a_jmp_always(list,l);
  1642. a_label(list,hlabel);
  1643. end
  1644. else
  1645. begin
  1646. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1647. ai.is_jmp:=true;
  1648. list.concat(ai);
  1649. end;
  1650. end;
  1651. procedure tbasecgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1652. begin
  1653. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1654. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1655. end;
  1656. procedure tbasecgarm.g_profilecode(list : TAsmList);
  1657. begin
  1658. if target_info.system = system_arm_linux then
  1659. begin
  1660. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
  1661. a_call_name(list,'__gnu_mcount_nc',false);
  1662. end
  1663. else
  1664. internalerror(2014091201);
  1665. end;
  1666. procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1667. var
  1668. ref : treference;
  1669. shift : byte;
  1670. firstfloatreg,lastfloatreg,
  1671. r : byte;
  1672. mmregs,
  1673. regs, saveregs : tcpuregisterset;
  1674. registerarea,
  1675. r7offset,
  1676. stackmisalignment : pint;
  1677. imm1, imm2: DWord;
  1678. stack_parameters : Boolean;
  1679. begin
  1680. LocalSize:=align(LocalSize,4);
  1681. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  1682. { call instruction does not put anything on the stack }
  1683. registerarea:=0;
  1684. tcpuprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
  1685. lastfloatreg:=RS_NO;
  1686. if not(nostackframe) then
  1687. begin
  1688. firstfloatreg:=RS_NO;
  1689. mmregs:=[];
  1690. case current_settings.fputype of
  1691. fpu_none,
  1692. fpu_soft,
  1693. fpu_libgcc:
  1694. ;
  1695. fpu_fpa,
  1696. fpu_fpa10,
  1697. fpu_fpa11:
  1698. begin
  1699. { save floating point registers? }
  1700. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1701. for r:=RS_F0 to RS_F7 do
  1702. if r in regs then
  1703. begin
  1704. if firstfloatreg=RS_NO then
  1705. firstfloatreg:=r;
  1706. lastfloatreg:=r;
  1707. inc(registerarea,12);
  1708. end;
  1709. end;
  1710. fpu_vfpv2,
  1711. fpu_vfpv3,
  1712. fpu_vfpv4,
  1713. fpu_vfpv3_d16:
  1714. begin;
  1715. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1716. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1717. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1718. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1719. end;
  1720. else
  1721. internalerror(2019050924);
  1722. end;
  1723. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1724. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1725. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1726. { save int registers }
  1727. reference_reset(ref,4,[]);
  1728. ref.index:=NR_STACK_POINTER_REG;
  1729. ref.addressmode:=AM_PREINDEXED;
  1730. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1731. if not(target_info.system in systems_darwin) then
  1732. begin
  1733. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1734. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1735. begin
  1736. a_reg_alloc(list,NR_R12);
  1737. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1738. end;
  1739. { the (old) ARM APCS requires saving both the stack pointer (to
  1740. crawl the stack) and the PC (to identify the function this
  1741. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1742. and R15 -- still needs updating for EABI and Darwin, they don't
  1743. need that }
  1744. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1745. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1746. else
  1747. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1748. include(regs,RS_R14);
  1749. if regs<>[] then
  1750. begin
  1751. for r:=RS_R0 to RS_R15 do
  1752. if r in regs then
  1753. inc(registerarea,4);
  1754. { if the stack is not 8 byte aligned, try to add an extra register,
  1755. so we can avoid the extra sub/add ...,#4 later (KB) }
  1756. if ((registerarea mod current_settings.alignment.localalignmax) <> 0) then
  1757. for r:=RS_R3 downto RS_R0 do
  1758. if not(r in regs) then
  1759. begin
  1760. regs:=regs+[r];
  1761. inc(registerarea,4);
  1762. tcpuprocinfo(current_procinfo).stackpaddingreg:=r;
  1763. break;
  1764. end;
  1765. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1766. end;
  1767. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1768. begin
  1769. { the framepointer now points to the saved R15, so the saved
  1770. framepointer is at R11-12 (for get_caller_frame) }
  1771. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1772. a_reg_dealloc(list,NR_R12);
  1773. end;
  1774. end
  1775. else
  1776. begin
  1777. { always save r14 if we use r7 as the framepointer, because
  1778. the parameter offsets are hardcoded in advance and always
  1779. assume that r14 sits on the stack right behind the saved r7
  1780. }
  1781. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1782. include(regs,RS_FRAME_POINTER_REG);
  1783. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1784. include(regs,RS_R14);
  1785. if regs<>[] then
  1786. begin
  1787. { on Darwin, you first have to save [r4-r7,lr], and then
  1788. [r8,r10,r11] and make r7 point to the previously saved
  1789. r7 so that you can perform a stack crawl based on it
  1790. ([r7] is previous stack frame, [r7+4] is return address
  1791. }
  1792. include(regs,RS_FRAME_POINTER_REG);
  1793. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1794. r7offset:=0;
  1795. for r:=RS_R0 to RS_R15 do
  1796. if r in saveregs then
  1797. begin
  1798. inc(registerarea,4);
  1799. if r<RS_FRAME_POINTER_REG then
  1800. inc(r7offset,4);
  1801. end;
  1802. { save the registers }
  1803. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1804. { make r7 point to the saved r7 (regardless of whether this
  1805. frame uses the framepointer, for backtrace purposes) }
  1806. if r7offset<>0 then
  1807. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1808. else
  1809. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1810. { now save the rest (if any) }
  1811. saveregs:=regs-saveregs;
  1812. if saveregs<>[] then
  1813. begin
  1814. for r:=RS_R8 to RS_R11 do
  1815. if r in saveregs then
  1816. inc(registerarea,4);
  1817. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1818. end;
  1819. end;
  1820. end;
  1821. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  1822. if (LocalSize<>0) or
  1823. ((stackmisalignment<>0) and
  1824. ((pi_do_call in current_procinfo.flags) or
  1825. (po_assembler in current_procinfo.procdef.procoptions))) then
  1826. begin
  1827. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1828. if stack_parameters and (pi_estimatestacksize in current_procinfo.flags) then
  1829. begin
  1830. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  1831. internalerror(2014030901)
  1832. else
  1833. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  1834. end;
  1835. if is_shifter_const(localsize,shift) then
  1836. begin
  1837. a_reg_dealloc(list,NR_R12);
  1838. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1839. end
  1840. else if split_into_shifter_const(localsize, imm1, imm2) then
  1841. begin
  1842. a_reg_dealloc(list,NR_R12);
  1843. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1844. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1845. end
  1846. else
  1847. begin
  1848. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1849. a_reg_alloc(list,NR_R12);
  1850. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1851. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1852. a_reg_dealloc(list,NR_R12);
  1853. end;
  1854. end;
  1855. if (mmregs<>[]) or
  1856. (firstfloatreg<>RS_NO) then
  1857. begin
  1858. reference_reset(ref,4,[]);
  1859. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1860. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1861. begin
  1862. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1863. begin
  1864. a_reg_alloc(list,NR_R12);
  1865. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1866. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1867. a_reg_dealloc(list,NR_R12);
  1868. end
  1869. else
  1870. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1871. ref.base:=NR_R12;
  1872. end
  1873. else
  1874. begin
  1875. ref.base:=current_procinfo.framepointer;
  1876. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1877. end;
  1878. case current_settings.fputype of
  1879. fpu_fpa,
  1880. fpu_fpa10,
  1881. fpu_fpa11:
  1882. begin
  1883. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1884. lastfloatreg-firstfloatreg+1,ref));
  1885. end;
  1886. fpu_vfpv2,
  1887. fpu_vfpv3,
  1888. fpu_vfpv4,
  1889. fpu_vfpv3_d16:
  1890. begin
  1891. ref.index:=ref.base;
  1892. ref.base:=NR_NO;
  1893. { FSTMX is deprecated on ARMv6 and later }
  1894. {if (current_settings.cputype<cpu_armv6) then
  1895. postfix:=PF_IAX
  1896. else
  1897. postfix:=PF_IAD;}
  1898. if mmregs<>[] then
  1899. list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  1900. end;
  1901. else
  1902. internalerror(2019050923);
  1903. end;
  1904. end;
  1905. end;
  1906. end;
  1907. procedure tbasecgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1908. var
  1909. ref : treference;
  1910. LocalSize : longint;
  1911. firstfloatreg,lastfloatreg,
  1912. r,
  1913. shift : byte;
  1914. mmregs,
  1915. saveregs,
  1916. regs : tcpuregisterset;
  1917. registerarea,
  1918. stackmisalignment: pint;
  1919. paddingreg: TSuperRegister;
  1920. imm1, imm2: DWord;
  1921. begin
  1922. if not(nostackframe) then
  1923. begin
  1924. registerarea:=0;
  1925. firstfloatreg:=RS_NO;
  1926. lastfloatreg:=RS_NO;
  1927. mmregs:=[];
  1928. saveregs:=[];
  1929. case current_settings.fputype of
  1930. fpu_none,
  1931. fpu_soft,
  1932. fpu_libgcc:
  1933. ;
  1934. fpu_fpa,
  1935. fpu_fpa10,
  1936. fpu_fpa11:
  1937. begin
  1938. { restore floating point registers? }
  1939. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1940. for r:=RS_F0 to RS_F7 do
  1941. if r in regs then
  1942. begin
  1943. if firstfloatreg=RS_NO then
  1944. firstfloatreg:=r;
  1945. lastfloatreg:=r;
  1946. { floating point register space is already included in
  1947. localsize below by calc_stackframe_size
  1948. inc(registerarea,12);
  1949. }
  1950. end;
  1951. end;
  1952. fpu_vfpv2,
  1953. fpu_vfpv3,
  1954. fpu_vfpv4,
  1955. fpu_vfpv3_d16:
  1956. begin;
  1957. { restore vfp registers? }
  1958. { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
  1959. they have numbers>$1f which is not really correct as they should simply have the same numbers
  1960. as the even ones by with a different subtype as it is done on x86 with al/ah }
  1961. mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
  1962. end;
  1963. else
  1964. internalerror(2019050926);
  1965. end;
  1966. if (firstfloatreg<>RS_NO) or
  1967. (mmregs<>[]) then
  1968. begin
  1969. reference_reset(ref,4,[]);
  1970. if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
  1971. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
  1972. begin
  1973. if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
  1974. begin
  1975. a_reg_alloc(list,NR_R12);
  1976. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  1977. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1978. a_reg_dealloc(list,NR_R12);
  1979. end
  1980. else
  1981. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tcpuprocinfo(current_procinfo).floatregstart));
  1982. ref.base:=NR_R12;
  1983. end
  1984. else
  1985. begin
  1986. ref.base:=current_procinfo.framepointer;
  1987. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  1988. end;
  1989. case current_settings.fputype of
  1990. fpu_fpa,
  1991. fpu_fpa10,
  1992. fpu_fpa11:
  1993. begin
  1994. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1995. lastfloatreg-firstfloatreg+1,ref));
  1996. end;
  1997. fpu_vfpv2,
  1998. fpu_vfpv3,
  1999. fpu_vfpv4,
  2000. fpu_vfpv3_d16:
  2001. begin
  2002. ref.index:=ref.base;
  2003. ref.base:=NR_NO;
  2004. { FLDMX is deprecated on ARMv6 and later }
  2005. {if (current_settings.cputype<cpu_armv6) then
  2006. mmpostfix:=PF_IAX
  2007. else
  2008. mmpostfix:=PF_IAD;}
  2009. if mmregs<>[] then
  2010. list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
  2011. end;
  2012. else
  2013. internalerror(2019050921);
  2014. end;
  2015. end;
  2016. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  2017. if (pi_do_call in current_procinfo.flags) or
  2018. (regs<>[]) or
  2019. ((target_info.system in systems_darwin) and
  2020. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  2021. begin
  2022. exclude(regs,RS_R14);
  2023. include(regs,RS_R15);
  2024. if (target_info.system in systems_darwin) then
  2025. include(regs,RS_FRAME_POINTER_REG);
  2026. end;
  2027. if not(target_info.system in systems_darwin) then
  2028. begin
  2029. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  2030. The saved PC came after that but is discarded, since we restore
  2031. the stack pointer }
  2032. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  2033. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  2034. end
  2035. else
  2036. begin
  2037. { restore R8-R11 already if necessary (they've been stored
  2038. before the others) }
  2039. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  2040. if saveregs<>[] then
  2041. begin
  2042. reference_reset(ref,4,[]);
  2043. ref.index:=NR_STACK_POINTER_REG;
  2044. ref.addressmode:=AM_PREINDEXED;
  2045. for r:=RS_R8 to RS_R11 do
  2046. if r in saveregs then
  2047. inc(registerarea,4);
  2048. regs:=regs-saveregs;
  2049. end;
  2050. end;
  2051. for r:=RS_R0 to RS_R15 do
  2052. if r in regs then
  2053. inc(registerarea,4);
  2054. { reapply the stack padding reg, in case there was one, see the complimentary
  2055. comment in g_proc_entry() (KB) }
  2056. paddingreg:=tcpuprocinfo(current_procinfo).stackpaddingreg;
  2057. if paddingreg < RS_R4 then
  2058. if paddingreg in regs then
  2059. internalerror(201306190)
  2060. else
  2061. begin
  2062. regs:=regs+[paddingreg];
  2063. inc(registerarea,4);
  2064. end;
  2065. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  2066. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  2067. (target_info.system in systems_darwin) then
  2068. begin
  2069. LocalSize:=current_procinfo.calc_stackframe_size;
  2070. if (LocalSize<>0) or
  2071. ((stackmisalignment<>0) and
  2072. ((pi_do_call in current_procinfo.flags) or
  2073. (po_assembler in current_procinfo.procdef.procoptions))) then
  2074. begin
  2075. if pi_estimatestacksize in current_procinfo.flags then
  2076. LocalSize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  2077. else
  2078. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  2079. if is_shifter_const(LocalSize,shift) then
  2080. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  2081. else if split_into_shifter_const(localsize, imm1, imm2) then
  2082. begin
  2083. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  2084. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  2085. end
  2086. else
  2087. begin
  2088. a_reg_alloc(list,NR_R12);
  2089. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  2090. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  2091. a_reg_dealloc(list,NR_R12);
  2092. end;
  2093. end;
  2094. if (target_info.system in systems_darwin) and
  2095. (saveregs<>[]) then
  2096. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  2097. if regs=[] then
  2098. begin
  2099. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2100. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2101. else
  2102. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2103. end
  2104. else
  2105. begin
  2106. reference_reset(ref,4,[]);
  2107. ref.index:=NR_STACK_POINTER_REG;
  2108. ref.addressmode:=AM_PREINDEXED;
  2109. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  2110. end;
  2111. end
  2112. else
  2113. begin
  2114. { restore int registers and return }
  2115. reference_reset(ref,4,[]);
  2116. ref.index:=NR_FRAME_POINTER_REG;
  2117. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  2118. end;
  2119. end
  2120. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  2121. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  2122. else
  2123. list.concat(taicpu.op_reg(A_BX,NR_R14))
  2124. end;
  2125. procedure tbasecgarm.g_maybe_got_init(list : TAsmList);
  2126. var
  2127. ref : treference;
  2128. l : TAsmLabel;
  2129. regs : tcpuregisterset;
  2130. r: byte;
  2131. begin
  2132. if (cs_create_pic in current_settings.moduleswitches) and
  2133. (pi_needs_got in current_procinfo.flags) and
  2134. (tf_pic_uses_got in target_info.flags) then
  2135. begin
  2136. { Procedure parametrs are not initialized at this stage.
  2137. Before GOT initialization code, allocate registers used for procedure parameters
  2138. to prevent usage of these registers for temp operations in later stages of code
  2139. generation. }
  2140. regs:=rg[R_INTREGISTER].used_in_proc;
  2141. for r:=RS_R0 to RS_R3 do
  2142. if r in regs then
  2143. a_reg_alloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2144. { Allocate scratch register R12 and use it for GOT calculations directly.
  2145. Otherwise the init code can be distorted in later stages of code generation. }
  2146. a_reg_alloc(list,NR_R12);
  2147. reference_reset(ref,4,[]);
  2148. current_asmdata.getglobaldatalabel(l);
  2149. cg.a_label(current_procinfo.aktlocaldata,l);
  2150. ref.symbol:=l;
  2151. ref.base:=NR_PC;
  2152. ref.symboldata:=current_procinfo.aktlocaldata.last;
  2153. list.concat(Taicpu.op_reg_ref(A_LDR,NR_R12,ref));
  2154. current_asmdata.getaddrlabel(l);
  2155. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_32bit,l,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_',AT_DATA),-8));
  2156. cg.a_label(list,l);
  2157. list.concat(Taicpu.op_reg_reg_reg(A_ADD,NR_R12,NR_PC,NR_R12));
  2158. list.concat(Taicpu.op_reg_reg(A_MOV,current_procinfo.got,NR_R12));
  2159. { Deallocate registers }
  2160. a_reg_dealloc(list,NR_R12);
  2161. for r:=RS_R3 downto RS_R0 do
  2162. if r in regs then
  2163. a_reg_dealloc(list, newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2164. end;
  2165. end;
  2166. procedure tbasecgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  2167. var
  2168. b : byte;
  2169. tmpref : treference;
  2170. instr : taicpu;
  2171. begin
  2172. if ref.addressmode<>AM_OFFSET then
  2173. internalerror(200309071);
  2174. tmpref:=ref;
  2175. { Be sure to have a base register }
  2176. if (tmpref.base=NR_NO) then
  2177. begin
  2178. if tmpref.shiftmode<>SM_None then
  2179. internalerror(2014020702);
  2180. if tmpref.signindex<0 then
  2181. internalerror(200312023);
  2182. tmpref.base:=tmpref.index;
  2183. tmpref.index:=NR_NO;
  2184. end;
  2185. if assigned(tmpref.symbol) or
  2186. not((is_shifter_const(tmpref.offset,b)) or
  2187. (is_shifter_const(-tmpref.offset,b))
  2188. ) then
  2189. fixref(list,tmpref);
  2190. { expect a base here if there is an index }
  2191. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  2192. internalerror(200312022);
  2193. if tmpref.index<>NR_NO then
  2194. begin
  2195. if tmpref.shiftmode<>SM_None then
  2196. internalerror(200312021);
  2197. if tmpref.signindex<0 then
  2198. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  2199. else
  2200. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  2201. if tmpref.offset<>0 then
  2202. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  2203. end
  2204. else
  2205. begin
  2206. if tmpref.base=NR_NO then
  2207. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  2208. else
  2209. if tmpref.offset<>0 then
  2210. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  2211. else
  2212. begin
  2213. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  2214. list.concat(instr);
  2215. add_move_instruction(instr);
  2216. end;
  2217. end;
  2218. end;
  2219. procedure tbasecgarm.fixref(list : TAsmList;var ref : treference);
  2220. var
  2221. tmpreg, tmpreg2 : tregister;
  2222. tmpref : treference;
  2223. l, piclabel : tasmlabel;
  2224. indirection_done : boolean;
  2225. begin
  2226. { absolute symbols can't be handled directly, we've to store the symbol reference
  2227. in the text segment and access it pc relative
  2228. For now, we assume that references where base or index equals to PC are already
  2229. relative, all other references are assumed to be absolute and thus they need
  2230. to be handled extra.
  2231. A proper solution would be to change refoptions to a set and store the information
  2232. if the symbol is absolute or relative there.
  2233. }
  2234. { create consts entry }
  2235. reference_reset(tmpref,4,[]);
  2236. current_asmdata.getjumplabel(l);
  2237. cg.a_label(current_procinfo.aktlocaldata,l);
  2238. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  2239. piclabel:=nil;
  2240. tmpreg:=NR_NO;
  2241. indirection_done:=false;
  2242. if assigned(ref.symbol) then
  2243. begin
  2244. if (target_info.system=system_arm_darwin) and
  2245. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  2246. begin
  2247. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  2248. if ref.offset<>0 then
  2249. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2250. indirection_done:=true;
  2251. end
  2252. else if ref.refaddr=addr_gottpoff then
  2253. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  2254. else if (cs_create_pic in current_settings.moduleswitches) then
  2255. if (tf_pic_uses_got in target_info.flags) then
  2256. current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
  2257. else
  2258. begin
  2259. { ideally, we would want to generate
  2260. ldr r1, LPICConstPool
  2261. LPICLocal:
  2262. ldr/str r2,[pc,r1]
  2263. ...
  2264. LPICConstPool:
  2265. .long _globsym-(LPICLocal+8)
  2266. However, we cannot be sure that the ldr/str will follow
  2267. right after the call to fixref, so we have to load the
  2268. complete address already in a register.
  2269. }
  2270. current_asmdata.getaddrlabel(piclabel);
  2271. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,ref.symbol,ref.offset-8));
  2272. end
  2273. else
  2274. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  2275. end
  2276. else
  2277. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  2278. { load consts entry }
  2279. if not indirection_done then
  2280. begin
  2281. tmpreg:=getintregister(list,OS_INT);
  2282. tmpref.symbol:=l;
  2283. tmpref.base:=NR_PC;
  2284. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2285. if (cs_create_pic in current_settings.moduleswitches) and
  2286. (tf_pic_uses_got in target_info.flags) and
  2287. assigned(ref.symbol) then
  2288. begin
  2289. {$ifdef EXTDEBUG}
  2290. if not (pi_needs_got in current_procinfo.flags) then
  2291. Comment(V_warning,'pi_needs_got not included');
  2292. {$endif EXTDEBUG}
  2293. Include(current_procinfo.flags,pi_needs_got);
  2294. reference_reset(tmpref,4,[]);
  2295. tmpref.base:=current_procinfo.got;
  2296. tmpref.index:=tmpreg;
  2297. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  2298. if ref.offset<>0 then
  2299. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  2300. end;
  2301. end;
  2302. if assigned(piclabel) then
  2303. begin
  2304. cg.a_label(list,piclabel);
  2305. tmpreg2:=getaddressregister(list);
  2306. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpreg,NR_PC,tmpreg2);
  2307. tmpreg:=tmpreg2
  2308. end;
  2309. { This routine can be called with PC as base/index in case the offset
  2310. was too large to encode in a load/store. In that case, the entire
  2311. absolute expression has been re-encoded in a new constpool entry, and
  2312. we have to remove the use of PC from the original reference (the code
  2313. above made everything relative to the value loaded from the new
  2314. constpool entry) }
  2315. if is_pc(ref.base) then
  2316. ref.base:=NR_NO;
  2317. if is_pc(ref.index) then
  2318. ref.index:=NR_NO;
  2319. if (ref.base<>NR_NO) then
  2320. begin
  2321. if ref.index<>NR_NO then
  2322. begin
  2323. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  2324. ref.base:=tmpreg;
  2325. end
  2326. else
  2327. if ref.base<>NR_PC then
  2328. begin
  2329. ref.index:=tmpreg;
  2330. ref.shiftimm:=0;
  2331. ref.signindex:=1;
  2332. ref.shiftmode:=SM_None;
  2333. end
  2334. else
  2335. ref.base:=tmpreg;
  2336. end
  2337. else
  2338. ref.base:=tmpreg;
  2339. ref.offset:=0;
  2340. ref.symbol:=nil;
  2341. end;
  2342. procedure tbasecgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2343. var
  2344. paraloc1,paraloc2,paraloc3 : TCGPara;
  2345. pd : tprocdef;
  2346. begin
  2347. pd:=search_system_proc('MOVE');
  2348. paraloc1.init;
  2349. paraloc2.init;
  2350. paraloc3.init;
  2351. paramanager.getintparaloc(list,pd,1,paraloc1);
  2352. paramanager.getintparaloc(list,pd,2,paraloc2);
  2353. paramanager.getintparaloc(list,pd,3,paraloc3);
  2354. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2355. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2356. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2357. paramanager.freecgpara(list,paraloc3);
  2358. paramanager.freecgpara(list,paraloc2);
  2359. paramanager.freecgpara(list,paraloc1);
  2360. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2361. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2362. a_call_name(list,'FPC_MOVE',false);
  2363. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  2364. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2365. paraloc3.done;
  2366. paraloc2.done;
  2367. paraloc1.done;
  2368. end;
  2369. procedure tbasecgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  2370. const
  2371. maxtmpreg_arm = 10; {roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  2372. maxtmpreg_thumb = 5;
  2373. var
  2374. srcref,dstref,usedtmpref,usedtmpref2:treference;
  2375. srcreg,destreg,countreg,r,tmpreg:tregister;
  2376. helpsize:aint;
  2377. copysize:byte;
  2378. cgsize:Tcgsize;
  2379. tmpregisters:array[1..maxtmpreg_arm] of tregister;
  2380. maxtmpreg,
  2381. tmpregi,tmpregi2:byte;
  2382. { will never be called with count<=4 }
  2383. procedure genloop(count : aword;size : byte);
  2384. const
  2385. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2386. var
  2387. l : tasmlabel;
  2388. begin
  2389. current_asmdata.getjumplabel(l);
  2390. if count<size then size:=1;
  2391. a_load_const_reg(list,OS_INT,count div size,countreg);
  2392. cg.a_label(list,l);
  2393. srcref.addressmode:=AM_POSTINDEXED;
  2394. dstref.addressmode:=AM_POSTINDEXED;
  2395. srcref.offset:=size;
  2396. dstref.offset:=size;
  2397. r:=getintregister(list,size2opsize[size]);
  2398. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2399. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2400. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  2401. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2402. a_jmp_flags(list,F_NE,l);
  2403. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2404. srcref.offset:=1;
  2405. dstref.offset:=1;
  2406. case count mod size of
  2407. 1:
  2408. begin
  2409. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2410. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2411. end;
  2412. 2:
  2413. if aligned then
  2414. begin
  2415. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2416. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2417. end
  2418. else
  2419. begin
  2420. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2421. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2422. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2423. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2424. end;
  2425. 3:
  2426. if aligned then
  2427. begin
  2428. srcref.offset:=2;
  2429. dstref.offset:=2;
  2430. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2431. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2432. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2433. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2434. end
  2435. else
  2436. begin
  2437. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2438. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2439. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2440. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2441. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2442. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2443. end;
  2444. end;
  2445. { keep the registers alive }
  2446. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2447. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2448. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2449. end;
  2450. { save estimation, if a creating a separate ref is needed or
  2451. if we can keep the original reference while copying }
  2452. function SimpleRef(const ref : treference) : boolean;
  2453. begin
  2454. result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
  2455. ((ref.symbol=nil) and
  2456. (ref.addressmode=AM_OFFSET) and
  2457. (((ref.offset>=0) and (ref.offset+len<=31)) or
  2458. (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
  2459. { ldrh has a limited offset range }
  2460. (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
  2461. )
  2462. );
  2463. end;
  2464. { will never be called with count<=4 }
  2465. procedure genloop_thumb(count : aword;size : byte);
  2466. procedure refincofs(const ref : treference;const value : longint = 1);
  2467. begin
  2468. a_op_const_reg(list,OP_ADD,OS_ADDR,value,ref.base);
  2469. end;
  2470. const
  2471. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  2472. var
  2473. l : tasmlabel;
  2474. begin
  2475. current_asmdata.getjumplabel(l);
  2476. if count<size then size:=1;
  2477. a_load_const_reg(list,OS_INT,count div size,countreg);
  2478. cg.a_label(list,l);
  2479. r:=getintregister(list,size2opsize[size]);
  2480. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  2481. refincofs(srcref);
  2482. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  2483. refincofs(dstref);
  2484. a_reg_alloc(list,NR_DEFAULTFLAGS);
  2485. list.concat(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1));
  2486. a_jmp_flags(list,F_NE,l);
  2487. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2488. case count mod size of
  2489. 1:
  2490. begin
  2491. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2492. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2493. end;
  2494. 2:
  2495. if aligned then
  2496. begin
  2497. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2498. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2499. end
  2500. else
  2501. begin
  2502. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2503. refincofs(srcref);
  2504. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2505. refincofs(dstref);
  2506. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2507. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2508. end;
  2509. 3:
  2510. if aligned then
  2511. begin
  2512. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2513. refincofs(srcref,2);
  2514. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2515. refincofs(dstref,2);
  2516. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2517. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2518. end
  2519. else
  2520. begin
  2521. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2522. refincofs(srcref);
  2523. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2524. refincofs(dstref);
  2525. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2526. refincofs(srcref);
  2527. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2528. refincofs(dstref);
  2529. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2530. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2531. end;
  2532. end;
  2533. { keep the registers alive }
  2534. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2535. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2536. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2537. end;
  2538. begin
  2539. if len=0 then
  2540. exit;
  2541. if GenerateThumbCode then
  2542. maxtmpreg:=maxtmpreg_thumb
  2543. else
  2544. maxtmpreg:=maxtmpreg_arm;
  2545. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2546. dstref:=dest;
  2547. srcref:=source;
  2548. if cs_opt_size in current_settings.optimizerswitches then
  2549. helpsize:=8;
  2550. if aligned and (len=4) then
  2551. begin
  2552. tmpreg:=getintregister(list,OS_32);
  2553. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2554. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2555. end
  2556. else if aligned and (len=2) then
  2557. begin
  2558. tmpreg:=getintregister(list,OS_16);
  2559. a_load_ref_reg(list,OS_16,OS_16,source,tmpreg);
  2560. a_load_reg_ref(list,OS_16,OS_16,tmpreg,dest);
  2561. end
  2562. else if (len<=helpsize) and aligned then
  2563. begin
  2564. tmpregi:=0;
  2565. { loading address in a separate register needed? }
  2566. if SimpleRef(source) then
  2567. begin
  2568. { ... then we don't need a loadaddr }
  2569. srcref:=source;
  2570. end
  2571. else
  2572. begin
  2573. srcreg:=getintregister(list,OS_ADDR);
  2574. a_loadaddr_ref_reg(list,source,srcreg);
  2575. reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
  2576. end;
  2577. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2578. begin
  2579. inc(tmpregi);
  2580. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2581. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2582. inc(srcref.offset,4);
  2583. dec(len,4);
  2584. end;
  2585. { loading address in a separate register needed? }
  2586. if SimpleRef(dest) then
  2587. dstref:=dest
  2588. else
  2589. begin
  2590. destreg:=getintregister(list,OS_ADDR);
  2591. a_loadaddr_ref_reg(list,dest,destreg);
  2592. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2593. end;
  2594. tmpregi2:=1;
  2595. while (tmpregi2<=tmpregi) do
  2596. begin
  2597. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2598. inc(dstref.offset,4);
  2599. inc(tmpregi2);
  2600. end;
  2601. copysize:=4;
  2602. cgsize:=OS_32;
  2603. while len<>0 do
  2604. begin
  2605. if len<2 then
  2606. begin
  2607. copysize:=1;
  2608. cgsize:=OS_8;
  2609. end
  2610. else if len<4 then
  2611. begin
  2612. copysize:=2;
  2613. cgsize:=OS_16;
  2614. end;
  2615. dec(len,copysize);
  2616. r:=getintregister(list,cgsize);
  2617. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2618. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2619. inc(srcref.offset,copysize);
  2620. inc(dstref.offset,copysize);
  2621. end;{end of while}
  2622. end
  2623. else
  2624. begin
  2625. cgsize:=OS_32;
  2626. if (len<=4) then{len<=4 and not aligned}
  2627. begin
  2628. r:=getintregister(list,cgsize);
  2629. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2630. if Len=1 then
  2631. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2632. else
  2633. begin
  2634. tmpreg:=getintregister(list,cgsize);
  2635. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2636. inc(usedtmpref.offset,1);
  2637. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2638. inc(usedtmpref2.offset,1);
  2639. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2640. if len>2 then
  2641. begin
  2642. inc(usedtmpref.offset,1);
  2643. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2644. inc(usedtmpref2.offset,1);
  2645. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2646. if len>3 then
  2647. begin
  2648. inc(usedtmpref.offset,1);
  2649. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2650. inc(usedtmpref2.offset,1);
  2651. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2652. end;
  2653. end;
  2654. end;
  2655. end{end of if len<=4}
  2656. else
  2657. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2658. destreg:=getintregister(list,OS_ADDR);
  2659. a_loadaddr_ref_reg(list,dest,destreg);
  2660. reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
  2661. srcreg:=getintregister(list,OS_ADDR);
  2662. a_loadaddr_ref_reg(list,source,srcreg);
  2663. reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
  2664. countreg:=getintregister(list,OS_32);
  2665. // if cs_opt_size in current_settings.optimizerswitches then
  2666. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2667. {if aligned then
  2668. genloop(len,4)
  2669. else}
  2670. if GenerateThumbCode then
  2671. genloop_thumb(len,1)
  2672. else
  2673. genloop(len,1);
  2674. end;
  2675. end;
  2676. end;
  2677. procedure tbasecgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2678. begin
  2679. g_concatcopy_internal(list,source,dest,len,false);
  2680. end;
  2681. procedure tbasecgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2682. begin
  2683. if (source.alignment in [1,3]) or
  2684. (dest.alignment in [1,3]) then
  2685. g_concatcopy_internal(list,source,dest,len,false)
  2686. else
  2687. g_concatcopy_internal(list,source,dest,len,true);
  2688. end;
  2689. procedure tbasecgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2690. var
  2691. ovloc : tlocation;
  2692. begin
  2693. ovloc.loc:=LOC_VOID;
  2694. g_overflowCheck_loc(list,l,def,ovloc);
  2695. end;
  2696. procedure tbasecgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2697. var
  2698. hl : tasmlabel;
  2699. ai:TAiCpu;
  2700. hflags : tresflags;
  2701. begin
  2702. if not(cs_check_overflow in current_settings.localswitches) then
  2703. exit;
  2704. current_asmdata.getjumplabel(hl);
  2705. case ovloc.loc of
  2706. LOC_VOID:
  2707. begin
  2708. ai:=taicpu.op_sym(A_B,hl);
  2709. ai.is_jmp:=true;
  2710. if not((def.typ=pointerdef) or
  2711. ((def.typ=orddef) and
  2712. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2713. pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2714. ai.SetCondition(C_VC)
  2715. else
  2716. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2717. ai.SetCondition(C_CS)
  2718. else
  2719. ai.SetCondition(C_CC);
  2720. list.concat(ai);
  2721. end;
  2722. LOC_FLAGS:
  2723. begin
  2724. hflags:=ovloc.resflags;
  2725. inverse_flags(hflags);
  2726. cg.a_jmp_flags(list,hflags,hl);
  2727. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2728. end;
  2729. else
  2730. internalerror(200409281);
  2731. end;
  2732. a_call_name(list,'FPC_OVERFLOW',false);
  2733. a_label(list,hl);
  2734. end;
  2735. procedure tbasecgarm.g_save_registers(list : TAsmList);
  2736. begin
  2737. { this work is done in g_proc_entry }
  2738. end;
  2739. procedure tbasecgarm.g_restore_registers(list : TAsmList);
  2740. begin
  2741. { this work is done in g_proc_exit }
  2742. end;
  2743. procedure tbasecgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2744. var
  2745. ai : taicpu;
  2746. hlabel : TAsmLabel;
  2747. begin
  2748. if GenerateThumbCode then
  2749. begin
  2750. { the optimizer has to fix this if jump range is sufficient short }
  2751. current_asmdata.getjumplabel(hlabel);
  2752. ai:=Taicpu.Op_sym(A_B,hlabel);
  2753. ai.SetCondition(inverse_cond(OpCmp2AsmCond[cond]));
  2754. ai.is_jmp:=true;
  2755. list.concat(ai);
  2756. a_jmp_always(list,l);
  2757. a_label(list,hlabel);
  2758. end
  2759. else
  2760. begin
  2761. ai:=Taicpu.Op_sym(A_B,l);
  2762. ai.SetCondition(OpCmp2AsmCond[cond]);
  2763. ai.is_jmp:=true;
  2764. list.concat(ai);
  2765. end;
  2766. end;
  2767. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2768. const
  2769. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2770. (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
  2771. (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
  2772. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2773. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2774. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2775. begin
  2776. result:=convertop[fromsize,tosize];
  2777. if result=A_NONE then
  2778. internalerror(200312205);
  2779. end;
  2780. function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
  2781. const
  2782. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
  2783. (PF_F32, PF_F32F64,PF_None,PF_None,PF_None),
  2784. (PF_F64F32,PF_F64, PF_None,PF_None,PF_None),
  2785. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2786. (PF_None, PF_None, PF_None,PF_None,PF_None),
  2787. (PF_None, PF_None, PF_None,PF_None,PF_None));
  2788. begin
  2789. result:=convertop[fromsize,tosize];
  2790. end;
  2791. procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2792. var
  2793. instr: taicpu;
  2794. begin
  2795. if (shuffle=nil) or shufflescalar(shuffle) then
  2796. instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
  2797. else
  2798. internalerror(2009112407);
  2799. list.concat(instr);
  2800. case instr.opcode of
  2801. A_VMOV:
  2802. add_move_instruction(instr);
  2803. else
  2804. ;
  2805. end;
  2806. end;
  2807. procedure tbasecgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2808. var
  2809. intreg,
  2810. tmpmmreg : tregister;
  2811. reg64 : tregister64;
  2812. begin
  2813. if assigned(shuffle) and
  2814. not(shufflescalar(shuffle)) then
  2815. internalerror(2009112413);
  2816. case fromsize of
  2817. OS_32,OS_S32:
  2818. begin
  2819. fromsize:=OS_F32;
  2820. { since we are loading an integer, no conversion may be required }
  2821. if (fromsize<>tosize) then
  2822. internalerror(2009112801);
  2823. end;
  2824. OS_64,OS_S64:
  2825. begin
  2826. fromsize:=OS_F64;
  2827. { since we are loading an integer, no conversion may be required }
  2828. if (fromsize<>tosize) then
  2829. internalerror(2009112901);
  2830. end;
  2831. OS_F32,OS_F64:
  2832. ;
  2833. else
  2834. internalerror(2019050920);
  2835. end;
  2836. if (fromsize<>tosize) then
  2837. tmpmmreg:=getmmregister(list,fromsize)
  2838. else
  2839. tmpmmreg:=reg;
  2840. if (ref.alignment in [1,2]) then
  2841. begin
  2842. case fromsize of
  2843. OS_F32:
  2844. begin
  2845. intreg:=getintregister(list,OS_32);
  2846. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2847. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2848. end;
  2849. OS_F64:
  2850. begin
  2851. reg64.reglo:=getintregister(list,OS_32);
  2852. reg64.reghi:=getintregister(list,OS_32);
  2853. cg64.a_load64_ref_reg(list,ref,reg64);
  2854. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2855. end;
  2856. else
  2857. internalerror(2009112412);
  2858. end;
  2859. end
  2860. else
  2861. begin
  2862. handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
  2863. end;
  2864. if (tmpmmreg<>reg) then
  2865. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2866. end;
  2867. procedure tbasecgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2868. var
  2869. intreg,
  2870. tmpmmreg : tregister;
  2871. reg64 : tregister64;
  2872. begin
  2873. if assigned(shuffle) and
  2874. not(shufflescalar(shuffle)) then
  2875. internalerror(2009112416);
  2876. case tosize of
  2877. OS_32,OS_S32:
  2878. begin
  2879. tosize:=OS_F32;
  2880. { since we are loading an integer, no conversion may be required }
  2881. if (fromsize<>tosize) then
  2882. internalerror(2009112801);
  2883. end;
  2884. OS_64,OS_S64:
  2885. begin
  2886. tosize:=OS_F64;
  2887. { since we are loading an integer, no conversion may be required }
  2888. if (fromsize<>tosize) then
  2889. internalerror(2009112901);
  2890. end;
  2891. OS_F32,OS_F64:
  2892. ;
  2893. else
  2894. internalerror(2019050919);
  2895. end;
  2896. if (fromsize<>tosize) then
  2897. begin
  2898. tmpmmreg:=getmmregister(list,tosize);
  2899. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2900. end
  2901. else
  2902. tmpmmreg:=reg;
  2903. if (ref.alignment in [1,2]) then
  2904. begin
  2905. case tosize of
  2906. OS_F32:
  2907. begin
  2908. intreg:=getintregister(list,OS_32);
  2909. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2910. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2911. end;
  2912. OS_F64:
  2913. begin
  2914. reg64.reglo:=getintregister(list,OS_32);
  2915. reg64.reghi:=getintregister(list,OS_32);
  2916. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2917. cg64.a_load64_reg_ref(list,reg64,ref);
  2918. end;
  2919. else
  2920. internalerror(2009112417);
  2921. end;
  2922. end
  2923. else
  2924. begin
  2925. handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
  2926. end;
  2927. end;
  2928. procedure tbasecgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2929. begin
  2930. { this code can only be used to transfer raw data, not to perform
  2931. conversions }
  2932. if (tosize<>OS_F32) then
  2933. internalerror(2009112419);
  2934. if not(fromsize in [OS_32,OS_S32]) then
  2935. internalerror(2009112420);
  2936. if assigned(shuffle) and
  2937. not shufflescalar(shuffle) then
  2938. internalerror(2009112516);
  2939. list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
  2940. end;
  2941. procedure tbasecgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  2942. begin
  2943. { this code can only be used to transfer raw data, not to perform
  2944. conversions }
  2945. if (fromsize<>OS_F32) then
  2946. internalerror(2009112430);
  2947. if not(tosize in [OS_32,OS_S32]) then
  2948. internalerror(2009112420);
  2949. if assigned(shuffle) and
  2950. not shufflescalar(shuffle) then
  2951. internalerror(2009112514);
  2952. list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
  2953. end;
  2954. procedure tbasecgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  2955. var
  2956. tmpreg: tregister;
  2957. begin
  2958. { the vfp doesn't support xor nor any other logical operation, but
  2959. this routine is used to initialise global mm regvars. We can
  2960. easily initialise an mm reg with 0 though. }
  2961. case op of
  2962. OP_XOR:
  2963. begin
  2964. if (src<>dst) or
  2965. (reg_cgsize(src)<>size) or
  2966. assigned(shuffle) then
  2967. internalerror(2009112907);
  2968. tmpreg:=getintregister(list,OS_32);
  2969. a_load_const_reg(list,OS_32,0,tmpreg);
  2970. case size of
  2971. OS_F32:
  2972. list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
  2973. OS_F64:
  2974. list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
  2975. else
  2976. internalerror(2009112908);
  2977. end;
  2978. end
  2979. else
  2980. internalerror(2009112906);
  2981. end;
  2982. end;
  2983. procedure tbasecgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  2984. const
  2985. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  2986. begin
  2987. if (op in overflowops) and
  2988. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  2989. a_load_reg_reg(list,OS_32,size,dst,dst);
  2990. end;
  2991. procedure tbasecgarm.safe_mla(list : TAsmList; op1,op2,op3,op4 : TRegister);
  2992. procedure checkreg(var reg : TRegister);
  2993. var
  2994. tmpreg : TRegister;
  2995. begin
  2996. if ((GenerateThumbCode or GenerateThumb2Code) and (getsupreg(reg)=RS_R13)) or
  2997. (getsupreg(reg)=RS_R15) then
  2998. begin
  2999. tmpreg:=getintregister(list,OS_INT);
  3000. a_load_reg_reg(list,OS_INT,OS_INT,reg,tmpreg);
  3001. reg:=tmpreg;
  3002. end;
  3003. end;
  3004. begin
  3005. checkreg(op1);
  3006. checkreg(op2);
  3007. checkreg(op3);
  3008. checkreg(op4);
  3009. list.concat(taicpu.op_reg_reg_reg_reg(A_MLA,op1,op2,op3,op4));
  3010. end;
  3011. procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
  3012. begin
  3013. list.concat(tai_regalloc.alloc(NR_R0,nil));
  3014. a_call_name(list,'fpc_read_tp',false);
  3015. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
  3016. list.concat(tai_regalloc.dealloc(NR_R0,nil));
  3017. end;
  3018. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3019. begin
  3020. case op of
  3021. OP_NEG:
  3022. begin
  3023. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3024. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3025. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  3026. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3027. end;
  3028. OP_NOT:
  3029. begin
  3030. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  3031. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  3032. end;
  3033. else
  3034. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  3035. end;
  3036. end;
  3037. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  3038. begin
  3039. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  3040. end;
  3041. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  3042. var
  3043. ovloc : tlocation;
  3044. begin
  3045. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  3046. end;
  3047. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  3048. var
  3049. ovloc : tlocation;
  3050. begin
  3051. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  3052. end;
  3053. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  3054. begin
  3055. { this code can only be used to transfer raw data, not to perform
  3056. conversions }
  3057. if (mmsize<>OS_F64) then
  3058. internalerror(2009112405);
  3059. list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
  3060. end;
  3061. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  3062. begin
  3063. { this code can only be used to transfer raw data, not to perform
  3064. conversions }
  3065. if (mmsize<>OS_F64) then
  3066. internalerror(2009112406);
  3067. list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
  3068. end;
  3069. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3070. var
  3071. tmpreg : tregister;
  3072. b : byte;
  3073. begin
  3074. ovloc.loc:=LOC_VOID;
  3075. case op of
  3076. OP_NEG,
  3077. OP_NOT :
  3078. internalerror(2012022501);
  3079. else
  3080. ;
  3081. end;
  3082. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3083. begin
  3084. case op of
  3085. OP_ADD:
  3086. begin
  3087. if is_shifter_const(lo(value),b) then
  3088. begin
  3089. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3090. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3091. end
  3092. else
  3093. begin
  3094. tmpreg:=cg.getintregister(list,OS_32);
  3095. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3096. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3097. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3098. end;
  3099. if is_shifter_const(hi(value),b) then
  3100. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  3101. else
  3102. begin
  3103. tmpreg:=cg.getintregister(list,OS_32);
  3104. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3105. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3106. end;
  3107. end;
  3108. OP_SUB:
  3109. begin
  3110. if is_shifter_const(lo(value),b) then
  3111. begin
  3112. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3113. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  3114. end
  3115. else
  3116. begin
  3117. tmpreg:=cg.getintregister(list,OS_32);
  3118. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  3119. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3120. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3121. end;
  3122. if is_shifter_const(hi(value),b) then
  3123. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  3124. else
  3125. begin
  3126. tmpreg:=cg.getintregister(list,OS_32);
  3127. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3128. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  3129. end;
  3130. end;
  3131. else
  3132. internalerror(200502131);
  3133. end;
  3134. if size=OS_64 then
  3135. begin
  3136. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3137. ovloc.loc:=LOC_FLAGS;
  3138. case op of
  3139. OP_ADD:
  3140. ovloc.resflags:=F_CS;
  3141. OP_SUB:
  3142. ovloc.resflags:=F_CC;
  3143. else
  3144. internalerror(2019050918);
  3145. end;
  3146. end;
  3147. end
  3148. else
  3149. begin
  3150. case op of
  3151. OP_AND,OP_OR,OP_XOR:
  3152. begin
  3153. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  3154. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  3155. end;
  3156. OP_ADD:
  3157. begin
  3158. if is_shifter_const(aint(lo(value)),b) then
  3159. begin
  3160. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3161. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3162. end
  3163. else
  3164. begin
  3165. tmpreg:=cg.getintregister(list,OS_32);
  3166. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3167. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3168. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3169. end;
  3170. if is_shifter_const(aint(hi(value)),b) then
  3171. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3172. else
  3173. begin
  3174. tmpreg:=cg.getintregister(list,OS_32);
  3175. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  3176. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  3177. end;
  3178. end;
  3179. OP_SUB:
  3180. begin
  3181. if is_shifter_const(aint(lo(value)),b) then
  3182. begin
  3183. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3184. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  3185. end
  3186. else
  3187. begin
  3188. tmpreg:=cg.getintregister(list,OS_32);
  3189. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  3190. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3191. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  3192. end;
  3193. if is_shifter_const(aint(hi(value)),b) then
  3194. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  3195. else
  3196. begin
  3197. tmpreg:=cg.getintregister(list,OS_32);
  3198. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  3199. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  3200. end;
  3201. end;
  3202. else
  3203. internalerror(2003083101);
  3204. end;
  3205. end;
  3206. end;
  3207. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  3208. begin
  3209. ovloc.loc:=LOC_VOID;
  3210. case op of
  3211. OP_NEG,
  3212. OP_NOT :
  3213. internalerror(2012022502);
  3214. else
  3215. ;
  3216. end;
  3217. if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  3218. begin
  3219. case op of
  3220. OP_ADD:
  3221. begin
  3222. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3223. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3224. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  3225. end;
  3226. OP_SUB:
  3227. begin
  3228. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3229. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3230. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  3231. end;
  3232. else
  3233. internalerror(2003083101);
  3234. end;
  3235. if size=OS_64 then
  3236. begin
  3237. { the arm has an weired opinion how flags for SUB/ADD are handled }
  3238. ovloc.loc:=LOC_FLAGS;
  3239. case op of
  3240. OP_ADD:
  3241. ovloc.resflags:=F_CS;
  3242. OP_SUB:
  3243. ovloc.resflags:=F_CC;
  3244. else
  3245. internalerror(2019050917);
  3246. end;
  3247. end;
  3248. end
  3249. else
  3250. begin
  3251. case op of
  3252. OP_AND,OP_OR,OP_XOR:
  3253. begin
  3254. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  3255. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  3256. end;
  3257. OP_ADD:
  3258. begin
  3259. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3260. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  3261. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  3262. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3263. end;
  3264. OP_SUB:
  3265. begin
  3266. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3267. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  3268. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  3269. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3270. end;
  3271. else
  3272. internalerror(2003083101);
  3273. end;
  3274. end;
  3275. end;
  3276. procedure tthumbcgarm.init_register_allocators;
  3277. begin
  3278. inherited init_register_allocators;
  3279. if assigned(current_procinfo) and (current_procinfo.framepointer=NR_R7) then
  3280. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3281. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6],first_int_imreg,[])
  3282. else
  3283. rg[R_INTREGISTER]:=trgintcputhumb.create(R_INTREGISTER,R_SUBWHOLE,
  3284. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7],first_int_imreg,[]);
  3285. end;
  3286. procedure tthumbcgarm.done_register_allocators;
  3287. begin
  3288. rg[R_INTREGISTER].free;
  3289. rg[R_FPUREGISTER].free;
  3290. rg[R_MMREGISTER].free;
  3291. inherited done_register_allocators;
  3292. end;
  3293. procedure tthumbcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3294. var
  3295. ref : treference;
  3296. r : byte;
  3297. regs : tcpuregisterset;
  3298. stackmisalignment : pint;
  3299. registerarea: DWord;
  3300. stack_parameters: Boolean;
  3301. begin
  3302. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3303. LocalSize:=align(LocalSize,4);
  3304. { call instruction does not put anything on the stack }
  3305. stackmisalignment:=0;
  3306. if not(nostackframe) then
  3307. begin
  3308. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3309. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3310. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3311. { save int registers }
  3312. reference_reset(ref,4,[]);
  3313. ref.index:=NR_STACK_POINTER_REG;
  3314. ref.addressmode:=AM_PREINDEXED;
  3315. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3316. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3317. begin
  3318. //!!!! a_reg_alloc(list,NR_R12);
  3319. //!!!! list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3320. end;
  3321. { the (old) ARM APCS requires saving both the stack pointer (to
  3322. crawl the stack) and the PC (to identify the function this
  3323. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  3324. and R15 -- still needs updating for EABI and Darwin, they don't
  3325. need that }
  3326. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3327. regs:=regs+[RS_R7,RS_R14]
  3328. else
  3329. // if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3330. include(regs,RS_R14);
  3331. { safely estimate stack size }
  3332. if localsize+current_settings.alignment.localalignmax+4>508 then
  3333. begin
  3334. include(rg[R_INTREGISTER].used_in_proc,RS_R4);
  3335. include(regs,RS_R4);
  3336. end;
  3337. registerarea:=0;
  3338. if regs<>[] then
  3339. begin
  3340. for r:=RS_R0 to RS_R15 do
  3341. if r in regs then
  3342. inc(registerarea,4);
  3343. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,regs));
  3344. end;
  3345. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3346. if stack_parameters or (LocalSize<>0) or
  3347. ((stackmisalignment<>0) and
  3348. ((pi_do_call in current_procinfo.flags) or
  3349. (po_assembler in current_procinfo.procdef.procoptions))) then
  3350. begin
  3351. { do we access stack parameters?
  3352. if yes, the previously estimated stacksize must be used }
  3353. if stack_parameters then
  3354. begin
  3355. if localsize>tcpuprocinfo(current_procinfo).stackframesize then
  3356. begin
  3357. writeln(localsize);
  3358. writeln(tcpuprocinfo(current_procinfo).stackframesize);
  3359. internalerror(2013040601);
  3360. end
  3361. else
  3362. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea;
  3363. end
  3364. else
  3365. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3366. if localsize<508 then
  3367. begin
  3368. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3369. end
  3370. else if localsize<=1016 then
  3371. begin
  3372. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3373. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize-508));
  3374. end
  3375. else
  3376. begin
  3377. a_load_const_reg(list,OS_ADDR,-localsize,NR_R4);
  3378. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R4));
  3379. include(regs,RS_R4);
  3380. //!!!! if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  3381. //!!!! a_reg_alloc(list,NR_R12);
  3382. //!!!! a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3383. //!!!! list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  3384. //!!!! a_reg_dealloc(list,NR_R12);
  3385. end;
  3386. end;
  3387. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3388. begin
  3389. list.concat(taicpu.op_reg_reg_const(A_ADD,current_procinfo.framepointer,NR_STACK_POINTER_REG,0));
  3390. end;
  3391. end;
  3392. end;
  3393. procedure tthumbcgarm.g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);
  3394. var
  3395. LocalSize : longint;
  3396. r: byte;
  3397. regs : tcpuregisterset;
  3398. registerarea : DWord;
  3399. stackmisalignment: pint;
  3400. stack_parameters : Boolean;
  3401. begin
  3402. if not(nostackframe) then
  3403. begin
  3404. stack_parameters:=current_procinfo.procdef.stack_tainting_parameter(calleeside);
  3405. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3406. include(regs,RS_R15);
  3407. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3408. include(regs,getsupreg(current_procinfo.framepointer));
  3409. registerarea:=0;
  3410. for r:=RS_R0 to RS_R15 do
  3411. if r in regs then
  3412. inc(registerarea,4);
  3413. stackmisalignment:=registerarea mod current_settings.alignment.localalignmax;
  3414. LocalSize:=current_procinfo.calc_stackframe_size;
  3415. if stack_parameters then
  3416. localsize:=tcpuprocinfo(current_procinfo).stackframesize-registerarea
  3417. else
  3418. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3419. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  3420. (target_info.system in systems_darwin) then
  3421. begin
  3422. if (LocalSize<>0) or
  3423. ((stackmisalignment<>0) and
  3424. ((pi_do_call in current_procinfo.flags) or
  3425. (po_assembler in current_procinfo.procdef.procoptions))) then
  3426. begin
  3427. if LocalSize=0 then
  3428. else if LocalSize<=508 then
  3429. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  3430. else if LocalSize<=1016 then
  3431. begin
  3432. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,508));
  3433. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,localsize-508));
  3434. end
  3435. else
  3436. begin
  3437. a_reg_alloc(list,NR_R3);
  3438. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R3);
  3439. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R3));
  3440. a_reg_dealloc(list,NR_R3);
  3441. end;
  3442. end;
  3443. if regs=[] then
  3444. begin
  3445. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3446. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3447. else
  3448. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3449. end
  3450. else
  3451. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,regs));
  3452. end;
  3453. end
  3454. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  3455. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  3456. else
  3457. list.concat(taicpu.op_reg(A_BX,NR_R14))
  3458. end;
  3459. procedure tthumbcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3460. var
  3461. oppostfix:toppostfix;
  3462. usedtmpref: treference;
  3463. tmpreg,tmpreg2 : tregister;
  3464. dir : integer;
  3465. begin
  3466. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3467. FromSize := ToSize;
  3468. case FromSize of
  3469. { signed integer registers }
  3470. OS_8:
  3471. oppostfix:=PF_B;
  3472. OS_S8:
  3473. oppostfix:=PF_SB;
  3474. OS_16:
  3475. oppostfix:=PF_H;
  3476. OS_S16:
  3477. oppostfix:=PF_SH;
  3478. OS_32,
  3479. OS_S32:
  3480. oppostfix:=PF_None;
  3481. else
  3482. InternalError(200308298);
  3483. end;
  3484. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  3485. begin
  3486. if target_info.endian=endian_big then
  3487. dir:=-1
  3488. else
  3489. dir:=1;
  3490. case FromSize of
  3491. OS_16,OS_S16:
  3492. begin
  3493. { only complicated references need an extra loadaddr }
  3494. if assigned(ref.symbol) or
  3495. (ref.index<>NR_NO) or
  3496. (ref.offset<-124) or
  3497. (ref.offset>124) or
  3498. { sometimes the compiler reused registers }
  3499. (reg=ref.index) or
  3500. (reg=ref.base) then
  3501. begin
  3502. tmpreg2:=getintregister(list,OS_INT);
  3503. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3504. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3505. end
  3506. else
  3507. usedtmpref:=ref;
  3508. if target_info.endian=endian_big then
  3509. inc(usedtmpref.offset,1);
  3510. tmpreg:=getintregister(list,OS_INT);
  3511. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3512. inc(usedtmpref.offset,dir);
  3513. if FromSize=OS_16 then
  3514. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  3515. else
  3516. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  3517. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3518. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3519. end;
  3520. OS_32,OS_S32:
  3521. begin
  3522. tmpreg:=getintregister(list,OS_INT);
  3523. { only complicated references need an extra loadaddr }
  3524. if assigned(ref.symbol) or
  3525. (ref.index<>NR_NO) or
  3526. (ref.offset<-124) or
  3527. (ref.offset>124) or
  3528. { sometimes the compiler reused registers }
  3529. (reg=ref.index) or
  3530. (reg=ref.base) then
  3531. begin
  3532. tmpreg2:=getintregister(list,OS_INT);
  3533. a_loadaddr_ref_reg(list,ref,tmpreg2);
  3534. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  3535. end
  3536. else
  3537. usedtmpref:=ref;
  3538. if ref.alignment=2 then
  3539. begin
  3540. if target_info.endian=endian_big then
  3541. inc(usedtmpref.offset,2);
  3542. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3543. inc(usedtmpref.offset,dir*2);
  3544. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3545. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3546. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3547. end
  3548. else
  3549. begin
  3550. if target_info.endian=endian_big then
  3551. inc(usedtmpref.offset,3);
  3552. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3553. inc(usedtmpref.offset,dir);
  3554. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3555. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,8));
  3556. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3557. inc(usedtmpref.offset,dir);
  3558. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3559. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,16));
  3560. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3561. inc(usedtmpref.offset,dir);
  3562. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3563. list.concat(taicpu.op_reg_const(A_LSL,tmpreg,24));
  3564. list.concat(taicpu.op_reg_reg(A_ORR,reg,tmpreg));
  3565. end;
  3566. end
  3567. else
  3568. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3569. end;
  3570. end
  3571. else
  3572. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3573. if (fromsize=OS_S8) and (tosize = OS_16) then
  3574. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3575. end;
  3576. procedure tthumbcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3577. var
  3578. l : tasmlabel;
  3579. hr : treference;
  3580. begin
  3581. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3582. internalerror(2002090902);
  3583. if is_thumb_imm(a) then
  3584. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3585. else
  3586. begin
  3587. reference_reset(hr,4,[]);
  3588. current_asmdata.getjumplabel(l);
  3589. cg.a_label(current_procinfo.aktlocaldata,l);
  3590. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3591. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3592. hr.symbol:=l;
  3593. hr.base:=NR_PC;
  3594. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3595. end;
  3596. end;
  3597. procedure tthumbcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  3598. var
  3599. hsym : tsym;
  3600. href,
  3601. tmpref : treference;
  3602. paraloc : Pcgparalocation;
  3603. l : TAsmLabel;
  3604. begin
  3605. { calculate the parameter info for the procdef }
  3606. procdef.init_paraloc_info(callerside);
  3607. hsym:=tsym(procdef.parast.Find('self'));
  3608. if not(assigned(hsym) and
  3609. (hsym.typ=paravarsym)) then
  3610. internalerror(200305251);
  3611. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  3612. while paraloc<>nil do
  3613. with paraloc^ do
  3614. begin
  3615. case loc of
  3616. LOC_REGISTER:
  3617. begin
  3618. if is_thumb_imm(ioffset) then
  3619. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  3620. else
  3621. begin
  3622. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3623. reference_reset(tmpref,4,[]);
  3624. current_asmdata.getjumplabel(l);
  3625. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3626. cg.a_label(current_procinfo.aktlocaldata,l);
  3627. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3628. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3629. tmpref.symbol:=l;
  3630. tmpref.base:=NR_PC;
  3631. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3632. a_op_reg_reg(list,OP_SUB,size,NR_R4,register);
  3633. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3634. end;
  3635. end;
  3636. LOC_REFERENCE:
  3637. begin
  3638. { offset in the wrapper needs to be adjusted for the stored
  3639. return address }
  3640. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
  3641. if is_thumb_imm(ioffset) then
  3642. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  3643. else
  3644. begin
  3645. list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3646. reference_reset(tmpref,4,[]);
  3647. current_asmdata.getjumplabel(l);
  3648. current_procinfo.aktlocaldata.Concat(tai_align.Create(4));
  3649. cg.a_label(current_procinfo.aktlocaldata,l);
  3650. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3651. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ioffset));
  3652. tmpref.symbol:=l;
  3653. tmpref.base:=NR_PC;
  3654. list.concat(taicpu.op_reg_ref(A_LDR,NR_R4,tmpref));
  3655. a_op_reg_ref(list,OP_SUB,size,NR_R4,href);
  3656. list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R4]));
  3657. end;
  3658. end
  3659. else
  3660. internalerror(200309189);
  3661. end;
  3662. paraloc:=next;
  3663. end;
  3664. end;
  3665. function tthumbcgarm.handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference;
  3666. var
  3667. href : treference;
  3668. tmpreg : TRegister;
  3669. begin
  3670. href:=ref;
  3671. if { LDR/STR limitations }
  3672. (
  3673. (((op=A_LDR) and (oppostfix=PF_None)) or
  3674. ((op=A_STR) and (oppostfix=PF_None))) and
  3675. (ref.base<>NR_STACK_POINTER_REG) and
  3676. (abs(ref.offset)>124)
  3677. ) or
  3678. { LDRB/STRB limitations }
  3679. (
  3680. (((op=A_LDR) and (oppostfix=PF_B)) or
  3681. ((op=A_LDRB) and (oppostfix=PF_None)) or
  3682. ((op=A_STR) and (oppostfix=PF_B)) or
  3683. ((op=A_STRB) and (oppostfix=PF_None))) and
  3684. ((ref.base=NR_STACK_POINTER_REG) or
  3685. (ref.index=NR_STACK_POINTER_REG) or
  3686. (abs(ref.offset)>31)
  3687. )
  3688. ) or
  3689. { LDRH/STRH limitations }
  3690. (
  3691. (((op=A_LDR) and (oppostfix=PF_H)) or
  3692. ((op=A_LDRH) and (oppostfix=PF_None)) or
  3693. ((op=A_STR) and (oppostfix=PF_H)) or
  3694. ((op=A_STRH) and (oppostfix=PF_None))) and
  3695. ((ref.base=NR_STACK_POINTER_REG) or
  3696. (ref.index=NR_STACK_POINTER_REG) or
  3697. (abs(ref.offset)>62) or
  3698. ((abs(ref.offset) mod 2)<>0)
  3699. )
  3700. ) then
  3701. begin
  3702. tmpreg:=getintregister(list,OS_ADDR);
  3703. a_loadaddr_ref_reg(list,ref,tmpreg);
  3704. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3705. end
  3706. else if (op=A_LDR) and
  3707. (oppostfix in [PF_None]) and
  3708. (ref.base=NR_STACK_POINTER_REG) and
  3709. (abs(ref.offset)>1020) then
  3710. begin
  3711. tmpreg:=getintregister(list,OS_ADDR);
  3712. a_loadaddr_ref_reg(list,ref,tmpreg);
  3713. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3714. end
  3715. else if (op=A_LDR) and
  3716. ((oppostfix in [PF_SH,PF_SB]) or
  3717. (abs(ref.offset)>124)) then
  3718. begin
  3719. tmpreg:=getintregister(list,OS_ADDR);
  3720. a_loadaddr_ref_reg(list,ref,tmpreg);
  3721. reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  3722. end;
  3723. Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
  3724. end;
  3725. procedure tthumbcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  3726. var
  3727. tmpreg : tregister;
  3728. begin
  3729. case op of
  3730. OP_NEG:
  3731. list.concat(taicpu.op_reg_reg(A_NEG,dst,src));
  3732. OP_NOT:
  3733. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  3734. OP_DIV,OP_IDIV:
  3735. internalerror(200308284);
  3736. OP_ROL:
  3737. begin
  3738. if not(size in [OS_32,OS_S32]) then
  3739. internalerror(2008072801);
  3740. { simulate ROL by ror'ing 32-value }
  3741. tmpreg:=getintregister(list,OS_32);
  3742. a_load_const_reg(list,OS_32,32,tmpreg);
  3743. list.concat(taicpu.op_reg_reg(A_SUB,tmpreg,src));
  3744. list.concat(taicpu.op_reg_reg(A_ROR,dst,src));
  3745. end;
  3746. else
  3747. begin
  3748. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3749. list.concat(setoppostfix(
  3750. taicpu.op_reg_reg(op_reg_opcg2asmop[op],dst,src),op_reg_postfix[op]));
  3751. end;
  3752. end;
  3753. maybeadjustresult(list,op,size,dst);
  3754. end;
  3755. procedure tthumbcgarm.a_op_const_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; dst: tregister);
  3756. var
  3757. tmpreg : tregister;
  3758. {$ifdef DUMMY}
  3759. l1 : longint;
  3760. {$endif DUMMY}
  3761. begin
  3762. //!!! ovloc.loc:=LOC_VOID;
  3763. if {$ifopt R+}(a<>-2147483648) and{$endif} {!!!!!! not setflags and } is_thumb_imm(-a) then
  3764. case op of
  3765. OP_ADD:
  3766. begin
  3767. op:=OP_SUB;
  3768. a:=aint(dword(-a));
  3769. end;
  3770. OP_SUB:
  3771. begin
  3772. op:=OP_ADD;
  3773. a:=aint(dword(-a));
  3774. end
  3775. else
  3776. ;
  3777. end;
  3778. if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
  3779. begin
  3780. // if cgsetflags or setflags then
  3781. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3782. list.concat(setoppostfix(
  3783. taicpu.op_reg_const(op_reg_opcg2asmop[op],dst,a),op_reg_postfix[op]));
  3784. if (cgsetflags {!!! or setflags }) and (size in [OS_8,OS_16,OS_32]) then
  3785. begin
  3786. //!!! ovloc.loc:=LOC_FLAGS;
  3787. case op of
  3788. OP_ADD:
  3789. //!!! ovloc.resflags:=F_CS;
  3790. ;
  3791. OP_SUB:
  3792. //!!! ovloc.resflags:=F_CC;
  3793. ;
  3794. else
  3795. ;
  3796. end;
  3797. end;
  3798. end
  3799. else
  3800. begin
  3801. { there could be added some more sophisticated optimizations }
  3802. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  3803. a_load_reg_reg(list,size,size,dst,dst)
  3804. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3805. a_load_const_reg(list,size,0,dst)
  3806. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  3807. a_op_reg_reg(list,OP_NEG,size,dst,dst)
  3808. { we do this here instead in the peephole optimizer because
  3809. it saves us a register }
  3810. {$ifdef DUMMY}
  3811. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3812. a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
  3813. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3814. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3815. begin
  3816. if l1>32 then{roozbeh does this ever happen?}
  3817. internalerror(200308296);
  3818. shifterop_reset(so);
  3819. so.shiftmode:=SM_LSL;
  3820. so.shiftimm:=l1;
  3821. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
  3822. end
  3823. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3824. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3825. begin
  3826. if l1>32 then{does this ever happen?}
  3827. internalerror(201205181);
  3828. shifterop_reset(so);
  3829. so.shiftmode:=SM_LSL;
  3830. so.shiftimm:=l1;
  3831. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
  3832. end
  3833. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
  3834. begin
  3835. { nothing to do on success }
  3836. end
  3837. {$endif DUMMY}
  3838. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3839. Just using mov x, #0 might allow some easier optimizations down the line. }
  3840. else if (op = OP_AND) and (dword(a)=0) then
  3841. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  3842. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3843. else if (op = OP_AND) and (not(dword(a))=0) then
  3844. // do nothing
  3845. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3846. broader range of shifterconstants.}
  3847. {$ifdef DUMMY}
  3848. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3849. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
  3850. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  3851. begin
  3852. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
  3853. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  3854. end
  3855. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  3856. not(cgsetflags or setflags) and
  3857. split_into_shifter_const(a, imm1, imm2) then
  3858. begin
  3859. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
  3860. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  3861. end
  3862. {$endif DUMMY}
  3863. else if (op in [OP_SHL, OP_SHR, OP_SAR]) then
  3864. begin
  3865. list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
  3866. end
  3867. else
  3868. begin
  3869. tmpreg:=getintregister(list,size);
  3870. a_load_const_reg(list,size,a,tmpreg);
  3871. a_op_reg_reg(list,op,size,tmpreg,dst);
  3872. end;
  3873. end;
  3874. maybeadjustresult(list,op,size,dst);
  3875. end;
  3876. procedure tthumbcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
  3877. begin
  3878. if (op=OP_ADD) and (src=NR_R13) and (dst<>NR_R13) and ((a mod 4)=0) and (a>0) and (a<=1020) then
  3879. list.concat(taicpu.op_reg_reg_const(A_ADD,dst,src,a))
  3880. else
  3881. inherited a_op_const_reg_reg(list,op,size,a,src,dst);
  3882. end;
  3883. procedure tthumbcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3884. var
  3885. l1,l2 : tasmlabel;
  3886. ai : taicpu;
  3887. begin
  3888. current_asmdata.getjumplabel(l1);
  3889. current_asmdata.getjumplabel(l2);
  3890. ai:=setcondition(taicpu.op_sym(A_B,l1),flags_to_cond(f));
  3891. ai.is_jmp:=true;
  3892. list.concat(ai);
  3893. list.concat(taicpu.op_reg_const(A_MOV,reg,0));
  3894. list.concat(taicpu.op_sym(A_B,l2));
  3895. cg.a_label(list,l1);
  3896. list.concat(taicpu.op_reg_const(A_MOV,reg,1));
  3897. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3898. cg.a_label(list,l2);
  3899. end;
  3900. procedure tthumb2cgarm.init_register_allocators;
  3901. begin
  3902. inherited init_register_allocators;
  3903. { currently, we save R14 always, so we can use it }
  3904. if (target_info.system<>system_arm_darwin) then
  3905. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3906. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3907. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  3908. else
  3909. { r9 is not available on Darwin according to the llvm code generator }
  3910. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  3911. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  3912. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  3913. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  3914. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  3915. if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
  3916. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3917. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3918. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  3919. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3920. ],first_mm_imreg,[])
  3921. else if current_settings.fputype in [fpu_fpv4_s16,fpu_vfpv3_d16] then
  3922. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  3923. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  3924. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  3925. ],first_mm_imreg,[])
  3926. else
  3927. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
  3928. [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
  3929. end;
  3930. procedure tthumb2cgarm.done_register_allocators;
  3931. begin
  3932. rg[R_INTREGISTER].free;
  3933. rg[R_FPUREGISTER].free;
  3934. rg[R_MMREGISTER].free;
  3935. inherited done_register_allocators;
  3936. end;
  3937. procedure tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  3938. begin
  3939. list.concat(taicpu.op_reg(A_BLX, reg));
  3940. {
  3941. the compiler does not properly set this flag anymore in pass 1, and
  3942. for now we only need it after pass 2 (I hope) (JM)
  3943. if not(pi_do_call in current_procinfo.flags) then
  3944. internalerror(2003060703);
  3945. }
  3946. include(current_procinfo.flags,pi_do_call);
  3947. end;
  3948. procedure tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  3949. var
  3950. l : tasmlabel;
  3951. hr : treference;
  3952. begin
  3953. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  3954. internalerror(2002090902);
  3955. if is_thumb32_imm(a) then
  3956. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  3957. else if is_thumb32_imm(not(a)) then
  3958. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  3959. else if (a and $FFFF)=a then
  3960. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  3961. else
  3962. begin
  3963. reference_reset(hr,4,[]);
  3964. current_asmdata.getjumplabel(l);
  3965. cg.a_label(current_procinfo.aktlocaldata,l);
  3966. hr.symboldata:=current_procinfo.aktlocaldata.last;
  3967. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  3968. hr.symbol:=l;
  3969. hr.base:=NR_PC;
  3970. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  3971. end;
  3972. end;
  3973. procedure tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  3974. var
  3975. oppostfix:toppostfix;
  3976. usedtmpref: treference;
  3977. tmpreg,tmpreg2 : tregister;
  3978. so : tshifterop;
  3979. dir : integer;
  3980. begin
  3981. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  3982. FromSize := ToSize;
  3983. case FromSize of
  3984. { signed integer registers }
  3985. OS_8:
  3986. oppostfix:=PF_B;
  3987. OS_S8:
  3988. oppostfix:=PF_SB;
  3989. OS_16:
  3990. oppostfix:=PF_H;
  3991. OS_S16:
  3992. oppostfix:=PF_SH;
  3993. OS_32,
  3994. OS_S32:
  3995. oppostfix:=PF_None;
  3996. else
  3997. InternalError(200308299);
  3998. end;
  3999. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  4000. begin
  4001. if target_info.endian=endian_big then
  4002. dir:=-1
  4003. else
  4004. dir:=1;
  4005. case FromSize of
  4006. OS_16,OS_S16:
  4007. begin
  4008. { only complicated references need an extra loadaddr }
  4009. if assigned(ref.symbol) or
  4010. (ref.index<>NR_NO) or
  4011. (ref.offset<-255) or
  4012. (ref.offset>4094) or
  4013. { sometimes the compiler reused registers }
  4014. (reg=ref.index) or
  4015. (reg=ref.base) then
  4016. begin
  4017. tmpreg2:=getintregister(list,OS_INT);
  4018. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4019. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4020. end
  4021. else
  4022. usedtmpref:=ref;
  4023. if target_info.endian=endian_big then
  4024. inc(usedtmpref.offset,1);
  4025. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  4026. tmpreg:=getintregister(list,OS_INT);
  4027. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4028. inc(usedtmpref.offset,dir);
  4029. if FromSize=OS_16 then
  4030. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  4031. else
  4032. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  4033. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4034. end;
  4035. OS_32,OS_S32:
  4036. begin
  4037. tmpreg:=getintregister(list,OS_INT);
  4038. { only complicated references need an extra loadaddr }
  4039. if assigned(ref.symbol) or
  4040. (ref.index<>NR_NO) or
  4041. (ref.offset<-255) or
  4042. (ref.offset>4092) or
  4043. { sometimes the compiler reused registers }
  4044. (reg=ref.index) or
  4045. (reg=ref.base) then
  4046. begin
  4047. tmpreg2:=getintregister(list,OS_INT);
  4048. a_loadaddr_ref_reg(list,ref,tmpreg2);
  4049. reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
  4050. end
  4051. else
  4052. usedtmpref:=ref;
  4053. shifterop_reset(so);so.shiftmode:=SM_LSL;
  4054. if ref.alignment=2 then
  4055. begin
  4056. if target_info.endian=endian_big then
  4057. inc(usedtmpref.offset,2);
  4058. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  4059. inc(usedtmpref.offset,dir*2);
  4060. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  4061. so.shiftimm:=16;
  4062. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4063. end
  4064. else
  4065. begin
  4066. if target_info.endian=endian_big then
  4067. inc(usedtmpref.offset,3);
  4068. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  4069. inc(usedtmpref.offset,dir);
  4070. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4071. so.shiftimm:=8;
  4072. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4073. inc(usedtmpref.offset,dir);
  4074. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4075. so.shiftimm:=16;
  4076. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4077. inc(usedtmpref.offset,dir);
  4078. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  4079. so.shiftimm:=24;
  4080. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  4081. end;
  4082. end
  4083. else
  4084. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4085. end;
  4086. end
  4087. else
  4088. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  4089. if (fromsize=OS_S8) and (tosize = OS_16) then
  4090. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  4091. end;
  4092. procedure tthumb2cgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  4093. begin
  4094. if op = OP_NOT then
  4095. begin
  4096. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  4097. case size of
  4098. OS_8: list.concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
  4099. OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
  4100. OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
  4101. OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
  4102. OS_32,
  4103. OS_S32:
  4104. ;
  4105. else
  4106. internalerror(2019050916);
  4107. end;
  4108. end
  4109. else
  4110. inherited a_op_reg_reg(list, op, size, src, dst);
  4111. end;
  4112. procedure tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4113. var
  4114. shift, width : byte;
  4115. tmpreg : tregister;
  4116. so : tshifterop;
  4117. l1 : longint;
  4118. begin
  4119. ovloc.loc:=LOC_VOID;
  4120. if (a<>-2147483648) and is_shifter_const(-a,shift) then
  4121. case op of
  4122. OP_ADD:
  4123. begin
  4124. op:=OP_SUB;
  4125. a:=aint(dword(-a));
  4126. end;
  4127. OP_SUB:
  4128. begin
  4129. op:=OP_ADD;
  4130. a:=aint(dword(-a));
  4131. end
  4132. else
  4133. ;
  4134. end;
  4135. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  4136. case op of
  4137. OP_NEG,OP_NOT,
  4138. OP_DIV,OP_IDIV:
  4139. internalerror(200308285);
  4140. OP_SHL:
  4141. begin
  4142. if a>32 then
  4143. internalerror(2014020703);
  4144. if a<>0 then
  4145. begin
  4146. shifterop_reset(so);
  4147. so.shiftmode:=SM_LSL;
  4148. so.shiftimm:=a;
  4149. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4150. end
  4151. else
  4152. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4153. end;
  4154. OP_ROL:
  4155. begin
  4156. if a>32 then
  4157. internalerror(2014020704);
  4158. if a<>0 then
  4159. begin
  4160. shifterop_reset(so);
  4161. so.shiftmode:=SM_ROR;
  4162. so.shiftimm:=32-a;
  4163. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4164. end
  4165. else
  4166. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4167. end;
  4168. OP_ROR:
  4169. begin
  4170. if a>32 then
  4171. internalerror(2014020705);
  4172. if a<>0 then
  4173. begin
  4174. shifterop_reset(so);
  4175. so.shiftmode:=SM_ROR;
  4176. so.shiftimm:=a;
  4177. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4178. end
  4179. else
  4180. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4181. end;
  4182. OP_SHR:
  4183. begin
  4184. if a>32 then
  4185. internalerror(200308292);
  4186. shifterop_reset(so);
  4187. if a<>0 then
  4188. begin
  4189. so.shiftmode:=SM_LSR;
  4190. so.shiftimm:=a;
  4191. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4192. end
  4193. else
  4194. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4195. end;
  4196. OP_SAR:
  4197. begin
  4198. if a>32 then
  4199. internalerror(200308295);
  4200. if a<>0 then
  4201. begin
  4202. shifterop_reset(so);
  4203. so.shiftmode:=SM_ASR;
  4204. so.shiftimm:=a;
  4205. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  4206. end
  4207. else
  4208. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  4209. end;
  4210. else
  4211. if (op in [OP_SUB, OP_ADD]) and
  4212. ((a < 0) or
  4213. (a > 4095)) then
  4214. begin
  4215. tmpreg:=getintregister(list,size);
  4216. a_load_const_reg(list, size, a, tmpreg);
  4217. if cgsetflags or setflags then
  4218. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4219. list.concat(setoppostfix(
  4220. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4221. end
  4222. else
  4223. begin
  4224. if cgsetflags or setflags then
  4225. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4226. list.concat(setoppostfix(
  4227. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4228. end;
  4229. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  4230. begin
  4231. ovloc.loc:=LOC_FLAGS;
  4232. case op of
  4233. OP_ADD:
  4234. ovloc.resflags:=F_CS;
  4235. OP_SUB:
  4236. ovloc.resflags:=F_CC;
  4237. else
  4238. ;
  4239. end;
  4240. end;
  4241. end
  4242. else
  4243. begin
  4244. { there could be added some more sophisticated optimizations }
  4245. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  4246. a_load_reg_reg(list,size,size,src,dst)
  4247. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  4248. a_load_const_reg(list,size,0,dst)
  4249. else if (op in [OP_IMUL]) and (a=-1) then
  4250. a_op_reg_reg(list,OP_NEG,size,src,dst)
  4251. { we do this here instead in the peephole optimizer because
  4252. it saves us a register }
  4253. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  4254. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  4255. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  4256. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  4257. begin
  4258. if l1>32 then{roozbeh does this ever happen?}
  4259. internalerror(200308296);
  4260. shifterop_reset(so);
  4261. so.shiftmode:=SM_LSL;
  4262. so.shiftimm:=l1;
  4263. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  4264. end
  4265. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  4266. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  4267. begin
  4268. if l1>32 then{does this ever happen?}
  4269. internalerror(201205181);
  4270. shifterop_reset(so);
  4271. so.shiftmode:=SM_LSL;
  4272. so.shiftimm:=l1;
  4273. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  4274. end
  4275. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  4276. begin
  4277. { nothing to do on success }
  4278. end
  4279. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  4280. Just using mov x, #0 might allow some easier optimizations down the line. }
  4281. else if (op = OP_AND) and (dword(a)=0) then
  4282. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  4283. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  4284. else if (op = OP_AND) and (not(dword(a))=0) then
  4285. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  4286. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  4287. broader range of shifterconstants.}
  4288. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  4289. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  4290. else if (op = OP_AND) and is_thumb32_imm(a) then
  4291. list.concat(taicpu.op_reg_reg_const(A_AND,dst,src,dword(a)))
  4292. else if (op = OP_AND) and (a = $FFFF) then
  4293. list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
  4294. else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
  4295. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  4296. else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
  4297. begin
  4298. a_load_reg_reg(list,size,size,src,dst);
  4299. list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
  4300. end
  4301. else
  4302. begin
  4303. tmpreg:=getintregister(list,size);
  4304. a_load_const_reg(list,size,a,tmpreg);
  4305. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  4306. end;
  4307. end;
  4308. maybeadjustresult(list,op,size,dst);
  4309. end;
  4310. const
  4311. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  4312. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  4313. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  4314. procedure tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  4315. var
  4316. so : tshifterop;
  4317. tmpreg,overflowreg : tregister;
  4318. asmop : tasmop;
  4319. begin
  4320. ovloc.loc:=LOC_VOID;
  4321. case op of
  4322. OP_NEG,OP_NOT:
  4323. internalerror(200308286);
  4324. OP_ROL:
  4325. begin
  4326. if not(size in [OS_32,OS_S32]) then
  4327. internalerror(2008072801);
  4328. { simulate ROL by ror'ing 32-value }
  4329. tmpreg:=getintregister(list,OS_32);
  4330. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  4331. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  4332. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4333. end;
  4334. OP_ROR:
  4335. begin
  4336. if not(size in [OS_32,OS_S32]) then
  4337. internalerror(2008072802);
  4338. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  4339. end;
  4340. OP_IMUL,
  4341. OP_MUL:
  4342. begin
  4343. if cgsetflags or setflags then
  4344. begin
  4345. overflowreg:=getintregister(list,size);
  4346. if op=OP_IMUL then
  4347. asmop:=A_SMULL
  4348. else
  4349. asmop:=A_UMULL;
  4350. { the arm doesn't allow that rd and rm are the same }
  4351. if dst=src2 then
  4352. begin
  4353. if dst<>src1 then
  4354. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  4355. else
  4356. begin
  4357. tmpreg:=getintregister(list,size);
  4358. a_load_reg_reg(list,size,size,src2,dst);
  4359. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  4360. end;
  4361. end
  4362. else
  4363. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  4364. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4365. if op=OP_IMUL then
  4366. begin
  4367. shifterop_reset(so);
  4368. so.shiftmode:=SM_ASR;
  4369. so.shiftimm:=31;
  4370. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  4371. end
  4372. else
  4373. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  4374. ovloc.loc:=LOC_FLAGS;
  4375. ovloc.resflags:=F_NE;
  4376. end
  4377. else
  4378. begin
  4379. { the arm doesn't allow that rd and rm are the same }
  4380. if dst=src2 then
  4381. begin
  4382. if dst<>src1 then
  4383. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  4384. else
  4385. begin
  4386. tmpreg:=getintregister(list,size);
  4387. a_load_reg_reg(list,size,size,src2,dst);
  4388. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  4389. end;
  4390. end
  4391. else
  4392. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  4393. end;
  4394. end;
  4395. else
  4396. begin
  4397. if cgsetflags or setflags then
  4398. a_reg_alloc(list,NR_DEFAULTFLAGS);
  4399. {$ifdef dummy}
  4400. { R13 is not allowed for certain instruction operands }
  4401. if op_reg_reg_opcg2asmopThumb2[op] in [A_ADD,A_SUB,A_AND,A_BIC,A_EOR] then
  4402. begin
  4403. if getsupreg(dst)=RS_R13 then
  4404. begin
  4405. tmpreg:=getintregister(list,OS_INT);
  4406. a_load_reg_reg(list,OS_INT,OS_INT,dst,tmpreg);
  4407. dst:=tmpreg;
  4408. end;
  4409. if getsupreg(src1)=RS_R13 then
  4410. begin
  4411. tmpreg:=getintregister(list,OS_INT);
  4412. a_load_reg_reg(list,OS_INT,OS_INT,src1,tmpreg);
  4413. src1:=tmpreg;
  4414. end;
  4415. end;
  4416. {$endif}
  4417. list.concat(setoppostfix(
  4418. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  4419. end;
  4420. end;
  4421. maybeadjustresult(list,op,size,dst);
  4422. end;
  4423. procedure tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  4424. begin
  4425. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  4426. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  4427. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  4428. end;
  4429. procedure tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  4430. var
  4431. ref : treference;
  4432. shift : byte;
  4433. firstfloatreg,lastfloatreg,
  4434. r : byte;
  4435. regs : tcpuregisterset;
  4436. stackmisalignment: pint;
  4437. begin
  4438. LocalSize:=align(LocalSize,4);
  4439. { call instruction does not put anything on the stack }
  4440. stackmisalignment:=0;
  4441. if not(nostackframe) then
  4442. begin
  4443. firstfloatreg:=RS_NO;
  4444. lastfloatreg:=RS_NO;
  4445. { save floating point registers? }
  4446. for r:=RS_F0 to RS_F7 do
  4447. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4448. begin
  4449. if firstfloatreg=RS_NO then
  4450. firstfloatreg:=r;
  4451. lastfloatreg:=r;
  4452. inc(stackmisalignment,12);
  4453. end;
  4454. a_reg_alloc(list,NR_STACK_POINTER_REG);
  4455. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4456. begin
  4457. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  4458. a_reg_alloc(list,NR_R12);
  4459. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  4460. end;
  4461. { save int registers }
  4462. reference_reset(ref,4,[]);
  4463. ref.index:=NR_STACK_POINTER_REG;
  4464. ref.addressmode:=AM_PREINDEXED;
  4465. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4466. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4467. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  4468. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  4469. include(regs,RS_R14);
  4470. if regs<>[] then
  4471. begin
  4472. for r:=RS_R0 to RS_R15 do
  4473. if (r in regs) then
  4474. inc(stackmisalignment,4);
  4475. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4476. end;
  4477. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  4478. begin
  4479. { the framepointer now points to the saved R15, so the saved
  4480. framepointer is at R11-12 (for get_caller_frame) }
  4481. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  4482. a_reg_dealloc(list,NR_R12);
  4483. end;
  4484. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4485. if (LocalSize<>0) or
  4486. ((stackmisalignment<>0) and
  4487. ((pi_do_call in current_procinfo.flags) or
  4488. (po_assembler in current_procinfo.procdef.procoptions))) then
  4489. begin
  4490. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4491. if not(is_shifter_const(localsize,shift)) then
  4492. begin
  4493. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  4494. a_reg_alloc(list,NR_R12);
  4495. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4496. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  4497. a_reg_dealloc(list,NR_R12);
  4498. end
  4499. else
  4500. begin
  4501. a_reg_dealloc(list,NR_R12);
  4502. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  4503. end;
  4504. end;
  4505. if firstfloatreg<>RS_NO then
  4506. begin
  4507. reference_reset(ref,4,[]);
  4508. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4509. begin
  4510. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4511. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4512. ref.base:=NR_R12;
  4513. end
  4514. else
  4515. begin
  4516. ref.base:=current_procinfo.framepointer;
  4517. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4518. end;
  4519. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4520. lastfloatreg-firstfloatreg+1,ref));
  4521. end;
  4522. end;
  4523. end;
  4524. procedure tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  4525. var
  4526. ref : treference;
  4527. firstfloatreg,lastfloatreg,
  4528. r : byte;
  4529. shift : byte;
  4530. regs : tcpuregisterset;
  4531. LocalSize : longint;
  4532. stackmisalignment: pint;
  4533. begin
  4534. if not(nostackframe) then
  4535. begin
  4536. stackmisalignment:=0;
  4537. { restore floating point register }
  4538. firstfloatreg:=RS_NO;
  4539. lastfloatreg:=RS_NO;
  4540. { save floating point registers? }
  4541. for r:=RS_F0 to RS_F7 do
  4542. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  4543. begin
  4544. if firstfloatreg=RS_NO then
  4545. firstfloatreg:=r;
  4546. lastfloatreg:=r;
  4547. { floating point register space is already included in
  4548. localsize below by calc_stackframe_size
  4549. inc(stackmisalignment,12);
  4550. }
  4551. end;
  4552. if firstfloatreg<>RS_NO then
  4553. begin
  4554. reference_reset(ref,4,[]);
  4555. if tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023 then
  4556. begin
  4557. a_load_const_reg(list,OS_ADDR,-tcpuprocinfo(current_procinfo).floatregstart,NR_R12);
  4558. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  4559. ref.base:=NR_R12;
  4560. end
  4561. else
  4562. begin
  4563. ref.base:=current_procinfo.framepointer;
  4564. ref.offset:=tcpuprocinfo(current_procinfo).floatregstart;
  4565. end;
  4566. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  4567. lastfloatreg-firstfloatreg+1,ref));
  4568. end;
  4569. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  4570. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  4571. begin
  4572. exclude(regs,RS_R14);
  4573. include(regs,RS_R15);
  4574. end;
  4575. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  4576. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  4577. for r:=RS_R0 to RS_R15 do
  4578. if (r in regs) then
  4579. inc(stackmisalignment,4);
  4580. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  4581. LocalSize:=current_procinfo.calc_stackframe_size;
  4582. if (LocalSize<>0) or
  4583. ((stackmisalignment<>0) and
  4584. ((pi_do_call in current_procinfo.flags) or
  4585. (po_assembler in current_procinfo.procdef.procoptions))) then
  4586. begin
  4587. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  4588. if not(is_shifter_const(LocalSize,shift)) then
  4589. begin
  4590. a_reg_alloc(list,NR_R12);
  4591. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  4592. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  4593. a_reg_dealloc(list,NR_R12);
  4594. end
  4595. else
  4596. begin
  4597. a_reg_dealloc(list,NR_R12);
  4598. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  4599. end;
  4600. end;
  4601. if regs=[] then
  4602. list.concat(taicpu.op_reg(A_BX,NR_R14))
  4603. else
  4604. begin
  4605. reference_reset(ref,4,[]);
  4606. ref.index:=NR_STACK_POINTER_REG;
  4607. ref.addressmode:=AM_PREINDEXED;
  4608. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  4609. end;
  4610. end
  4611. else
  4612. list.concat(taicpu.op_reg(A_BX,NR_R14));
  4613. end;
  4614. function tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  4615. var
  4616. tmpreg : tregister;
  4617. tmpref : treference;
  4618. l : tasmlabel;
  4619. begin
  4620. tmpreg:=NR_NO;
  4621. { Be sure to have a base register }
  4622. if (ref.base=NR_NO) then
  4623. begin
  4624. if ref.shiftmode<>SM_None then
  4625. internalerror(2014020706);
  4626. ref.base:=ref.index;
  4627. ref.index:=NR_NO;
  4628. end;
  4629. { absolute symbols can't be handled directly, we've to store the symbol reference
  4630. in the text segment and access it pc relative
  4631. For now, we assume that references where base or index equals to PC are already
  4632. relative, all other references are assumed to be absolute and thus they need
  4633. to be handled extra.
  4634. A proper solution would be to change refoptions to a set and store the information
  4635. if the symbol is absolute or relative there.
  4636. }
  4637. if (assigned(ref.symbol) and
  4638. not(is_pc(ref.base)) and
  4639. not(is_pc(ref.index))
  4640. ) or
  4641. { [#xxx] isn't a valid address operand }
  4642. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  4643. //(ref.offset<-4095) or
  4644. (ref.offset<-255) or
  4645. (ref.offset>4095) or
  4646. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  4647. ((ref.offset<-255) or
  4648. (ref.offset>255)
  4649. )
  4650. ) or
  4651. (((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and
  4652. ((ref.offset<-1020) or
  4653. (ref.offset>1020) or
  4654. ((abs(ref.offset) mod 4)<>0) or
  4655. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  4656. assigned(ref.symbol)
  4657. )
  4658. ) then
  4659. begin
  4660. reference_reset(tmpref,4,[]);
  4661. { load symbol }
  4662. tmpreg:=getintregister(list,OS_INT);
  4663. if assigned(ref.symbol) then
  4664. begin
  4665. current_asmdata.getjumplabel(l);
  4666. cg.a_label(current_procinfo.aktlocaldata,l);
  4667. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  4668. if ref.refaddr=addr_gottpoff then
  4669. current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
  4670. else
  4671. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  4672. { load consts entry }
  4673. tmpref.symbol:=l;
  4674. tmpref.base:=NR_R15;
  4675. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  4676. { in case of LDF/STF, we got rid of the NR_R15 }
  4677. if is_pc(ref.base) then
  4678. ref.base:=NR_NO;
  4679. if is_pc(ref.index) then
  4680. ref.index:=NR_NO;
  4681. end
  4682. else
  4683. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  4684. if (ref.base<>NR_NO) then
  4685. begin
  4686. if ref.index<>NR_NO then
  4687. begin
  4688. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4689. ref.base:=tmpreg;
  4690. end
  4691. else
  4692. begin
  4693. ref.index:=tmpreg;
  4694. ref.shiftimm:=0;
  4695. ref.signindex:=1;
  4696. ref.shiftmode:=SM_None;
  4697. end;
  4698. end
  4699. else
  4700. ref.base:=tmpreg;
  4701. ref.offset:=0;
  4702. ref.symbol:=nil;
  4703. end;
  4704. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  4705. begin
  4706. if tmpreg<>NR_NO then
  4707. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  4708. else
  4709. begin
  4710. tmpreg:=getintregister(list,OS_ADDR);
  4711. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  4712. ref.base:=tmpreg;
  4713. end;
  4714. ref.offset:=0;
  4715. end;
  4716. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  4717. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  4718. begin
  4719. tmpreg:=getintregister(list,OS_ADDR);
  4720. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  4721. ref.base := tmpreg;
  4722. end;
  4723. { floating point operations have only limited references
  4724. we expect here, that a base is already set }
  4725. if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
  4726. begin
  4727. if ref.shiftmode<>SM_none then
  4728. internalerror(200309121);
  4729. if tmpreg<>NR_NO then
  4730. begin
  4731. if ref.base=tmpreg then
  4732. begin
  4733. if ref.signindex<0 then
  4734. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  4735. else
  4736. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  4737. ref.index:=NR_NO;
  4738. end
  4739. else
  4740. begin
  4741. if ref.index<>tmpreg then
  4742. internalerror(200403161);
  4743. if ref.signindex<0 then
  4744. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  4745. else
  4746. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  4747. ref.base:=tmpreg;
  4748. ref.index:=NR_NO;
  4749. end;
  4750. end
  4751. else
  4752. begin
  4753. tmpreg:=getintregister(list,OS_ADDR);
  4754. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  4755. ref.base:=tmpreg;
  4756. ref.index:=NR_NO;
  4757. end;
  4758. end;
  4759. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  4760. Result := ref;
  4761. end;
  4762. procedure tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  4763. var
  4764. instr: taicpu;
  4765. begin
  4766. if (fromsize=OS_F32) and
  4767. (tosize=OS_F32) then
  4768. begin
  4769. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  4770. list.Concat(instr);
  4771. add_move_instruction(instr);
  4772. end
  4773. else if (fromsize=OS_F64) and
  4774. (tosize=OS_F64) then
  4775. begin
  4776. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  4777. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  4778. end
  4779. else if (fromsize=OS_F32) and
  4780. (tosize=OS_F64) then
  4781. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  4782. begin
  4783. //list.concat(nil);
  4784. end;
  4785. end;
  4786. procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  4787. begin
  4788. handle_load_store(list,A_VLDR,PF_None,reg,ref);
  4789. end;
  4790. procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  4791. begin
  4792. handle_load_store(list,A_VSTR,PF_None,reg,ref);
  4793. end;
  4794. procedure tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  4795. begin
  4796. if //(shuffle=nil) and
  4797. (tosize=OS_F32) then
  4798. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  4799. else
  4800. internalerror(2012100813);
  4801. end;
  4802. procedure tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  4803. begin
  4804. if //(shuffle=nil) and
  4805. (fromsize=OS_F32) then
  4806. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
  4807. else
  4808. internalerror(2012100814);
  4809. end;
  4810. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  4811. var tmpreg: tregister;
  4812. begin
  4813. case op of
  4814. OP_NEG:
  4815. begin
  4816. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4817. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  4818. tmpreg:=cg.getintregister(list,OS_32);
  4819. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  4820. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  4821. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4822. end;
  4823. else
  4824. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  4825. end;
  4826. end;
  4827. procedure tthumbcg64farm.a_op64_reg_reg(list: TAsmList; op: TOpCG; size: tcgsize; regsrc, regdst: tregister64);
  4828. begin
  4829. case op of
  4830. OP_NEG:
  4831. begin
  4832. list.concat(taicpu.op_reg_const(A_MOV,regdst.reglo,0));
  4833. list.concat(taicpu.op_reg_const(A_MOV,regdst.reghi,0));
  4834. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4835. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4836. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4837. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  4838. end;
  4839. OP_NOT:
  4840. begin
  4841. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  4842. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  4843. end;
  4844. OP_AND,OP_OR,OP_XOR:
  4845. begin
  4846. cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
  4847. cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
  4848. end;
  4849. OP_ADD:
  4850. begin
  4851. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4852. list.concat(taicpu.op_reg_reg(A_ADD,regdst.reglo,regsrc.reglo));
  4853. list.concat(taicpu.op_reg_reg(A_ADC,regdst.reghi,regsrc.reghi));
  4854. end;
  4855. OP_SUB:
  4856. begin
  4857. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4858. list.concat(taicpu.op_reg_reg(A_SUB,regdst.reglo,regsrc.reglo));
  4859. list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
  4860. end;
  4861. else
  4862. internalerror(2003083101);
  4863. end;
  4864. end;
  4865. procedure tthumbcg64farm.a_op64_const_reg(list: TAsmList; op: TOpCG; size: tcgsize; value: int64; reg: tregister64);
  4866. var
  4867. tmpreg : tregister;
  4868. begin
  4869. case op of
  4870. OP_AND,OP_OR,OP_XOR:
  4871. begin
  4872. cg.a_op_const_reg(list,op,OS_32,aint(lo(value)),reg.reglo);
  4873. cg.a_op_const_reg(list,op,OS_32,aint(hi(value)),reg.reghi);
  4874. end;
  4875. OP_ADD:
  4876. begin
  4877. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4878. begin
  4879. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4880. list.concat(taicpu.op_reg_const(A_ADD,reg.reglo,aint(lo(value))));
  4881. end
  4882. else
  4883. begin
  4884. tmpreg:=cg.getintregister(list,OS_32);
  4885. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4886. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4887. list.concat(taicpu.op_reg_reg(A_ADD,reg.reglo,tmpreg));
  4888. end;
  4889. tmpreg:=cg.getintregister(list,OS_32);
  4890. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  4891. list.concat(taicpu.op_reg_reg(A_ADC,reg.reghi,tmpreg));
  4892. end;
  4893. OP_SUB:
  4894. begin
  4895. if (aint(lo(value))>=0) and (aint(lo(value))<=255) then
  4896. begin
  4897. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4898. list.concat(taicpu.op_reg_const(A_SUB,reg.reglo,aint(lo(value))))
  4899. end
  4900. else
  4901. begin
  4902. tmpreg:=cg.getintregister(list,OS_32);
  4903. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  4904. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  4905. list.concat(taicpu.op_reg_reg(A_SUB,reg.reglo,tmpreg));
  4906. end;
  4907. tmpreg:=cg.getintregister(list,OS_32);
  4908. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  4909. list.concat(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg));
  4910. end;
  4911. else
  4912. internalerror(2003083101);
  4913. end;
  4914. end;
  4915. procedure create_codegen;
  4916. begin
  4917. if GenerateThumb2Code then
  4918. begin
  4919. cg:=tthumb2cgarm.create;
  4920. cg64:=tthumb2cg64farm.create;
  4921. casmoptimizer:=TCpuThumb2AsmOptimizer;
  4922. end
  4923. else if GenerateThumbCode then
  4924. begin
  4925. cg:=tthumbcgarm.create;
  4926. cg64:=tthumbcg64farm.create;
  4927. // casmoptimizer:=TCpuThumbAsmOptimizer;
  4928. end
  4929. else
  4930. begin
  4931. cg:=tarmcgarm.create;
  4932. cg64:=tarmcg64farm.create;
  4933. casmoptimizer:=TCpuAsmOptimizer;
  4934. end;
  4935. end;
  4936. end.