thumb2.inc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$asmmode divided}
  13. {$ifndef FPC_SYSTEM_HAS_MOVE}
  14. {$define FPC_SYSTEM_FPC_MOVE}
  15. {$endif FPC_SYSTEM_HAS_MOVE}
  16. {$ifdef FPC_SYSTEM_FPC_MOVE}
  17. const
  18. cpu_has_edsp : boolean = false;
  19. in_edsp_test : boolean = false;
  20. {$endif FPC_SYSTEM_FPC_MOVE}
  21. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  22. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  23. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  24. begin
  25. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  26. asm
  27. movw r0, #(0xed88)
  28. movt r0, #(0xe000)
  29. ldr r1, [r0]
  30. orr r1, r1, #(0xF << 20)
  31. str r1, [r0]
  32. end;
  33. softfloat_exception_mask:=[float_flag_underflow,float_flag_inexact,float_flag_denormal];
  34. softfloat_exception_flags:=[];
  35. end;
  36. {$endif}
  37. {$ifdef wince}
  38. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  39. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  40. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  41. begin
  42. softfloat_exception_flags:=[];
  43. end;
  44. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  45. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  46. begin
  47. softfloat_exception_mask:=[float_flag_underflow,float_flag_inexact,float_flag_denormal];
  48. softfloat_exception_flags:=[];
  49. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  50. { FPU precision 64 bit, rounding to nearest, affine infinity }
  51. _controlfp($000C0003, $030F031F);
  52. end;
  53. {$endif wince}
  54. {$define FPC_SYSTEM_HAS_GETSETNATIVEFPUCONTROLWORD}
  55. function GetNativeFPUControlWord: TNativeFPUControlWord;
  56. begin
  57. result:=_controlfp(0,0);
  58. end;
  59. procedure SetNativeFPUControlWord(const cw: TNativeFPUControlWord);
  60. begin
  61. _controlfp(cw,$ffffffff);
  62. end;
  63. {****************************************************************************
  64. stack frame related stuff
  65. ****************************************************************************}
  66. {$IFNDEF INTERNAL_BACKTRACE}
  67. {$define FPC_SYSTEM_HAS_GET_FRAME}
  68. function get_frame:pointer;assembler;nostackframe;
  69. asm
  70. mov r0,r11
  71. end;
  72. {$ENDIF not INTERNAL_BACKTRACE}
  73. {
  74. Stack frame on Thumb2:
  75. LR <- FP
  76. Old FP
  77. }
  78. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  79. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;assembler;
  80. asm
  81. movs r0,r0
  82. beq .Lg_a_null
  83. ldr r0,[r0]
  84. .Lg_a_null:
  85. end;
  86. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  87. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;assembler;
  88. asm
  89. movs r0,r0
  90. beq .Lgnf_null
  91. ldr r0,[r0,#-4]
  92. .Lgnf_null:
  93. end;
  94. {$define FPC_SYSTEM_HAS_SPTR}
  95. Function Sptr : pointer;assembler;
  96. asm
  97. mov r0,sp
  98. end;
  99. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  100. {$define FPC_SYSTEM_HAS_FILLCHAR}
  101. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  102. asm
  103. // less than 0?
  104. cmp r1,#0
  105. it le
  106. movle pc,lr
  107. mov r3,r0
  108. cmp r1,#8 // at least 8 bytes to do?
  109. add r1, r0
  110. blt .LFillchar3
  111. orr r2,r2,r2,lsl #8
  112. orr r2,r2,r2,lsl #16
  113. .LFillchar0:
  114. ands ip, r3, #3
  115. beq .LAligned
  116. subs r0, ip, #1
  117. lsls r0, r0, #1
  118. add pc, r0
  119. nop
  120. strb r2,[r3,#2]
  121. strb r2,[r3,#1]
  122. strb r2,[r3,#0]
  123. rsb r0, ip, #4
  124. add r3, r0
  125. .LAligned:
  126. mov ip,r2
  127. push {r4,r5,lr}
  128. mov r4,r2
  129. mov r5,r2
  130. .LFillchar1:
  131. // Use calculated jump to do fills of x*16 bytes
  132. subs r0, r1, r3
  133. cmp r0, #128
  134. bge .LFillchar1_128
  135. lsrs r0, #4
  136. beq .LFillchar2
  137. rsb r0, #8
  138. lsls r0, #2
  139. add pc, r0
  140. nop
  141. .LFillchar1_128:
  142. stmia r3!,{r2,r4,r5,ip}
  143. stmia r3!,{r2,r4,r5,ip}
  144. stmia r3!,{r2,r4,r5,ip}
  145. stmia r3!,{r2,r4,r5,ip}
  146. stmia r3!,{r2,r4,r5,ip}
  147. stmia r3!,{r2,r4,r5,ip}
  148. stmia r3!,{r2,r4,r5,ip}
  149. stmia r3!,{r2,r4,r5,ip}
  150. b .LFillchar1
  151. .LFillchar2:
  152. // Mop up any leftover 8 byte chunks. We are still aligned at this point
  153. pop {r4,r5,lr}
  154. sub r0, r1, r3
  155. cmp r0, #8
  156. it ge
  157. stmgeia r3!,{r2,ip}
  158. .LFillchar3:
  159. // Write any remaining bytes
  160. subs r0, r3, r1
  161. adds r0, #7 // 7-(e-s) = 7+(s-e)
  162. lsls r0, #1
  163. add pc, r0
  164. nop
  165. strb r2,[r3,#6]
  166. strb r2,[r3,#5]
  167. strb r2,[r3,#4]
  168. strb r2,[r3,#3]
  169. strb r2,[r3,#2]
  170. strb r2,[r3,#1]
  171. strb r2,[r3,#0]
  172. end;
  173. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  174. {$ifndef FPC_SYSTEM_HAS_MOVE}
  175. {$define FPC_SYSTEM_HAS_MOVE}
  176. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  177. asm
  178. pld [r0]
  179. pld [r1]
  180. // count <=0 ?
  181. cmp r2,#0
  182. it le
  183. movle pc,lr
  184. // overlap?
  185. cmp r1,r0
  186. bls .Lnooverlap
  187. add r3,r0,r2
  188. cmp r3,r1
  189. bls .Lnooverlap
  190. // overlap, copy backward
  191. .Loverlapped:
  192. subs r2,r2,#1
  193. ldrb r3,[r0,r2]
  194. strb r3,[r1,r2]
  195. bne .Loverlapped
  196. mov pc,lr
  197. .Lnooverlap:
  198. // less then 16 bytes to copy?
  199. cmp r2,#8
  200. // yes, the forget about the whole optimizations
  201. // and do a bytewise copy
  202. blt .Lbyteloop
  203. // both aligned?
  204. orr r3,r0,r1
  205. tst r3,#3
  206. bne .Lbyteloop
  207. (*
  208. // yes, then align
  209. // alignment to 4 byte boundries is enough
  210. ldrb ip,[r0],#1
  211. sub r2,r2,#1
  212. stb ip,[r1],#1
  213. tst r3,#2
  214. bne .Ldifferentaligned
  215. ldrh ip,[r0],#2
  216. sub r2,r2,#2
  217. sth ip,[r1],#2
  218. .Ldifferentaligned
  219. // qword aligned?
  220. orrs r3,r0,r1
  221. tst r3,#7
  222. bne .Ldwordloop
  223. *)
  224. pld [r0,#32]
  225. pld [r1,#32]
  226. .Ldwordloop:
  227. sub r2,r2,#4
  228. ldr r3,[r0],#4
  229. // preload
  230. pld [r0,#64]
  231. pld [r1,#64]
  232. cmp r2,#4
  233. str r3,[r1],#4
  234. bcs .Ldwordloop
  235. cmp r2,#0
  236. it eq
  237. moveq pc,lr
  238. .Lbyteloop:
  239. subs r2,r2,#1
  240. ldrb r3,[r0],#1
  241. strb r3,[r1],#1
  242. bne .Lbyteloop
  243. mov pc,lr
  244. end;
  245. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  246. asm
  247. // count <=0 ?
  248. cmp r2,#0
  249. it le
  250. movle pc,lr
  251. // overlap?
  252. cmp r1,r0
  253. bls .Lnooverlap
  254. add r3,r0,r2
  255. cmp r3,r1
  256. bls .Lnooverlap
  257. // overlap, copy backward
  258. .Loverlapped:
  259. subs r2,r2,#1
  260. ldrb r3,[r0,r2]
  261. strb r3,[r1,r2]
  262. bne .Loverlapped
  263. mov pc,lr
  264. .Lnooverlap:
  265. // less then 16 bytes to copy?
  266. cmp r2,#8
  267. // yes, the forget about the whole optimizations
  268. // and do a bytewise copy
  269. blt .Lbyteloop
  270. // both aligned?
  271. orr r3,r0,r1
  272. tst r3,#3
  273. bne .Lbyteloop
  274. (*
  275. // yes, then align
  276. // alignment to 4 byte boundries is enough
  277. ldrb ip,[r0],#1
  278. sub r2,r2,#1
  279. stb ip,[r1],#1
  280. tst r3,#2
  281. bne .Ldifferentaligned
  282. ldrh ip,[r0],#2
  283. sub r2,r2,#2
  284. sth ip,[r1],#2
  285. .Ldifferentaligned
  286. // qword aligned?
  287. orrs r3,r0,r1
  288. tst r3,#7
  289. bne .Ldwordloop
  290. *)
  291. .Ldwordloop:
  292. sub r2,r2,#4
  293. ldr r3,[r0],#4
  294. cmp r2,#4
  295. str r3,[r1],#4
  296. bcs .Ldwordloop
  297. cmp r2,#0
  298. it eq
  299. moveq pc,lr
  300. .Lbyteloop:
  301. subs r2,r2,#1
  302. ldrb r3,[r0],#1
  303. strb r3,[r1],#1
  304. bne .Lbyteloop
  305. mov pc,lr
  306. end;
  307. const
  308. moveproc : pointer = @move_blended;
  309. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  310. asm
  311. ldr ip,.Lmoveproc
  312. ldr pc,[ip]
  313. .Lmoveproc:
  314. .long moveproc
  315. end;
  316. {$endif FPC_SYSTEM_HAS_MOVE}
  317. {****************************************************************************
  318. String
  319. ****************************************************************************}
  320. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  321. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  322. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  323. {r0: __RESULT
  324. r1: len
  325. r2: sstr}
  326. asm
  327. ldrb r12,[r2],#1
  328. cmp r12,r1
  329. it gt
  330. movgt r12,r1
  331. strb r12,[r0],#1
  332. cmp r12,#6 (* 6 seems to be the break even point. *)
  333. blt .LStartTailCopy
  334. (* Align destination on 32bits. This is the only place where unrolling
  335. really seems to help, since in the common case, sstr is aligned on
  336. 32 bits, therefore in the common case we need to copy 3 bytes to
  337. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  338. rsb r3,r0,#0
  339. ands r3,r3,#3
  340. sub r12,r12,r3
  341. itttt ne
  342. ldrneb r1,[r2],#1
  343. strneb r1,[r0],#1
  344. subnes r3,r3,#1
  345. ldrneb r1,[r2],#1
  346. itttt ne
  347. strneb r1,[r0],#1
  348. subnes r3,r3,#1
  349. ldrneb r1,[r2],#1
  350. strneb r1,[r0],#1
  351. it ne
  352. subnes r3,r3,#1
  353. .LDoneAlign:
  354. (* Destination should be aligned now, but source might not be aligned,
  355. if this is the case, do a byte-per-byte copy. *)
  356. tst r2,#3
  357. bne .LStartTailCopy
  358. (* Start the main copy, 32 bit at a time. *)
  359. movs r3,r12,lsr #2
  360. and r12,r12,#3
  361. beq .LStartTailCopy
  362. .LNext4bytes:
  363. (* Unrolling this loop would save a little bit of time for long strings
  364. (>20 chars), but alas, it hurts for short strings and they are the
  365. common case.*)
  366. ittt ne
  367. ldrne r1,[r2],#4
  368. strne r1,[r0],#4
  369. subnes r3,r3,#1
  370. bne .LNext4bytes
  371. .LStartTailCopy:
  372. (* Do remaining bytes. *)
  373. cmp r12,#0
  374. beq .LDoneTail
  375. .LNextChar3:
  376. ldrb r1,[r2],#1
  377. strb r1,[r0],#1
  378. subs r12,r12,#1
  379. bne .LNextChar3
  380. .LDoneTail:
  381. end;
  382. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  383. {r0: len
  384. r1: sstr
  385. r2: dstr}
  386. asm
  387. ldrb r12,[r1],#1
  388. cmp r12,r0
  389. it gt
  390. movgt r12,r0
  391. strb r12,[r2],#1
  392. cmp r12,#6 (* 6 seems to be the break even point. *)
  393. blt .LStartTailCopy
  394. (* Align destination on 32bits. This is the only place where unrolling
  395. really seems to help, since in the common case, sstr is aligned on
  396. 32 bits, therefore in the common case we need to copy 3 bytes to
  397. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  398. rsb r3,r2,#0
  399. ands r3,r3,#3
  400. sub r12,r12,r3
  401. itttt ne
  402. ldrneb r0,[r1],#1
  403. strneb r0,[r2],#1
  404. subnes r3,r3,#1
  405. ldrneb r0,[r1],#1
  406. itttt ne
  407. strneb r0,[r2],#1
  408. subnes r3,r3,#1
  409. ldrneb r0,[r1],#1
  410. strneb r0,[r2],#1
  411. it ne
  412. subnes r3,r3,#1
  413. .LDoneAlign:
  414. (* Destination should be aligned now, but source might not be aligned,
  415. if this is the case, do a byte-per-byte copy. *)
  416. tst r1,#3
  417. bne .LStartTailCopy
  418. (* Start the main copy, 32 bit at a time. *)
  419. movs r3,r12,lsr #2
  420. and r12,r12,#3
  421. beq .LStartTailCopy
  422. .LNext4bytes:
  423. (* Unrolling this loop would save a little bit of time for long strings
  424. (>20 chars), but alas, it hurts for short strings and they are the
  425. common case.*)
  426. ittt ne
  427. ldrne r0,[r1],#4
  428. strne r0,[r2],#4
  429. subnes r3,r3,#1
  430. bne .LNext4bytes
  431. .LStartTailCopy:
  432. (* Do remaining bytes. *)
  433. cmp r12,#0
  434. beq .LDoneTail
  435. .LNextChar3:
  436. ldrb r0,[r1],#1
  437. strb r0,[r2],#1
  438. subs r12,r12,#1
  439. bne .LNextChar3
  440. .LDoneTail:
  441. end;
  442. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  443. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  444. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  445. function fpc_Pchar_length(p:Pchar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  446. asm
  447. cmp r0,#0
  448. mov r1,r0
  449. beq .Ldone
  450. .Lnextchar:
  451. (*Are we aligned?*)
  452. tst r1,#3
  453. bne .Ltest_unaligned (*No, do byte per byte.*)
  454. ldr r3,.L01010101
  455. .Ltest_aligned:
  456. (*Aligned, load 4 bytes at a time.*)
  457. ldr r12,[r1],#4
  458. (*Check wether r12 contains a 0 byte.*)
  459. sub r2,r12,r3
  460. mvn r12,r12
  461. and r2,r2,r12
  462. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  463. beq .Ltest_aligned (*No 0 byte, repeat.*)
  464. sub r1,r1,#4
  465. .Ltest_unaligned:
  466. ldrb r12,[r1],#1
  467. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  468. bcs .Lnextchar
  469. (*Dirty trick: we need to subtract 1 extra because we have counted the
  470. terminating 0, due to the known carry flag sbc can do this.*)
  471. sbc r0,r1,r0
  472. .Ldone:
  473. mov pc,lr
  474. .L01010101:
  475. .long 0x01010101
  476. end;
  477. {$endif}
  478. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  479. asm
  480. .Lloop:
  481. ldrex ip, [r0]
  482. sub ip, #1
  483. strex r3, ip, [r0]
  484. cmp r3, #0
  485. bne .Lloop
  486. mov r0, ip
  487. end;
  488. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  489. asm
  490. .Lloop:
  491. ldrex ip, [r0]
  492. add ip, #1
  493. strex r3, ip, [r0]
  494. cmp r3, #0
  495. bne .Lloop
  496. mov r0, ip
  497. end;
  498. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  499. asm
  500. .Lloop:
  501. ldrex ip, [r0]
  502. strex r3, r1, [r0]
  503. cmp r3, #0
  504. bne .Lloop
  505. mov r0, ip
  506. end;
  507. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  508. asm
  509. .Lloop:
  510. ldrex ip, [r0]
  511. add r2, ip, r1
  512. strex r3, r2, [r0]
  513. cmp r3, #0
  514. bne .Lloop
  515. mov r0, ip
  516. end;
  517. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  518. asm
  519. .Lloop:
  520. ldrex ip, [r0]
  521. cmp ip, r2
  522. ite eq
  523. strexeq r3, r1, [r0]
  524. movne r3, #0
  525. cmp r3, #0
  526. bne .Lloop
  527. mov r0, ip
  528. end;
  529. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  530. function declocked(var l: longint) : boolean; inline;
  531. begin
  532. Result:=InterLockedDecrement(l) = 0;
  533. end;
  534. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  535. procedure inclocked(var l: longint); inline;
  536. begin
  537. InterLockedIncrement(l);
  538. end;
  539. procedure fpc_cpucodeinit;
  540. begin
  541. {$ifdef FPC_SYSTEM_FPC_MOVE}
  542. cpu_has_edsp:=true;
  543. in_edsp_test:=true;
  544. asm
  545. mov r1,sp
  546. bic r0,r1,#7
  547. ldrd r0,r1,[r0]
  548. end;
  549. in_edsp_test:=false;
  550. if cpu_has_edsp then
  551. moveproc:=@move_pld
  552. else
  553. moveproc:=@move_blended;
  554. {$endif FPC_SYSTEM_FPC_MOVE}
  555. end;