thumb2.inc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$asmmode gas}
  13. {$ifndef FPC_SYSTEM_HAS_MOVE}
  14. {$define FPC_SYSTEM_FPC_MOVE}
  15. {$endif FPC_SYSTEM_HAS_MOVE}
  16. {$ifdef FPC_SYSTEM_FPC_MOVE}
  17. const
  18. cpu_has_edsp : boolean = false;
  19. in_edsp_test : boolean = false;
  20. {$endif FPC_SYSTEM_FPC_MOVE}
  21. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  22. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  23. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  24. begin
  25. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  26. asm
  27. {$IFDEF FPUFPV4_S16}
  28. movw r0, #(0xed88)
  29. movt r0, #(0xe000)
  30. ldr r1, [r0]
  31. orr r1, r1, #(0xF << 20)
  32. str r1, [r0]
  33. bx lr
  34. {$ELSE FPUFPV4_S16}
  35. rfs r0
  36. and r0,r0,#0xffe0ffff
  37. orr r0,r0,#0x00070000
  38. wfs r0
  39. {$endif FPUFPV4_S16}
  40. end;
  41. end;
  42. {$endif}
  43. procedure fpc_cpuinit;
  44. begin
  45. SysInitFPU;
  46. end;
  47. {$ifdef wince}
  48. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  49. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  50. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  51. begin
  52. softfloat_exception_flags:=0;
  53. end;
  54. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  55. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  56. begin
  57. softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
  58. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  59. { FPU precision 64 bit, rounding to nearest, affine infinity }
  60. _controlfp($000C0003, $030F031F);
  61. end;
  62. {$endif wince}
  63. {****************************************************************************
  64. stack frame related stuff
  65. ****************************************************************************}
  66. {$IFNDEF INTERNAL_BACKTRACE}
  67. {$define FPC_SYSTEM_HAS_GET_FRAME}
  68. function get_frame:pointer;assembler;nostackframe;
  69. asm
  70. mov r0,r11
  71. end;
  72. {$ENDIF not INTERNAL_BACKTRACE}
  73. {
  74. Stack frame on Thumb2:
  75. LR <- FP
  76. Old FP
  77. }
  78. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  79. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;assembler;
  80. asm
  81. movs r0,r0
  82. beq .Lg_a_null
  83. ldr r0,[r0]
  84. .Lg_a_null:
  85. end;
  86. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  87. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;assembler;
  88. asm
  89. movs r0,r0
  90. beq .Lgnf_null
  91. ldr r0,[r0,#-4]
  92. .Lgnf_null:
  93. end;
  94. {$define FPC_SYSTEM_HAS_SPTR}
  95. Function Sptr : pointer;assembler;
  96. asm
  97. mov r0,sp
  98. end;
  99. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  100. {$define FPC_SYSTEM_HAS_FILLCHAR}
  101. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  102. asm
  103. // less than 0?
  104. cmp r1,#0
  105. it lt
  106. movlt pc,lr
  107. mov r3,r0
  108. cmp r1,#8 // at least 8 bytes to do?
  109. blt .LFillchar2
  110. orr r2,r2,r2,lsl #8
  111. orr r2,r2,r2,lsl #16
  112. .LFillchar0:
  113. tst r3,#3 // aligned yet?
  114. itt ne
  115. strneb r2,[r3],#1
  116. subne r1,r1,#1
  117. bne .LFillchar0
  118. mov ip,r2
  119. .LFillchar1:
  120. cmp r1,#8 // 8 bytes still to do?
  121. blt .LFillchar2
  122. stmia r3!,{r2,ip}
  123. sub r1,r1,#8
  124. cmp r1,#8 // 8 bytes still to do?
  125. blt .LFillchar2
  126. stmia r3!,{r2,ip}
  127. sub r1,r1,#8
  128. cmp r1,#8 // 8 bytes still to do?
  129. blt .LFillchar2
  130. stmia r3!,{r2,ip}
  131. sub r1,r1,#8
  132. cmp r1,#8 // 8 bytes still to do?
  133. itt ge
  134. stmgeia r3!,{r2,ip}
  135. subge r1,r1,#8
  136. bge .LFillchar1
  137. .LFillchar2:
  138. movs r1,r1 // anything left?
  139. it eq
  140. moveq pc,lr
  141. rsb r1,r1,#7
  142. mov r1,r1,lsl #2
  143. add pc,r1
  144. mov r0,r0
  145. strb r2,[r3],#1
  146. strb r2,[r3],#1
  147. strb r2,[r3],#1
  148. strb r2,[r3],#1
  149. strb r2,[r3],#1
  150. strb r2,[r3],#1
  151. strb r2,[r3],#1
  152. mov pc,lr
  153. end;
  154. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  155. {$ifndef FPC_SYSTEM_HAS_MOVE}
  156. {$define FPC_SYSTEM_HAS_MOVE}
  157. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  158. asm
  159. pld [r0]
  160. pld [r1]
  161. // count <=0 ?
  162. cmp r2,#0
  163. it le
  164. movle pc,lr
  165. // overlap?
  166. cmp r1,r0
  167. bls .Lnooverlap
  168. add r3,r0,r2
  169. cmp r3,r1
  170. bls .Lnooverlap
  171. // overlap, copy backward
  172. .Loverlapped:
  173. subs r2,r2,#1
  174. ldrb r3,[r0,r2]
  175. strb r3,[r1,r2]
  176. bne .Loverlapped
  177. mov pc,lr
  178. .Lnooverlap:
  179. // less then 16 bytes to copy?
  180. cmp r2,#8
  181. // yes, the forget about the whole optimizations
  182. // and do a bytewise copy
  183. blt .Lbyteloop
  184. // both aligned?
  185. orr r3,r0,r1
  186. tst r3,#3
  187. bne .Lbyteloop
  188. (*
  189. // yes, then align
  190. // alignment to 4 byte boundries is enough
  191. ldrb ip,[r0],#1
  192. sub r2,r2,#1
  193. stb ip,[r1],#1
  194. tst r3,#2
  195. bne .Ldifferentaligned
  196. ldrh ip,[r0],#2
  197. sub r2,r2,#2
  198. sth ip,[r1],#2
  199. .Ldifferentaligned
  200. // qword aligned?
  201. orrs r3,r0,r1
  202. tst r3,#7
  203. bne .Ldwordloop
  204. *)
  205. pld [r0,#32]
  206. pld [r1,#32]
  207. .Ldwordloop:
  208. sub r2,r2,#4
  209. ldr r3,[r0],#4
  210. // preload
  211. pld [r0,#64]
  212. pld [r1,#64]
  213. cmp r2,#4
  214. str r3,[r1],#4
  215. bcs .Ldwordloop
  216. cmp r2,#0
  217. it eq
  218. moveq pc,lr
  219. .Lbyteloop:
  220. subs r2,r2,#1
  221. ldrb r3,[r0],#1
  222. strb r3,[r1],#1
  223. bne .Lbyteloop
  224. mov pc,lr
  225. end;
  226. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  227. asm
  228. // count <=0 ?
  229. cmp r2,#0
  230. it le
  231. movle pc,lr
  232. // overlap?
  233. cmp r1,r0
  234. bls .Lnooverlap
  235. add r3,r0,r2
  236. cmp r3,r1
  237. bls .Lnooverlap
  238. // overlap, copy backward
  239. .Loverlapped:
  240. subs r2,r2,#1
  241. ldrb r3,[r0,r2]
  242. strb r3,[r1,r2]
  243. bne .Loverlapped
  244. mov pc,lr
  245. .Lnooverlap:
  246. // less then 16 bytes to copy?
  247. cmp r2,#8
  248. // yes, the forget about the whole optimizations
  249. // and do a bytewise copy
  250. blt .Lbyteloop
  251. // both aligned?
  252. orr r3,r0,r1
  253. tst r3,#3
  254. bne .Lbyteloop
  255. (*
  256. // yes, then align
  257. // alignment to 4 byte boundries is enough
  258. ldrb ip,[r0],#1
  259. sub r2,r2,#1
  260. stb ip,[r1],#1
  261. tst r3,#2
  262. bne .Ldifferentaligned
  263. ldrh ip,[r0],#2
  264. sub r2,r2,#2
  265. sth ip,[r1],#2
  266. .Ldifferentaligned
  267. // qword aligned?
  268. orrs r3,r0,r1
  269. tst r3,#7
  270. bne .Ldwordloop
  271. *)
  272. .Ldwordloop:
  273. sub r2,r2,#4
  274. ldr r3,[r0],#4
  275. cmp r2,#4
  276. str r3,[r1],#4
  277. bcs .Ldwordloop
  278. cmp r2,#0
  279. it eq
  280. moveq pc,lr
  281. .Lbyteloop:
  282. subs r2,r2,#1
  283. ldrb r3,[r0],#1
  284. strb r3,[r1],#1
  285. bne .Lbyteloop
  286. mov pc,lr
  287. end;
  288. const
  289. moveproc : pointer = @move_blended;
  290. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  291. asm
  292. ldr ip,.Lmoveproc
  293. ldr pc,[ip]
  294. .Lmoveproc:
  295. .long moveproc
  296. end;
  297. {$endif FPC_SYSTEM_HAS_MOVE}
  298. {****************************************************************************
  299. String
  300. ****************************************************************************}
  301. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  302. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  303. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  304. function fpc_shortstr_to_shortstr(len:longint;const sstr:shortstring):shortstring;assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  305. {$else}
  306. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  307. {$endif}
  308. {r0: __RESULT
  309. r1: len
  310. r2: sstr}
  311. asm
  312. ldrb r12,[r2],#1
  313. cmp r12,r1
  314. it gt
  315. movgt r12,r1
  316. strb r12,[r0],#1
  317. cmp r12,#6 (* 6 seems to be the break even point. *)
  318. blt .LStartTailCopy
  319. (* Align destination on 32bits. This is the only place where unrolling
  320. really seems to help, since in the common case, sstr is aligned on
  321. 32 bits, therefore in the common case we need to copy 3 bytes to
  322. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  323. rsb r3,r0,#0
  324. ands r3,r3,#3
  325. sub r12,r12,r3
  326. itttt ne
  327. ldrneb r1,[r2],#1
  328. strneb r1,[r0],#1
  329. subnes r3,r3,#1
  330. ldrneb r1,[r2],#1
  331. itttt ne
  332. strneb r1,[r0],#1
  333. subnes r3,r3,#1
  334. ldrneb r1,[r2],#1
  335. strneb r1,[r0],#1
  336. it ne
  337. subnes r3,r3,#1
  338. .LDoneAlign:
  339. (* Destination should be aligned now, but source might not be aligned,
  340. if this is the case, do a byte-per-byte copy. *)
  341. tst r2,#3
  342. bne .LStartTailCopy
  343. (* Start the main copy, 32 bit at a time. *)
  344. movs r3,r12,lsr #2
  345. and r12,r12,#3
  346. beq .LStartTailCopy
  347. .LNext4bytes:
  348. (* Unrolling this loop would save a little bit of time for long strings
  349. (>20 chars), but alas, it hurts for short strings and they are the
  350. common case.*)
  351. ittt ne
  352. ldrne r1,[r2],#4
  353. strne r1,[r0],#4
  354. subnes r3,r3,#1
  355. bne .LNext4bytes
  356. .LStartTailCopy:
  357. (* Do remaining bytes. *)
  358. cmp r12,#0
  359. beq .LDoneTail
  360. .LNextChar3:
  361. ldrb r1,[r2],#1
  362. strb r1,[r0],#1
  363. subs r12,r12,#1
  364. bne .LNextChar3
  365. .LDoneTail:
  366. end;
  367. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  368. {r0: len
  369. r1: sstr
  370. r2: dstr}
  371. asm
  372. ldrb r12,[r1],#1
  373. cmp r12,r0
  374. it gt
  375. movgt r12,r0
  376. strb r12,[r2],#1
  377. cmp r12,#6 (* 6 seems to be the break even point. *)
  378. blt .LStartTailCopy
  379. (* Align destination on 32bits. This is the only place where unrolling
  380. really seems to help, since in the common case, sstr is aligned on
  381. 32 bits, therefore in the common case we need to copy 3 bytes to
  382. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  383. rsb r3,r2,#0
  384. ands r3,r3,#3
  385. sub r12,r12,r3
  386. itttt ne
  387. ldrneb r0,[r1],#1
  388. strneb r0,[r2],#1
  389. subnes r3,r3,#1
  390. ldrneb r0,[r1],#1
  391. itttt ne
  392. strneb r0,[r2],#1
  393. subnes r3,r3,#1
  394. ldrneb r0,[r1],#1
  395. strneb r0,[r2],#1
  396. it ne
  397. subnes r3,r3,#1
  398. .LDoneAlign:
  399. (* Destination should be aligned now, but source might not be aligned,
  400. if this is the case, do a byte-per-byte copy. *)
  401. tst r1,#3
  402. bne .LStartTailCopy
  403. (* Start the main copy, 32 bit at a time. *)
  404. movs r3,r12,lsr #2
  405. and r12,r12,#3
  406. beq .LStartTailCopy
  407. .LNext4bytes:
  408. (* Unrolling this loop would save a little bit of time for long strings
  409. (>20 chars), but alas, it hurts for short strings and they are the
  410. common case.*)
  411. ittt ne
  412. ldrne r0,[r1],#4
  413. strne r0,[r2],#4
  414. subnes r3,r3,#1
  415. bne .LNext4bytes
  416. .LStartTailCopy:
  417. (* Do remaining bytes. *)
  418. cmp r12,#0
  419. beq .LDoneTail
  420. .LNextChar3:
  421. ldrb r0,[r1],#1
  422. strb r0,[r2],#1
  423. subs r12,r12,#1
  424. bne .LNextChar3
  425. .LDoneTail:
  426. end;
  427. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  428. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  429. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  430. function fpc_Pchar_length(p:Pchar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  431. asm
  432. cmp r0,#0
  433. mov r1,r0
  434. beq .Ldone
  435. .Lnextchar:
  436. (*Are we aligned?*)
  437. tst r1,#3
  438. bne .Ltest_unaligned (*No, do byte per byte.*)
  439. ldr r3,.L01010101
  440. .Ltest_aligned:
  441. (*Aligned, load 4 bytes at a time.*)
  442. ldr r12,[r1],#4
  443. (*Check wether r12 contains a 0 byte.*)
  444. sub r2,r12,r3
  445. mvn r12,r12
  446. and r2,r2,r12
  447. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  448. beq .Ltest_aligned (*No 0 byte, repeat.*)
  449. sub r1,r1,#4
  450. .Ltest_unaligned:
  451. ldrb r12,[r1],#1
  452. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  453. bcs .Lnextchar
  454. (*Dirty trick: we need to subtract 1 extra because we have counted the
  455. terminating 0, due to the known carry flag sbc can do this.*)
  456. sbc r0,r1,r0
  457. .Ldone:
  458. mov pc,lr
  459. .L01010101:
  460. .long 0x01010101
  461. end;
  462. {$endif}
  463. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  464. asm
  465. .Lloop:
  466. ldrex ip, [r0]
  467. sub ip, #1
  468. strex r3, ip, [r0]
  469. cmp r3, #0
  470. bne .Lloop
  471. mov r0, ip
  472. end;
  473. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  474. asm
  475. .Lloop:
  476. ldrex ip, [r0]
  477. add ip, #1
  478. strex r3, ip, [r0]
  479. cmp r3, #0
  480. bne .Lloop
  481. mov r0, ip
  482. end;
  483. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  484. asm
  485. .Lloop:
  486. ldrex ip, [r0]
  487. strex r3, r1, [r0]
  488. cmp r3, #0
  489. bne .Lloop
  490. mov r0, ip
  491. end;
  492. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  493. asm
  494. .Lloop:
  495. ldrex ip, [r0]
  496. add r2, ip, r1
  497. strex r3, r2, [r0]
  498. cmp r3, #0
  499. bne .Lloop
  500. mov r0, ip
  501. end;
  502. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  503. asm
  504. .Lloop:
  505. ldrex ip, [r0]
  506. cmp ip, r2
  507. ite eq
  508. strexeq r3, r1, [r0]
  509. movne r3, #0
  510. cmp r3, #0
  511. bne .Lloop
  512. mov r0, ip
  513. end;
  514. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  515. function declocked(var l: longint) : boolean; inline;
  516. begin
  517. Result:=InterLockedDecrement(l) = 0;
  518. end;
  519. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  520. procedure inclocked(var l: longint); inline;
  521. begin
  522. InterLockedIncrement(l);
  523. end;
  524. procedure fpc_cpucodeinit;
  525. begin
  526. {$ifdef FPC_SYSTEM_FPC_MOVE}
  527. cpu_has_edsp:=true;
  528. in_edsp_test:=true;
  529. asm
  530. mov r1,sp
  531. bic r0,r1,#7
  532. ldrd r0,[r0]
  533. end;
  534. in_edsp_test:=false;
  535. if cpu_has_edsp then
  536. moveproc:=@move_pld
  537. else
  538. moveproc:=@move_blended;
  539. {$endif FPC_SYSTEM_FPC_MOVE}
  540. end;