thumb2.inc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$asmmode divided}
  13. {$ifndef FPC_SYSTEM_HAS_MOVE}
  14. {$define FPC_SYSTEM_FPC_MOVE}
  15. {$endif FPC_SYSTEM_HAS_MOVE}
  16. {$ifdef FPC_SYSTEM_FPC_MOVE}
  17. const
  18. cpu_has_edsp : boolean = false;
  19. in_edsp_test : boolean = false;
  20. {$endif FPC_SYSTEM_FPC_MOVE}
  21. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  22. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  23. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  24. begin
  25. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  26. asm
  27. {$if defined(FPUFPA) or defined(FPUFPA10) or defined(FPUFPA11)}
  28. rfs r0
  29. and r0,r0,#0xffe0ffff
  30. orr r0,r0,#0x00070000
  31. wfs r0
  32. {$else}
  33. movw r0, #(0xed88)
  34. movt r0, #(0xe000)
  35. ldr r1, [r0]
  36. orr r1, r1, #(0xF << 20)
  37. str r1, [r0]
  38. {$endif}
  39. end;
  40. end;
  41. {$endif}
  42. procedure fpc_cpuinit;
  43. begin
  44. SysInitFPU;
  45. end;
  46. {$ifdef wince}
  47. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  48. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  49. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  50. begin
  51. softfloat_exception_flags:=[];
  52. end;
  53. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  54. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  55. begin
  56. softfloat_exception_mask:=[float_flag_underflow,float_flag_inexact,float_flag_denormal];
  57. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  58. { FPU precision 64 bit, rounding to nearest, affine infinity }
  59. _controlfp($000C0003, $030F031F);
  60. end;
  61. {$endif wince}
  62. {****************************************************************************
  63. stack frame related stuff
  64. ****************************************************************************}
  65. {$IFNDEF INTERNAL_BACKTRACE}
  66. {$define FPC_SYSTEM_HAS_GET_FRAME}
  67. function get_frame:pointer;assembler;nostackframe;
  68. asm
  69. mov r0,r11
  70. end;
  71. {$ENDIF not INTERNAL_BACKTRACE}
  72. {
  73. Stack frame on Thumb2:
  74. LR <- FP
  75. Old FP
  76. }
  77. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  78. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;assembler;
  79. asm
  80. movs r0,r0
  81. beq .Lg_a_null
  82. ldr r0,[r0]
  83. .Lg_a_null:
  84. end;
  85. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  86. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;assembler;
  87. asm
  88. movs r0,r0
  89. beq .Lgnf_null
  90. ldr r0,[r0,#-4]
  91. .Lgnf_null:
  92. end;
  93. {$define FPC_SYSTEM_HAS_SPTR}
  94. Function Sptr : pointer;assembler;
  95. asm
  96. mov r0,sp
  97. end;
  98. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  99. {$define FPC_SYSTEM_HAS_FILLCHAR}
  100. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  101. asm
  102. // less than 0?
  103. cmp r1,#0
  104. it lt
  105. movlt pc,lr
  106. mov r3,r0
  107. cmp r1,#8 // at least 8 bytes to do?
  108. blt .LFillchar2
  109. orr r2,r2,r2,lsl #8
  110. orr r2,r2,r2,lsl #16
  111. .LFillchar0:
  112. tst r3,#3 // aligned yet?
  113. itt ne
  114. strneb r2,[r3],#1
  115. subne r1,r1,#1
  116. bne .LFillchar0
  117. mov ip,r2
  118. .LFillchar1:
  119. cmp r1,#8 // 8 bytes still to do?
  120. blt .LFillchar2
  121. stmia r3!,{r2,ip}
  122. sub r1,r1,#8
  123. cmp r1,#8 // 8 bytes still to do?
  124. blt .LFillchar2
  125. stmia r3!,{r2,ip}
  126. sub r1,r1,#8
  127. cmp r1,#8 // 8 bytes still to do?
  128. blt .LFillchar2
  129. stmia r3!,{r2,ip}
  130. sub r1,r1,#8
  131. cmp r1,#8 // 8 bytes still to do?
  132. itt ge
  133. stmgeia r3!,{r2,ip}
  134. subge r1,r1,#8
  135. bge .LFillchar1
  136. .LFillchar2:
  137. movs r1,r1 // anything left?
  138. it eq
  139. moveq pc,lr
  140. rsb r1,r1,#7
  141. mov r1,r1,lsl #2
  142. add pc,r1
  143. mov r0,r0
  144. strb r2,[r3],#1
  145. strb r2,[r3],#1
  146. strb r2,[r3],#1
  147. strb r2,[r3],#1
  148. strb r2,[r3],#1
  149. strb r2,[r3],#1
  150. strb r2,[r3],#1
  151. mov pc,lr
  152. end;
  153. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  154. {$ifndef FPC_SYSTEM_HAS_MOVE}
  155. {$define FPC_SYSTEM_HAS_MOVE}
  156. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  157. asm
  158. pld [r0]
  159. pld [r1]
  160. // count <=0 ?
  161. cmp r2,#0
  162. it le
  163. movle pc,lr
  164. // overlap?
  165. cmp r1,r0
  166. bls .Lnooverlap
  167. add r3,r0,r2
  168. cmp r3,r1
  169. bls .Lnooverlap
  170. // overlap, copy backward
  171. .Loverlapped:
  172. subs r2,r2,#1
  173. ldrb r3,[r0,r2]
  174. strb r3,[r1,r2]
  175. bne .Loverlapped
  176. mov pc,lr
  177. .Lnooverlap:
  178. // less then 16 bytes to copy?
  179. cmp r2,#8
  180. // yes, the forget about the whole optimizations
  181. // and do a bytewise copy
  182. blt .Lbyteloop
  183. // both aligned?
  184. orr r3,r0,r1
  185. tst r3,#3
  186. bne .Lbyteloop
  187. (*
  188. // yes, then align
  189. // alignment to 4 byte boundries is enough
  190. ldrb ip,[r0],#1
  191. sub r2,r2,#1
  192. stb ip,[r1],#1
  193. tst r3,#2
  194. bne .Ldifferentaligned
  195. ldrh ip,[r0],#2
  196. sub r2,r2,#2
  197. sth ip,[r1],#2
  198. .Ldifferentaligned
  199. // qword aligned?
  200. orrs r3,r0,r1
  201. tst r3,#7
  202. bne .Ldwordloop
  203. *)
  204. pld [r0,#32]
  205. pld [r1,#32]
  206. .Ldwordloop:
  207. sub r2,r2,#4
  208. ldr r3,[r0],#4
  209. // preload
  210. pld [r0,#64]
  211. pld [r1,#64]
  212. cmp r2,#4
  213. str r3,[r1],#4
  214. bcs .Ldwordloop
  215. cmp r2,#0
  216. it eq
  217. moveq pc,lr
  218. .Lbyteloop:
  219. subs r2,r2,#1
  220. ldrb r3,[r0],#1
  221. strb r3,[r1],#1
  222. bne .Lbyteloop
  223. mov pc,lr
  224. end;
  225. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  226. asm
  227. // count <=0 ?
  228. cmp r2,#0
  229. it le
  230. movle pc,lr
  231. // overlap?
  232. cmp r1,r0
  233. bls .Lnooverlap
  234. add r3,r0,r2
  235. cmp r3,r1
  236. bls .Lnooverlap
  237. // overlap, copy backward
  238. .Loverlapped:
  239. subs r2,r2,#1
  240. ldrb r3,[r0,r2]
  241. strb r3,[r1,r2]
  242. bne .Loverlapped
  243. mov pc,lr
  244. .Lnooverlap:
  245. // less then 16 bytes to copy?
  246. cmp r2,#8
  247. // yes, the forget about the whole optimizations
  248. // and do a bytewise copy
  249. blt .Lbyteloop
  250. // both aligned?
  251. orr r3,r0,r1
  252. tst r3,#3
  253. bne .Lbyteloop
  254. (*
  255. // yes, then align
  256. // alignment to 4 byte boundries is enough
  257. ldrb ip,[r0],#1
  258. sub r2,r2,#1
  259. stb ip,[r1],#1
  260. tst r3,#2
  261. bne .Ldifferentaligned
  262. ldrh ip,[r0],#2
  263. sub r2,r2,#2
  264. sth ip,[r1],#2
  265. .Ldifferentaligned
  266. // qword aligned?
  267. orrs r3,r0,r1
  268. tst r3,#7
  269. bne .Ldwordloop
  270. *)
  271. .Ldwordloop:
  272. sub r2,r2,#4
  273. ldr r3,[r0],#4
  274. cmp r2,#4
  275. str r3,[r1],#4
  276. bcs .Ldwordloop
  277. cmp r2,#0
  278. it eq
  279. moveq pc,lr
  280. .Lbyteloop:
  281. subs r2,r2,#1
  282. ldrb r3,[r0],#1
  283. strb r3,[r1],#1
  284. bne .Lbyteloop
  285. mov pc,lr
  286. end;
  287. const
  288. moveproc : pointer = @move_blended;
  289. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  290. asm
  291. ldr ip,.Lmoveproc
  292. ldr pc,[ip]
  293. .Lmoveproc:
  294. .long moveproc
  295. end;
  296. {$endif FPC_SYSTEM_HAS_MOVE}
  297. {****************************************************************************
  298. String
  299. ****************************************************************************}
  300. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  301. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  302. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  303. {r0: __RESULT
  304. r1: len
  305. r2: sstr}
  306. asm
  307. ldrb r12,[r2],#1
  308. cmp r12,r1
  309. it gt
  310. movgt r12,r1
  311. strb r12,[r0],#1
  312. cmp r12,#6 (* 6 seems to be the break even point. *)
  313. blt .LStartTailCopy
  314. (* Align destination on 32bits. This is the only place where unrolling
  315. really seems to help, since in the common case, sstr is aligned on
  316. 32 bits, therefore in the common case we need to copy 3 bytes to
  317. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  318. rsb r3,r0,#0
  319. ands r3,r3,#3
  320. sub r12,r12,r3
  321. itttt ne
  322. ldrneb r1,[r2],#1
  323. strneb r1,[r0],#1
  324. subnes r3,r3,#1
  325. ldrneb r1,[r2],#1
  326. itttt ne
  327. strneb r1,[r0],#1
  328. subnes r3,r3,#1
  329. ldrneb r1,[r2],#1
  330. strneb r1,[r0],#1
  331. it ne
  332. subnes r3,r3,#1
  333. .LDoneAlign:
  334. (* Destination should be aligned now, but source might not be aligned,
  335. if this is the case, do a byte-per-byte copy. *)
  336. tst r2,#3
  337. bne .LStartTailCopy
  338. (* Start the main copy, 32 bit at a time. *)
  339. movs r3,r12,lsr #2
  340. and r12,r12,#3
  341. beq .LStartTailCopy
  342. .LNext4bytes:
  343. (* Unrolling this loop would save a little bit of time for long strings
  344. (>20 chars), but alas, it hurts for short strings and they are the
  345. common case.*)
  346. ittt ne
  347. ldrne r1,[r2],#4
  348. strne r1,[r0],#4
  349. subnes r3,r3,#1
  350. bne .LNext4bytes
  351. .LStartTailCopy:
  352. (* Do remaining bytes. *)
  353. cmp r12,#0
  354. beq .LDoneTail
  355. .LNextChar3:
  356. ldrb r1,[r2],#1
  357. strb r1,[r0],#1
  358. subs r12,r12,#1
  359. bne .LNextChar3
  360. .LDoneTail:
  361. end;
  362. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  363. {r0: len
  364. r1: sstr
  365. r2: dstr}
  366. asm
  367. ldrb r12,[r1],#1
  368. cmp r12,r0
  369. it gt
  370. movgt r12,r0
  371. strb r12,[r2],#1
  372. cmp r12,#6 (* 6 seems to be the break even point. *)
  373. blt .LStartTailCopy
  374. (* Align destination on 32bits. This is the only place where unrolling
  375. really seems to help, since in the common case, sstr is aligned on
  376. 32 bits, therefore in the common case we need to copy 3 bytes to
  377. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  378. rsb r3,r2,#0
  379. ands r3,r3,#3
  380. sub r12,r12,r3
  381. itttt ne
  382. ldrneb r0,[r1],#1
  383. strneb r0,[r2],#1
  384. subnes r3,r3,#1
  385. ldrneb r0,[r1],#1
  386. itttt ne
  387. strneb r0,[r2],#1
  388. subnes r3,r3,#1
  389. ldrneb r0,[r1],#1
  390. strneb r0,[r2],#1
  391. it ne
  392. subnes r3,r3,#1
  393. .LDoneAlign:
  394. (* Destination should be aligned now, but source might not be aligned,
  395. if this is the case, do a byte-per-byte copy. *)
  396. tst r1,#3
  397. bne .LStartTailCopy
  398. (* Start the main copy, 32 bit at a time. *)
  399. movs r3,r12,lsr #2
  400. and r12,r12,#3
  401. beq .LStartTailCopy
  402. .LNext4bytes:
  403. (* Unrolling this loop would save a little bit of time for long strings
  404. (>20 chars), but alas, it hurts for short strings and they are the
  405. common case.*)
  406. ittt ne
  407. ldrne r0,[r1],#4
  408. strne r0,[r2],#4
  409. subnes r3,r3,#1
  410. bne .LNext4bytes
  411. .LStartTailCopy:
  412. (* Do remaining bytes. *)
  413. cmp r12,#0
  414. beq .LDoneTail
  415. .LNextChar3:
  416. ldrb r0,[r1],#1
  417. strb r0,[r2],#1
  418. subs r12,r12,#1
  419. bne .LNextChar3
  420. .LDoneTail:
  421. end;
  422. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  423. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  424. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  425. function fpc_Pchar_length(p:Pchar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  426. asm
  427. cmp r0,#0
  428. mov r1,r0
  429. beq .Ldone
  430. .Lnextchar:
  431. (*Are we aligned?*)
  432. tst r1,#3
  433. bne .Ltest_unaligned (*No, do byte per byte.*)
  434. ldr r3,.L01010101
  435. .Ltest_aligned:
  436. (*Aligned, load 4 bytes at a time.*)
  437. ldr r12,[r1],#4
  438. (*Check wether r12 contains a 0 byte.*)
  439. sub r2,r12,r3
  440. mvn r12,r12
  441. and r2,r2,r12
  442. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  443. beq .Ltest_aligned (*No 0 byte, repeat.*)
  444. sub r1,r1,#4
  445. .Ltest_unaligned:
  446. ldrb r12,[r1],#1
  447. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  448. bcs .Lnextchar
  449. (*Dirty trick: we need to subtract 1 extra because we have counted the
  450. terminating 0, due to the known carry flag sbc can do this.*)
  451. sbc r0,r1,r0
  452. .Ldone:
  453. mov pc,lr
  454. .L01010101:
  455. .long 0x01010101
  456. end;
  457. {$endif}
  458. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  459. asm
  460. .Lloop:
  461. ldrex ip, [r0]
  462. sub ip, #1
  463. strex r3, ip, [r0]
  464. cmp r3, #0
  465. bne .Lloop
  466. mov r0, ip
  467. end;
  468. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  469. asm
  470. .Lloop:
  471. ldrex ip, [r0]
  472. add ip, #1
  473. strex r3, ip, [r0]
  474. cmp r3, #0
  475. bne .Lloop
  476. mov r0, ip
  477. end;
  478. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  479. asm
  480. .Lloop:
  481. ldrex ip, [r0]
  482. strex r3, r1, [r0]
  483. cmp r3, #0
  484. bne .Lloop
  485. mov r0, ip
  486. end;
  487. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  488. asm
  489. .Lloop:
  490. ldrex ip, [r0]
  491. add r2, ip, r1
  492. strex r3, r2, [r0]
  493. cmp r3, #0
  494. bne .Lloop
  495. mov r0, ip
  496. end;
  497. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  498. asm
  499. .Lloop:
  500. ldrex ip, [r0]
  501. cmp ip, r2
  502. ite eq
  503. strexeq r3, r1, [r0]
  504. movne r3, #0
  505. cmp r3, #0
  506. bne .Lloop
  507. mov r0, ip
  508. end;
  509. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  510. function declocked(var l: longint) : boolean; inline;
  511. begin
  512. Result:=InterLockedDecrement(l) = 0;
  513. end;
  514. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  515. procedure inclocked(var l: longint); inline;
  516. begin
  517. InterLockedIncrement(l);
  518. end;
  519. procedure fpc_cpucodeinit;
  520. begin
  521. {$ifdef FPC_SYSTEM_FPC_MOVE}
  522. cpu_has_edsp:=true;
  523. in_edsp_test:=true;
  524. asm
  525. mov r1,sp
  526. bic r0,r1,#7
  527. ldrd r0,r1,[r0]
  528. end;
  529. in_edsp_test:=false;
  530. if cpu_has_edsp then
  531. moveproc:=@move_pld
  532. else
  533. moveproc:=@move_blended;
  534. {$endif FPC_SYSTEM_FPC_MOVE}
  535. end;