thumb2.inc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$asmmode gas}
  13. {$ifndef FPC_SYSTEM_HAS_MOVE}
  14. {$define FPC_SYSTEM_FPC_MOVE}
  15. {$endif FPC_SYSTEM_HAS_MOVE}
  16. {$ifdef FPC_SYSTEM_FPC_MOVE}
  17. const
  18. cpu_has_edsp : boolean = false;
  19. in_edsp_test : boolean = false;
  20. {$endif FPC_SYSTEM_FPC_MOVE}
  21. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  22. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  23. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  24. begin
  25. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  26. asm
  27. rfs r0
  28. and r0,r0,#0xffe0ffff
  29. orr r0,r0,#0x00070000
  30. wfs r0
  31. end;
  32. end;
  33. {$endif}
  34. procedure fpc_cpuinit;
  35. begin
  36. SysInitFPU;
  37. end;
  38. {$ifdef wince}
  39. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  40. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  41. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  42. begin
  43. softfloat_exception_flags:=0;
  44. end;
  45. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  46. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  47. begin
  48. softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
  49. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  50. { FPU precision 64 bit, rounding to nearest, affine infinity }
  51. _controlfp($000C0003, $030F031F);
  52. end;
  53. {$endif wince}
  54. {****************************************************************************
  55. stack frame related stuff
  56. ****************************************************************************}
  57. {$IFNDEF INTERNAL_BACKTRACE}
  58. {$define FPC_SYSTEM_HAS_GET_FRAME}
  59. function get_frame:pointer;assembler;nostackframe;
  60. asm
  61. mov r0,r11
  62. end;
  63. {$ENDIF not INTERNAL_BACKTRACE}
  64. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  65. function get_caller_addr(framebp:pointer):pointer;assembler;
  66. asm
  67. movs r0,r0
  68. beq .Lg_a_null
  69. ldr r0,[r0,#-4]
  70. .Lg_a_null:
  71. end;
  72. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  73. function get_caller_frame(framebp:pointer):pointer;assembler;
  74. asm
  75. movs r0,r0
  76. beq .Lgnf_null
  77. ldr r0,[r0,#-12]
  78. .Lgnf_null:
  79. end;
  80. {$define FPC_SYSTEM_HAS_SPTR}
  81. Function Sptr : pointer;assembler;
  82. asm
  83. mov r0,sp
  84. end;
  85. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  86. {$define FPC_SYSTEM_HAS_FILLCHAR}
  87. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  88. asm
  89. // less than 0?
  90. cmp r1,#0
  91. it lt
  92. movlt pc,lr
  93. mov r3,r0
  94. cmp r1,#8 // at least 8 bytes to do?
  95. blt .LFillchar2
  96. orr r2,r2,r2,lsl #8
  97. orr r2,r2,r2,lsl #16
  98. .LFillchar0:
  99. tst r3,#3 // aligned yet?
  100. itt ne
  101. strneb r2,[r3],#1
  102. subne r1,r1,#1
  103. bne .LFillchar0
  104. mov ip,r2
  105. .LFillchar1:
  106. cmp r1,#8 // 8 bytes still to do?
  107. blt .LFillchar2
  108. stmia r3!,{r2,ip}
  109. sub r1,r1,#8
  110. cmp r1,#8 // 8 bytes still to do?
  111. blt .LFillchar2
  112. stmia r3!,{r2,ip}
  113. sub r1,r1,#8
  114. cmp r1,#8 // 8 bytes still to do?
  115. blt .LFillchar2
  116. stmia r3!,{r2,ip}
  117. sub r1,r1,#8
  118. cmp r1,#8 // 8 bytes still to do?
  119. itt ge
  120. stmgeia r3!,{r2,ip}
  121. subge r1,r1,#8
  122. bge .LFillchar1
  123. .LFillchar2:
  124. movs r1,r1 // anything left?
  125. it eq
  126. moveq pc,lr
  127. rsb r1,r1,#7
  128. mov r1,r1,lsl #2
  129. add pc,r1
  130. mov r0,r0
  131. strb r2,[r3],#1
  132. strb r2,[r3],#1
  133. strb r2,[r3],#1
  134. strb r2,[r3],#1
  135. strb r2,[r3],#1
  136. strb r2,[r3],#1
  137. strb r2,[r3],#1
  138. mov pc,lr
  139. end;
  140. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  141. {$ifndef FPC_SYSTEM_HAS_MOVE}
  142. {$define FPC_SYSTEM_HAS_MOVE}
  143. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  144. asm
  145. pld [r0]
  146. pld [r1]
  147. // count <=0 ?
  148. cmp r2,#0
  149. it le
  150. movle pc,lr
  151. // overlap?
  152. cmp r1,r0
  153. bls .Lnooverlap
  154. add r3,r0,r2
  155. cmp r3,r1
  156. bls .Lnooverlap
  157. // overlap, copy backward
  158. .Loverlapped:
  159. subs r2,r2,#1
  160. ldrb r3,[r0,r2]
  161. strb r3,[r1,r2]
  162. bne .Loverlapped
  163. mov pc,lr
  164. .Lnooverlap:
  165. // less then 16 bytes to copy?
  166. cmp r2,#8
  167. // yes, the forget about the whole optimizations
  168. // and do a bytewise copy
  169. blt .Lbyteloop
  170. // both aligned?
  171. orr r3,r0,r1
  172. tst r3,#3
  173. bne .Lbyteloop
  174. (*
  175. // yes, then align
  176. // alignment to 4 byte boundries is enough
  177. ldrb ip,[r0],#1
  178. sub r2,r2,#1
  179. stb ip,[r1],#1
  180. tst r3,#2
  181. bne .Ldifferentaligned
  182. ldrh ip,[r0],#2
  183. sub r2,r2,#2
  184. sth ip,[r1],#2
  185. .Ldifferentaligned
  186. // qword aligned?
  187. orrs r3,r0,r1
  188. tst r3,#7
  189. bne .Ldwordloop
  190. *)
  191. pld [r0,#32]
  192. pld [r1,#32]
  193. .Ldwordloop:
  194. sub r2,r2,#4
  195. ldr r3,[r0],#4
  196. // preload
  197. pld [r0,#64]
  198. pld [r1,#64]
  199. cmp r2,#4
  200. str r3,[r1],#4
  201. bcs .Ldwordloop
  202. cmp r2,#0
  203. it eq
  204. moveq pc,lr
  205. .Lbyteloop:
  206. subs r2,r2,#1
  207. ldrb r3,[r0],#1
  208. strb r3,[r1],#1
  209. bne .Lbyteloop
  210. mov pc,lr
  211. end;
  212. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  213. asm
  214. // count <=0 ?
  215. cmp r2,#0
  216. it le
  217. movle pc,lr
  218. // overlap?
  219. cmp r1,r0
  220. bls .Lnooverlap
  221. add r3,r0,r2
  222. cmp r3,r1
  223. bls .Lnooverlap
  224. // overlap, copy backward
  225. .Loverlapped:
  226. subs r2,r2,#1
  227. ldrb r3,[r0,r2]
  228. strb r3,[r1,r2]
  229. bne .Loverlapped
  230. mov pc,lr
  231. .Lnooverlap:
  232. // less then 16 bytes to copy?
  233. cmp r2,#8
  234. // yes, the forget about the whole optimizations
  235. // and do a bytewise copy
  236. blt .Lbyteloop
  237. // both aligned?
  238. orr r3,r0,r1
  239. tst r3,#3
  240. bne .Lbyteloop
  241. (*
  242. // yes, then align
  243. // alignment to 4 byte boundries is enough
  244. ldrb ip,[r0],#1
  245. sub r2,r2,#1
  246. stb ip,[r1],#1
  247. tst r3,#2
  248. bne .Ldifferentaligned
  249. ldrh ip,[r0],#2
  250. sub r2,r2,#2
  251. sth ip,[r1],#2
  252. .Ldifferentaligned
  253. // qword aligned?
  254. orrs r3,r0,r1
  255. tst r3,#7
  256. bne .Ldwordloop
  257. *)
  258. .Ldwordloop:
  259. sub r2,r2,#4
  260. ldr r3,[r0],#4
  261. cmp r2,#4
  262. str r3,[r1],#4
  263. bcs .Ldwordloop
  264. cmp r2,#0
  265. it eq
  266. moveq pc,lr
  267. .Lbyteloop:
  268. subs r2,r2,#1
  269. ldrb r3,[r0],#1
  270. strb r3,[r1],#1
  271. bne .Lbyteloop
  272. mov pc,lr
  273. end;
  274. const
  275. moveproc : pointer = @move_blended;
  276. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  277. asm
  278. ldr ip,.Lmoveproc
  279. ldr pc,[ip]
  280. .Lmoveproc:
  281. .long moveproc
  282. end;
  283. {$endif FPC_SYSTEM_HAS_MOVE}
  284. {****************************************************************************
  285. String
  286. ****************************************************************************}
  287. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  288. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  289. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  290. function fpc_shortstr_to_shortstr(len:longint;const sstr:shortstring):shortstring;assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  291. {$else}
  292. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  293. {$endif}
  294. {r0: __RESULT
  295. r1: len
  296. r2: sstr}
  297. asm
  298. ldrb r12,[r2],#1
  299. cmp r12,r1
  300. it gt
  301. movgt r12,r1
  302. strb r12,[r0],#1
  303. cmp r12,#6 (* 6 seems to be the break even point. *)
  304. blt .LStartTailCopy
  305. (* Align destination on 32bits. This is the only place where unrolling
  306. really seems to help, since in the common case, sstr is aligned on
  307. 32 bits, therefore in the common case we need to copy 3 bytes to
  308. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  309. rsb r3,r0,#0
  310. ands r3,r3,#3
  311. sub r12,r12,r3
  312. itttt ne
  313. ldrneb r1,[r2],#1
  314. strneb r1,[r0],#1
  315. subnes r3,r3,#1
  316. ldrneb r1,[r2],#1
  317. itttt ne
  318. strneb r1,[r0],#1
  319. subnes r3,r3,#1
  320. ldrneb r1,[r2],#1
  321. strneb r1,[r0],#1
  322. it ne
  323. subnes r3,r3,#1
  324. .LDoneAlign:
  325. (* Destination should be aligned now, but source might not be aligned,
  326. if this is the case, do a byte-per-byte copy. *)
  327. tst r2,#3
  328. bne .LStartTailCopy
  329. (* Start the main copy, 32 bit at a time. *)
  330. movs r3,r12,lsr #2
  331. and r12,r12,#3
  332. beq .LStartTailCopy
  333. .LNext4bytes:
  334. (* Unrolling this loop would save a little bit of time for long strings
  335. (>20 chars), but alas, it hurts for short strings and they are the
  336. common case.*)
  337. ittt ne
  338. ldrne r1,[r2],#4
  339. strne r1,[r0],#4
  340. subnes r3,r3,#1
  341. bne .LNext4bytes
  342. .LStartTailCopy:
  343. (* Do remaining bytes. *)
  344. cmp r12,#0
  345. beq .LDoneTail
  346. .LNextChar3:
  347. ldrb r1,[r2],#1
  348. strb r1,[r0],#1
  349. subs r12,r12,#1
  350. bne .LNextChar3
  351. .LDoneTail:
  352. end;
  353. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  354. {r0: len
  355. r1: sstr
  356. r2: dstr}
  357. asm
  358. ldrb r12,[r1],#1
  359. cmp r12,r0
  360. it gt
  361. movgt r12,r0
  362. strb r12,[r2],#1
  363. cmp r12,#6 (* 6 seems to be the break even point. *)
  364. blt .LStartTailCopy
  365. (* Align destination on 32bits. This is the only place where unrolling
  366. really seems to help, since in the common case, sstr is aligned on
  367. 32 bits, therefore in the common case we need to copy 3 bytes to
  368. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  369. rsb r3,r2,#0
  370. ands r3,r3,#3
  371. sub r12,r12,r3
  372. itttt ne
  373. ldrneb r0,[r1],#1
  374. strneb r0,[r2],#1
  375. subnes r3,r3,#1
  376. ldrneb r0,[r1],#1
  377. itttt ne
  378. strneb r0,[r2],#1
  379. subnes r3,r3,#1
  380. ldrneb r0,[r1],#1
  381. strneb r0,[r2],#1
  382. it ne
  383. subnes r3,r3,#1
  384. .LDoneAlign:
  385. (* Destination should be aligned now, but source might not be aligned,
  386. if this is the case, do a byte-per-byte copy. *)
  387. tst r1,#3
  388. bne .LStartTailCopy
  389. (* Start the main copy, 32 bit at a time. *)
  390. movs r3,r12,lsr #2
  391. and r12,r12,#3
  392. beq .LStartTailCopy
  393. .LNext4bytes:
  394. (* Unrolling this loop would save a little bit of time for long strings
  395. (>20 chars), but alas, it hurts for short strings and they are the
  396. common case.*)
  397. ittt ne
  398. ldrne r0,[r1],#4
  399. strne r0,[r2],#4
  400. subnes r3,r3,#1
  401. bne .LNext4bytes
  402. .LStartTailCopy:
  403. (* Do remaining bytes. *)
  404. cmp r12,#0
  405. beq .LDoneTail
  406. .LNextChar3:
  407. ldrb r0,[r1],#1
  408. strb r0,[r2],#1
  409. subs r12,r12,#1
  410. bne .LNextChar3
  411. .LDoneTail:
  412. end;
  413. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  414. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  415. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  416. function fpc_Pchar_length(p:Pchar):longint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  417. asm
  418. cmp r0,#0
  419. mov r1,r0
  420. beq .Ldone
  421. .Lnextchar:
  422. (*Are we aligned?*)
  423. tst r1,#3
  424. bne .Ltest_unaligned (*No, do byte per byte.*)
  425. ldr r3,.L01010101
  426. .Ltest_aligned:
  427. (*Aligned, load 4 bytes at a time.*)
  428. ldr r12,[r1],#4
  429. (*Check wether r12 contains a 0 byte.*)
  430. sub r2,r12,r3
  431. mvn r12,r12
  432. and r2,r2,r12
  433. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  434. beq .Ltest_aligned (*No 0 byte, repeat.*)
  435. sub r1,r1,#4
  436. .Ltest_unaligned:
  437. ldrb r12,[r1],#1
  438. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  439. bcs .Lnextchar
  440. (*Dirty trick: we need to subtract 1 extra because we have counted the
  441. terminating 0, due to the known carry flag sbc can do this.*)
  442. sbc r0,r1,r0
  443. .Ldone:
  444. mov pc,lr
  445. .L01010101:
  446. .long 0x01010101
  447. end;
  448. {$endif}
  449. var
  450. fpc_system_lock: longint; export name 'fpc_system_lock';
  451. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  452. asm
  453. // lock
  454. ldr r3, .Lfpc_system_lock
  455. mov r1, #1
  456. .Lloop:
  457. ldrex r2, [r3]
  458. cmp r2, #0
  459. itt eq
  460. strexeq r2, r1, [r3]
  461. cmpeq r2, #0
  462. bne .Lloop
  463. // do the job
  464. ldr r1, [r0]
  465. sub r1, r1, #1
  466. str r1, [r0]
  467. mov r0, r1
  468. // unlock and return
  469. str r2, [r3]
  470. mov pc, lr
  471. .Lfpc_system_lock:
  472. .long fpc_system_lock
  473. end;
  474. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  475. asm
  476. // lock
  477. ldr r3, .Lfpc_system_lock
  478. mov r1, #1
  479. .Lloop:
  480. ldrex r2, [r3]
  481. cmp r2, #0
  482. itt eq
  483. strexeq r2, r1, [r3]
  484. cmpeq r2, #0
  485. bne .Lloop
  486. // do the job
  487. ldr r1, [r0]
  488. add r1, r1, #1
  489. str r1, [r0]
  490. mov r0, r1
  491. // unlock and return
  492. str r2, [r3]
  493. mov pc, lr
  494. .Lfpc_system_lock:
  495. .long fpc_system_lock
  496. end;
  497. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  498. asm
  499. // lock
  500. ldr r3, .Lfpc_system_lock
  501. mov r2, #1
  502. .Lloop:
  503. ldrex r2, [r3]
  504. cmp r2, #0
  505. itt eq
  506. strexeq r2, r12, [r3]
  507. cmpeq r2, #0
  508. bne .Lloop
  509. // do the job
  510. ldr r2, [r0]
  511. str r1, [r0]
  512. mov r0, r2
  513. // unlock and return
  514. mov r2, #0
  515. str r2, [r3]
  516. mov pc, lr
  517. .Lfpc_system_lock:
  518. .long fpc_system_lock
  519. end;
  520. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  521. asm
  522. // lock
  523. ldr r3, .Lfpc_system_lock
  524. mov r2, #1
  525. .Lloop:
  526. ldrex r2, [r3]
  527. cmp r2, #0
  528. itt eq
  529. strexeq r2, r12, [r3]
  530. cmpeq r2, #0
  531. bne .Lloop
  532. // do the job
  533. ldr r2, [r0]
  534. add r1, r1, r2
  535. str r1, [r0]
  536. mov r0, r2
  537. // unlock and return
  538. mov r2, #0
  539. str r2, [r3]
  540. mov pc, lr
  541. .Lfpc_system_lock:
  542. .long fpc_system_lock
  543. end;
  544. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  545. asm
  546. // lock
  547. ldr r12, .Lfpc_system_lock
  548. mov r3, #1
  549. .Lloop:
  550. ldrex r2, [r12]
  551. cmp r2, #0
  552. itt eq
  553. strexeq r2, r1, [r12]
  554. cmpeq r2, #0
  555. bne .Lloop
  556. // do the job
  557. ldr r3, [r0]
  558. cmp r3, r2
  559. it eq
  560. streq r1, [r0]
  561. mov r0, r3
  562. // unlock and return
  563. mov r3, #0
  564. str r3, [r12]
  565. mov pc, lr
  566. .Lfpc_system_lock:
  567. .long fpc_system_lock
  568. end;
  569. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  570. function declocked(var l: longint) : boolean; inline;
  571. begin
  572. Result:=InterLockedDecrement(l) = 0;
  573. end;
  574. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  575. procedure inclocked(var l: longint); inline;
  576. begin
  577. InterLockedIncrement(l);
  578. end;
  579. procedure fpc_cpucodeinit;
  580. begin
  581. {$ifdef FPC_SYSTEM_FPC_MOVE}
  582. cpu_has_edsp:=true;
  583. in_edsp_test:=true;
  584. asm
  585. mov r1,sp
  586. bic r0,r1,#7
  587. ldrd r0,[r0]
  588. end;
  589. in_edsp_test:=false;
  590. if cpu_has_edsp then
  591. moveproc:=@move_pld
  592. else
  593. moveproc:=@move_blended;
  594. {$endif FPC_SYSTEM_FPC_MOVE}
  595. end;
  596. {include hand-optimized assembler division code}
  597. {$i divide.inc}