arm.inc 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$asmmode gas}
  13. {$ifndef FPC_SYSTEM_HAS_MOVE}
  14. {$define FPC_SYSTEM_FPC_MOVE}
  15. {$endif FPC_SYSTEM_HAS_MOVE}
  16. {$ifdef FPC_SYSTEM_FPC_MOVE}
  17. const
  18. cpu_has_edsp : boolean = false;
  19. in_edsp_test : boolean = false;
  20. {$endif FPC_SYSTEM_FPC_MOVE}
  21. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  22. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  23. {$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3) and not defined(FPUVFPV3_D16)}
  24. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  25. begin
  26. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  27. asm
  28. rfs r0
  29. and r0,r0,#0xffe0ffff
  30. orr r0,r0,#0x00070000
  31. wfs r0
  32. end;
  33. end;
  34. {$else}
  35. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  36. begin
  37. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  38. asm
  39. fmrx r0,fpscr
  40. // set "round to nearest" mode
  41. and r0,r0,#0xff3fffff
  42. // mask "exception happened" and overflow flags
  43. and r0,r0,#0xffffff20
  44. // mask exception flags
  45. and r0,r0,#0xffff40ff
  46. {$ifndef darwin}
  47. // Floating point exceptions cause kernel panics on iPhoneOS 2.2.1...
  48. // disable flush-to-zero mode (IEEE math compliant)
  49. and r0,r0,#0xfeffffff
  50. // enable invalid operation, div-by-zero and overflow exceptions
  51. orr r0,r0,#0x00000700
  52. {$endif}
  53. fmxr fpscr,r0
  54. end;
  55. end;
  56. {$endif}
  57. {$endif}
  58. procedure fpc_cpuinit;
  59. begin
  60. { don't let libraries influence the FPU cw set by the host program }
  61. if not IsLibrary then
  62. SysInitFPU;
  63. end;
  64. {$ifdef wince}
  65. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  66. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  67. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  68. begin
  69. softfloat_exception_flags:=0;
  70. end;
  71. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  72. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  73. begin
  74. softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
  75. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  76. { FPU precision 64 bit, rounding to nearest, affine infinity }
  77. _controlfp($000C0003, $030F031F);
  78. end;
  79. {$endif wince}
  80. {****************************************************************************
  81. stack frame related stuff
  82. ****************************************************************************}
  83. {$IFNDEF INTERNAL_BACKTRACE}
  84. {$define FPC_SYSTEM_HAS_GET_FRAME}
  85. function get_frame:pointer;assembler;nostackframe;
  86. asm
  87. {$ifndef darwin}
  88. mov r0,r11
  89. {$else}
  90. mov r0,r7
  91. {$endif}
  92. end;
  93. {$ENDIF not INTERNAL_BACKTRACE}
  94. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  95. function get_caller_addr(framebp:pointer):pointer;assembler;nostackframe;
  96. asm
  97. cmp r0,#0
  98. {$ifndef darwin}
  99. ldrne r0,[r0,#-4]
  100. {$else}
  101. ldrne r0,[r0,#4]
  102. {$endif}
  103. end;
  104. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  105. function get_caller_frame(framebp:pointer):pointer;assembler;nostackframe;
  106. asm
  107. cmp r0,#0
  108. {$ifndef darwin}
  109. ldrne r0,[r0,#-12]
  110. {$else}
  111. ldrne r0,[r0]
  112. {$endif}
  113. end;
  114. {$define FPC_SYSTEM_HAS_SPTR}
  115. Function Sptr : pointer;assembler;nostackframe;
  116. asm
  117. mov r0,sp
  118. end;
  119. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  120. {$define FPC_SYSTEM_HAS_FILLCHAR}
  121. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  122. asm
  123. // less than 0?
  124. cmp r1,#0
  125. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  126. movlt pc,lr
  127. {$else}
  128. bxlt lr
  129. {$endif}
  130. mov r3,r0
  131. cmp r1,#8 // at least 8 bytes to do?
  132. blt .LFillchar2
  133. orr r2,r2,r2,lsl #8
  134. orr r2,r2,r2,lsl #16
  135. .LFillchar0:
  136. tst r3,#3 // aligned yet?
  137. strneb r2,[r3],#1
  138. subne r1,r1,#1
  139. bne .LFillchar0
  140. mov ip,r2
  141. .LFillchar1:
  142. cmp r1,#8 // 8 bytes still to do?
  143. blt .LFillchar2
  144. stmia r3!,{r2,ip}
  145. sub r1,r1,#8
  146. cmp r1,#8 // 8 bytes still to do?
  147. blt .LFillchar2
  148. stmia r3!,{r2,ip}
  149. sub r1,r1,#8
  150. cmp r1,#8 // 8 bytes still to do?
  151. blt .LFillchar2
  152. stmia r3!,{r2,ip}
  153. sub r1,r1,#8
  154. cmp r1,#8 // 8 bytes still to do?
  155. stmgeia r3!,{r2,ip}
  156. subge r1,r1,#8
  157. bge .LFillchar1
  158. .LFillchar2:
  159. movs r1,r1 // anything left?
  160. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  161. moveq pc,lr
  162. {$else}
  163. bxeq lr
  164. {$endif}
  165. rsb r1,r1,#7
  166. add pc,pc,r1,lsl #2
  167. mov r0,r0
  168. strb r2,[r3],#1
  169. strb r2,[r3],#1
  170. strb r2,[r3],#1
  171. strb r2,[r3],#1
  172. strb r2,[r3],#1
  173. strb r2,[r3],#1
  174. strb r2,[r3],#1
  175. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  176. mov pc,lr
  177. {$else}
  178. bx lr
  179. {$endif}
  180. end;
  181. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  182. {$ifndef FPC_SYSTEM_HAS_MOVE}
  183. {$define FPC_SYSTEM_HAS_MOVE}
  184. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  185. asm
  186. pld [r0]
  187. // count <=0 ?
  188. cmp r2,#0
  189. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  190. movle pc,lr
  191. {$else}
  192. bxle lr
  193. {$endif}
  194. // overlap?
  195. cmp r1,r0
  196. bls .Lnooverlap
  197. add r3,r0,r2
  198. cmp r3,r1
  199. bls .Lnooverlap
  200. // overlap, copy backward
  201. .Loverlapped:
  202. subs r2,r2,#1
  203. ldrb r3,[r0,r2]
  204. strb r3,[r1,r2]
  205. bne .Loverlapped
  206. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  207. mov pc,lr
  208. {$else}
  209. bx lr
  210. {$endif}
  211. .Lnooverlap:
  212. // less then 16 bytes to copy?
  213. cmp r2,#8
  214. // yes, the forget about the whole optimizations
  215. // and do a bytewise copy
  216. blt .Lbyteloop
  217. // both aligned?
  218. orr r3,r0,r1
  219. tst r3,#3
  220. bne .Lbyteloop
  221. (*
  222. // yes, then align
  223. // alignment to 4 byte boundries is enough
  224. ldrb ip,[r0],#1
  225. sub r2,r2,#1
  226. stb ip,[r1],#1
  227. tst r3,#2
  228. bne .Ldifferentaligned
  229. ldrh ip,[r0],#2
  230. sub r2,r2,#2
  231. sth ip,[r1],#2
  232. .Ldifferentaligned
  233. // qword aligned?
  234. orrs r3,r0,r1
  235. tst r3,#7
  236. bne .Ldwordloop
  237. *)
  238. pld [r0,#32]
  239. .Ldwordloop:
  240. sub r2,r2,#4
  241. ldr r3,[r0],#4
  242. // preload
  243. pld [r0,#64]
  244. cmp r2,#4
  245. str r3,[r1],#4
  246. bcs .Ldwordloop
  247. cmp r2,#0
  248. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  249. moveq pc,lr
  250. {$else}
  251. bxeq lr
  252. {$endif}
  253. .Lbyteloop:
  254. subs r2,r2,#1
  255. ldrb r3,[r0],#1
  256. strb r3,[r1],#1
  257. bne .Lbyteloop
  258. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  259. mov pc,lr
  260. {$else}
  261. bx lr
  262. {$endif}
  263. end;
  264. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  265. asm
  266. // count <=0 ?
  267. cmp r2,#0
  268. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  269. movle pc,lr
  270. {$else}
  271. bxle lr
  272. {$endif}
  273. // overlap?
  274. cmp r1,r0
  275. bls .Lnooverlap
  276. add r3,r0,r2
  277. cmp r3,r1
  278. bls .Lnooverlap
  279. // overlap, copy backward
  280. .Loverlapped:
  281. subs r2,r2,#1
  282. ldrb r3,[r0,r2]
  283. strb r3,[r1,r2]
  284. bne .Loverlapped
  285. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  286. mov pc,lr
  287. {$else}
  288. bx lr
  289. {$endif}
  290. .Lnooverlap:
  291. // less then 16 bytes to copy?
  292. cmp r2,#8
  293. // yes, the forget about the whole optimizations
  294. // and do a bytewise copy
  295. blt .Lbyteloop
  296. // both aligned?
  297. orr r3,r0,r1
  298. tst r3,#3
  299. bne .Lbyteloop
  300. (*
  301. // yes, then align
  302. // alignment to 4 byte boundries is enough
  303. ldrb ip,[r0],#1
  304. sub r2,r2,#1
  305. stb ip,[r1],#1
  306. tst r3,#2
  307. bne .Ldifferentaligned
  308. ldrh ip,[r0],#2
  309. sub r2,r2,#2
  310. sth ip,[r1],#2
  311. .Ldifferentaligned
  312. // qword aligned?
  313. orrs r3,r0,r1
  314. tst r3,#7
  315. bne .Ldwordloop
  316. *)
  317. .Ldwordloop:
  318. sub r2,r2,#4
  319. ldr r3,[r0],#4
  320. cmp r2,#4
  321. str r3,[r1],#4
  322. bcs .Ldwordloop
  323. cmp r2,#0
  324. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  325. moveq pc,lr
  326. {$else}
  327. bxeq lr
  328. {$endif}
  329. .Lbyteloop:
  330. subs r2,r2,#1
  331. ldrb r3,[r0],#1
  332. strb r3,[r1],#1
  333. bne .Lbyteloop
  334. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  335. mov pc,lr
  336. {$else}
  337. bx lr
  338. {$endif}
  339. end;
  340. const
  341. moveproc : pointer = @move_blended;
  342. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  343. asm
  344. ldr ip,.Lmoveproc
  345. ldr pc,[ip]
  346. .Lmoveproc:
  347. .long moveproc
  348. end;
  349. {$endif FPC_SYSTEM_HAS_MOVE}
  350. {****************************************************************************
  351. String
  352. ****************************************************************************}
  353. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  354. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  355. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  356. function fpc_shortstr_to_shortstr(len:longint;const sstr:shortstring):shortstring;assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  357. {$else}
  358. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  359. {$endif}
  360. {r0: __RESULT
  361. r1: len
  362. r2: sstr}
  363. asm
  364. ldrb r12,[r2],#1
  365. cmp r12,r1
  366. movgt r12,r1
  367. strb r12,[r0],#1
  368. cmp r12,#6 (* 6 seems to be the break even point. *)
  369. blt .LStartTailCopy
  370. (* Align destination on 32bits. This is the only place where unrolling
  371. really seems to help, since in the common case, sstr is aligned on
  372. 32 bits, therefore in the common case we need to copy 3 bytes to
  373. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  374. rsb r3,r0,#0
  375. ands r3,r3,#3
  376. sub r12,r12,r3
  377. ldrneb r1,[r2],#1
  378. strneb r1,[r0],#1
  379. subnes r3,r3,#1
  380. ldrneb r1,[r2],#1
  381. strneb r1,[r0],#1
  382. subnes r3,r3,#1
  383. ldrneb r1,[r2],#1
  384. strneb r1,[r0],#1
  385. subnes r3,r3,#1
  386. .LDoneAlign:
  387. (* Destination should be aligned now, but source might not be aligned,
  388. if this is the case, do a byte-per-byte copy. *)
  389. tst r2,#3
  390. bne .LStartTailCopy
  391. (* Start the main copy, 32 bit at a time. *)
  392. movs r3,r12,lsr #2
  393. and r12,r12,#3
  394. beq .LStartTailCopy
  395. .LNext4bytes:
  396. (* Unrolling this loop would save a little bit of time for long strings
  397. (>20 chars), but alas, it hurts for short strings and they are the
  398. common case.*)
  399. ldrne r1,[r2],#4
  400. strne r1,[r0],#4
  401. subnes r3,r3,#1
  402. bne .LNext4bytes
  403. .LStartTailCopy:
  404. (* Do remaining bytes. *)
  405. cmp r12,#0
  406. beq .LDoneTail
  407. .LNextChar3:
  408. ldrb r1,[r2],#1
  409. strb r1,[r0],#1
  410. subs r12,r12,#1
  411. bne .LNextChar3
  412. .LDoneTail:
  413. end;
  414. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  415. {r0: len
  416. r1: sstr
  417. r2: dstr}
  418. asm
  419. ldrb r12,[r1],#1
  420. cmp r12,r0
  421. movgt r12,r0
  422. strb r12,[r2],#1
  423. cmp r12,#6 (* 6 seems to be the break even point. *)
  424. blt .LStartTailCopy
  425. (* Align destination on 32bits. This is the only place where unrolling
  426. really seems to help, since in the common case, sstr is aligned on
  427. 32 bits, therefore in the common case we need to copy 3 bytes to
  428. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  429. rsb r3,r2,#0
  430. ands r3,r3,#3
  431. sub r12,r12,r3
  432. ldrneb r0,[r1],#1
  433. strneb r0,[r2],#1
  434. subnes r3,r3,#1
  435. ldrneb r0,[r1],#1
  436. strneb r0,[r2],#1
  437. subnes r3,r3,#1
  438. ldrneb r0,[r1],#1
  439. strneb r0,[r2],#1
  440. subnes r3,r3,#1
  441. .LDoneAlign:
  442. (* Destination should be aligned now, but source might not be aligned,
  443. if this is the case, do a byte-per-byte copy. *)
  444. tst r1,#3
  445. bne .LStartTailCopy
  446. (* Start the main copy, 32 bit at a time. *)
  447. movs r3,r12,lsr #2
  448. and r12,r12,#3
  449. beq .LStartTailCopy
  450. .LNext4bytes:
  451. (* Unrolling this loop would save a little bit of time for long strings
  452. (>20 chars), but alas, it hurts for short strings and they are the
  453. common case.*)
  454. ldrne r0,[r1],#4
  455. strne r0,[r2],#4
  456. subnes r3,r3,#1
  457. bne .LNext4bytes
  458. .LStartTailCopy:
  459. (* Do remaining bytes. *)
  460. cmp r12,#0
  461. beq .LDoneTail
  462. .LNextChar3:
  463. ldrb r0,[r1],#1
  464. strb r0,[r2],#1
  465. subs r12,r12,#1
  466. bne .LNextChar3
  467. .LDoneTail:
  468. end;
  469. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  470. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  471. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  472. function fpc_Pchar_length(p:Pchar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  473. asm
  474. cmp r0,#0
  475. mov r1,r0
  476. beq .Ldone
  477. .Lnextchar:
  478. (*Are we aligned?*)
  479. tst r1,#3
  480. bne .Ltest_unaligned (*No, do byte per byte.*)
  481. ldr r3,.L01010101
  482. .Ltest_aligned:
  483. (*Aligned, load 4 bytes at a time.*)
  484. ldr r12,[r1],#4
  485. (*Check wether r12 contains a 0 byte.*)
  486. sub r2,r12,r3
  487. mvn r12,r12
  488. and r2,r2,r12
  489. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  490. beq .Ltest_aligned (*No 0 byte, repeat.*)
  491. sub r1,r1,#4
  492. .Ltest_unaligned:
  493. ldrb r12,[r1],#1
  494. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  495. bcs .Lnextchar
  496. (*Dirty trick: we need to subtract 1 extra because we have counted the
  497. terminating 0, due to the known carry flag sbc can do this.*)
  498. sbc r0,r1,r0
  499. .Ldone:
  500. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  501. mov pc,lr
  502. {$else}
  503. bx lr
  504. {$endif}
  505. .L01010101:
  506. .long 0x01010101
  507. end;
  508. {$endif}
  509. var
  510. fpc_system_lock: longint; export name 'fpc_system_lock';
  511. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  512. asm
  513. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  514. .Lloop:
  515. ldrex r1, [r0]
  516. sub r1, r1, #1
  517. strex r2, r1, [r0]
  518. cmp r2, #0
  519. bne .Lloop
  520. mov r0, r1
  521. bx lr
  522. {$else}
  523. {$if defined(LINUX) and defined(CPUARMEL)}
  524. stmfd r13!, {lr}
  525. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  526. .Latomic_dec_loop:
  527. ldr r0, [r2] // Load the current value
  528. // We expect this to work without looping most of the time
  529. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  530. // loop here again, we have to reload the value. Normaly this just fills the
  531. // load stall-cycles from the above ldr so in reality we'll not get any additional
  532. // delays because of this
  533. // Don't use ldr to load r3 to avoid cacheline trashing
  534. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  535. // the kuser_cmpxchg entry point
  536. mvn r3, #0x0000f000
  537. sub r3, r3, #0x3F
  538. sub r1, r0, #1 // Decrement value
  539. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  540. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  541. ldmcsfd r13!, {pc}
  542. b .Latomic_dec_loop // kuser_cmpxchg sets C flag on error
  543. {$else}
  544. // lock
  545. ldr r3, .Lfpc_system_lock
  546. mov r1, #1
  547. .Lloop:
  548. swp r2, r1, [r3]
  549. cmp r2, #0
  550. bne .Lloop
  551. // do the job
  552. ldr r1, [r0]
  553. sub r1, r1, #1
  554. str r1, [r0]
  555. mov r0, r1
  556. // unlock and return
  557. str r2, [r3]
  558. bx lr
  559. .Lfpc_system_lock:
  560. .long fpc_system_lock
  561. {$endif}
  562. {$endif}
  563. end;
  564. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  565. asm
  566. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  567. .Lloop:
  568. ldrex r1, [r0]
  569. add r1, r1, #1
  570. strex r2, r1, [r0]
  571. cmp r2, #0
  572. bne .Lloop
  573. mov r0, r1
  574. bx lr
  575. {$else}
  576. {$if defined(LINUX) and defined(CPUARMEL)}
  577. stmfd r13!, {lr}
  578. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  579. .Latomic_inc_loop:
  580. ldr r0, [r2] // Load the current value
  581. // We expect this to work without looping most of the time
  582. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  583. // loop here again, we have to reload the value. Normaly this just fills the
  584. // load stall-cycles from the above ldr so in reality we'll not get any additional
  585. // delays because of this
  586. // Don't use ldr to load r3 to avoid cacheline trashing
  587. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  588. // the kuser_cmpxchg entry point
  589. mvn r3, #0x0000f000
  590. sub r3, r3, #0x3F
  591. add r1, r0, #1 // Increment value
  592. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  593. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  594. ldmcsfd r13!, {pc}
  595. b .Latomic_inc_loop // kuser_cmpxchg sets C flag on error
  596. {$else}
  597. // lock
  598. ldr r3, .Lfpc_system_lock
  599. mov r1, #1
  600. .Lloop:
  601. swp r2, r1, [r3]
  602. cmp r2, #0
  603. bne .Lloop
  604. // do the job
  605. ldr r1, [r0]
  606. add r1, r1, #1
  607. str r1, [r0]
  608. mov r0, r1
  609. // unlock and return
  610. str r2, [r3]
  611. bx lr
  612. .Lfpc_system_lock:
  613. .long fpc_system_lock
  614. {$endif}
  615. {$endif}
  616. end;
  617. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  618. asm
  619. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  620. // swp is deprecated on ARMv6 and above
  621. .Lloop:
  622. ldrex r2, [r0]
  623. strex r3, r1, [r0]
  624. cmp r3, #0
  625. bne .Lloop
  626. mov r0, r2
  627. bx lr
  628. {$else}
  629. swp r1, r1, [r0]
  630. mov r0,r1
  631. {$endif}
  632. end;
  633. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  634. asm
  635. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  636. .Lloop:
  637. ldrex r2, [r0]
  638. add r12, r1, r2
  639. strex r3, r12, [r0]
  640. cmp r3, #0
  641. bne .Lloop
  642. mov r0, r2
  643. bx lr
  644. {$else}
  645. {$if defined(LINUX) and defined(CPUARMEL)}
  646. stmfd r13!, {r4, lr}
  647. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  648. mov r4, r1 // Save addend
  649. .Latomic_add_loop:
  650. ldr r0, [r2] // Load the current value
  651. // We expect this to work without looping most of the time
  652. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  653. // loop here again, we have to reload the value. Normaly this just fills the
  654. // load stall-cycles from the above ldr so in reality we'll not get any additional
  655. // delays because of this
  656. // Don't use ldr to load r3 to avoid cacheline trashing
  657. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  658. // the kuser_cmpxchg entry point
  659. mvn r3, #0x0000f000
  660. sub r3, r3, #0x3F
  661. add r1, r0, r4 // Add to value
  662. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  663. // r1 does not get clobbered, so just get back the original value
  664. // Otherwise we would have to allocate one more register and store the
  665. // temporary value
  666. subcs r0, r1, r4
  667. ldmcsfd r13!, {r4, pc}
  668. b .Latomic_add_loop // kuser_cmpxchg failed, loop back
  669. {$else}
  670. // lock
  671. ldr r3, .Lfpc_system_lock
  672. mov r2, #1
  673. .Lloop:
  674. swp r2, r2, [r3]
  675. cmp r2, #0
  676. bne .Lloop
  677. // do the job
  678. ldr r2, [r0]
  679. add r1, r1, r2
  680. str r1, [r0]
  681. mov r0, r2
  682. // unlock and return
  683. mov r2, #0
  684. str r2, [r3]
  685. bx lr
  686. .Lfpc_system_lock:
  687. .long fpc_system_lock
  688. {$endif}
  689. {$endif}
  690. end;
  691. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  692. asm
  693. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  694. .Lloop:
  695. ldrex r3, [r0]
  696. mov r12, #0
  697. cmp r3, r2
  698. strexeq r12, r1, [r0]
  699. cmp r12, #0
  700. bne .Lloop
  701. mov r0, r3
  702. bx lr
  703. {$else}
  704. {$if defined(LINUX) and defined(CPUARMEL)}
  705. stmfd r13!, {r4, lr}
  706. mvn r3, #0x0000f000
  707. sub r3, r3, #0x3F
  708. mov r4, r2 // Swap parameters around
  709. mov r2, r0
  710. mov r0, r4 // Use r4 because we'll need the new value for later
  711. // r1 and r2 will not be clobbered by kuser_cmpxchg
  712. // If we have to loop, r0 will be set to the original Comperand
  713. .Linterlocked_compare_exchange_loop:
  714. blx r3 // Call kuser_cmpxchg sets C-Flag on success
  715. movcs r0, r4 // Return the previous value on success
  716. ldmcsfd r13!, {r4, pc}
  717. // The error case is a bit tricky, kuser_cmpxchg does not return the current value
  718. // So we may need to loop to avoid race conditions
  719. // The loop case is HIGHLY unlikely, it would require that we got rescheduled between
  720. // calling kuser_cmpxchg and the ldr. While beeing rescheduled another process/thread
  721. // would have the set the value to our comperand
  722. ldr r0, [r2] // Load the currently set value
  723. cmp r0, r4 // Return if Comperand != current value, otherwise loop again
  724. ldmnefd r13!, {r4, pc}
  725. // If we need to loop here, we have to
  726. b .Linterlocked_compare_exchange_loop
  727. {$else}
  728. // lock
  729. ldr r12, .Lfpc_system_lock
  730. mov r3, #1
  731. .Lloop:
  732. swp r3, r3, [r12]
  733. cmp r3, #0
  734. bne .Lloop
  735. // do the job
  736. ldr r3, [r0]
  737. cmp r3, r2
  738. streq r1, [r0]
  739. mov r0, r3
  740. // unlock and return
  741. mov r3, #0
  742. str r3, [r12]
  743. bx lr
  744. .Lfpc_system_lock:
  745. .long fpc_system_lock
  746. {$endif}
  747. {$endif}
  748. end;
  749. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  750. function declocked(var l: longint) : boolean; inline;
  751. begin
  752. Result:=InterLockedDecrement(l) = 0;
  753. end;
  754. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  755. procedure inclocked(var l: longint); inline;
  756. begin
  757. InterLockedIncrement(l);
  758. end;
  759. procedure fpc_cpucodeinit;
  760. begin
  761. {$ifdef FPC_SYSTEM_FPC_MOVE}
  762. cpu_has_edsp:=true;
  763. in_edsp_test:=true;
  764. asm
  765. bic r0,sp,#7
  766. ldrd r0,[r0]
  767. end;
  768. in_edsp_test:=false;
  769. if cpu_has_edsp then
  770. moveproc:=@move_pld
  771. else
  772. moveproc:=@move_blended;
  773. {$endif FPC_SYSTEM_FPC_MOVE}
  774. end;
  775. {$define FPC_SYSTEM_HAS_SWAPENDIAN}
  776. { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
  777. function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  778. begin
  779. { the extra Word type cast is necessary because the "AValue shr 8" }
  780. { is turned into "longint(AValue) shr 8", so if AValue < 0 then }
  781. { the sign bits from the upper 16 bits are shifted in rather than }
  782. { zeroes. }
  783. Result := SmallInt((Word(AValue) shr 8) or (Word(AValue) shl 8));
  784. end;
  785. function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
  786. begin
  787. Result := Word((AValue shr 8) or (AValue shl 8));
  788. end;
  789. (*
  790. This is kept for reference. Thats what the compiler COULD generate in these cases.
  791. But FPC currently does not support inlining of asm-functions, so the whole call-overhead
  792. is bigger than the gain of the optimized function.
  793. function AsmSwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif};assembler;nostackframe;
  794. asm
  795. // We're starting with 4321
  796. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  797. mov r0, r0, shl #16 // Shift to make that 2100
  798. mov r0, r0, ror #24 // Rotate to 1002
  799. orr r0, r0, r0 shr #16 // Shift and combine into 0012
  800. {$else}
  801. rev r0, r0 // Reverse byteorder r0 = 1234
  802. mov r0, r0, shr #16 // Shift down to 16bits r0 = 0012
  803. {$endif}
  804. end;
  805. *)
  806. {
  807. These used to be an assembler-function, but with newer improvements to the compiler this
  808. generates a perfect 4 cycle code sequence and can be inlined.
  809. }
  810. function SwapEndian(const AValue: LongWord): LongWord;{$ifdef SYSTEMINLINE}inline;{$endif}
  811. begin
  812. Result:= AValue xor rordword(AValue,16);
  813. Result:= Result and $FF00FFFF;
  814. Result:= (Result shr 8) xor rordword(AValue,8);
  815. end;
  816. function SwapEndian(const AValue: LongInt): LongInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  817. begin
  818. Result:=LongInt(SwapEndian(DWord(AValue)));
  819. end;
  820. {
  821. Currently freepascal will not generate a good assembler sequence for
  822. Result:=(SwapEndian(longword(lo(AValue))) shl 32) or
  823. (SwapEndian(longword(hi(AValue))));
  824. So we keep an assembly version for now
  825. }
  826. function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
  827. asm
  828. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  829. mov ip, r1
  830. // We're starting with r0 = $87654321
  831. eor r1, r0, r0, ror #16 // r1 = $C444C444
  832. bic r1, r1, #16711680 // r1 = r1 and $ff00ffff = $C400C444
  833. mov r0, r0, ror #8 // r0 = $21876543
  834. eor r1, r0, r1, lsr #8 // r1 = $21436587
  835. eor r0, ip, ip, ror #16
  836. bic r0, r0, #16711680
  837. mov ip, ip, ror #8
  838. eor r0, ip, r0, lsr #8
  839. {$else}
  840. rev r2, r0
  841. rev r0, r1
  842. mov r1, r2
  843. {$endif}
  844. end;
  845. function SwapEndian(const AValue: QWord): QWord; {$ifdef SYSTEMINLINE}inline;{$endif}
  846. begin
  847. Result:=QWord(SwapEndian(Int64(AValue)));
  848. end;
  849. {include hand-optimized assembler division code}
  850. {$i divide.inc}