arm.inc 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$asmmode gas}
  13. {$ifndef FPC_SYSTEM_HAS_MOVE}
  14. {$define FPC_SYSTEM_FPC_MOVE}
  15. {$endif FPC_SYSTEM_HAS_MOVE}
  16. {$ifdef FPC_SYSTEM_FPC_MOVE}
  17. const
  18. cpu_has_edsp : boolean = false;
  19. in_edsp_test : boolean = false;
  20. {$endif FPC_SYSTEM_FPC_MOVE}
  21. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  22. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  23. {$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3)}
  24. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  25. begin
  26. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  27. asm
  28. rfs r0
  29. and r0,r0,#0xffe0ffff
  30. orr r0,r0,#0x00070000
  31. wfs r0
  32. end;
  33. end;
  34. {$else}
  35. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  36. begin
  37. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  38. asm
  39. fmrx r0,fpscr
  40. // set "round to nearest" mode
  41. and r0,r0,#0xff3fffff
  42. // mask "exception happened" and overflow flags
  43. and r0,r0,#0xffffff20
  44. // mask exception flags
  45. and r0,r0,#0xffff40ff
  46. {$ifndef darwin}
  47. // Floating point exceptions cause kernel panics on iPhoneOS 2.2.1...
  48. // disable flush-to-zero mode (IEEE math compliant)
  49. and r0,r0,#0xfeffffff
  50. // enable invalid operation, div-by-zero and overflow exceptions
  51. orr r0,r0,#0x00000700
  52. {$endif}
  53. fmxr fpscr,r0
  54. end;
  55. end;
  56. {$endif}
  57. {$endif}
  58. procedure fpc_cpuinit;
  59. begin
  60. { don't let libraries influence the FPU cw set by the host program }
  61. if not IsLibrary then
  62. SysInitFPU;
  63. end;
  64. {$ifdef wince}
  65. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  66. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  67. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  68. begin
  69. softfloat_exception_flags:=0;
  70. end;
  71. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  72. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  73. begin
  74. softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
  75. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  76. { FPU precision 64 bit, rounding to nearest, affine infinity }
  77. _controlfp($000C0003, $030F031F);
  78. end;
  79. {$endif wince}
  80. {****************************************************************************
  81. stack frame related stuff
  82. ****************************************************************************}
  83. {$IFNDEF INTERNAL_BACKTRACE}
  84. {$define FPC_SYSTEM_HAS_GET_FRAME}
  85. function get_frame:pointer;assembler;nostackframe;
  86. asm
  87. mov r0,r11
  88. end;
  89. {$ENDIF not INTERNAL_BACKTRACE}
  90. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  91. function get_caller_addr(framebp:pointer):pointer;assembler;nostackframe;
  92. asm
  93. cmp r0,#0
  94. ldrne r0,[r0,#-4]
  95. end;
  96. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  97. function get_caller_frame(framebp:pointer):pointer;assembler;nostackframe;
  98. asm
  99. cmp r0,#0
  100. ldrne r0,[r0,#-12]
  101. end;
  102. {$define FPC_SYSTEM_HAS_SPTR}
  103. Function Sptr : pointer;assembler;nostackframe;
  104. asm
  105. mov r0,sp
  106. end;
  107. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  108. {$define FPC_SYSTEM_HAS_FILLCHAR}
  109. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  110. asm
  111. // less than 0?
  112. cmp r1,#0
  113. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  114. movlt pc,lr
  115. {$else}
  116. bxlt lr
  117. {$endif}
  118. mov r3,r0
  119. cmp r1,#8 // at least 8 bytes to do?
  120. blt .LFillchar2
  121. orr r2,r2,r2,lsl #8
  122. orr r2,r2,r2,lsl #16
  123. .LFillchar0:
  124. tst r3,#3 // aligned yet?
  125. strneb r2,[r3],#1
  126. subne r1,r1,#1
  127. bne .LFillchar0
  128. mov ip,r2
  129. .LFillchar1:
  130. cmp r1,#8 // 8 bytes still to do?
  131. blt .LFillchar2
  132. stmia r3!,{r2,ip}
  133. sub r1,r1,#8
  134. cmp r1,#8 // 8 bytes still to do?
  135. blt .LFillchar2
  136. stmia r3!,{r2,ip}
  137. sub r1,r1,#8
  138. cmp r1,#8 // 8 bytes still to do?
  139. blt .LFillchar2
  140. stmia r3!,{r2,ip}
  141. sub r1,r1,#8
  142. cmp r1,#8 // 8 bytes still to do?
  143. stmgeia r3!,{r2,ip}
  144. subge r1,r1,#8
  145. bge .LFillchar1
  146. .LFillchar2:
  147. movs r1,r1 // anything left?
  148. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  149. moveq pc,lr
  150. {$else}
  151. bxeq lr
  152. {$endif}
  153. rsb r1,r1,#7
  154. add pc,pc,r1,lsl #2
  155. mov r0,r0
  156. strb r2,[r3],#1
  157. strb r2,[r3],#1
  158. strb r2,[r3],#1
  159. strb r2,[r3],#1
  160. strb r2,[r3],#1
  161. strb r2,[r3],#1
  162. strb r2,[r3],#1
  163. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  164. mov pc,lr
  165. {$else}
  166. bx lr
  167. {$endif}
  168. end;
  169. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  170. {$ifndef FPC_SYSTEM_HAS_MOVE}
  171. {$define FPC_SYSTEM_HAS_MOVE}
  172. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  173. asm
  174. pld [r0]
  175. // count <=0 ?
  176. cmp r2,#0
  177. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  178. movle pc,lr
  179. {$else}
  180. bxle lr
  181. {$endif}
  182. // overlap?
  183. cmp r1,r0
  184. bls .Lnooverlap
  185. add r3,r0,r2
  186. cmp r3,r1
  187. bls .Lnooverlap
  188. // overlap, copy backward
  189. .Loverlapped:
  190. subs r2,r2,#1
  191. ldrb r3,[r0,r2]
  192. strb r3,[r1,r2]
  193. bne .Loverlapped
  194. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  195. mov pc,lr
  196. {$else}
  197. bx lr
  198. {$endif}
  199. .Lnooverlap:
  200. // less then 16 bytes to copy?
  201. cmp r2,#8
  202. // yes, the forget about the whole optimizations
  203. // and do a bytewise copy
  204. blt .Lbyteloop
  205. // both aligned?
  206. orr r3,r0,r1
  207. tst r3,#3
  208. bne .Lbyteloop
  209. (*
  210. // yes, then align
  211. // alignment to 4 byte boundries is enough
  212. ldrb ip,[r0],#1
  213. sub r2,r2,#1
  214. stb ip,[r1],#1
  215. tst r3,#2
  216. bne .Ldifferentaligned
  217. ldrh ip,[r0],#2
  218. sub r2,r2,#2
  219. sth ip,[r1],#2
  220. .Ldifferentaligned
  221. // qword aligned?
  222. orrs r3,r0,r1
  223. tst r3,#7
  224. bne .Ldwordloop
  225. *)
  226. pld [r0,#32]
  227. .Ldwordloop:
  228. sub r2,r2,#4
  229. ldr r3,[r0],#4
  230. // preload
  231. pld [r0,#64]
  232. cmp r2,#4
  233. str r3,[r1],#4
  234. bcs .Ldwordloop
  235. cmp r2,#0
  236. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  237. moveq pc,lr
  238. {$else}
  239. bxeq lr
  240. {$endif}
  241. .Lbyteloop:
  242. subs r2,r2,#1
  243. ldrb r3,[r0],#1
  244. strb r3,[r1],#1
  245. bne .Lbyteloop
  246. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  247. mov pc,lr
  248. {$else}
  249. bx lr
  250. {$endif}
  251. end;
  252. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  253. asm
  254. // count <=0 ?
  255. cmp r2,#0
  256. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  257. movle pc,lr
  258. {$else}
  259. bxle lr
  260. {$endif}
  261. // overlap?
  262. cmp r1,r0
  263. bls .Lnooverlap
  264. add r3,r0,r2
  265. cmp r3,r1
  266. bls .Lnooverlap
  267. // overlap, copy backward
  268. .Loverlapped:
  269. subs r2,r2,#1
  270. ldrb r3,[r0,r2]
  271. strb r3,[r1,r2]
  272. bne .Loverlapped
  273. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  274. mov pc,lr
  275. {$else}
  276. bx lr
  277. {$endif}
  278. .Lnooverlap:
  279. // less then 16 bytes to copy?
  280. cmp r2,#8
  281. // yes, the forget about the whole optimizations
  282. // and do a bytewise copy
  283. blt .Lbyteloop
  284. // both aligned?
  285. orr r3,r0,r1
  286. tst r3,#3
  287. bne .Lbyteloop
  288. (*
  289. // yes, then align
  290. // alignment to 4 byte boundries is enough
  291. ldrb ip,[r0],#1
  292. sub r2,r2,#1
  293. stb ip,[r1],#1
  294. tst r3,#2
  295. bne .Ldifferentaligned
  296. ldrh ip,[r0],#2
  297. sub r2,r2,#2
  298. sth ip,[r1],#2
  299. .Ldifferentaligned
  300. // qword aligned?
  301. orrs r3,r0,r1
  302. tst r3,#7
  303. bne .Ldwordloop
  304. *)
  305. .Ldwordloop:
  306. sub r2,r2,#4
  307. ldr r3,[r0],#4
  308. cmp r2,#4
  309. str r3,[r1],#4
  310. bcs .Ldwordloop
  311. cmp r2,#0
  312. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  313. moveq pc,lr
  314. {$else}
  315. bxeq lr
  316. {$endif}
  317. .Lbyteloop:
  318. subs r2,r2,#1
  319. ldrb r3,[r0],#1
  320. strb r3,[r1],#1
  321. bne .Lbyteloop
  322. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  323. mov pc,lr
  324. {$else}
  325. bx lr
  326. {$endif}
  327. end;
  328. const
  329. moveproc : pointer = @move_blended;
  330. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  331. asm
  332. ldr ip,.Lmoveproc
  333. ldr pc,[ip]
  334. .Lmoveproc:
  335. .long moveproc
  336. end;
  337. {$endif FPC_SYSTEM_HAS_MOVE}
  338. {****************************************************************************
  339. String
  340. ****************************************************************************}
  341. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  342. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  343. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  344. function fpc_shortstr_to_shortstr(len:longint;const sstr:shortstring):shortstring;assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  345. {$else}
  346. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  347. {$endif}
  348. {r0: __RESULT
  349. r1: len
  350. r2: sstr}
  351. asm
  352. ldrb r12,[r2],#1
  353. cmp r12,r1
  354. movgt r12,r1
  355. strb r12,[r0],#1
  356. cmp r12,#6 (* 6 seems to be the break even point. *)
  357. blt .LStartTailCopy
  358. (* Align destination on 32bits. This is the only place where unrolling
  359. really seems to help, since in the common case, sstr is aligned on
  360. 32 bits, therefore in the common case we need to copy 3 bytes to
  361. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  362. rsb r3,r0,#0
  363. ands r3,r3,#3
  364. sub r12,r12,r3
  365. ldrneb r1,[r2],#1
  366. strneb r1,[r0],#1
  367. subnes r3,r3,#1
  368. ldrneb r1,[r2],#1
  369. strneb r1,[r0],#1
  370. subnes r3,r3,#1
  371. ldrneb r1,[r2],#1
  372. strneb r1,[r0],#1
  373. subnes r3,r3,#1
  374. .LDoneAlign:
  375. (* Destination should be aligned now, but source might not be aligned,
  376. if this is the case, do a byte-per-byte copy. *)
  377. tst r2,#3
  378. bne .LStartTailCopy
  379. (* Start the main copy, 32 bit at a time. *)
  380. movs r3,r12,lsr #2
  381. and r12,r12,#3
  382. beq .LStartTailCopy
  383. .LNext4bytes:
  384. (* Unrolling this loop would save a little bit of time for long strings
  385. (>20 chars), but alas, it hurts for short strings and they are the
  386. common case.*)
  387. ldrne r1,[r2],#4
  388. strne r1,[r0],#4
  389. subnes r3,r3,#1
  390. bne .LNext4bytes
  391. .LStartTailCopy:
  392. (* Do remaining bytes. *)
  393. cmp r12,#0
  394. beq .LDoneTail
  395. .LNextChar3:
  396. ldrb r1,[r2],#1
  397. strb r1,[r0],#1
  398. subs r12,r12,#1
  399. bne .LNextChar3
  400. .LDoneTail:
  401. end;
  402. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  403. {r0: len
  404. r1: sstr
  405. r2: dstr}
  406. asm
  407. ldrb r12,[r1],#1
  408. cmp r12,r0
  409. movgt r12,r0
  410. strb r12,[r2],#1
  411. cmp r12,#6 (* 6 seems to be the break even point. *)
  412. blt .LStartTailCopy
  413. (* Align destination on 32bits. This is the only place where unrolling
  414. really seems to help, since in the common case, sstr is aligned on
  415. 32 bits, therefore in the common case we need to copy 3 bytes to
  416. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  417. rsb r3,r2,#0
  418. ands r3,r3,#3
  419. sub r12,r12,r3
  420. ldrneb r0,[r1],#1
  421. strneb r0,[r2],#1
  422. subnes r3,r3,#1
  423. ldrneb r0,[r1],#1
  424. strneb r0,[r2],#1
  425. subnes r3,r3,#1
  426. ldrneb r0,[r1],#1
  427. strneb r0,[r2],#1
  428. subnes r3,r3,#1
  429. .LDoneAlign:
  430. (* Destination should be aligned now, but source might not be aligned,
  431. if this is the case, do a byte-per-byte copy. *)
  432. tst r1,#3
  433. bne .LStartTailCopy
  434. (* Start the main copy, 32 bit at a time. *)
  435. movs r3,r12,lsr #2
  436. and r12,r12,#3
  437. beq .LStartTailCopy
  438. .LNext4bytes:
  439. (* Unrolling this loop would save a little bit of time for long strings
  440. (>20 chars), but alas, it hurts for short strings and they are the
  441. common case.*)
  442. ldrne r0,[r1],#4
  443. strne r0,[r2],#4
  444. subnes r3,r3,#1
  445. bne .LNext4bytes
  446. .LStartTailCopy:
  447. (* Do remaining bytes. *)
  448. cmp r12,#0
  449. beq .LDoneTail
  450. .LNextChar3:
  451. ldrb r0,[r1],#1
  452. strb r0,[r2],#1
  453. subs r12,r12,#1
  454. bne .LNextChar3
  455. .LDoneTail:
  456. end;
  457. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  458. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  459. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  460. function fpc_Pchar_length(p:Pchar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  461. asm
  462. cmp r0,#0
  463. mov r1,r0
  464. beq .Ldone
  465. .Lnextchar:
  466. (*Are we aligned?*)
  467. tst r1,#3
  468. bne .Ltest_unaligned (*No, do byte per byte.*)
  469. ldr r3,.L01010101
  470. .Ltest_aligned:
  471. (*Aligned, load 4 bytes at a time.*)
  472. ldr r12,[r1],#4
  473. (*Check wether r12 contains a 0 byte.*)
  474. sub r2,r12,r3
  475. mvn r12,r12
  476. and r2,r2,r12
  477. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  478. beq .Ltest_aligned (*No 0 byte, repeat.*)
  479. sub r1,r1,#4
  480. .Ltest_unaligned:
  481. ldrb r12,[r1],#1
  482. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  483. bcs .Lnextchar
  484. (*Dirty trick: we need to subtract 1 extra because we have counted the
  485. terminating 0, due to the known carry flag sbc can do this.*)
  486. sbc r0,r1,r0
  487. .Ldone:
  488. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  489. mov pc,lr
  490. {$else}
  491. bx lr
  492. {$endif}
  493. .L01010101:
  494. .long 0x01010101
  495. end;
  496. {$endif}
  497. var
  498. fpc_system_lock: longint; export name 'fpc_system_lock';
  499. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  500. asm
  501. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  502. .Lloop:
  503. ldrex r1, [r0]
  504. sub r1, r1, #1
  505. strex r2, r1, [r0]
  506. cmp r2, #0
  507. bne .Lloop
  508. mov r0, r1
  509. bx lr
  510. {$else}
  511. {$if defined(linux)}
  512. stmfd r13!, {lr}
  513. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  514. .Latomic_dec_loop:
  515. ldr r0, [r2] // Load the current value
  516. // We expect this to work without looping most of the time
  517. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  518. // loop here again, we have to reload the value. Normaly this just fills the
  519. // load stall-cycles from the above ldr so in reality we'll not get any additional
  520. // delays because of this
  521. // Don't use ldr to load r3 to avoid cacheline trashing
  522. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  523. // the kuser_cmpxchg entry point
  524. mvn r3, #0x0000f000
  525. sub r3, r3, #0x3F
  526. sub r1, r0, #1 // Decrement value
  527. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  528. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  529. ldmcsfd r13!, {pc}
  530. b .Latomic_dec_loop // kuser_cmpxchg sets C flag on error
  531. {$else}
  532. // lock
  533. ldr r3, .Lfpc_system_lock
  534. mov r1, #1
  535. .Lloop:
  536. swp r2, r1, [r3]
  537. cmp r2, #0
  538. bne .Lloop
  539. // do the job
  540. ldr r1, [r0]
  541. sub r1, r1, #1
  542. str r1, [r0]
  543. mov r0, r1
  544. // unlock and return
  545. str r2, [r3]
  546. bx lr
  547. .Lfpc_system_lock:
  548. .long fpc_system_lock
  549. {$endif}
  550. {$endif}
  551. end;
  552. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  553. asm
  554. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  555. .Lloop:
  556. ldrex r1, [r0]
  557. add r1, r1, #1
  558. strex r2, r1, [r0]
  559. cmp r2, #0
  560. bne .Lloop
  561. mov r0, r1
  562. bx lr
  563. {$else}
  564. {$if defined(linux)}
  565. stmfd r13!, {lr}
  566. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  567. .Latomic_inc_loop:
  568. ldr r0, [r2] // Load the current value
  569. // We expect this to work without looping most of the time
  570. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  571. // loop here again, we have to reload the value. Normaly this just fills the
  572. // load stall-cycles from the above ldr so in reality we'll not get any additional
  573. // delays because of this
  574. // Don't use ldr to load r3 to avoid cacheline trashing
  575. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  576. // the kuser_cmpxchg entry point
  577. mvn r3, #0x0000f000
  578. sub r3, r3, #0x3F
  579. add r1, r0, #1 // Decrement value
  580. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  581. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  582. ldmcsfd r13!, {pc}
  583. b .Latomic_inc_loop // kuser_cmpxchg sets C flag on error
  584. {$else}
  585. // lock
  586. ldr r3, .Lfpc_system_lock
  587. mov r1, #1
  588. .Lloop:
  589. swp r2, r1, [r3]
  590. cmp r2, #0
  591. bne .Lloop
  592. // do the job
  593. ldr r1, [r0]
  594. add r1, r1, #1
  595. str r1, [r0]
  596. mov r0, r1
  597. // unlock and return
  598. str r2, [r3]
  599. bx lr
  600. .Lfpc_system_lock:
  601. .long fpc_system_lock
  602. {$endif}
  603. {$endif}
  604. end;
  605. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  606. asm
  607. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  608. // swp is deprecated on ARMv6 and above
  609. .Lloop:
  610. ldrex r2, [r0]
  611. strex r3, r1, [r0]
  612. cmp r3, #0
  613. bne .Lloop
  614. mov r0, r2
  615. bx lr
  616. {$else}
  617. swp r1, r1, [r0]
  618. mov r0,r1
  619. {$endif}
  620. end;
  621. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  622. asm
  623. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  624. .Lloop:
  625. ldrex r2, [r0]
  626. add r12, r1, r2
  627. strex r3, r12, [r0]
  628. cmp r3, #0
  629. bne .Lloop
  630. mov r0, r2
  631. bx lr
  632. {$else}
  633. {$if defined(linux)}
  634. stmfd r13!, {r4, lr}
  635. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  636. mov r4, r1 // Save addend
  637. .Latomic_add_loop:
  638. ldr r0, [r2] // Load the current value
  639. // We expect this to work without looping most of the time
  640. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  641. // loop here again, we have to reload the value. Normaly this just fills the
  642. // load stall-cycles from the above ldr so in reality we'll not get any additional
  643. // delays because of this
  644. // Don't use ldr to load r3 to avoid cacheline trashing
  645. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  646. // the kuser_cmpxchg entry point
  647. mvn r3, #0x0000f000
  648. sub r3, r3, #0x3F
  649. add r1, r0, r4 // Add to value
  650. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  651. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  652. ldmcsfd r13!, {r4, pc}
  653. b .Latomic_add_loop // kuser_cmpxchg sets C flag on error
  654. {$else}
  655. // lock
  656. ldr r3, .Lfpc_system_lock
  657. mov r2, #1
  658. .Lloop:
  659. swp r2, r2, [r3]
  660. cmp r2, #0
  661. bne .Lloop
  662. // do the job
  663. ldr r2, [r0]
  664. add r1, r1, r2
  665. str r1, [r0]
  666. mov r0, r2
  667. // unlock and return
  668. mov r2, #0
  669. str r2, [r3]
  670. bx lr
  671. .Lfpc_system_lock:
  672. .long fpc_system_lock
  673. {$endif}
  674. {$endif}
  675. end;
  676. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  677. asm
  678. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  679. .Lloop:
  680. ldrex r3, [r0]
  681. mov r12, #0
  682. cmp r3, r2
  683. strexeq r12, r1, [r0]
  684. cmp r12, #0
  685. bne .Lloop
  686. mov r0, r3
  687. bx lr
  688. {$else}
  689. {$if defined(linux)}
  690. stmfd r13!, {lr}
  691. mvn r3, #0x0000f000
  692. sub r3, r3, #0x3F
  693. mov ip, r2 // Swap parameters around
  694. mov r2, r0
  695. mov r0, ip
  696. blx r3 // Call kuser_cmpxchg sets C-Flag on success
  697. ldrcc r0, [r2] // Load the currently set value on failure
  698. // We could use "mov r0, r3" here, but thats undocumented
  699. ldmfd r13!, {lr}
  700. {$else}
  701. // lock
  702. ldr r12, .Lfpc_system_lock
  703. mov r3, #1
  704. .Lloop:
  705. swp r3, r3, [r12]
  706. cmp r3, #0
  707. bne .Lloop
  708. // do the job
  709. ldr r3, [r0]
  710. cmp r3, r2
  711. streq r1, [r0]
  712. mov r0, r3
  713. // unlock and return
  714. mov r3, #0
  715. str r3, [r12]
  716. bx lr
  717. .Lfpc_system_lock:
  718. .long fpc_system_lock
  719. {$endif}
  720. {$endif}
  721. end;
  722. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  723. function declocked(var l: longint) : boolean; inline;
  724. begin
  725. Result:=InterLockedDecrement(l) = 0;
  726. end;
  727. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  728. procedure inclocked(var l: longint); inline;
  729. begin
  730. InterLockedIncrement(l);
  731. end;
  732. procedure fpc_cpucodeinit;
  733. begin
  734. {$ifdef FPC_SYSTEM_FPC_MOVE}
  735. cpu_has_edsp:=true;
  736. in_edsp_test:=true;
  737. asm
  738. bic r0,sp,#7
  739. ldrd r0,[r0]
  740. end;
  741. in_edsp_test:=false;
  742. if cpu_has_edsp then
  743. moveproc:=@move_pld
  744. else
  745. moveproc:=@move_blended;
  746. {$endif FPC_SYSTEM_FPC_MOVE}
  747. end;
  748. {include hand-optimized assembler division code}
  749. {$i divide.inc}