arm.inc 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$asmmode gas}
  13. {$ifndef FPC_SYSTEM_HAS_MOVE}
  14. {$define FPC_SYSTEM_FPC_MOVE}
  15. {$endif FPC_SYSTEM_HAS_MOVE}
  16. {$ifdef FPC_SYSTEM_FPC_MOVE}
  17. const
  18. cpu_has_edsp : boolean = false;
  19. in_edsp_test : boolean = false;
  20. {$endif FPC_SYSTEM_FPC_MOVE}
  21. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  22. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  23. {$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3)}
  24. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  25. begin
  26. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  27. asm
  28. rfs r0
  29. and r0,r0,#0xffe0ffff
  30. orr r0,r0,#0x00070000
  31. wfs r0
  32. end;
  33. end;
  34. {$else}
  35. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  36. begin
  37. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  38. asm
  39. fmrx r0,fpscr
  40. // set "round to nearest" mode
  41. and r0,r0,#0xff3fffff
  42. // mask "exception happened" and overflow flags
  43. and r0,r0,#0xffffff20
  44. // mask exception flags
  45. and r0,r0,#0xffff40ff
  46. {$ifndef darwin}
  47. // Floating point exceptions cause kernel panics on iPhoneOS 2.2.1...
  48. // disable flush-to-zero mode (IEEE math compliant)
  49. and r0,r0,#0xfeffffff
  50. // enable invalid operation, div-by-zero and overflow exceptions
  51. orr r0,r0,#0x00000700
  52. {$endif}
  53. fmxr fpscr,r0
  54. end;
  55. end;
  56. {$endif}
  57. {$endif}
  58. procedure fpc_cpuinit;
  59. begin
  60. { don't let libraries influence the FPU cw set by the host program }
  61. if not IsLibrary then
  62. SysInitFPU;
  63. end;
  64. {$ifdef wince}
  65. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  66. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  67. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  68. begin
  69. softfloat_exception_flags:=0;
  70. end;
  71. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  72. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  73. begin
  74. softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
  75. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  76. { FPU precision 64 bit, rounding to nearest, affine infinity }
  77. _controlfp($000C0003, $030F031F);
  78. end;
  79. {$endif wince}
  80. {****************************************************************************
  81. stack frame related stuff
  82. ****************************************************************************}
  83. {$IFNDEF INTERNAL_BACKTRACE}
  84. {$define FPC_SYSTEM_HAS_GET_FRAME}
  85. function get_frame:pointer;assembler;nostackframe;
  86. asm
  87. mov r0,r11
  88. end;
  89. {$ENDIF not INTERNAL_BACKTRACE}
  90. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  91. function get_caller_addr(framebp:pointer):pointer;assembler;
  92. asm
  93. movs r0,r0
  94. beq .Lg_a_null
  95. ldr r0,[r0,#-4]
  96. .Lg_a_null:
  97. end;
  98. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  99. function get_caller_frame(framebp:pointer):pointer;assembler;
  100. asm
  101. movs r0,r0
  102. beq .Lgnf_null
  103. // see comments in arm/cgcpu.pas, g_proc_entry
  104. ldr r0,[r0,#-12]
  105. .Lgnf_null:
  106. end;
  107. {$define FPC_SYSTEM_HAS_SPTR}
  108. Function Sptr : pointer;assembler;
  109. asm
  110. mov r0,sp
  111. end;
  112. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  113. {$define FPC_SYSTEM_HAS_FILLCHAR}
  114. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  115. asm
  116. // less than 0?
  117. cmp r1,#0
  118. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  119. movle pc,lr
  120. {$else}
  121. bxle lr
  122. {$endif}
  123. mov r3,r0
  124. orr r2,r2,r2,lsl #8
  125. orr r2,r2,r2,lsl #16
  126. tst r3, #3 // Aligned?
  127. bne .LFillchar_do_align
  128. .LFillchar_is_aligned:
  129. subs r1,r1,#8
  130. bmi .LFillchar_less_than_8bytes
  131. mov ip,r2
  132. .LFillchar_at_least_8bytes:
  133. // Do 16 bytes per loop
  134. // More unrolling is uncessary, as we'll just stall on the write buffers
  135. stmia r3!,{r2,ip}
  136. subs r1,r1,#8
  137. stmplia r3!,{r2,ip}
  138. subpls r1,r1,#8
  139. bpl .LFillchar_at_least_8bytes
  140. .LFillchar_less_than_8bytes:
  141. // Do the rest
  142. adds r1, r1, #8
  143. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  144. moveq pc,lr
  145. {$else}
  146. bxeq lr
  147. {$endif}
  148. tst r1, #4
  149. strne r2,[r3],#4
  150. tst r1, #2
  151. strneh r2,[r3],#2
  152. tst r1, #1
  153. strneb r2,[r3],#1
  154. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  155. mov pc,lr
  156. {$else}
  157. bx lr
  158. {$endif}
  159. // Special case for unaligned start
  160. // We make a maximum of 3 loops here
  161. .LFillchar_do_align:
  162. strb r2,[r3],#1
  163. subs r1, r1, #1
  164. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  165. moveq pc,lr
  166. {$else}
  167. bxeq lr
  168. {$endif}
  169. tst r3,#3
  170. bne .LFillchar_do_align
  171. b .LFillchar_is_aligned
  172. end;
  173. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  174. {$ifndef FPC_SYSTEM_HAS_MOVE}
  175. {$define FPC_SYSTEM_HAS_MOVE}
  176. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  177. asm
  178. pld [r0]
  179. // count <=0 ?
  180. cmp r2,#0
  181. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  182. movle pc,lr
  183. {$else}
  184. bxle lr
  185. {$endif}
  186. // overlap?
  187. cmp r1,r0
  188. bls .Lnooverlap
  189. add r3,r0,r2
  190. cmp r3,r1
  191. bls .Lnooverlap
  192. // overlap, copy backward
  193. .Loverlapped:
  194. subs r2,r2,#1
  195. ldrb r3,[r0,r2]
  196. strb r3,[r1,r2]
  197. bne .Loverlapped
  198. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  199. mov pc,lr
  200. {$else}
  201. bx lr
  202. {$endif}
  203. .Lnooverlap:
  204. // less then 16 bytes to copy?
  205. cmp r2,#8
  206. // yes, the forget about the whole optimizations
  207. // and do a bytewise copy
  208. blt .Lbyteloop
  209. // both aligned?
  210. orr r3,r0,r1
  211. tst r3,#3
  212. bne .Lbyteloop
  213. (*
  214. // yes, then align
  215. // alignment to 4 byte boundries is enough
  216. ldrb ip,[r0],#1
  217. sub r2,r2,#1
  218. stb ip,[r1],#1
  219. tst r3,#2
  220. bne .Ldifferentaligned
  221. ldrh ip,[r0],#2
  222. sub r2,r2,#2
  223. sth ip,[r1],#2
  224. .Ldifferentaligned
  225. // qword aligned?
  226. orrs r3,r0,r1
  227. tst r3,#7
  228. bne .Ldwordloop
  229. *)
  230. pld [r0,#32]
  231. .Ldwordloop:
  232. sub r2,r2,#4
  233. ldr r3,[r0],#4
  234. // preload
  235. pld [r0,#64]
  236. cmp r2,#4
  237. str r3,[r1],#4
  238. bcs .Ldwordloop
  239. cmp r2,#0
  240. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  241. moveq pc,lr
  242. {$else}
  243. bxeq lr
  244. {$endif}
  245. .Lbyteloop:
  246. subs r2,r2,#1
  247. ldrb r3,[r0],#1
  248. strb r3,[r1],#1
  249. bne .Lbyteloop
  250. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  251. mov pc,lr
  252. {$else}
  253. bx lr
  254. {$endif}
  255. end;
  256. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  257. asm
  258. // count <=0 ?
  259. cmp r2,#0
  260. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  261. movle pc,lr
  262. {$else}
  263. bxle lr
  264. {$endif}
  265. // overlap?
  266. cmp r1,r0
  267. bls .Lnooverlap
  268. add r3,r0,r2
  269. cmp r3,r1
  270. bls .Lnooverlap
  271. // overlap, copy backward
  272. .Loverlapped:
  273. subs r2,r2,#1
  274. ldrb r3,[r0,r2]
  275. strb r3,[r1,r2]
  276. bne .Loverlapped
  277. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  278. mov pc,lr
  279. {$else}
  280. bx lr
  281. {$endif}
  282. .Lnooverlap:
  283. // less then 16 bytes to copy?
  284. cmp r2,#8
  285. // yes, the forget about the whole optimizations
  286. // and do a bytewise copy
  287. blt .Lbyteloop
  288. // both aligned?
  289. orr r3,r0,r1
  290. tst r3,#3
  291. bne .Lbyteloop
  292. (*
  293. // yes, then align
  294. // alignment to 4 byte boundries is enough
  295. ldrb ip,[r0],#1
  296. sub r2,r2,#1
  297. stb ip,[r1],#1
  298. tst r3,#2
  299. bne .Ldifferentaligned
  300. ldrh ip,[r0],#2
  301. sub r2,r2,#2
  302. sth ip,[r1],#2
  303. .Ldifferentaligned
  304. // qword aligned?
  305. orrs r3,r0,r1
  306. tst r3,#7
  307. bne .Ldwordloop
  308. *)
  309. .Ldwordloop:
  310. sub r2,r2,#4
  311. ldr r3,[r0],#4
  312. cmp r2,#4
  313. str r3,[r1],#4
  314. bcs .Ldwordloop
  315. cmp r2,#0
  316. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  317. moveq pc,lr
  318. {$else}
  319. bxeq lr
  320. {$endif}
  321. .Lbyteloop:
  322. subs r2,r2,#1
  323. ldrb r3,[r0],#1
  324. strb r3,[r1],#1
  325. bne .Lbyteloop
  326. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  327. mov pc,lr
  328. {$else}
  329. bx lr
  330. {$endif}
  331. end;
  332. const
  333. moveproc : pointer = @move_blended;
  334. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  335. asm
  336. ldr ip,.Lmoveproc
  337. ldr pc,[ip]
  338. .Lmoveproc:
  339. .long moveproc
  340. end;
  341. {$endif FPC_SYSTEM_HAS_MOVE}
  342. {****************************************************************************
  343. String
  344. ****************************************************************************}
  345. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  346. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  347. {$ifndef FPC_STRTOSHORTSTRINGPROC}
  348. function fpc_shortstr_to_shortstr(len:longint;const sstr:shortstring):shortstring;assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  349. {$else}
  350. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  351. {$endif}
  352. {r0: __RESULT
  353. r1: len
  354. r2: sstr}
  355. asm
  356. ldrb r12,[r2],#1
  357. cmp r12,r1
  358. movgt r12,r1
  359. strb r12,[r0],#1
  360. cmp r12,#6 (* 6 seems to be the break even point. *)
  361. blt .LStartTailCopy
  362. (* Align destination on 32bits. This is the only place where unrolling
  363. really seems to help, since in the common case, sstr is aligned on
  364. 32 bits, therefore in the common case we need to copy 3 bytes to
  365. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  366. rsb r3,r0,#0
  367. ands r3,r3,#3
  368. sub r12,r12,r3
  369. ldrneb r1,[r2],#1
  370. strneb r1,[r0],#1
  371. subnes r3,r3,#1
  372. ldrneb r1,[r2],#1
  373. strneb r1,[r0],#1
  374. subnes r3,r3,#1
  375. ldrneb r1,[r2],#1
  376. strneb r1,[r0],#1
  377. subnes r3,r3,#1
  378. .LDoneAlign:
  379. (* Destination should be aligned now, but source might not be aligned,
  380. if this is the case, do a byte-per-byte copy. *)
  381. tst r2,#3
  382. bne .LStartTailCopy
  383. (* Start the main copy, 32 bit at a time. *)
  384. movs r3,r12,lsr #2
  385. and r12,r12,#3
  386. beq .LStartTailCopy
  387. .LNext4bytes:
  388. (* Unrolling this loop would save a little bit of time for long strings
  389. (>20 chars), but alas, it hurts for short strings and they are the
  390. common case.*)
  391. ldrne r1,[r2],#4
  392. strne r1,[r0],#4
  393. subnes r3,r3,#1
  394. bne .LNext4bytes
  395. .LStartTailCopy:
  396. (* Do remaining bytes. *)
  397. cmp r12,#0
  398. beq .LDoneTail
  399. .LNextChar3:
  400. ldrb r1,[r2],#1
  401. strb r1,[r0],#1
  402. subs r12,r12,#1
  403. bne .LNextChar3
  404. .LDoneTail:
  405. end;
  406. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  407. {r0: len
  408. r1: sstr
  409. r2: dstr}
  410. asm
  411. ldrb r12,[r1],#1
  412. cmp r12,r0
  413. movgt r12,r0
  414. strb r12,[r2],#1
  415. cmp r12,#6 (* 6 seems to be the break even point. *)
  416. blt .LStartTailCopy
  417. (* Align destination on 32bits. This is the only place where unrolling
  418. really seems to help, since in the common case, sstr is aligned on
  419. 32 bits, therefore in the common case we need to copy 3 bytes to
  420. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  421. rsb r3,r2,#0
  422. ands r3,r3,#3
  423. sub r12,r12,r3
  424. ldrneb r0,[r1],#1
  425. strneb r0,[r2],#1
  426. subnes r3,r3,#1
  427. ldrneb r0,[r1],#1
  428. strneb r0,[r2],#1
  429. subnes r3,r3,#1
  430. ldrneb r0,[r1],#1
  431. strneb r0,[r2],#1
  432. subnes r3,r3,#1
  433. .LDoneAlign:
  434. (* Destination should be aligned now, but source might not be aligned,
  435. if this is the case, do a byte-per-byte copy. *)
  436. tst r1,#3
  437. bne .LStartTailCopy
  438. (* Start the main copy, 32 bit at a time. *)
  439. movs r3,r12,lsr #2
  440. and r12,r12,#3
  441. beq .LStartTailCopy
  442. .LNext4bytes:
  443. (* Unrolling this loop would save a little bit of time for long strings
  444. (>20 chars), but alas, it hurts for short strings and they are the
  445. common case.*)
  446. ldrne r0,[r1],#4
  447. strne r0,[r2],#4
  448. subnes r3,r3,#1
  449. bne .LNext4bytes
  450. .LStartTailCopy:
  451. (* Do remaining bytes. *)
  452. cmp r12,#0
  453. beq .LDoneTail
  454. .LNextChar3:
  455. ldrb r0,[r1],#1
  456. strb r0,[r2],#1
  457. subs r12,r12,#1
  458. bne .LNextChar3
  459. .LDoneTail:
  460. end;
  461. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  462. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  463. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  464. function fpc_Pchar_length(p:Pchar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  465. asm
  466. cmp r0,#0
  467. mov r1,r0
  468. beq .Ldone
  469. .Lnextchar:
  470. (*Are we aligned?*)
  471. tst r1,#3
  472. bne .Ltest_unaligned (*No, do byte per byte.*)
  473. ldr r3,.L01010101
  474. .Ltest_aligned:
  475. (*Aligned, load 4 bytes at a time.*)
  476. ldr r12,[r1],#4
  477. (*Check wether r12 contains a 0 byte.*)
  478. sub r2,r12,r3
  479. mvn r12,r12
  480. and r2,r2,r12
  481. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  482. beq .Ltest_aligned (*No 0 byte, repeat.*)
  483. sub r1,r1,#4
  484. .Ltest_unaligned:
  485. ldrb r12,[r1],#1
  486. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  487. bcs .Lnextchar
  488. (*Dirty trick: we need to subtract 1 extra because we have counted the
  489. terminating 0, due to the known carry flag sbc can do this.*)
  490. sbc r0,r1,r0
  491. .Ldone:
  492. {$if defined(cpuarmv3) or defined(cpuarmv4)}
  493. mov pc,lr
  494. {$else}
  495. bx lr
  496. {$endif}
  497. .L01010101:
  498. .long 0x01010101
  499. end;
  500. {$endif}
  501. var
  502. fpc_system_lock: longint; export name 'fpc_system_lock';
  503. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  504. asm
  505. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  506. .Lloop:
  507. ldrex r1, [r0]
  508. sub r1, r1, #1
  509. strex r2, r1, [r0]
  510. cmp r2, #0
  511. bne .Lloop
  512. mov r0, r1
  513. bx lr
  514. {$else}
  515. {$if defined(LINUX) and defined(CPUARMEL)}
  516. stmfd r13!, {lr}
  517. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  518. .Latomic_dec_loop:
  519. ldr r0, [r2] // Load the current value
  520. // We expect this to work without looping most of the time
  521. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  522. // loop here again, we have to reload the value. Normaly this just fills the
  523. // load stall-cycles from the above ldr so in reality we'll not get any additional
  524. // delays because of this
  525. // Don't use ldr to load r3 to avoid cacheline trashing
  526. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  527. // the kuser_cmpxchg entry point
  528. mvn r3, #0x0000f000
  529. sub r3, r3, #0x3F
  530. sub r1, r0, #1 // Decrement value
  531. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  532. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  533. ldmcsfd r13!, {pc}
  534. b .Latomic_dec_loop // kuser_cmpxchg sets C flag on error
  535. {$else}
  536. // lock
  537. ldr r3, .Lfpc_system_lock
  538. mov r1, #1
  539. .Lloop:
  540. swp r2, r1, [r3]
  541. cmp r2, #0
  542. bne .Lloop
  543. // do the job
  544. ldr r1, [r0]
  545. sub r1, r1, #1
  546. str r1, [r0]
  547. mov r0, r1
  548. // unlock and return
  549. str r2, [r3]
  550. bx lr
  551. .Lfpc_system_lock:
  552. .long fpc_system_lock
  553. {$endif}
  554. {$endif}
  555. end;
  556. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  557. asm
  558. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  559. .Lloop:
  560. ldrex r1, [r0]
  561. add r1, r1, #1
  562. strex r2, r1, [r0]
  563. cmp r2, #0
  564. bne .Lloop
  565. mov r0, r1
  566. bx lr
  567. {$else}
  568. {$if defined(LINUX) and defined(CPUARMEL)}
  569. stmfd r13!, {lr}
  570. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  571. .Latomic_inc_loop:
  572. ldr r0, [r2] // Load the current value
  573. // We expect this to work without looping most of the time
  574. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  575. // loop here again, we have to reload the value. Normaly this just fills the
  576. // load stall-cycles from the above ldr so in reality we'll not get any additional
  577. // delays because of this
  578. // Don't use ldr to load r3 to avoid cacheline trashing
  579. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  580. // the kuser_cmpxchg entry point
  581. mvn r3, #0x0000f000
  582. sub r3, r3, #0x3F
  583. add r1, r0, #1 // Increment value
  584. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  585. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  586. ldmcsfd r13!, {pc}
  587. b .Latomic_inc_loop // kuser_cmpxchg sets C flag on error
  588. {$else}
  589. // lock
  590. ldr r3, .Lfpc_system_lock
  591. mov r1, #1
  592. .Lloop:
  593. swp r2, r1, [r3]
  594. cmp r2, #0
  595. bne .Lloop
  596. // do the job
  597. ldr r1, [r0]
  598. add r1, r1, #1
  599. str r1, [r0]
  600. mov r0, r1
  601. // unlock and return
  602. str r2, [r3]
  603. bx lr
  604. .Lfpc_system_lock:
  605. .long fpc_system_lock
  606. {$endif}
  607. {$endif}
  608. end;
  609. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  610. asm
  611. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  612. // swp is deprecated on ARMv6 and above
  613. .Lloop:
  614. ldrex r2, [r0]
  615. strex r3, r1, [r0]
  616. cmp r3, #0
  617. bne .Lloop
  618. mov r0, r2
  619. bx lr
  620. {$else}
  621. swp r1, r1, [r0]
  622. mov r0,r1
  623. {$endif}
  624. end;
  625. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  626. asm
  627. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  628. .Lloop:
  629. ldrex r2, [r0]
  630. add r12, r1, r2
  631. strex r3, r12, [r0]
  632. cmp r3, #0
  633. bne .Lloop
  634. mov r0, r2
  635. bx lr
  636. {$else}
  637. {$if defined(LINUX) and defined(CPUARMEL)}
  638. stmfd r13!, {r4, lr}
  639. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  640. mov r4, r1 // Save addend
  641. .Latomic_add_loop:
  642. ldr r0, [r2] // Load the current value
  643. // We expect this to work without looping most of the time
  644. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  645. // loop here again, we have to reload the value. Normaly this just fills the
  646. // load stall-cycles from the above ldr so in reality we'll not get any additional
  647. // delays because of this
  648. // Don't use ldr to load r3 to avoid cacheline trashing
  649. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  650. // the kuser_cmpxchg entry point
  651. mvn r3, #0x0000f000
  652. sub r3, r3, #0x3F
  653. add r1, r0, r4 // Add to value
  654. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  655. // r1 does not get clobbered, so just get back the original value
  656. // Otherwise we would have to allocate one more register and store the
  657. // temporary value
  658. subcs r0, r1, r4
  659. ldmcsfd r13!, {r4, pc}
  660. b .Latomic_add_loop // kuser_cmpxchg failed, loop back
  661. {$else}
  662. // lock
  663. ldr r3, .Lfpc_system_lock
  664. mov r2, #1
  665. .Lloop:
  666. swp r2, r2, [r3]
  667. cmp r2, #0
  668. bne .Lloop
  669. // do the job
  670. ldr r2, [r0]
  671. add r1, r1, r2
  672. str r1, [r0]
  673. mov r0, r2
  674. // unlock and return
  675. mov r2, #0
  676. str r2, [r3]
  677. bx lr
  678. .Lfpc_system_lock:
  679. .long fpc_system_lock
  680. {$endif}
  681. {$endif}
  682. end;
  683. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  684. asm
  685. {$if defined(cpuarmv6) or defined(cpuarmv7m) or defined(cpucortexm3)}
  686. .Lloop:
  687. ldrex r3, [r0]
  688. mov r12, #0
  689. cmp r3, r2
  690. strexeq r12, r1, [r0]
  691. cmp r12, #0
  692. bne .Lloop
  693. mov r0, r3
  694. bx lr
  695. {$else}
  696. {$if defined(LINUX) and defined(CPUARMEL)}
  697. stmfd r13!, {r4, lr}
  698. mvn r3, #0x0000f000
  699. sub r3, r3, #0x3F
  700. mov r4, r2 // Swap parameters around
  701. mov r2, r0
  702. mov r0, r4 // Use r4 because we'll need the new value for later
  703. // r1 and r2 will not be clobbered by kuser_cmpxchg
  704. // If we have to loop, r0 will be set to the original Comperand
  705. .Linterlocked_compare_exchange_loop:
  706. blx r3 // Call kuser_cmpxchg sets C-Flag on success
  707. movcs r0, r4 // Return the previous value on success
  708. ldmcsfd r13!, {r4, pc}
  709. // The error case is a bit tricky, kuser_cmpxchg does not return the current value
  710. // So we may need to loop to avoid race conditions
  711. // The loop case is HIGHLY unlikely, it would require that we got rescheduled between
  712. // calling kuser_cmpxchg and the ldr. While beeing rescheduled another process/thread
  713. // would have the set the value to our comperand
  714. ldr r0, [r2] // Load the currently set value
  715. cmp r0, r4 // Return if Comperand != current value, otherwise loop again
  716. ldmnefd r13!, {r4, pc}
  717. // If we need to loop here, we have to
  718. b .Linterlocked_compare_exchange_loop
  719. {$else}
  720. // lock
  721. ldr r12, .Lfpc_system_lock
  722. mov r3, #1
  723. .Lloop:
  724. swp r3, r3, [r12]
  725. cmp r3, #0
  726. bne .Lloop
  727. // do the job
  728. ldr r3, [r0]
  729. cmp r3, r2
  730. streq r1, [r0]
  731. mov r0, r3
  732. // unlock and return
  733. mov r3, #0
  734. str r3, [r12]
  735. bx lr
  736. .Lfpc_system_lock:
  737. .long fpc_system_lock
  738. {$endif}
  739. {$endif}
  740. end;
  741. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  742. function declocked(var l: longint) : boolean; inline;
  743. begin
  744. Result:=InterLockedDecrement(l) = 0;
  745. end;
  746. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  747. procedure inclocked(var l: longint); inline;
  748. begin
  749. InterLockedIncrement(l);
  750. end;
  751. procedure fpc_cpucodeinit;
  752. begin
  753. {$ifdef FPC_SYSTEM_FPC_MOVE}
  754. cpu_has_edsp:=true;
  755. in_edsp_test:=true;
  756. asm
  757. bic r0,sp,#7
  758. ldrd r0,[r0]
  759. end;
  760. in_edsp_test:=false;
  761. if cpu_has_edsp then
  762. moveproc:=@move_pld
  763. else
  764. moveproc:=@move_blended;
  765. {$endif FPC_SYSTEM_FPC_MOVE}
  766. end;
  767. {$define FPC_SYSTEM_HAS_SWAPENDIAN}
  768. { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
  769. function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  770. begin
  771. { the extra Word type cast is necessary because the "AValue shr 8" }
  772. { is turned into "longint(AValue) shr 8", so if AValue < 0 then }
  773. { the sign bits from the upper 16 bits are shifted in rather than }
  774. { zeroes. }
  775. Result := SmallInt((Word(AValue) shr 8) or (Word(AValue) shl 8));
  776. end;
  777. function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
  778. begin
  779. Result := Word((AValue shr 8) or (AValue shl 8));
  780. end;
  781. (*
  782. This is kept for reference. Thats what the compiler COULD generate in these cases.
  783. But FPC currently does not support inlining of asm-functions, so the whole call-overhead
  784. is bigger than the gain of the optimized function.
  785. function AsmSwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif};assembler;nostackframe;
  786. asm
  787. // We're starting with 4321
  788. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  789. mov r0, r0, shl #16 // Shift to make that 2100
  790. mov r0, r0, ror #24 // Rotate to 1002
  791. orr r0, r0, r0 shr #16 // Shift and combine into 0012
  792. {$else}
  793. rev r0, r0 // Reverse byteorder r0 = 1234
  794. mov r0, r0, shr #16 // Shift down to 16bits r0 = 0012
  795. {$endif}
  796. end;
  797. *)
  798. function SwapEndian(const AValue: LongInt): LongInt;assembler;nostackframe;
  799. asm
  800. // We're starting with r0 = 4321
  801. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  802. mov r2, r0, lsr #24 // r2 = 0004
  803. and r1, r0, #16711680 // r1 = 0300
  804. orr r2, r2, r0, lsl #24 // r2 = 1004
  805. orr r2, r2, r1, lsr #8 // r2 = 1034
  806. and r0, r0, #65280 // r0 = 0020
  807. orr r0, r2, r0, lsl #8 // r0 = 1234
  808. {$else}
  809. rev r0, r0
  810. {$endif}
  811. end;
  812. function SwapEndian(const AValue: DWord): DWord;assembler;nostackframe;
  813. asm
  814. // We're starting with r0 = 4321
  815. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  816. mov r2, r0, lsr #24 // r2 = 0004
  817. and r1, r0, #16711680 // r1 = 0300
  818. orr r2, r2, r0, lsl #24 // r2 = 1004
  819. orr r2, r2, r1, lsr #8 // r2 = 1034
  820. and r0, r0, #65280 // r0 = 0020
  821. orr r0, r2, r0, lsl #8 // r0 = 1234
  822. {$else}
  823. rev r0, r0
  824. {$endif}
  825. end;
  826. function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
  827. asm
  828. // We're starting with r0 = 4321 r1 = 8765
  829. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  830. mov ip, r1
  831. mov r2, r0, lsr #24 // r2 = 0004
  832. and r3, r0, #16711680 // r3 = 0300
  833. orr r2, r2, r0, lsl #24 // r2 = 1004
  834. orr r2, r2, r3, lsr #8 // r2 = 1034
  835. and r0, r0, #65280 // r0 = 0020
  836. orr r1, r2, r0, lsl #8 // r1 = 1234
  837. mov r2, ip, lsr #24 // r2 = 0008
  838. and r3, ip, #16711680 // r1 = 0700
  839. orr r2, r2, ip, lsl #24 // r2 = 5008
  840. orr r2, r2, r3, lsr #8 // r2 = 5078
  841. and ip, ip, #65280 // ip = 0060
  842. orr r0, r2, ip, lsl #8 // r0 = 5678
  843. bx lr
  844. {$else}
  845. rev r2, r0
  846. rev r0, r1
  847. mov r1, r2
  848. {$endif}
  849. end;
  850. function SwapEndian(const AValue: QWord): QWord; assembler; nostackframe;
  851. asm
  852. // We're starting with r0 = 4321 r1 = 8765
  853. {$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
  854. mov ip, r1
  855. mov r2, r0, lsr #24 // r2 = 0004
  856. and r3, r0, #16711680 // r3 = 0300
  857. orr r2, r2, r0, lsl #24 // r2 = 1004
  858. orr r2, r2, r3, lsr #8 // r2 = 1034
  859. and r0, r0, #65280 // r0 = 0020
  860. orr r1, r2, r0, lsl #8 // r1 = 1234
  861. mov r2, ip, lsr #24 // r2 = 0008
  862. and r3, ip, #16711680 // r1 = 0700
  863. orr r2, r2, ip, lsl #24 // r2 = 5008
  864. orr r2, r2, r3, lsr #8 // r2 = 5078
  865. and ip, ip, #65280 // ip = 0060
  866. orr r0, r2, ip, lsl #8 // r0 = 5678
  867. bx lr
  868. {$else}
  869. rev r2, r0
  870. rev r0, r1
  871. mov r1, r2
  872. {$endif}
  873. end;
  874. {include hand-optimized assembler division code}
  875. {$i divide.inc}