arm.inc 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$ifndef FPC_SYSTEM_HAS_MOVE}
  13. {$define FPC_SYSTEM_FPC_MOVE}
  14. {$endif FPC_SYSTEM_HAS_MOVE}
  15. {$ifdef FPC_SYSTEM_FPC_MOVE}
  16. const
  17. cpu_has_edsp : boolean = false;
  18. in_edsp_test : boolean = false;
  19. {$endif FPC_SYSTEM_FPC_MOVE}
  20. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  21. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  22. {$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3) and not defined(FPUVFPV3_D16)}
  23. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  24. begin
  25. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  26. asm
  27. rfs r0
  28. and r0,r0,#0xffe0ffff
  29. orr r0,r0,#0x00070000
  30. wfs r0
  31. end;
  32. end;
  33. {$else}
  34. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  35. begin
  36. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  37. asm
  38. fmrx r0,fpscr
  39. // set "round to nearest" mode
  40. and r0,r0,#0xff3fffff
  41. // mask "exception happened" and overflow flags
  42. and r0,r0,#0xffffff20
  43. // mask exception flags
  44. and r0,r0,#0xffff40ff
  45. {$ifndef darwin}
  46. // Floating point exceptions cause kernel panics on iPhoneOS 2.2.1...
  47. // disable flush-to-zero mode (IEEE math compliant)
  48. and r0,r0,#0xfeffffff
  49. // enable invalid operation, div-by-zero and overflow exceptions
  50. orr r0,r0,#0x00000700
  51. {$endif}
  52. fmxr fpscr,r0
  53. end;
  54. end;
  55. {$endif}
  56. {$endif}
  57. procedure fpc_cpuinit;
  58. begin
  59. { don't let libraries influence the FPU cw set by the host program }
  60. if not IsLibrary then
  61. SysInitFPU;
  62. end;
  63. {$ifdef wince}
  64. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  65. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  66. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  67. begin
  68. end;
  69. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  70. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  71. begin
  72. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  73. { FPU precision 64 bit, rounding to nearest, affine infinity }
  74. _controlfp($000C0003, $030F031F);
  75. end;
  76. {$endif wince}
  77. {****************************************************************************
  78. stack frame related stuff
  79. ****************************************************************************}
  80. {$IFNDEF INTERNAL_BACKTRACE}
  81. {$define FPC_SYSTEM_HAS_GET_FRAME}
  82. function get_frame:pointer;assembler;nostackframe;
  83. asm
  84. {$ifndef darwin}
  85. mov r0,r11
  86. {$else}
  87. mov r0,r7
  88. {$endif}
  89. end;
  90. {$ENDIF not INTERNAL_BACKTRACE}
  91. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  92. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;assembler;nostackframe;
  93. asm
  94. cmp r0,#0
  95. {$ifndef darwin}
  96. ldrne r0,[r0,#-4]
  97. {$else}
  98. ldrne r0,[r0,#4]
  99. {$endif}
  100. end;
  101. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  102. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;assembler;nostackframe;
  103. asm
  104. cmp r0,#0
  105. {$ifndef darwin}
  106. ldrne r0,[r0,#-12]
  107. {$else}
  108. ldrne r0,[r0]
  109. {$endif}
  110. end;
  111. {$define FPC_SYSTEM_HAS_SPTR}
  112. Function Sptr : pointer;assembler;nostackframe;
  113. asm
  114. mov r0,sp
  115. end;
  116. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  117. {$define FPC_SYSTEM_HAS_FILLCHAR}
  118. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  119. asm
  120. // less than 0?
  121. cmp r1,#0
  122. {$ifdef CPUARM_HAS_BX}
  123. bxle lr
  124. {$else}
  125. movle pc,lr
  126. {$endif}
  127. mov r3,r0
  128. orr r2,r2,r2,lsl #8
  129. orr r2,r2,r2,lsl #16
  130. tst r3, #3 // Aligned?
  131. bne .LFillchar_do_align
  132. .LFillchar_is_aligned:
  133. subs r1,r1,#8
  134. bmi .LFillchar_less_than_8bytes
  135. mov ip,r2
  136. .LFillchar_at_least_8bytes:
  137. // Do 16 bytes per loop
  138. // More unrolling is uncessary, as we'll just stall on the write buffers
  139. stmia r3!,{r2,ip}
  140. subs r1,r1,#8
  141. stmplia r3!,{r2,ip}
  142. subpls r1,r1,#8
  143. bpl .LFillchar_at_least_8bytes
  144. .LFillchar_less_than_8bytes:
  145. // Do the rest
  146. adds r1, r1, #8
  147. {$ifdef CPUARM_HAS_BX}
  148. bxeq lr
  149. {$else}
  150. moveq pc,lr
  151. {$endif}
  152. tst r1, #4
  153. strne r2,[r3],#4
  154. {$ifdef CPUARM_HAS_ALL_MEM}
  155. tst r1, #2
  156. strneh r2,[r3],#2
  157. {$else CPUARM_HAS_ALL_MEM}
  158. tst r1, #2
  159. strneb r2,[r3],#1
  160. strneb r2,[r3],#1
  161. {$endif CPUARM_HAS_ALL_MEM}
  162. tst r1, #1
  163. strneb r2,[r3],#1
  164. {$ifdef CPUARM_HAS_BX}
  165. bx lr
  166. {$else}
  167. mov pc,lr
  168. {$endif}
  169. // Special case for unaligned start
  170. // We make a maximum of 3 loops here
  171. .LFillchar_do_align:
  172. strb r2,[r3],#1
  173. subs r1, r1, #1
  174. {$ifdef CPUARM_HAS_BX}
  175. bxeq lr
  176. {$else}
  177. moveq pc,lr
  178. {$endif}
  179. tst r3,#3
  180. bne .LFillchar_do_align
  181. b .LFillchar_is_aligned
  182. end;
  183. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  184. {$ifndef FPC_SYSTEM_HAS_MOVE}
  185. {$define FPC_SYSTEM_HAS_MOVE}
  186. {$ifdef CPUARM_HAS_EDSP}
  187. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  188. {$else CPUARM_HAS_EDSP}
  189. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  190. {$endif CPUARM_HAS_EDSP}
  191. asm
  192. // pld [r0]
  193. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  194. .long 0xf5d0f000
  195. // count <=0 ?
  196. cmp r2,#0
  197. {$ifdef CPUARM_HAS_BX}
  198. bxle lr
  199. {$else}
  200. movle pc,lr
  201. {$endif}
  202. // overlap?
  203. subs r3, r1, r0 // if (dest > source) and
  204. cmphi r2, r3 // (count > dest - src) then
  205. bhi .Loverlapped // DoReverseByteCopy;
  206. cmp r2,#8 // if (count < 8) then
  207. blt .Lbyteloop // DoForwardByteCopy;
  208. // Any way to avoid the above jump and fuse the next two instructions?
  209. tst r0, #3 // if (source and 3) <> 0 or
  210. tsteq r1, #3 // (dest and 3) <> 0 then
  211. bne .Lbyteloop // DoForwardByteCopy;
  212. // pld [r0,#32]
  213. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  214. .long 0xf5d0f020
  215. .Ldwordloop:
  216. ldmia r0!, {r3, ip}
  217. // preload
  218. // pld [r0,#64]
  219. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  220. .long 0xf5d0f040
  221. sub r2,r2,#8
  222. cmp r2, #8
  223. stmia r1!, {r3, ip}
  224. bge .Ldwordloop
  225. cmp r2,#0
  226. {$ifdef CPUARM_HAS_BX}
  227. bxeq lr
  228. {$else}
  229. moveq pc,lr
  230. {$endif}
  231. .Lbyteloop:
  232. subs r2,r2,#1
  233. ldrb r3,[r0],#1
  234. strb r3,[r1],#1
  235. bne .Lbyteloop
  236. {$ifdef CPUARM_HAS_BX}
  237. bx lr
  238. {$else}
  239. mov pc,lr
  240. {$endif}
  241. .Loverlapped:
  242. subs r2,r2,#1
  243. ldrb r3,[r0,r2]
  244. strb r3,[r1,r2]
  245. bne .Loverlapped
  246. end;
  247. {$ifndef CPUARM_HAS_EDSP}
  248. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  249. asm
  250. // count <=0 ?
  251. cmp r2,#0
  252. {$ifdef CPUARM_HAS_BX}
  253. bxle lr
  254. {$else}
  255. movle pc,lr
  256. {$endif}
  257. // overlap?
  258. subs r3, r1, r0 // if (dest > source) and
  259. cmphi r2, r3 // (count > dest - src) then
  260. bhi .Loverlapped // DoReverseByteCopy;
  261. cmp r2,#8 // if (count < 8) then
  262. blt .Lbyteloop // DoForwardByteCopy;
  263. // Any way to avoid the above jump and fuse the next two instructions?
  264. tst r0, #3 // if (source and 3) <> 0 or
  265. tsteq r1, #3 // (dest and 3) <> 0 then
  266. bne .Lbyteloop // DoForwardByteCopy;
  267. .Ldwordloop:
  268. ldmia r0!, {r3, ip}
  269. sub r2,r2,#8
  270. cmp r2, #8
  271. stmia r1!, {r3, ip}
  272. bge .Ldwordloop
  273. cmp r2,#0
  274. {$ifdef CPUARM_HAS_BX}
  275. bxeq lr
  276. {$else}
  277. moveq pc,lr
  278. {$endif}
  279. .Lbyteloop:
  280. subs r2,r2,#1
  281. ldrb r3,[r0],#1
  282. strb r3,[r1],#1
  283. bne .Lbyteloop
  284. {$ifdef CPUARM_HAS_BX}
  285. bx lr
  286. {$else}
  287. mov pc,lr
  288. {$endif}
  289. .Loverlapped:
  290. subs r2,r2,#1
  291. ldrb r3,[r0,r2]
  292. strb r3,[r1,r2]
  293. bne .Loverlapped
  294. end;
  295. const
  296. moveproc : pointer = @move_blended;
  297. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  298. asm
  299. ldr ip,.Lmoveproc
  300. ldr pc,[ip]
  301. .Lmoveproc:
  302. .long moveproc
  303. end;
  304. {$endif CPUARM_HAS_EDSP}
  305. {$endif FPC_SYSTEM_HAS_MOVE}
  306. {****************************************************************************
  307. String
  308. ****************************************************************************}
  309. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  310. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  311. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  312. {r0: __RESULT
  313. r1: len
  314. r2: sstr}
  315. asm
  316. ldrb r12,[r2],#1
  317. cmp r12,r1
  318. movgt r12,r1
  319. strb r12,[r0],#1
  320. cmp r12,#6 (* 6 seems to be the break even point. *)
  321. blt .LStartTailCopy
  322. (* Align destination on 32bits. This is the only place where unrolling
  323. really seems to help, since in the common case, sstr is aligned on
  324. 32 bits, therefore in the common case we need to copy 3 bytes to
  325. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  326. rsb r3,r0,#0
  327. ands r3,r3,#3
  328. sub r12,r12,r3
  329. ldrneb r1,[r2],#1
  330. strneb r1,[r0],#1
  331. subnes r3,r3,#1
  332. ldrneb r1,[r2],#1
  333. strneb r1,[r0],#1
  334. subnes r3,r3,#1
  335. ldrneb r1,[r2],#1
  336. strneb r1,[r0],#1
  337. subnes r3,r3,#1
  338. .LDoneAlign:
  339. (* Destination should be aligned now, but source might not be aligned,
  340. if this is the case, do a byte-per-byte copy. *)
  341. tst r2,#3
  342. bne .LStartTailCopy
  343. (* Start the main copy, 32 bit at a time. *)
  344. movs r3,r12,lsr #2
  345. and r12,r12,#3
  346. beq .LStartTailCopy
  347. .LNext4bytes:
  348. (* Unrolling this loop would save a little bit of time for long strings
  349. (>20 chars), but alas, it hurts for short strings and they are the
  350. common case.*)
  351. ldrne r1,[r2],#4
  352. strne r1,[r0],#4
  353. subnes r3,r3,#1
  354. bne .LNext4bytes
  355. .LStartTailCopy:
  356. (* Do remaining bytes. *)
  357. cmp r12,#0
  358. beq .LDoneTail
  359. .LNextChar3:
  360. ldrb r1,[r2],#1
  361. strb r1,[r0],#1
  362. subs r12,r12,#1
  363. bne .LNextChar3
  364. .LDoneTail:
  365. end;
  366. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  367. {r0: len
  368. r1: sstr
  369. r2: dstr}
  370. asm
  371. ldrb r12,[r1],#1
  372. cmp r12,r0
  373. movgt r12,r0
  374. strb r12,[r2],#1
  375. cmp r12,#6 (* 6 seems to be the break even point. *)
  376. blt .LStartTailCopy
  377. (* Align destination on 32bits. This is the only place where unrolling
  378. really seems to help, since in the common case, sstr is aligned on
  379. 32 bits, therefore in the common case we need to copy 3 bytes to
  380. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  381. rsb r3,r2,#0
  382. ands r3,r3,#3
  383. sub r12,r12,r3
  384. ldrneb r0,[r1],#1
  385. strneb r0,[r2],#1
  386. subnes r3,r3,#1
  387. ldrneb r0,[r1],#1
  388. strneb r0,[r2],#1
  389. subnes r3,r3,#1
  390. ldrneb r0,[r1],#1
  391. strneb r0,[r2],#1
  392. subnes r3,r3,#1
  393. .LDoneAlign:
  394. (* Destination should be aligned now, but source might not be aligned,
  395. if this is the case, do a byte-per-byte copy. *)
  396. tst r1,#3
  397. bne .LStartTailCopy
  398. (* Start the main copy, 32 bit at a time. *)
  399. movs r3,r12,lsr #2
  400. and r12,r12,#3
  401. beq .LStartTailCopy
  402. .LNext4bytes:
  403. (* Unrolling this loop would save a little bit of time for long strings
  404. (>20 chars), but alas, it hurts for short strings and they are the
  405. common case.*)
  406. ldrne r0,[r1],#4
  407. strne r0,[r2],#4
  408. subnes r3,r3,#1
  409. bne .LNext4bytes
  410. .LStartTailCopy:
  411. (* Do remaining bytes. *)
  412. cmp r12,#0
  413. beq .LDoneTail
  414. .LNextChar3:
  415. ldrb r0,[r1],#1
  416. strb r0,[r2],#1
  417. subs r12,r12,#1
  418. bne .LNextChar3
  419. .LDoneTail:
  420. end;
  421. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  422. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  423. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  424. function fpc_Pchar_length(p:Pchar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  425. asm
  426. cmp r0,#0
  427. mov r1,r0
  428. beq .Ldone
  429. .Lnextchar:
  430. (*Are we aligned?*)
  431. tst r1,#3
  432. bne .Ltest_unaligned (*No, do byte per byte.*)
  433. ldr r3,.L01010101
  434. .Ltest_aligned:
  435. (*Aligned, load 4 bytes at a time.*)
  436. ldr r12,[r1],#4
  437. (*Check wether r12 contains a 0 byte.*)
  438. sub r2,r12,r3
  439. mvn r12,r12
  440. and r2,r2,r12
  441. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  442. beq .Ltest_aligned (*No 0 byte, repeat.*)
  443. sub r1,r1,#4
  444. .Ltest_unaligned:
  445. ldrb r12,[r1],#1
  446. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  447. bcs .Lnextchar
  448. (*Dirty trick: we need to subtract 1 extra because we have counted the
  449. terminating 0, due to the known carry flag sbc can do this.*)
  450. sbc r0,r1,r0
  451. .Ldone:
  452. {$ifdef CPUARM_HAS_BX}
  453. bx lr
  454. {$else}
  455. mov pc,lr
  456. {$endif}
  457. .L01010101:
  458. .long 0x01010101
  459. end;
  460. {$endif}
  461. {$ifndef darwin}
  462. {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
  463. Procedure fpc_ansistr_decr_ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF'];assembler;nostackframe; compilerproc;
  464. asm
  465. ldr r1, [r0]
  466. // On return the pointer will always be set to zero, so utilize the delay slots
  467. mov r2, #0
  468. str r2, [r0]
  469. // Check for a zero string
  470. cmp r1, #0
  471. // Load reference counter
  472. ldrne r2, [r1, #-8]
  473. {$ifdef CPUARM_HAS_BX}
  474. bxeq lr
  475. {$else}
  476. moveq pc,lr
  477. {$endif}
  478. // Check for a constant string
  479. cmp r2, #0
  480. {$ifdef CPUARM_HAS_BX}
  481. bxlt lr
  482. {$else}
  483. movlt pc,lr
  484. {$endif}
  485. stmfd sp!, {r1, lr}
  486. sub r0, r1, #8
  487. {$if defined(CPUARM_HAS_BLX_LABEL) and not(defined(WINCE))}
  488. blx InterLockedDecrement
  489. {$else defined(CPUARM_HAS_BLX_LABEL) and not(defined(WINCE))}
  490. bl InterLockedDecrement
  491. {$endif defined(CPUARM_HAS_BLX_LABEL) and not(defined(WINCE))}
  492. // InterLockedDecrement is a nice guy and sets the z flag for us
  493. // if the reference count dropped to 0
  494. ldmnefd sp!, {r1, pc}
  495. ldmfd sp!, {r0, lr}
  496. // We currently can not use constant symbols in ARM-Assembly
  497. // but we need to stay backward compatible with 2.6
  498. sub r0, r0, #12
  499. // Jump without a link, so freemem directly returns to our caller
  500. b FPC_FREEMEM
  501. end;
  502. {$endif not darwin}
  503. var
  504. fpc_system_lock: longint; export name 'fpc_system_lock';
  505. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  506. asm
  507. {$ifdef CPUARM_HAS_LDREX}
  508. .Lloop:
  509. ldrex r1, [r0]
  510. sub r1, r1, #1
  511. strex r2, r1, [r0]
  512. cmp r2, #0
  513. bne .Lloop
  514. movs r0, r1
  515. bx lr
  516. {$else}
  517. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  518. stmfd r13!, {lr}
  519. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  520. .Latomic_dec_loop:
  521. ldr r0, [r2] // Load the current value
  522. // We expect this to work without looping most of the time
  523. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  524. // loop here again, we have to reload the value. Normaly this just fills the
  525. // load stall-cycles from the above ldr so in reality we'll not get any additional
  526. // delays because of this
  527. // Don't use ldr to load r3 to avoid cacheline trashing
  528. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  529. // the kuser_cmpxchg entry point
  530. mvn r3, #0x0000f000
  531. sub r3, r3, #0x3F
  532. sub r1, r0, #1 // Decrement value
  533. {$ifdef CPUARM_HAS_BLX}
  534. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  535. {$else}
  536. mov lr, pc
  537. {$ifdef CPUARM_HAS_BX}
  538. bx r3
  539. {$else}
  540. mov pc, r3
  541. {$endif}
  542. {$endif}
  543. // MOVS sets the Z flag when the result reaches zero, this can be used later on
  544. // The C-Flag will not be modified by this because we're not doing any shifting
  545. movcss r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  546. ldmcsfd r13!, {pc}
  547. b .Latomic_dec_loop // kuser_cmpxchg sets C flag on error
  548. {$else}
  549. // lock
  550. ldr r3, .Lfpc_system_lock
  551. mov r1, #1
  552. .Lloop:
  553. swp r2, r1, [r3]
  554. cmp r2, #0
  555. bne .Lloop
  556. // do the job
  557. ldr r1, [r0]
  558. sub r1, r1, #1
  559. str r1, [r0]
  560. movs r0, r1
  561. // unlock and return
  562. str r2, [r3]
  563. {$ifdef CPUARM_HAS_BX}
  564. bx lr
  565. {$else}
  566. mov pc,lr
  567. {$endif}
  568. .Lfpc_system_lock:
  569. .long fpc_system_lock
  570. {$endif}
  571. {$endif}
  572. end;
  573. {$ifndef darwin}
  574. {$define FPC_SYSTEM_HAS_ANSISTR_INCR_REF}
  575. Procedure fpc_ansistr_incr_ref (S : Pointer); [Public,Alias:'FPC_ANSISTR_INCR_REF'];assembler;nostackframe; compilerproc;
  576. asm
  577. // Null string?
  578. cmp r0, #0
  579. // Load reference counter
  580. ldrne r1, [r0, #-8]
  581. // pointer to counter, calculate here for delay slot utilization
  582. subne r0, r0, #8
  583. {$ifdef CPUARM_HAS_BX}
  584. bxeq lr
  585. {$else}
  586. moveq pc,lr
  587. {$endif}
  588. // Check for a constant string
  589. cmp r1, #0
  590. // Tailcall
  591. // Hopefully the linker will place InterLockedIncrement as layed out here
  592. bge InterLockedIncrement
  593. // Freepascal will generate a proper return here, save some cachespace
  594. end;
  595. {$endif not darwin}
  596. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  597. asm
  598. {$ifdef CPUARM_HAS_LDREX}
  599. .Lloop:
  600. ldrex r1, [r0]
  601. add r1, r1, #1
  602. strex r2, r1, [r0]
  603. cmp r2, #0
  604. bne .Lloop
  605. mov r0, r1
  606. bx lr
  607. {$else}
  608. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  609. stmfd r13!, {lr}
  610. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  611. .Latomic_inc_loop:
  612. ldr r0, [r2] // Load the current value
  613. // We expect this to work without looping most of the time
  614. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  615. // loop here again, we have to reload the value. Normaly this just fills the
  616. // load stall-cycles from the above ldr so in reality we'll not get any additional
  617. // delays because of this
  618. // Don't use ldr to load r3 to avoid cacheline trashing
  619. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  620. // the kuser_cmpxchg entry point
  621. mvn r3, #0x0000f000
  622. sub r3, r3, #0x3F
  623. add r1, r0, #1 // Increment value
  624. {$ifdef CPUARM_HAS_BLX}
  625. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  626. {$else}
  627. mov lr, pc
  628. {$ifdef CPUARM_HAS_BX}
  629. bx r3
  630. {$else}
  631. mov pc, r3
  632. {$endif}
  633. {$endif}
  634. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  635. ldmcsfd r13!, {pc}
  636. b .Latomic_inc_loop // kuser_cmpxchg sets C flag on error
  637. {$else}
  638. // lock
  639. ldr r3, .Lfpc_system_lock
  640. mov r1, #1
  641. .Lloop:
  642. swp r2, r1, [r3]
  643. cmp r2, #0
  644. bne .Lloop
  645. // do the job
  646. ldr r1, [r0]
  647. add r1, r1, #1
  648. str r1, [r0]
  649. mov r0, r1
  650. // unlock and return
  651. str r2, [r3]
  652. {$ifdef CPUARM_HAS_BX}
  653. bx lr
  654. {$else}
  655. mov pc,lr
  656. {$endif}
  657. .Lfpc_system_lock:
  658. .long fpc_system_lock
  659. {$endif}
  660. {$endif}
  661. end;
  662. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  663. asm
  664. {$ifdef CPUARM_HAS_LDREX}
  665. // swp is deprecated on ARMv6 and above
  666. .Lloop:
  667. ldrex r2, [r0]
  668. strex r3, r1, [r0]
  669. cmp r3, #0
  670. bne .Lloop
  671. mov r0, r2
  672. bx lr
  673. {$else}
  674. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  675. stmfd r13!, {r4, lr}
  676. mov r2, r0 // kuser_cmpxchg does not clobber r2 (and r1) by definition
  677. .Latomic_add_loop:
  678. ldr r0, [r2] // Load the current value
  679. // We expect this to work without looping most of the time
  680. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  681. // loop here again, we have to reload the value. Normaly this just fills the
  682. // load stall-cycles from the above ldr so in reality we'll not get any additional
  683. // delays because of this
  684. // Don't use ldr to load r3 to avoid cacheline trashing
  685. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  686. // the kuser_cmpxchg entry point
  687. mvn r3, #0x0000f000
  688. sub r3, r3, #0x3F
  689. mov r4, r0 // save the current value because kuser_cmpxchg clobbers r0
  690. {$ifdef CPUARM_HAS_BLX}
  691. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  692. {$else}
  693. mov lr, pc
  694. {$ifdef CPUARM_HAS_BX}
  695. bx r3
  696. {$else}
  697. mov pc, r3
  698. {$endif}
  699. {$endif}
  700. // restore the original value if needed
  701. movcs r0, r4
  702. ldmcsfd r13!, {r4, pc}
  703. b .Latomic_add_loop // kuser_cmpxchg failed, loop back
  704. {$else}
  705. // lock
  706. ldr r3, .Lfpc_system_lock
  707. mov r2, #1
  708. .Lloop:
  709. swp r2, r2, [r3]
  710. cmp r2, #0
  711. bne .Lloop
  712. // do the job
  713. ldr r2, [r0]
  714. str r1, [r0]
  715. mov r0, r2
  716. // unlock and return
  717. mov r2, #0
  718. str r2, [r3]
  719. {$ifdef CPUARM_HAS_BX}
  720. bx lr
  721. {$else}
  722. mov pc,lr
  723. {$endif}
  724. .Lfpc_system_lock:
  725. .long fpc_system_lock
  726. {$endif}
  727. {$endif}
  728. end;
  729. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  730. asm
  731. {$ifdef CPUARM_HAS_LDREX}
  732. .Lloop:
  733. ldrex r2, [r0]
  734. add r12, r1, r2
  735. strex r3, r12, [r0]
  736. cmp r3, #0
  737. bne .Lloop
  738. mov r0, r2
  739. bx lr
  740. {$else}
  741. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  742. stmfd r13!, {r4, lr}
  743. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  744. mov r4, r1 // Save addend
  745. .Latomic_add_loop:
  746. ldr r0, [r2] // Load the current value
  747. // We expect this to work without looping most of the time
  748. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  749. // loop here again, we have to reload the value. Normaly this just fills the
  750. // load stall-cycles from the above ldr so in reality we'll not get any additional
  751. // delays because of this
  752. // Don't use ldr to load r3 to avoid cacheline trashing
  753. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  754. // the kuser_cmpxchg entry point
  755. mvn r3, #0x0000f000
  756. sub r3, r3, #0x3F
  757. add r1, r0, r4 // Add to value
  758. {$ifdef CPUARM_HAS_BLX}
  759. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  760. {$else}
  761. mov lr, pc
  762. {$ifdef CPUARM_HAS_BX}
  763. bx r3
  764. {$else}
  765. mov pc, r3
  766. {$endif}
  767. {$endif}
  768. // r1 does not get clobbered, so just get back the original value
  769. // Otherwise we would have to allocate one more register and store the
  770. // temporary value
  771. subcs r0, r1, r4
  772. ldmcsfd r13!, {r4, pc}
  773. b .Latomic_add_loop // kuser_cmpxchg failed, loop back
  774. {$else}
  775. // lock
  776. ldr r3, .Lfpc_system_lock
  777. mov r2, #1
  778. .Lloop:
  779. swp r2, r2, [r3]
  780. cmp r2, #0
  781. bne .Lloop
  782. // do the job
  783. ldr r2, [r0]
  784. add r1, r1, r2
  785. str r1, [r0]
  786. mov r0, r2
  787. // unlock and return
  788. mov r2, #0
  789. str r2, [r3]
  790. {$ifdef CPUARM_HAS_BX}
  791. bx lr
  792. {$else}
  793. mov pc,lr
  794. {$endif}
  795. .Lfpc_system_lock:
  796. .long fpc_system_lock
  797. {$endif}
  798. {$endif}
  799. end;
  800. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  801. asm
  802. {$ifdef CPUARM_HAS_LDREX}
  803. .Lloop:
  804. ldrex r3, [r0]
  805. mov r12, #0
  806. cmp r3, r2
  807. strexeq r12, r1, [r0]
  808. cmp r12, #0
  809. bne .Lloop
  810. mov r0, r3
  811. bx lr
  812. {$else}
  813. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  814. stmfd r13!, {r4, lr}
  815. mvn r3, #0x0000f000
  816. sub r3, r3, #0x3F
  817. mov r4, r2 // Swap parameters around
  818. mov r2, r0
  819. mov r0, r4 // Use r4 because we'll need the new value for later
  820. // r1 and r2 will not be clobbered by kuser_cmpxchg
  821. // If we have to loop, r0 will be set to the original Comperand
  822. .Linterlocked_compare_exchange_loop:
  823. {$ifdef CPUARM_HAS_BLX}
  824. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  825. {$else}
  826. mov lr, pc
  827. {$ifdef CPUARM_HAS_BX}
  828. bx r3
  829. {$else}
  830. mov pc, r3
  831. {$endif}
  832. {$endif}
  833. movcs r0, r4 // Return the previous value on success
  834. ldmcsfd r13!, {r4, pc}
  835. // The error case is a bit tricky, kuser_cmpxchg does not return the current value
  836. // So we may need to loop to avoid race conditions
  837. // The loop case is HIGHLY unlikely, it would require that we got rescheduled between
  838. // calling kuser_cmpxchg and the ldr. While beeing rescheduled another process/thread
  839. // would have the set the value to our comperand
  840. ldr r0, [r2] // Load the currently set value
  841. cmp r0, r4 // Return if Comperand != current value, otherwise loop again
  842. ldmnefd r13!, {r4, pc}
  843. // If we need to loop here, we have to
  844. b .Linterlocked_compare_exchange_loop
  845. {$else}
  846. // lock
  847. ldr r12, .Lfpc_system_lock
  848. mov r3, #1
  849. .Lloop:
  850. swp r3, r3, [r12]
  851. cmp r3, #0
  852. bne .Lloop
  853. // do the job
  854. ldr r3, [r0]
  855. cmp r3, r2
  856. streq r1, [r0]
  857. mov r0, r3
  858. // unlock and return
  859. mov r3, #0
  860. str r3, [r12]
  861. {$ifdef CPUARM_HAS_BX}
  862. bx lr
  863. {$else}
  864. mov pc,lr
  865. {$endif}
  866. .Lfpc_system_lock:
  867. .long fpc_system_lock
  868. {$endif}
  869. {$endif}
  870. end;
  871. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  872. function declocked(var l: longint) : boolean; inline;
  873. begin
  874. Result:=InterLockedDecrement(l) = 0;
  875. end;
  876. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  877. procedure inclocked(var l: longint); inline;
  878. begin
  879. InterLockedIncrement(l);
  880. end;
  881. procedure fpc_cpucodeinit;
  882. begin
  883. {$ifdef FPC_SYSTEM_FPC_MOVE}
  884. {$ifndef CPUARM_HAS_EDSP}
  885. cpu_has_edsp:=true;
  886. in_edsp_test:=true;
  887. asm
  888. bic r0,sp,#7
  889. // ldrd r0,r1,[r0]
  890. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  891. .long 0xe1c000d0
  892. end;
  893. in_edsp_test:=false;
  894. if cpu_has_edsp then
  895. moveproc:=@move_pld
  896. else
  897. moveproc:=@move_blended;
  898. {$else CPUARM_HAS_EDSP}
  899. cpu_has_edsp:=true;
  900. {$endif CPUARM_HAS_EDSP}
  901. {$endif FPC_SYSTEM_FPC_MOVE}
  902. end;
  903. {$define FPC_SYSTEM_HAS_SWAPENDIAN}
  904. { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
  905. function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  906. begin
  907. { the extra Word type cast is necessary because the "AValue shr 8" }
  908. { is turned into "longint(AValue) shr 8", so if AValue < 0 then }
  909. { the sign bits from the upper 16 bits are shifted in rather than }
  910. { zeroes. }
  911. Result := SmallInt((Word(AValue) shr 8) or (Word(AValue) shl 8));
  912. end;
  913. function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
  914. begin
  915. Result := Word((AValue shr 8) or (AValue shl 8));
  916. end;
  917. (*
  918. This is kept for reference. Thats what the compiler COULD generate in these cases.
  919. But FPC currently does not support inlining of asm-functions, so the whole call-overhead
  920. is bigger than the gain of the optimized function.
  921. function AsmSwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif};assembler;nostackframe;
  922. asm
  923. // We're starting with 4321
  924. {$if defined(CPUARM_HAS_REV)}
  925. rev r0, r0 // Reverse byteorder r0 = 1234
  926. mov r0, r0, shr #16 // Shift down to 16bits r0 = 0012
  927. {$else}
  928. mov r0, r0, shl #16 // Shift to make that 2100
  929. mov r0, r0, ror #24 // Rotate to 1002
  930. orr r0, r0, r0 shr #16 // Shift and combine into 0012
  931. {$endif}
  932. end;
  933. *)
  934. {
  935. These used to be an assembler-function, but with newer improvements to the compiler this
  936. generates a perfect 4 cycle code sequence and can be inlined.
  937. }
  938. function SwapEndian(const AValue: LongWord): LongWord;{$ifdef SYSTEMINLINE}inline;{$endif}
  939. begin
  940. Result:= AValue xor rordword(AValue,16);
  941. Result:= Result and $FF00FFFF;
  942. Result:= (Result shr 8) xor rordword(AValue,8);
  943. end;
  944. function SwapEndian(const AValue: LongInt): LongInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  945. begin
  946. Result:=LongInt(SwapEndian(DWord(AValue)));
  947. end;
  948. {
  949. Currently freepascal will not generate a good assembler sequence for
  950. Result:=(SwapEndian(longword(lo(AValue))) shl 32) or
  951. (SwapEndian(longword(hi(AValue))));
  952. So we keep an assembly version for now
  953. }
  954. function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
  955. asm
  956. // fpc >2.6.0 adds the "rev" instruction in the internal assembler
  957. {$if defined(CPUARM_HAS_REV)}
  958. rev r2, r0
  959. rev r0, r1
  960. mov r1, r2
  961. {$else}
  962. mov ip, r1
  963. // We're starting with r0 = $87654321
  964. eor r1, r0, r0, ror #16 // r1 = $C444C444
  965. bic r1, r1, #16711680 // r1 = r1 and $ff00ffff = $C400C444
  966. mov r0, r0, ror #8 // r0 = $21876543
  967. eor r1, r0, r1, lsr #8 // r1 = $21436587
  968. eor r0, ip, ip, ror #16
  969. bic r0, r0, #16711680
  970. mov ip, ip, ror #8
  971. eor r0, ip, r0, lsr #8
  972. {$endif}
  973. end;
  974. function SwapEndian(const AValue: QWord): QWord; {$ifdef SYSTEMINLINE}inline;{$endif}
  975. begin
  976. Result:=QWord(SwapEndian(Int64(AValue)));
  977. end;
  978. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  979. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  980. { Generic read/readwrite barrier code. }
  981. procedure barrier; assembler; nostackframe;
  982. asm
  983. // manually encode the instructions to avoid bootstrap and -march external
  984. // assembler settings
  985. {$ifdef CPUARM_HAS_DMB}
  986. .long 0xf57ff05f // dmb sy
  987. {$else}
  988. {$ifdef CPUARMV6}
  989. mov r0, #0
  990. .long 0xee070fba // mcr 15, 0, r0, cr7, cr10, {5}
  991. {$endif}
  992. {$endif}
  993. end;
  994. procedure ReadBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  995. begin
  996. barrier;
  997. end;
  998. procedure ReadDependencyBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  999. begin
  1000. { reads imply barrier on earlier reads depended on; not required on ARM }
  1001. end;
  1002. procedure ReadWriteBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1003. begin
  1004. barrier;
  1005. end;
  1006. procedure WriteBarrier; assembler; nostackframe;
  1007. asm
  1008. // specialize the write barrier because according to ARM, implementations for
  1009. // "dmb st" may be more optimal than the more generic "dmb sy"
  1010. {$ifdef CPUARM_HAS_DMB}
  1011. .long 0xf57ff05e // dmb st
  1012. {$else}
  1013. {$ifdef CPUARMV6}
  1014. mov r0, #0
  1015. .long 0xee070fba // mcr 15, 0, r0, cr7, cr10, {5}
  1016. {$endif}
  1017. {$endif}
  1018. end;
  1019. {$endif}
  1020. {include hand-optimized assembler division code}
  1021. {$i divide.inc}