arm.inc 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. { IMPORTANT!
  13. Never use the "BLX label" instruction! Use "BL label" instead.
  14. The linker will always change BL to BLX if necessary, but not vice versa (linker version dependent).
  15. "BLX label" ALWAYS changes the instruction set. It changes a processor in ARM state to Thumb state,
  16. or a processor in Thumb state to ARM state.
  17. }
  18. {$ifndef FPC_SYSTEM_HAS_MOVE}
  19. {$define FPC_SYSTEM_FPC_MOVE}
  20. {$endif FPC_SYSTEM_HAS_MOVE}
  21. {$ifdef FPC_SYSTEM_FPC_MOVE}
  22. const
  23. cpu_has_edsp : boolean = false;
  24. in_edsp_test : boolean = false;
  25. {$endif FPC_SYSTEM_FPC_MOVE}
  26. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  27. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  28. {$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3) and not defined(FPUVFPV4) and not defined(FPUVFPV3_D16)}
  29. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  30. begin
  31. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  32. asm
  33. rfs r0
  34. and r0,r0,#0xffe0ffff
  35. orr r0,r0,#0x00070000
  36. wfs r0
  37. end;
  38. end;
  39. {$else}
  40. const
  41. fpu_nx = 1 shl 0;
  42. fpu_uf = 1 shl 1;
  43. fpu_of = 1 shl 2;
  44. fpu_dz = 1 shl 3;
  45. fpu_nv = 1 shl 4;
  46. function getfpscr: sizeuint; nostackframe; assembler;
  47. asm
  48. fmrx r0,fpscr
  49. end;
  50. procedure setfpscr(flags : sizeuint); nostackframe; assembler;
  51. asm
  52. fmxr fpscr,r0
  53. end;
  54. const
  55. FPSCR_IOC = 1;
  56. FPSCR_DZC = 1 shl 1;
  57. FPSCR_OFC = 1 shl 2;
  58. FPSCR_UFC = 1 shl 3;
  59. FPSCR_IXC = 1 shl 4;
  60. FPSCR_IDC = 1 shl 7;
  61. procedure fpc_throwfpuexception;[public,alias:'FPC_THROWFPUEXCEPTION'];
  62. var
  63. fpscr : longint;
  64. f: TFPUException;
  65. begin
  66. { at this point, we know already, that an exception will be risen }
  67. fpscr:=getfpscr;
  68. if (fpscr and FPSCR_DZC) <> 0 then
  69. float_raise(exZeroDivide);
  70. if (fpscr and FPSCR_OFC) <> 0 then
  71. float_raise(exOverflow);
  72. if (fpscr and FPSCR_UFC) <> 0 then
  73. float_raise(exUnderflow);
  74. if (fpscr and FPSCR_IOC) <> 0 then
  75. float_raise(exInvalidOp);
  76. if (fpscr and FPSCR_IXC) <> 0 then
  77. float_raise(exPrecision);
  78. if (fpscr and FPSCR_IDC) <> 0 then
  79. float_raise(exDenormalized);
  80. { now the soft float exceptions }
  81. for f in softfloat_exception_flags do
  82. float_raise(f);
  83. end;
  84. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  85. begin
  86. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  87. asm
  88. fmrx r0,fpscr
  89. // set "round to nearest" mode
  90. and r0,r0,#0xff3fffff
  91. // mask "exception happened" and overflow flags
  92. and r0,r0,#0xffffff20
  93. // mask exception flags
  94. and r0,r0,#0xffff40ff
  95. {$ifndef darwin}
  96. // Floating point exceptions cause kernel panics on iPhoneOS 2.2.1...
  97. // disable flush-to-zero mode (IEEE math compliant)
  98. and r0,r0,#0xfeffffff
  99. // enable invalid operation, div-by-zero and overflow exceptions
  100. orr r0,r0,#0x00000700
  101. {$endif}
  102. fmxr fpscr,r0
  103. end;
  104. end;
  105. {$endif}
  106. {$endif}
  107. procedure fpc_cpuinit;
  108. begin
  109. { don't let libraries influence the FPU cw set by the host program }
  110. if not IsLibrary then
  111. SysInitFPU;
  112. end;
  113. {$ifdef wince}
  114. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  115. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  116. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  117. begin
  118. end;
  119. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  120. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  121. begin
  122. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  123. { FPU precision 64 bit, rounding to nearest, affine infinity }
  124. _controlfp($000C0003, $030F031F);
  125. end;
  126. {$endif wince}
  127. {****************************************************************************
  128. stack frame related stuff
  129. ****************************************************************************}
  130. {$IFNDEF INTERNAL_BACKTRACE}
  131. {$define FPC_SYSTEM_HAS_GET_FRAME}
  132. function get_frame:pointer;assembler;nostackframe;
  133. asm
  134. {$ifndef darwin}
  135. mov r0,r11
  136. {$else}
  137. mov r0,r7
  138. {$endif}
  139. end;
  140. {$ENDIF not INTERNAL_BACKTRACE}
  141. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  142. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;assembler;nostackframe;
  143. asm
  144. cmp r0,#0
  145. {$ifndef darwin}
  146. ldrne r0,[r0,#-4]
  147. {$else}
  148. ldrne r0,[r0,#4]
  149. {$endif}
  150. end;
  151. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  152. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;assembler;nostackframe;
  153. asm
  154. cmp r0,#0
  155. {$ifndef darwin}
  156. ldrne r0,[r0,#-12]
  157. {$else}
  158. ldrne r0,[r0]
  159. {$endif}
  160. end;
  161. {$define FPC_SYSTEM_HAS_SPTR}
  162. Function Sptr : pointer;assembler;nostackframe;
  163. asm
  164. mov r0,sp
  165. end;
  166. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  167. {$define FPC_SYSTEM_HAS_FILLCHAR}
  168. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  169. asm
  170. // less than 0?
  171. cmp r1,#0
  172. {$ifdef CPUARM_HAS_BX}
  173. bxle lr
  174. {$else}
  175. movle pc,lr
  176. {$endif}
  177. mov r3,r0
  178. orr r2,r2,r2,lsl #8
  179. orr r2,r2,r2,lsl #16
  180. tst r3, #3 // Aligned?
  181. bne .LFillchar_do_align
  182. .LFillchar_is_aligned:
  183. subs r1,r1,#8
  184. bmi .LFillchar_less_than_8bytes
  185. mov ip,r2
  186. .LFillchar_at_least_8bytes:
  187. // Do 16 bytes per loop
  188. // More unrolling is uncessary, as we'll just stall on the write buffers
  189. stmia r3!,{r2,ip}
  190. subs r1,r1,#8
  191. stmplia r3!,{r2,ip}
  192. subpls r1,r1,#8
  193. bpl .LFillchar_at_least_8bytes
  194. .LFillchar_less_than_8bytes:
  195. // Do the rest
  196. adds r1, r1, #8
  197. {$ifdef CPUARM_HAS_BX}
  198. bxeq lr
  199. {$else}
  200. moveq pc,lr
  201. {$endif}
  202. tst r1, #4
  203. strne r2,[r3],#4
  204. {$ifdef CPUARM_HAS_ALL_MEM}
  205. tst r1, #2
  206. strneh r2,[r3],#2
  207. {$else CPUARM_HAS_ALL_MEM}
  208. tst r1, #2
  209. strneb r2,[r3],#1
  210. strneb r2,[r3],#1
  211. {$endif CPUARM_HAS_ALL_MEM}
  212. tst r1, #1
  213. strneb r2,[r3],#1
  214. {$ifdef CPUARM_HAS_BX}
  215. bx lr
  216. {$else}
  217. mov pc,lr
  218. {$endif}
  219. // Special case for unaligned start
  220. // We make a maximum of 3 loops here
  221. .LFillchar_do_align:
  222. strb r2,[r3],#1
  223. subs r1, r1, #1
  224. {$ifdef CPUARM_HAS_BX}
  225. bxeq lr
  226. {$else}
  227. moveq pc,lr
  228. {$endif}
  229. tst r3,#3
  230. bne .LFillchar_do_align
  231. b .LFillchar_is_aligned
  232. end;
  233. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  234. {$ifndef FPC_SYSTEM_HAS_MOVE}
  235. {$define FPC_SYSTEM_HAS_MOVE}
  236. {$ifdef CPUARM_HAS_EDSP}
  237. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  238. {$else CPUARM_HAS_EDSP}
  239. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  240. {$endif CPUARM_HAS_EDSP}
  241. asm
  242. // pld [r0]
  243. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  244. .long 0xf5d0f000
  245. // count <=0 ?
  246. cmp r2,#0
  247. {$ifdef CPUARM_HAS_BX}
  248. bxle lr
  249. {$else}
  250. movle pc,lr
  251. {$endif}
  252. // overlap?
  253. subs r3, r1, r0 // if (dest > source) and
  254. cmphi r2, r3 // (count > dest - src) then
  255. bhi .Loverlapped // DoReverseByteCopy;
  256. cmp r2,#8 // if (count < 8) then
  257. blt .Lbyteloop // DoForwardByteCopy;
  258. // Any way to avoid the above jump and fuse the next two instructions?
  259. tst r0, #3 // if (source and 3) <> 0 or
  260. tsteq r1, #3 // (dest and 3) <> 0 then
  261. bne .Lbyteloop // DoForwardByteCopy;
  262. // pld [r0,#32]
  263. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  264. .long 0xf5d0f020
  265. .Ldwordloop:
  266. ldmia r0!, {r3, ip}
  267. // preload
  268. // pld [r0,#64]
  269. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  270. .long 0xf5d0f040
  271. sub r2,r2,#8
  272. cmp r2, #8
  273. stmia r1!, {r3, ip}
  274. bge .Ldwordloop
  275. cmp r2,#0
  276. {$ifdef CPUARM_HAS_BX}
  277. bxeq lr
  278. {$else}
  279. moveq pc,lr
  280. {$endif}
  281. .Lbyteloop:
  282. subs r2,r2,#1
  283. ldrb r3,[r0],#1
  284. strb r3,[r1],#1
  285. bne .Lbyteloop
  286. {$ifdef CPUARM_HAS_BX}
  287. bx lr
  288. {$else}
  289. mov pc,lr
  290. {$endif}
  291. .Loverlapped:
  292. subs r2,r2,#1
  293. ldrb r3,[r0,r2]
  294. strb r3,[r1,r2]
  295. bne .Loverlapped
  296. end;
  297. {$ifndef CPUARM_HAS_EDSP}
  298. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  299. asm
  300. // count <=0 ?
  301. cmp r2,#0
  302. {$ifdef CPUARM_HAS_BX}
  303. bxle lr
  304. {$else}
  305. movle pc,lr
  306. {$endif}
  307. // overlap?
  308. subs r3, r1, r0 // if (dest > source) and
  309. cmphi r2, r3 // (count > dest - src) then
  310. bhi .Loverlapped // DoReverseByteCopy;
  311. cmp r2,#8 // if (count < 8) then
  312. blt .Lbyteloop // DoForwardByteCopy;
  313. // Any way to avoid the above jump and fuse the next two instructions?
  314. tst r0, #3 // if (source and 3) <> 0 or
  315. tsteq r1, #3 // (dest and 3) <> 0 then
  316. bne .Lbyteloop // DoForwardByteCopy;
  317. .Ldwordloop:
  318. ldmia r0!, {r3, ip}
  319. sub r2,r2,#8
  320. cmp r2, #8
  321. stmia r1!, {r3, ip}
  322. bge .Ldwordloop
  323. cmp r2,#0
  324. {$ifdef CPUARM_HAS_BX}
  325. bxeq lr
  326. {$else}
  327. moveq pc,lr
  328. {$endif}
  329. .Lbyteloop:
  330. subs r2,r2,#1
  331. ldrb r3,[r0],#1
  332. strb r3,[r1],#1
  333. bne .Lbyteloop
  334. {$ifdef CPUARM_HAS_BX}
  335. bx lr
  336. {$else}
  337. mov pc,lr
  338. {$endif}
  339. .Loverlapped:
  340. subs r2,r2,#1
  341. ldrb r3,[r0,r2]
  342. strb r3,[r1,r2]
  343. bne .Loverlapped
  344. end;
  345. const
  346. moveproc : procedure(const source;var dest;count:longint) = @move_blended;
  347. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE']; {$ifndef FPC_PIC} assembler;nostackframe; {$endif FPC_PIC}
  348. {$ifdef FPC_PIC}
  349. begin
  350. moveproc(source,dest,count);
  351. end;
  352. {$else FPC_PIC}
  353. asm
  354. ldr ip,.Lmoveproc
  355. ldr pc,[ip]
  356. .Lmoveproc:
  357. .long moveproc
  358. end;
  359. {$endif FPC_PIC}
  360. {$endif CPUARM_HAS_EDSP}
  361. {$endif FPC_SYSTEM_HAS_MOVE}
  362. {****************************************************************************
  363. String
  364. ****************************************************************************}
  365. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  366. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  367. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  368. {r0: __RESULT
  369. r1: len
  370. r2: sstr}
  371. asm
  372. ldrb r12,[r2],#1
  373. cmp r12,r1
  374. movgt r12,r1
  375. strb r12,[r0],#1
  376. cmp r12,#6 (* 6 seems to be the break even point. *)
  377. blt .LStartTailCopy
  378. (* Align destination on 32bits. This is the only place where unrolling
  379. really seems to help, since in the common case, sstr is aligned on
  380. 32 bits, therefore in the common case we need to copy 3 bytes to
  381. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  382. rsb r3,r0,#0
  383. ands r3,r3,#3
  384. sub r12,r12,r3
  385. ldrneb r1,[r2],#1
  386. strneb r1,[r0],#1
  387. subnes r3,r3,#1
  388. ldrneb r1,[r2],#1
  389. strneb r1,[r0],#1
  390. subnes r3,r3,#1
  391. ldrneb r1,[r2],#1
  392. strneb r1,[r0],#1
  393. subnes r3,r3,#1
  394. .LDoneAlign:
  395. (* Destination should be aligned now, but source might not be aligned,
  396. if this is the case, do a byte-per-byte copy. *)
  397. tst r2,#3
  398. bne .LStartTailCopy
  399. (* Start the main copy, 32 bit at a time. *)
  400. movs r3,r12,lsr #2
  401. and r12,r12,#3
  402. beq .LStartTailCopy
  403. .LNext4bytes:
  404. (* Unrolling this loop would save a little bit of time for long strings
  405. (>20 chars), but alas, it hurts for short strings and they are the
  406. common case.*)
  407. ldrne r1,[r2],#4
  408. strne r1,[r0],#4
  409. subnes r3,r3,#1
  410. bne .LNext4bytes
  411. .LStartTailCopy:
  412. (* Do remaining bytes. *)
  413. cmp r12,#0
  414. beq .LDoneTail
  415. .LNextChar3:
  416. ldrb r1,[r2],#1
  417. strb r1,[r0],#1
  418. subs r12,r12,#1
  419. bne .LNextChar3
  420. .LDoneTail:
  421. end;
  422. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  423. {r0: len
  424. r1: sstr
  425. r2: dstr}
  426. asm
  427. ldrb r12,[r1],#1
  428. cmp r12,r0
  429. movgt r12,r0
  430. strb r12,[r2],#1
  431. cmp r12,#6 (* 6 seems to be the break even point. *)
  432. blt .LStartTailCopy
  433. (* Align destination on 32bits. This is the only place where unrolling
  434. really seems to help, since in the common case, sstr is aligned on
  435. 32 bits, therefore in the common case we need to copy 3 bytes to
  436. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  437. rsb r3,r2,#0
  438. ands r3,r3,#3
  439. sub r12,r12,r3
  440. ldrneb r0,[r1],#1
  441. strneb r0,[r2],#1
  442. subnes r3,r3,#1
  443. ldrneb r0,[r1],#1
  444. strneb r0,[r2],#1
  445. subnes r3,r3,#1
  446. ldrneb r0,[r1],#1
  447. strneb r0,[r2],#1
  448. subnes r3,r3,#1
  449. .LDoneAlign:
  450. (* Destination should be aligned now, but source might not be aligned,
  451. if this is the case, do a byte-per-byte copy. *)
  452. tst r1,#3
  453. bne .LStartTailCopy
  454. (* Start the main copy, 32 bit at a time. *)
  455. movs r3,r12,lsr #2
  456. and r12,r12,#3
  457. beq .LStartTailCopy
  458. .LNext4bytes:
  459. (* Unrolling this loop would save a little bit of time for long strings
  460. (>20 chars), but alas, it hurts for short strings and they are the
  461. common case.*)
  462. ldrne r0,[r1],#4
  463. strne r0,[r2],#4
  464. subnes r3,r3,#1
  465. bne .LNext4bytes
  466. .LStartTailCopy:
  467. (* Do remaining bytes. *)
  468. cmp r12,#0
  469. beq .LDoneTail
  470. .LNextChar3:
  471. ldrb r0,[r1],#1
  472. strb r0,[r2],#1
  473. subs r12,r12,#1
  474. bne .LNextChar3
  475. .LDoneTail:
  476. end;
  477. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  478. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  479. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  480. function fpc_Pchar_length(p:Pchar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  481. asm
  482. cmp r0,#0
  483. mov r1,r0
  484. beq .Ldone
  485. .Lnextchar:
  486. (*Are we aligned?*)
  487. tst r1,#3
  488. bne .Ltest_unaligned (*No, do byte per byte.*)
  489. ldr r3,.L01010101
  490. .Ltest_aligned:
  491. (*Aligned, load 4 bytes at a time.*)
  492. ldr r12,[r1],#4
  493. (*Check wether r12 contains a 0 byte.*)
  494. sub r2,r12,r3
  495. mvn r12,r12
  496. and r2,r2,r12
  497. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  498. beq .Ltest_aligned (*No 0 byte, repeat.*)
  499. sub r1,r1,#4
  500. .Ltest_unaligned:
  501. ldrb r12,[r1],#1
  502. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  503. bcs .Lnextchar
  504. (*Dirty trick: we need to subtract 1 extra because we have counted the
  505. terminating 0, due to the known carry flag sbc can do this.*)
  506. sbc r0,r1,r0
  507. .Ldone:
  508. {$ifdef CPUARM_HAS_BX}
  509. bx lr
  510. {$else}
  511. mov pc,lr
  512. {$endif}
  513. .L01010101:
  514. .long 0x01010101
  515. end;
  516. {$endif}
  517. {$ifndef darwin}
  518. {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
  519. Procedure fpc_ansistr_decr_ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF'];assembler;nostackframe; compilerproc;
  520. asm
  521. ldr r1, [r0]
  522. // On return the pointer will always be set to zero, so utilize the delay slots
  523. mov r2, #0
  524. str r2, [r0]
  525. // Check for a zero string
  526. cmp r1, #0
  527. // Load reference counter
  528. ldrne r2, [r1, #-8]
  529. {$ifdef CPUARM_HAS_BX}
  530. bxeq lr
  531. {$else}
  532. moveq pc,lr
  533. {$endif}
  534. // Check for a constant string
  535. cmp r2, #0
  536. {$ifdef CPUARM_HAS_BX}
  537. bxlt lr
  538. {$else}
  539. movlt pc,lr
  540. {$endif}
  541. stmfd sp!, {r1, lr}
  542. sub r0, r1, #8
  543. bl InterLockedDecrement
  544. // InterLockedDecrement is a nice guy and sets the z flag for us
  545. // if the reference count dropped to 0
  546. ldmnefd sp!, {r1, pc}
  547. ldmfd sp!, {r0, lr}
  548. // We currently can not use constant symbols in ARM-Assembly
  549. // but we need to stay backward compatible with 2.6
  550. sub r0, r0, #12
  551. // Jump without a link, so freemem directly returns to our caller
  552. b FPC_FREEMEM
  553. end;
  554. {$define FPC_SYSTEM_HAS_ANSISTR_INCR_REF}
  555. Procedure fpc_ansistr_incr_ref (S : Pointer); [Public,Alias:'FPC_ANSISTR_INCR_REF'];assembler;nostackframe; compilerproc;
  556. asm
  557. // Null string?
  558. cmp r0, #0
  559. // Load reference counter
  560. ldrne r1, [r0, #-8]
  561. // pointer to counter, calculate here for delay slot utilization
  562. subne r0, r0, #8
  563. {$ifdef CPUARM_HAS_BX}
  564. bxeq lr
  565. {$else}
  566. moveq pc,lr
  567. {$endif}
  568. // Check for a constant string
  569. cmp r1, #0
  570. // Tailcall
  571. // Hopefully the linker will place InterLockedIncrement as layed out here
  572. bge InterLockedIncrement
  573. // Freepascal will generate a proper return here, save some cachespace
  574. end;
  575. {$endif not darwin}
  576. // --- InterLocked functions begin
  577. {$if not defined(CPUARM_HAS_LDREX) and not defined(SYSTEM_HAS_KUSER_CMPXCHG) }
  578. // Use generic interlock implementation
  579. var
  580. fpc_system_lock: longint;
  581. {$ifdef FPC_PIC}
  582. // Use generic interlock implementation with PIC
  583. // A helper function to get a pointer to fpc_system_lock in the PIC compatible way.
  584. function get_fpc_system_lock_ptr: pointer;
  585. begin
  586. get_fpc_system_lock_ptr:=@fpc_system_lock;
  587. end;
  588. {$endif FPC_PIC}
  589. {$endif}
  590. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  591. asm
  592. {$ifdef CPUARM_HAS_LDREX}
  593. .Lloop:
  594. ldrex r1, [r0]
  595. sub r1, r1, #1
  596. strex r2, r1, [r0]
  597. cmp r2, #0
  598. bne .Lloop
  599. movs r0, r1
  600. bx lr
  601. {$else}
  602. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  603. stmfd r13!, {lr}
  604. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  605. .Latomic_dec_loop:
  606. ldr r0, [r2] // Load the current value
  607. // We expect this to work without looping most of the time
  608. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  609. // loop here again, we have to reload the value. Normaly this just fills the
  610. // load stall-cycles from the above ldr so in reality we'll not get any additional
  611. // delays because of this
  612. // Don't use ldr to load r3 to avoid cacheline trashing
  613. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  614. // the kuser_cmpxchg entry point
  615. mvn r3, #0x0000f000
  616. sub r3, r3, #0x3F
  617. sub r1, r0, #1 // Decrement value
  618. {$ifdef CPUARM_HAS_BLX}
  619. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  620. {$else}
  621. mov lr, pc
  622. {$ifdef CPUARM_HAS_BX}
  623. bx r3
  624. {$else}
  625. mov pc, r3
  626. {$endif}
  627. {$endif}
  628. // MOVS sets the Z flag when the result reaches zero, this can be used later on
  629. // The C-Flag will not be modified by this because we're not doing any shifting
  630. movcss r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  631. ldmcsfd r13!, {pc}
  632. b .Latomic_dec_loop // kuser_cmpxchg sets C flag on error
  633. {$else}
  634. // lock
  635. {$ifdef FPC_PIC}
  636. push {r0,lr}
  637. bl get_fpc_system_lock_ptr
  638. mov r3,r0
  639. pop {r0,lr}
  640. {$else FPC_PIC}
  641. ldr r3, .Lfpc_system_lock
  642. {$endif FPC_PIC}
  643. mov r1, #1
  644. .Lloop:
  645. swp r2, r1, [r3]
  646. cmp r2, #0
  647. bne .Lloop
  648. // do the job
  649. ldr r1, [r0]
  650. sub r1, r1, #1
  651. str r1, [r0]
  652. movs r0, r1
  653. // unlock and return
  654. str r2, [r3]
  655. {$ifdef CPUARM_HAS_BX}
  656. bx lr
  657. {$else}
  658. mov pc,lr
  659. {$endif}
  660. {$ifndef FPC_PIC}
  661. .Lfpc_system_lock:
  662. .long fpc_system_lock
  663. {$endif FPC_PIC}
  664. {$endif}
  665. {$endif}
  666. end;
  667. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  668. asm
  669. {$ifdef CPUARM_HAS_LDREX}
  670. .Lloop:
  671. ldrex r1, [r0]
  672. add r1, r1, #1
  673. strex r2, r1, [r0]
  674. cmp r2, #0
  675. bne .Lloop
  676. mov r0, r1
  677. bx lr
  678. {$else}
  679. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  680. stmfd r13!, {lr}
  681. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  682. .Latomic_inc_loop:
  683. ldr r0, [r2] // Load the current value
  684. // We expect this to work without looping most of the time
  685. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  686. // loop here again, we have to reload the value. Normaly this just fills the
  687. // load stall-cycles from the above ldr so in reality we'll not get any additional
  688. // delays because of this
  689. // Don't use ldr to load r3 to avoid cacheline trashing
  690. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  691. // the kuser_cmpxchg entry point
  692. mvn r3, #0x0000f000
  693. sub r3, r3, #0x3F
  694. add r1, r0, #1 // Increment value
  695. {$ifdef CPUARM_HAS_BLX}
  696. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  697. {$else}
  698. mov lr, pc
  699. {$ifdef CPUARM_HAS_BX}
  700. bx r3
  701. {$else}
  702. mov pc, r3
  703. {$endif}
  704. {$endif}
  705. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  706. ldmcsfd r13!, {pc}
  707. b .Latomic_inc_loop // kuser_cmpxchg sets C flag on error
  708. {$else}
  709. // lock
  710. {$ifdef FPC_PIC}
  711. push {r0,lr}
  712. bl get_fpc_system_lock_ptr
  713. mov r3,r0
  714. pop {r0,lr}
  715. {$else FPC_PIC}
  716. ldr r3, .Lfpc_system_lock
  717. {$endif FPC_PIC}
  718. mov r1, #1
  719. .Lloop:
  720. swp r2, r1, [r3]
  721. cmp r2, #0
  722. bne .Lloop
  723. // do the job
  724. ldr r1, [r0]
  725. add r1, r1, #1
  726. str r1, [r0]
  727. mov r0, r1
  728. // unlock and return
  729. str r2, [r3]
  730. {$ifdef CPUARM_HAS_BX}
  731. bx lr
  732. {$else}
  733. mov pc,lr
  734. {$endif}
  735. {$ifndef FPC_PIC}
  736. .Lfpc_system_lock:
  737. .long fpc_system_lock
  738. {$endif FPC_PIC}
  739. {$endif}
  740. {$endif}
  741. end;
  742. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  743. asm
  744. {$ifdef CPUARM_HAS_LDREX}
  745. // swp is deprecated on ARMv6 and above
  746. .Lloop:
  747. ldrex r2, [r0]
  748. strex r3, r1, [r0]
  749. cmp r3, #0
  750. bne .Lloop
  751. mov r0, r2
  752. bx lr
  753. {$else}
  754. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  755. stmfd r13!, {r4, lr}
  756. mov r2, r0 // kuser_cmpxchg does not clobber r2 (and r1) by definition
  757. .Latomic_add_loop:
  758. ldr r0, [r2] // Load the current value
  759. // We expect this to work without looping most of the time
  760. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  761. // loop here again, we have to reload the value. Normaly this just fills the
  762. // load stall-cycles from the above ldr so in reality we'll not get any additional
  763. // delays because of this
  764. // Don't use ldr to load r3 to avoid cacheline trashing
  765. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  766. // the kuser_cmpxchg entry point
  767. mvn r3, #0x0000f000
  768. sub r3, r3, #0x3F
  769. mov r4, r0 // save the current value because kuser_cmpxchg clobbers r0
  770. {$ifdef CPUARM_HAS_BLX}
  771. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  772. {$else}
  773. mov lr, pc
  774. {$ifdef CPUARM_HAS_BX}
  775. bx r3
  776. {$else}
  777. mov pc, r3
  778. {$endif}
  779. {$endif}
  780. // restore the original value if needed
  781. movcs r0, r4
  782. ldmcsfd r13!, {r4, pc}
  783. b .Latomic_add_loop // kuser_cmpxchg failed, loop back
  784. {$else}
  785. // lock
  786. {$ifdef FPC_PIC}
  787. push {r0,r1,lr}
  788. bl get_fpc_system_lock_ptr
  789. mov r3,r0
  790. pop {r0,r1,lr}
  791. {$else FPC_PIC}
  792. ldr r3, .Lfpc_system_lock
  793. {$endif FPC_PIC}
  794. mov r2, #1
  795. .Lloop:
  796. swp r2, r2, [r3]
  797. cmp r2, #0
  798. bne .Lloop
  799. // do the job
  800. ldr r2, [r0]
  801. str r1, [r0]
  802. mov r0, r2
  803. // unlock and return
  804. mov r2, #0
  805. str r2, [r3]
  806. {$ifdef CPUARM_HAS_BX}
  807. bx lr
  808. {$else}
  809. mov pc,lr
  810. {$endif}
  811. {$ifndef FPC_PIC}
  812. .Lfpc_system_lock:
  813. .long fpc_system_lock
  814. {$endif FPC_PIC}
  815. {$endif}
  816. {$endif}
  817. end;
  818. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  819. asm
  820. {$ifdef CPUARM_HAS_LDREX}
  821. .Lloop:
  822. ldrex r2, [r0]
  823. add r12, r1, r2
  824. strex r3, r12, [r0]
  825. cmp r3, #0
  826. bne .Lloop
  827. mov r0, r2
  828. bx lr
  829. {$else}
  830. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  831. stmfd r13!, {r4, lr}
  832. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  833. mov r4, r1 // Save addend
  834. .Latomic_add_loop:
  835. ldr r0, [r2] // Load the current value
  836. // We expect this to work without looping most of the time
  837. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  838. // loop here again, we have to reload the value. Normaly this just fills the
  839. // load stall-cycles from the above ldr so in reality we'll not get any additional
  840. // delays because of this
  841. // Don't use ldr to load r3 to avoid cacheline trashing
  842. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  843. // the kuser_cmpxchg entry point
  844. mvn r3, #0x0000f000
  845. sub r3, r3, #0x3F
  846. add r1, r0, r4 // Add to value
  847. {$ifdef CPUARM_HAS_BLX}
  848. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  849. {$else}
  850. mov lr, pc
  851. {$ifdef CPUARM_HAS_BX}
  852. bx r3
  853. {$else}
  854. mov pc, r3
  855. {$endif}
  856. {$endif}
  857. // r1 does not get clobbered, so just get back the original value
  858. // Otherwise we would have to allocate one more register and store the
  859. // temporary value
  860. subcs r0, r1, r4
  861. ldmcsfd r13!, {r4, pc}
  862. b .Latomic_add_loop // kuser_cmpxchg failed, loop back
  863. {$else}
  864. // lock
  865. {$ifdef FPC_PIC}
  866. push {r0,r1,lr}
  867. bl get_fpc_system_lock_ptr
  868. mov r3,r0
  869. pop {r0,r1,lr}
  870. {$else FPC_PIC}
  871. ldr r3, .Lfpc_system_lock
  872. {$endif FPC_PIC}
  873. mov r2, #1
  874. .Lloop:
  875. swp r2, r2, [r3]
  876. cmp r2, #0
  877. bne .Lloop
  878. // do the job
  879. ldr r2, [r0]
  880. add r1, r1, r2
  881. str r1, [r0]
  882. mov r0, r2
  883. // unlock and return
  884. mov r2, #0
  885. str r2, [r3]
  886. {$ifdef CPUARM_HAS_BX}
  887. bx lr
  888. {$else}
  889. mov pc,lr
  890. {$endif}
  891. {$ifndef FPC_PIC}
  892. .Lfpc_system_lock:
  893. .long fpc_system_lock
  894. {$endif FPC_PIC}
  895. {$endif}
  896. {$endif}
  897. end;
  898. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  899. asm
  900. {$ifdef CPUARM_HAS_LDREX}
  901. .Lloop:
  902. ldrex r3, [r0]
  903. mov r12, #0
  904. cmp r3, r2
  905. strexeq r12, r1, [r0]
  906. cmp r12, #0
  907. bne .Lloop
  908. mov r0, r3
  909. bx lr
  910. {$else}
  911. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  912. stmfd r13!, {r4, lr}
  913. mov r4, r2 // Swap parameters around
  914. mov r2, r0
  915. mov r0, r4 // Use r4 because we'll need the new value for later
  916. // r1 and r2 will not be clobbered by kuser_cmpxchg
  917. // If we have to loop, r0 will be set to the original Comperand
  918. // kuser_cmpxchg is documented to destroy r3, therefore setting
  919. // r3 must be in the loop
  920. .Linterlocked_compare_exchange_loop:
  921. mvn r3, #0x0000f000
  922. sub r3, r3, #0x3F
  923. {$ifdef CPUARM_HAS_BLX}
  924. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  925. {$else}
  926. mov lr, pc
  927. {$ifdef CPUARM_HAS_BX}
  928. bx r3
  929. {$else}
  930. mov pc, r3
  931. {$endif}
  932. {$endif}
  933. movcs r0, r4 // Return the previous value on success
  934. ldmcsfd r13!, {r4, pc}
  935. // The error case is a bit tricky, kuser_cmpxchg does not return the current value
  936. // So we may need to loop to avoid race conditions
  937. // The loop case is HIGHLY unlikely, it would require that we got rescheduled between
  938. // calling kuser_cmpxchg and the ldr. While beeing rescheduled another process/thread
  939. // would have the set the value to our comperand
  940. ldr r0, [r2] // Load the currently set value
  941. cmp r0, r4 // Return if Comperand != current value, otherwise loop again
  942. ldmnefd r13!, {r4, pc}
  943. // If we need to loop here, we have to
  944. b .Linterlocked_compare_exchange_loop
  945. {$else}
  946. // lock
  947. {$ifdef FPC_PIC}
  948. push {r0,r1,r2,lr}
  949. bl get_fpc_system_lock_ptr
  950. mov r12,r0
  951. pop {r0,r1,r2,lr}
  952. {$else FPC_PIC}
  953. ldr r12, .Lfpc_system_lock
  954. {$endif FPC_PIC}
  955. mov r3, #1
  956. .Lloop:
  957. swp r3, r3, [r12]
  958. cmp r3, #0
  959. bne .Lloop
  960. // do the job
  961. ldr r3, [r0]
  962. cmp r3, r2
  963. streq r1, [r0]
  964. mov r0, r3
  965. // unlock and return
  966. mov r3, #0
  967. str r3, [r12]
  968. {$ifdef CPUARM_HAS_BX}
  969. bx lr
  970. {$else}
  971. mov pc,lr
  972. {$endif}
  973. {$ifndef FPC_PIC}
  974. .Lfpc_system_lock:
  975. .long fpc_system_lock
  976. {$endif FPC_PIC}
  977. {$endif}
  978. {$endif}
  979. end;
  980. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  981. function declocked(var l: longint) : boolean; inline;
  982. begin
  983. Result:=InterLockedDecrement(l) = 0;
  984. end;
  985. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  986. procedure inclocked(var l: longint); inline;
  987. begin
  988. InterLockedIncrement(l);
  989. end;
  990. // --- InterLocked functions end
  991. procedure fpc_cpucodeinit;
  992. begin
  993. {$ifdef FPC_SYSTEM_FPC_MOVE}
  994. {$ifndef CPUARM_HAS_EDSP}
  995. cpu_has_edsp:=true;
  996. in_edsp_test:=true;
  997. asm
  998. bic r0,sp,#7
  999. // ldrd r0,r1,[r0]
  1000. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  1001. .long 0xe1c000d0
  1002. end;
  1003. in_edsp_test:=false;
  1004. if cpu_has_edsp then
  1005. moveproc:=@move_pld
  1006. else
  1007. moveproc:=@move_blended;
  1008. {$else CPUARM_HAS_EDSP}
  1009. cpu_has_edsp:=true;
  1010. {$endif CPUARM_HAS_EDSP}
  1011. {$endif FPC_SYSTEM_FPC_MOVE}
  1012. end;
  1013. {$define FPC_SYSTEM_HAS_SWAPENDIAN}
  1014. { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
  1015. function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1016. begin
  1017. { the extra Word type cast is necessary because the "AValue shr 8" }
  1018. { is turned into "longint(AValue) shr 8", so if AValue < 0 then }
  1019. { the sign bits from the upper 16 bits are shifted in rather than }
  1020. { zeroes. }
  1021. Result := SmallInt(((Word(AValue) shr 8) or (Word(AValue) shl 8)) and $ffff);
  1022. end;
  1023. function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
  1024. begin
  1025. Result := ((AValue shr 8) or (AValue shl 8)) and $ffff;
  1026. end;
  1027. (*
  1028. This is kept for reference. Thats what the compiler COULD generate in these cases.
  1029. But FPC currently does not support inlining of asm-functions, so the whole call-overhead
  1030. is bigger than the gain of the optimized function.
  1031. function AsmSwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif};assembler;nostackframe;
  1032. asm
  1033. // We're starting with 4321
  1034. {$if defined(CPUARM_HAS_REV)}
  1035. rev r0, r0 // Reverse byteorder r0 = 1234
  1036. mov r0, r0, shr #16 // Shift down to 16bits r0 = 0012
  1037. {$else}
  1038. mov r0, r0, shl #16 // Shift to make that 2100
  1039. mov r0, r0, ror #24 // Rotate to 1002
  1040. orr r0, r0, r0 shr #16 // Shift and combine into 0012
  1041. {$endif}
  1042. end;
  1043. *)
  1044. {
  1045. These used to be an assembler-function, but with newer improvements to the compiler this
  1046. generates a perfect 4 cycle code sequence and can be inlined.
  1047. }
  1048. function SwapEndian(const AValue: LongWord): LongWord;{$ifdef SYSTEMINLINE}inline;{$endif}
  1049. var
  1050. Temp: LongWord;
  1051. begin
  1052. Temp := AValue xor rordword(AValue,16);
  1053. Temp := Temp and $FF00FFFF;
  1054. Result:= (Temp shr 8) xor rordword(AValue,8);
  1055. end;
  1056. function SwapEndian(const AValue: LongInt): LongInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1057. begin
  1058. Result:=LongInt(SwapEndian(DWord(AValue)));
  1059. end;
  1060. {
  1061. Currently freepascal will not generate a good assembler sequence for
  1062. Result:=(SwapEndian(longword(lo(AValue))) shl 32) or
  1063. (SwapEndian(longword(hi(AValue))));
  1064. So we keep an assembly version for now
  1065. }
  1066. function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
  1067. asm
  1068. // fpc >2.6.0 adds the "rev" instruction in the internal assembler
  1069. {$if defined(CPUARM_HAS_REV)}
  1070. rev r2, r0
  1071. rev r0, r1
  1072. mov r1, r2
  1073. {$else}
  1074. mov ip, r1
  1075. // We're starting with r0 = $87654321
  1076. eor r1, r0, r0, ror #16 // r1 = $C444C444
  1077. bic r1, r1, #16711680 // r1 = r1 and $ff00ffff = $C400C444
  1078. mov r0, r0, ror #8 // r0 = $21876543
  1079. eor r1, r0, r1, lsr #8 // r1 = $21436587
  1080. eor r0, ip, ip, ror #16
  1081. bic r0, r0, #16711680
  1082. mov ip, ip, ror #8
  1083. eor r0, ip, r0, lsr #8
  1084. {$endif}
  1085. end;
  1086. function SwapEndian(const AValue: QWord): QWord; {$ifdef SYSTEMINLINE}inline;{$endif}
  1087. begin
  1088. Result:=QWord(SwapEndian(Int64(AValue)));
  1089. end;
  1090. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  1091. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  1092. { Generic read/readwrite barrier code. }
  1093. procedure barrier; assembler; nostackframe;
  1094. asm
  1095. // manually encode the instructions to avoid bootstrap and -march external
  1096. // assembler settings
  1097. {$ifdef CPUARM_HAS_DMB}
  1098. .long 0xf57ff05f // dmb sy
  1099. {$else CPUARM_HAS_DMB}
  1100. {$ifdef CPUARMV6}
  1101. mov r0, #0
  1102. .long 0xee070fba // mcr 15, 0, r0, cr7, cr10, {5}
  1103. {$else CPUARMV6}
  1104. {$ifdef SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1105. stmfd r13!, {lr}
  1106. mvn r0, #0x0000f000
  1107. sub r0, r0, #0x5F
  1108. {$ifdef CPUARM_HAS_BLX}
  1109. blx r0 // Call kuser_memory_barrier at address 0xffff0fa0
  1110. {$else CPUARM_HAS_BLX}
  1111. mov lr, pc
  1112. {$ifdef CPUARM_HAS_BX}
  1113. bx r0
  1114. {$else CPUARM_HAS_BX}
  1115. mov pc, r0
  1116. {$endif CPUARM_HAS_BX}
  1117. {$endif CPUARM_HAS_BLX}
  1118. ldmfd r13!, {pc}
  1119. {$endif SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1120. {$endif CPUARMV6}
  1121. {$endif CPUARM_HAS_DMB}
  1122. end;
  1123. procedure ReadBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1124. begin
  1125. barrier;
  1126. end;
  1127. procedure ReadDependencyBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1128. begin
  1129. { reads imply barrier on earlier reads depended on; not required on ARM }
  1130. end;
  1131. procedure ReadWriteBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1132. begin
  1133. barrier;
  1134. end;
  1135. procedure WriteBarrier; assembler; nostackframe;
  1136. asm
  1137. // specialize the write barrier because according to ARM, implementations for
  1138. // "dmb st" may be more optimal than the more generic "dmb sy"
  1139. {$ifdef CPUARM_HAS_DMB}
  1140. .long 0xf57ff05e // dmb st
  1141. {$else CPUARM_HAS_DMB}
  1142. {$ifdef CPUARMV6}
  1143. mov r0, #0
  1144. .long 0xee070fba // mcr 15, 0, r0, cr7, cr10, {5}
  1145. {$else CPUARMV6}
  1146. {$ifdef SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1147. stmfd r13!, {lr}
  1148. mvn r0, #0x0000f000
  1149. sub r0, r0, #0x5F
  1150. {$ifdef CPUARM_HAS_BLX}
  1151. blx r0 // Call kuser_memory_barrier at address 0xffff0fa0
  1152. {$else CPUARM_HAS_BLX}
  1153. mov lr, pc
  1154. {$ifdef CPUARM_HAS_BX}
  1155. bx r0
  1156. {$else CPUARM_HAS_BX}
  1157. mov pc, r0
  1158. {$endif CPUARM_HAS_BX}
  1159. {$endif CPUARM_HAS_BLX}
  1160. ldmfd r13!, {pc}
  1161. {$endif SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1162. {$endif CPUARMV6}
  1163. {$endif CPUARM_HAS_DMB}
  1164. end;
  1165. {$endif}
  1166. {include hand-optimized assembler division code}
  1167. {$i divide.inc}