arm.inc 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. { IMPORTANT!
  13. Never use the "BLX label" instruction! Use "BL label" instead.
  14. The linker will always change BL to BLX if necessary, but not vice versa (linker version dependent).
  15. "BLX label" ALWAYS changes the instruction set. It changes a processor in ARM state to Thumb state,
  16. or a processor in Thumb state to ARM state.
  17. }
  18. {$ifndef FPC_SYSTEM_HAS_MOVE}
  19. {$define FPC_SYSTEM_FPC_MOVE}
  20. {$endif FPC_SYSTEM_HAS_MOVE}
  21. {$ifdef FPC_SYSTEM_FPC_MOVE}
  22. const
  23. cpu_has_edsp : boolean = false;
  24. in_edsp_test : boolean = false;
  25. {$endif FPC_SYSTEM_FPC_MOVE}
  26. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  27. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  28. {$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3) and not defined(FPUVFPV4) and not defined(FPUVFPV3_D16)}
  29. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  30. begin
  31. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  32. asm
  33. rfs r0
  34. and r0,r0,#0xffe0ffff
  35. orr r0,r0,#0x00070000
  36. wfs r0
  37. end;
  38. end;
  39. {$else}
  40. const
  41. fpu_nx = 1 shl 0;
  42. fpu_uf = 1 shl 1;
  43. fpu_of = 1 shl 2;
  44. fpu_dz = 1 shl 3;
  45. fpu_nv = 1 shl 4;
  46. function getfpscr: sizeuint; nostackframe; assembler;
  47. asm
  48. fmrx r0,fpscr
  49. end;
  50. procedure setfpscr(flags : sizeuint); nostackframe; assembler;
  51. asm
  52. fmxr fpscr,r0
  53. end;
  54. const
  55. FPSCR_IOC = 1;
  56. FPSCR_DZC = 1 shl 1;
  57. FPSCR_OFC = 1 shl 2;
  58. FPSCR_UFC = 1 shl 3;
  59. FPSCR_IXC = 1 shl 4;
  60. FPSCR_IDC = 1 shl 7;
  61. procedure fpc_throwfpuexception;[public,alias:'FPC_THROWFPUEXCEPTION'];
  62. var
  63. fpscr : longint;
  64. f: TFPUException;
  65. begin
  66. { at this point, we know already, that an exception will be risen }
  67. fpscr:=getfpscr;
  68. if (fpscr and FPSCR_DZC) <> 0 then
  69. float_raise(exZeroDivide);
  70. if (fpscr and FPSCR_OFC) <> 0 then
  71. float_raise(exOverflow);
  72. if (fpscr and FPSCR_UFC) <> 0 then
  73. float_raise(exUnderflow);
  74. if (fpscr and FPSCR_IOC) <> 0 then
  75. float_raise(exInvalidOp);
  76. if (fpscr and FPSCR_IXC) <> 0 then
  77. float_raise(exPrecision);
  78. if (fpscr and FPSCR_IDC) <> 0 then
  79. float_raise(exDenormalized);
  80. { now the soft float exceptions }
  81. for f in softfloat_exception_flags do
  82. float_raise(f);
  83. end;
  84. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  85. begin
  86. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  87. asm
  88. fmrx r0,fpscr
  89. // set "round to nearest" mode
  90. and r0,r0,#0xff3fffff
  91. // mask "exception happened" and overflow flags
  92. and r0,r0,#0xffffff20
  93. // mask exception flags
  94. and r0,r0,#0xffff40ff
  95. {$ifndef darwin}
  96. // Floating point exceptions cause kernel panics on iPhoneOS 2.2.1...
  97. // disable flush-to-zero mode (IEEE math compliant)
  98. and r0,r0,#0xfeffffff
  99. // enable invalid operation, div-by-zero and overflow exceptions
  100. orr r0,r0,#0x00000700
  101. {$endif}
  102. fmxr fpscr,r0
  103. end;
  104. end;
  105. {$endif}
  106. {$endif}
  107. procedure fpc_cpuinit;
  108. begin
  109. { don't let libraries influence the FPU cw set by the host program }
  110. if not IsLibrary then
  111. SysInitFPU;
  112. end;
  113. {$ifdef wince}
  114. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  115. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  116. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  117. begin
  118. end;
  119. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  120. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  121. begin
  122. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  123. { FPU precision 64 bit, rounding to nearest, affine infinity }
  124. _controlfp($000C0003, $030F031F);
  125. end;
  126. {$endif wince}
  127. {$ifdef linux}
  128. function fpc_read_tp : pointer; [public, alias: 'fpc_read_tp'];assembler; nostackframe;
  129. asm
  130. // Helper is located at 0xffff0fe0
  131. mvn r0,#0x0000f000 // mov r0, #0xffff0fff
  132. sub pc,r0,#0x1f // Jump to helper
  133. end;
  134. {$endif linux}
  135. {****************************************************************************
  136. stack frame related stuff
  137. ****************************************************************************}
  138. {$IFNDEF INTERNAL_BACKTRACE}
  139. {$define FPC_SYSTEM_HAS_GET_FRAME}
  140. function get_frame:pointer;assembler;nostackframe;
  141. asm
  142. {$ifndef darwin}
  143. mov r0,r11
  144. {$else}
  145. mov r0,r7
  146. {$endif}
  147. end;
  148. {$ENDIF not INTERNAL_BACKTRACE}
  149. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  150. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;assembler;nostackframe;
  151. asm
  152. cmp r0,#0
  153. {$ifndef darwin}
  154. ldrne r0,[r0,#-4]
  155. {$else}
  156. ldrne r0,[r0,#4]
  157. {$endif}
  158. end;
  159. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  160. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;assembler;nostackframe;
  161. asm
  162. cmp r0,#0
  163. {$ifndef darwin}
  164. ldrne r0,[r0,#-12]
  165. {$else}
  166. ldrne r0,[r0]
  167. {$endif}
  168. end;
  169. {$define FPC_SYSTEM_HAS_SPTR}
  170. Function Sptr : pointer;assembler;nostackframe;
  171. asm
  172. mov r0,sp
  173. end;
  174. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  175. {$define FPC_SYSTEM_HAS_FILLCHAR}
  176. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  177. asm
  178. // less than 0?
  179. cmp r1,#0
  180. {$ifdef CPUARM_HAS_BX}
  181. bxle lr
  182. {$else}
  183. movle pc,lr
  184. {$endif}
  185. mov r3,r0
  186. orr r2,r2,r2,lsl #8
  187. orr r2,r2,r2,lsl #16
  188. tst r3, #3 // Aligned?
  189. bne .LFillchar_do_align
  190. .LFillchar_is_aligned:
  191. subs r1,r1,#8
  192. bmi .LFillchar_less_than_8bytes
  193. mov ip,r2
  194. .LFillchar_at_least_8bytes:
  195. // Do 16 bytes per loop
  196. // More unrolling is uncessary, as we'll just stall on the write buffers
  197. stmia r3!,{r2,ip}
  198. subs r1,r1,#8
  199. stmplia r3!,{r2,ip}
  200. subpls r1,r1,#8
  201. bpl .LFillchar_at_least_8bytes
  202. .LFillchar_less_than_8bytes:
  203. // Do the rest
  204. adds r1, r1, #8
  205. {$ifdef CPUARM_HAS_BX}
  206. bxeq lr
  207. {$else}
  208. moveq pc,lr
  209. {$endif}
  210. tst r1, #4
  211. strne r2,[r3],#4
  212. {$ifdef CPUARM_HAS_ALL_MEM}
  213. tst r1, #2
  214. strneh r2,[r3],#2
  215. {$else CPUARM_HAS_ALL_MEM}
  216. tst r1, #2
  217. strneb r2,[r3],#1
  218. strneb r2,[r3],#1
  219. {$endif CPUARM_HAS_ALL_MEM}
  220. tst r1, #1
  221. strneb r2,[r3],#1
  222. {$ifdef CPUARM_HAS_BX}
  223. bx lr
  224. {$else}
  225. mov pc,lr
  226. {$endif}
  227. // Special case for unaligned start
  228. // We make a maximum of 3 loops here
  229. .LFillchar_do_align:
  230. strb r2,[r3],#1
  231. subs r1, r1, #1
  232. {$ifdef CPUARM_HAS_BX}
  233. bxeq lr
  234. {$else}
  235. moveq pc,lr
  236. {$endif}
  237. tst r3,#3
  238. bne .LFillchar_do_align
  239. b .LFillchar_is_aligned
  240. end;
  241. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  242. {$ifndef FPC_SYSTEM_HAS_MOVE}
  243. {$define FPC_SYSTEM_HAS_MOVE}
  244. {$ifdef CPUARM_HAS_EDSP}
  245. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  246. {$else CPUARM_HAS_EDSP}
  247. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  248. {$endif CPUARM_HAS_EDSP}
  249. asm
  250. // pld [r0]
  251. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  252. .long 0xf5d0f000
  253. // count <=0 ?
  254. cmp r2,#0
  255. {$ifdef CPUARM_HAS_BX}
  256. bxle lr
  257. {$else}
  258. movle pc,lr
  259. {$endif}
  260. // overlap?
  261. subs r3, r1, r0 // if (dest > source) and
  262. cmphi r2, r3 // (count > dest - src) then
  263. bhi .Loverlapped // DoReverseByteCopy;
  264. cmp r2,#8 // if (count < 8) then
  265. blt .Lbyteloop // DoForwardByteCopy;
  266. // Any way to avoid the above jump and fuse the next two instructions?
  267. tst r0, #3 // if (source and 3) <> 0 or
  268. tsteq r1, #3 // (dest and 3) <> 0 then
  269. bne .Lbyteloop // DoForwardByteCopy;
  270. // pld [r0,#32]
  271. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  272. .long 0xf5d0f020
  273. .Ldwordloop:
  274. ldmia r0!, {r3, ip}
  275. // preload
  276. // pld [r0,#64]
  277. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  278. .long 0xf5d0f040
  279. sub r2,r2,#8
  280. cmp r2, #8
  281. stmia r1!, {r3, ip}
  282. bge .Ldwordloop
  283. cmp r2,#0
  284. {$ifdef CPUARM_HAS_BX}
  285. bxeq lr
  286. {$else}
  287. moveq pc,lr
  288. {$endif}
  289. .Lbyteloop:
  290. subs r2,r2,#1
  291. ldrb r3,[r0],#1
  292. strb r3,[r1],#1
  293. bne .Lbyteloop
  294. {$ifdef CPUARM_HAS_BX}
  295. bx lr
  296. {$else}
  297. mov pc,lr
  298. {$endif}
  299. .Loverlapped:
  300. subs r2,r2,#1
  301. ldrb r3,[r0,r2]
  302. strb r3,[r1,r2]
  303. bne .Loverlapped
  304. end;
  305. {$ifndef CPUARM_HAS_EDSP}
  306. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  307. asm
  308. // count <=0 ?
  309. cmp r2,#0
  310. {$ifdef CPUARM_HAS_BX}
  311. bxle lr
  312. {$else}
  313. movle pc,lr
  314. {$endif}
  315. // overlap?
  316. subs r3, r1, r0 // if (dest > source) and
  317. cmphi r2, r3 // (count > dest - src) then
  318. bhi .Loverlapped // DoReverseByteCopy;
  319. cmp r2,#8 // if (count < 8) then
  320. blt .Lbyteloop // DoForwardByteCopy;
  321. // Any way to avoid the above jump and fuse the next two instructions?
  322. tst r0, #3 // if (source and 3) <> 0 or
  323. tsteq r1, #3 // (dest and 3) <> 0 then
  324. bne .Lbyteloop // DoForwardByteCopy;
  325. .Ldwordloop:
  326. ldmia r0!, {r3, ip}
  327. sub r2,r2,#8
  328. cmp r2, #8
  329. stmia r1!, {r3, ip}
  330. bge .Ldwordloop
  331. cmp r2,#0
  332. {$ifdef CPUARM_HAS_BX}
  333. bxeq lr
  334. {$else}
  335. moveq pc,lr
  336. {$endif}
  337. .Lbyteloop:
  338. subs r2,r2,#1
  339. ldrb r3,[r0],#1
  340. strb r3,[r1],#1
  341. bne .Lbyteloop
  342. {$ifdef CPUARM_HAS_BX}
  343. bx lr
  344. {$else}
  345. mov pc,lr
  346. {$endif}
  347. .Loverlapped:
  348. subs r2,r2,#1
  349. ldrb r3,[r0,r2]
  350. strb r3,[r1,r2]
  351. bne .Loverlapped
  352. end;
  353. const
  354. moveproc : procedure(const source;var dest;count:longint) = @move_blended;
  355. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE']; {$ifndef FPC_PIC} assembler;nostackframe; {$endif FPC_PIC}
  356. {$ifdef FPC_PIC}
  357. begin
  358. moveproc(source,dest,count);
  359. end;
  360. {$else FPC_PIC}
  361. asm
  362. ldr ip,.Lmoveproc
  363. ldr pc,[ip]
  364. .Lmoveproc:
  365. .long moveproc
  366. end;
  367. {$endif FPC_PIC}
  368. {$endif CPUARM_HAS_EDSP}
  369. {$endif FPC_SYSTEM_HAS_MOVE}
  370. {****************************************************************************
  371. String
  372. ****************************************************************************}
  373. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  374. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  375. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  376. {r0: __RESULT
  377. r1: len
  378. r2: sstr}
  379. asm
  380. ldrb r12,[r2],#1
  381. cmp r12,r1
  382. movgt r12,r1
  383. strb r12,[r0],#1
  384. cmp r12,#6 (* 6 seems to be the break even point. *)
  385. blt .LStartTailCopy
  386. (* Align destination on 32bits. This is the only place where unrolling
  387. really seems to help, since in the common case, sstr is aligned on
  388. 32 bits, therefore in the common case we need to copy 3 bytes to
  389. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  390. rsb r3,r0,#0
  391. ands r3,r3,#3
  392. sub r12,r12,r3
  393. ldrneb r1,[r2],#1
  394. strneb r1,[r0],#1
  395. subnes r3,r3,#1
  396. ldrneb r1,[r2],#1
  397. strneb r1,[r0],#1
  398. subnes r3,r3,#1
  399. ldrneb r1,[r2],#1
  400. strneb r1,[r0],#1
  401. subnes r3,r3,#1
  402. .LDoneAlign:
  403. (* Destination should be aligned now, but source might not be aligned,
  404. if this is the case, do a byte-per-byte copy. *)
  405. tst r2,#3
  406. bne .LStartTailCopy
  407. (* Start the main copy, 32 bit at a time. *)
  408. movs r3,r12,lsr #2
  409. and r12,r12,#3
  410. beq .LStartTailCopy
  411. .LNext4bytes:
  412. (* Unrolling this loop would save a little bit of time for long strings
  413. (>20 chars), but alas, it hurts for short strings and they are the
  414. common case.*)
  415. ldrne r1,[r2],#4
  416. strne r1,[r0],#4
  417. subnes r3,r3,#1
  418. bne .LNext4bytes
  419. .LStartTailCopy:
  420. (* Do remaining bytes. *)
  421. cmp r12,#0
  422. beq .LDoneTail
  423. .LNextChar3:
  424. ldrb r1,[r2],#1
  425. strb r1,[r0],#1
  426. subs r12,r12,#1
  427. bne .LNextChar3
  428. .LDoneTail:
  429. end;
  430. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  431. {r0: len
  432. r1: sstr
  433. r2: dstr}
  434. asm
  435. ldrb r12,[r1],#1
  436. cmp r12,r0
  437. movgt r12,r0
  438. strb r12,[r2],#1
  439. cmp r12,#6 (* 6 seems to be the break even point. *)
  440. blt .LStartTailCopy
  441. (* Align destination on 32bits. This is the only place where unrolling
  442. really seems to help, since in the common case, sstr is aligned on
  443. 32 bits, therefore in the common case we need to copy 3 bytes to
  444. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  445. rsb r3,r2,#0
  446. ands r3,r3,#3
  447. sub r12,r12,r3
  448. ldrneb r0,[r1],#1
  449. strneb r0,[r2],#1
  450. subnes r3,r3,#1
  451. ldrneb r0,[r1],#1
  452. strneb r0,[r2],#1
  453. subnes r3,r3,#1
  454. ldrneb r0,[r1],#1
  455. strneb r0,[r2],#1
  456. subnes r3,r3,#1
  457. .LDoneAlign:
  458. (* Destination should be aligned now, but source might not be aligned,
  459. if this is the case, do a byte-per-byte copy. *)
  460. tst r1,#3
  461. bne .LStartTailCopy
  462. (* Start the main copy, 32 bit at a time. *)
  463. movs r3,r12,lsr #2
  464. and r12,r12,#3
  465. beq .LStartTailCopy
  466. .LNext4bytes:
  467. (* Unrolling this loop would save a little bit of time for long strings
  468. (>20 chars), but alas, it hurts for short strings and they are the
  469. common case.*)
  470. ldrne r0,[r1],#4
  471. strne r0,[r2],#4
  472. subnes r3,r3,#1
  473. bne .LNext4bytes
  474. .LStartTailCopy:
  475. (* Do remaining bytes. *)
  476. cmp r12,#0
  477. beq .LDoneTail
  478. .LNextChar3:
  479. ldrb r0,[r1],#1
  480. strb r0,[r2],#1
  481. subs r12,r12,#1
  482. bne .LNextChar3
  483. .LDoneTail:
  484. end;
  485. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  486. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  487. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  488. function fpc_Pchar_length(p:Pchar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  489. asm
  490. cmp r0,#0
  491. mov r1,r0
  492. beq .Ldone
  493. .Lnextchar:
  494. (*Are we aligned?*)
  495. tst r1,#3
  496. bne .Ltest_unaligned (*No, do byte per byte.*)
  497. ldr r3,.L01010101
  498. .Ltest_aligned:
  499. (*Aligned, load 4 bytes at a time.*)
  500. ldr r12,[r1],#4
  501. (*Check wether r12 contains a 0 byte.*)
  502. sub r2,r12,r3
  503. mvn r12,r12
  504. and r2,r2,r12
  505. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  506. beq .Ltest_aligned (*No 0 byte, repeat.*)
  507. sub r1,r1,#4
  508. .Ltest_unaligned:
  509. ldrb r12,[r1],#1
  510. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  511. bcs .Lnextchar
  512. (*Dirty trick: we need to subtract 1 extra because we have counted the
  513. terminating 0, due to the known carry flag sbc can do this.*)
  514. sbc r0,r1,r0
  515. .Ldone:
  516. {$ifdef CPUARM_HAS_BX}
  517. bx lr
  518. {$else}
  519. mov pc,lr
  520. {$endif}
  521. .L01010101:
  522. .long 0x01010101
  523. end;
  524. {$endif}
  525. {$ifndef darwin}
  526. {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
  527. Procedure fpc_ansistr_decr_ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF'];assembler;nostackframe; compilerproc;
  528. asm
  529. ldr r1, [r0]
  530. // On return the pointer will always be set to zero, so utilize the delay slots
  531. mov r2, #0
  532. str r2, [r0]
  533. // Check for a zero string
  534. cmp r1, #0
  535. // Load reference counter
  536. ldrne r2, [r1, #-8]
  537. {$ifdef CPUARM_HAS_BX}
  538. bxeq lr
  539. {$else}
  540. moveq pc,lr
  541. {$endif}
  542. // Check for a constant string
  543. cmp r2, #0
  544. {$ifdef CPUARM_HAS_BX}
  545. bxlt lr
  546. {$else}
  547. movlt pc,lr
  548. {$endif}
  549. stmfd sp!, {r1, lr}
  550. sub r0, r1, #8
  551. bl InterLockedDecrement
  552. // InterLockedDecrement is a nice guy and sets the z flag for us
  553. // if the reference count dropped to 0
  554. ldmnefd sp!, {r1, pc}
  555. ldmfd sp!, {r0, lr}
  556. // We currently can not use constant symbols in ARM-Assembly
  557. // but we need to stay backward compatible with 2.6
  558. sub r0, r0, #12
  559. // Jump without a link, so freemem directly returns to our caller
  560. b FPC_FREEMEM
  561. end;
  562. {$define FPC_SYSTEM_HAS_ANSISTR_INCR_REF}
  563. Procedure fpc_ansistr_incr_ref (S : Pointer); [Public,Alias:'FPC_ANSISTR_INCR_REF'];assembler;nostackframe; compilerproc;
  564. asm
  565. // Null string?
  566. cmp r0, #0
  567. // Load reference counter
  568. ldrne r1, [r0, #-8]
  569. // pointer to counter, calculate here for delay slot utilization
  570. subne r0, r0, #8
  571. {$ifdef CPUARM_HAS_BX}
  572. bxeq lr
  573. {$else}
  574. moveq pc,lr
  575. {$endif}
  576. // Check for a constant string
  577. cmp r1, #0
  578. // Tailcall
  579. // Hopefully the linker will place InterLockedIncrement as layed out here
  580. bge InterLockedIncrement
  581. // Freepascal will generate a proper return here, save some cachespace
  582. end;
  583. {$endif not darwin}
  584. // --- InterLocked functions begin
  585. {$if not defined(CPUARM_HAS_LDREX) and not defined(SYSTEM_HAS_KUSER_CMPXCHG) }
  586. // Use generic interlock implementation
  587. var
  588. fpc_system_lock: longint;
  589. {$ifdef FPC_PIC}
  590. // Use generic interlock implementation with PIC
  591. // A helper function to get a pointer to fpc_system_lock in the PIC compatible way.
  592. function get_fpc_system_lock_ptr: pointer;
  593. begin
  594. get_fpc_system_lock_ptr:=@fpc_system_lock;
  595. end;
  596. {$endif FPC_PIC}
  597. {$endif}
  598. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  599. asm
  600. {$ifdef CPUARM_HAS_LDREX}
  601. .Lloop:
  602. ldrex r1, [r0]
  603. sub r1, r1, #1
  604. strex r2, r1, [r0]
  605. cmp r2, #0
  606. bne .Lloop
  607. movs r0, r1
  608. bx lr
  609. {$else}
  610. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  611. stmfd r13!, {lr}
  612. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  613. .Latomic_dec_loop:
  614. ldr r0, [r2] // Load the current value
  615. // We expect this to work without looping most of the time
  616. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  617. // loop here again, we have to reload the value. Normaly this just fills the
  618. // load stall-cycles from the above ldr so in reality we'll not get any additional
  619. // delays because of this
  620. // Don't use ldr to load r3 to avoid cacheline trashing
  621. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  622. // the kuser_cmpxchg entry point
  623. mvn r3, #0x0000f000
  624. sub r3, r3, #0x3F
  625. sub r1, r0, #1 // Decrement value
  626. {$ifdef CPUARM_HAS_BLX}
  627. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  628. {$else}
  629. mov lr, pc
  630. {$ifdef CPUARM_HAS_BX}
  631. bx r3
  632. {$else}
  633. mov pc, r3
  634. {$endif}
  635. {$endif}
  636. // MOVS sets the Z flag when the result reaches zero, this can be used later on
  637. // The C-Flag will not be modified by this because we're not doing any shifting
  638. movcss r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  639. ldmcsfd r13!, {pc}
  640. b .Latomic_dec_loop // kuser_cmpxchg sets C flag on error
  641. {$else}
  642. // lock
  643. {$ifdef FPC_PIC}
  644. push {r0,lr}
  645. bl get_fpc_system_lock_ptr
  646. mov r3,r0
  647. pop {r0,lr}
  648. {$else FPC_PIC}
  649. ldr r3, .Lfpc_system_lock
  650. {$endif FPC_PIC}
  651. mov r1, #1
  652. .Lloop:
  653. swp r2, r1, [r3]
  654. cmp r2, #0
  655. bne .Lloop
  656. // do the job
  657. ldr r1, [r0]
  658. sub r1, r1, #1
  659. str r1, [r0]
  660. movs r0, r1
  661. // unlock and return
  662. str r2, [r3]
  663. {$ifdef CPUARM_HAS_BX}
  664. bx lr
  665. {$else}
  666. mov pc,lr
  667. {$endif}
  668. {$ifndef FPC_PIC}
  669. .Lfpc_system_lock:
  670. .long fpc_system_lock
  671. {$endif FPC_PIC}
  672. {$endif}
  673. {$endif}
  674. end;
  675. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  676. asm
  677. {$ifdef CPUARM_HAS_LDREX}
  678. .Lloop:
  679. ldrex r1, [r0]
  680. add r1, r1, #1
  681. strex r2, r1, [r0]
  682. cmp r2, #0
  683. bne .Lloop
  684. mov r0, r1
  685. bx lr
  686. {$else}
  687. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  688. stmfd r13!, {lr}
  689. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  690. .Latomic_inc_loop:
  691. ldr r0, [r2] // Load the current value
  692. // We expect this to work without looping most of the time
  693. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  694. // loop here again, we have to reload the value. Normaly this just fills the
  695. // load stall-cycles from the above ldr so in reality we'll not get any additional
  696. // delays because of this
  697. // Don't use ldr to load r3 to avoid cacheline trashing
  698. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  699. // the kuser_cmpxchg entry point
  700. mvn r3, #0x0000f000
  701. sub r3, r3, #0x3F
  702. add r1, r0, #1 // Increment value
  703. {$ifdef CPUARM_HAS_BLX}
  704. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  705. {$else}
  706. mov lr, pc
  707. {$ifdef CPUARM_HAS_BX}
  708. bx r3
  709. {$else}
  710. mov pc, r3
  711. {$endif}
  712. {$endif}
  713. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  714. ldmcsfd r13!, {pc}
  715. b .Latomic_inc_loop // kuser_cmpxchg sets C flag on error
  716. {$else}
  717. // lock
  718. {$ifdef FPC_PIC}
  719. push {r0,lr}
  720. bl get_fpc_system_lock_ptr
  721. mov r3,r0
  722. pop {r0,lr}
  723. {$else FPC_PIC}
  724. ldr r3, .Lfpc_system_lock
  725. {$endif FPC_PIC}
  726. mov r1, #1
  727. .Lloop:
  728. swp r2, r1, [r3]
  729. cmp r2, #0
  730. bne .Lloop
  731. // do the job
  732. ldr r1, [r0]
  733. add r1, r1, #1
  734. str r1, [r0]
  735. mov r0, r1
  736. // unlock and return
  737. str r2, [r3]
  738. {$ifdef CPUARM_HAS_BX}
  739. bx lr
  740. {$else}
  741. mov pc,lr
  742. {$endif}
  743. {$ifndef FPC_PIC}
  744. .Lfpc_system_lock:
  745. .long fpc_system_lock
  746. {$endif FPC_PIC}
  747. {$endif}
  748. {$endif}
  749. end;
  750. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  751. asm
  752. {$ifdef CPUARM_HAS_LDREX}
  753. // swp is deprecated on ARMv6 and above
  754. .Lloop:
  755. ldrex r2, [r0]
  756. strex r3, r1, [r0]
  757. cmp r3, #0
  758. bne .Lloop
  759. mov r0, r2
  760. bx lr
  761. {$else}
  762. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  763. stmfd r13!, {r4, lr}
  764. mov r2, r0 // kuser_cmpxchg does not clobber r2 (and r1) by definition
  765. .Latomic_add_loop:
  766. ldr r0, [r2] // Load the current value
  767. // We expect this to work without looping most of the time
  768. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  769. // loop here again, we have to reload the value. Normaly this just fills the
  770. // load stall-cycles from the above ldr so in reality we'll not get any additional
  771. // delays because of this
  772. // Don't use ldr to load r3 to avoid cacheline trashing
  773. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  774. // the kuser_cmpxchg entry point
  775. mvn r3, #0x0000f000
  776. sub r3, r3, #0x3F
  777. mov r4, r0 // save the current value because kuser_cmpxchg clobbers r0
  778. {$ifdef CPUARM_HAS_BLX}
  779. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  780. {$else}
  781. mov lr, pc
  782. {$ifdef CPUARM_HAS_BX}
  783. bx r3
  784. {$else}
  785. mov pc, r3
  786. {$endif}
  787. {$endif}
  788. // restore the original value if needed
  789. movcs r0, r4
  790. ldmcsfd r13!, {r4, pc}
  791. b .Latomic_add_loop // kuser_cmpxchg failed, loop back
  792. {$else}
  793. // lock
  794. {$ifdef FPC_PIC}
  795. push {r0,r1,lr}
  796. bl get_fpc_system_lock_ptr
  797. mov r3,r0
  798. pop {r0,r1,lr}
  799. {$else FPC_PIC}
  800. ldr r3, .Lfpc_system_lock
  801. {$endif FPC_PIC}
  802. mov r2, #1
  803. .Lloop:
  804. swp r2, r2, [r3]
  805. cmp r2, #0
  806. bne .Lloop
  807. // do the job
  808. ldr r2, [r0]
  809. str r1, [r0]
  810. mov r0, r2
  811. // unlock and return
  812. mov r2, #0
  813. str r2, [r3]
  814. {$ifdef CPUARM_HAS_BX}
  815. bx lr
  816. {$else}
  817. mov pc,lr
  818. {$endif}
  819. {$ifndef FPC_PIC}
  820. .Lfpc_system_lock:
  821. .long fpc_system_lock
  822. {$endif FPC_PIC}
  823. {$endif}
  824. {$endif}
  825. end;
  826. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  827. asm
  828. {$ifdef CPUARM_HAS_LDREX}
  829. .Lloop:
  830. ldrex r2, [r0]
  831. add r12, r1, r2
  832. strex r3, r12, [r0]
  833. cmp r3, #0
  834. bne .Lloop
  835. mov r0, r2
  836. bx lr
  837. {$else}
  838. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  839. stmfd r13!, {r4, lr}
  840. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  841. mov r4, r1 // Save addend
  842. .Latomic_add_loop:
  843. ldr r0, [r2] // Load the current value
  844. // We expect this to work without looping most of the time
  845. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  846. // loop here again, we have to reload the value. Normaly this just fills the
  847. // load stall-cycles from the above ldr so in reality we'll not get any additional
  848. // delays because of this
  849. // Don't use ldr to load r3 to avoid cacheline trashing
  850. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  851. // the kuser_cmpxchg entry point
  852. mvn r3, #0x0000f000
  853. sub r3, r3, #0x3F
  854. add r1, r0, r4 // Add to value
  855. {$ifdef CPUARM_HAS_BLX}
  856. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  857. {$else}
  858. mov lr, pc
  859. {$ifdef CPUARM_HAS_BX}
  860. bx r3
  861. {$else}
  862. mov pc, r3
  863. {$endif}
  864. {$endif}
  865. // r1 does not get clobbered, so just get back the original value
  866. // Otherwise we would have to allocate one more register and store the
  867. // temporary value
  868. subcs r0, r1, r4
  869. ldmcsfd r13!, {r4, pc}
  870. b .Latomic_add_loop // kuser_cmpxchg failed, loop back
  871. {$else}
  872. // lock
  873. {$ifdef FPC_PIC}
  874. push {r0,r1,lr}
  875. bl get_fpc_system_lock_ptr
  876. mov r3,r0
  877. pop {r0,r1,lr}
  878. {$else FPC_PIC}
  879. ldr r3, .Lfpc_system_lock
  880. {$endif FPC_PIC}
  881. mov r2, #1
  882. .Lloop:
  883. swp r2, r2, [r3]
  884. cmp r2, #0
  885. bne .Lloop
  886. // do the job
  887. ldr r2, [r0]
  888. add r1, r1, r2
  889. str r1, [r0]
  890. mov r0, r2
  891. // unlock and return
  892. mov r2, #0
  893. str r2, [r3]
  894. {$ifdef CPUARM_HAS_BX}
  895. bx lr
  896. {$else}
  897. mov pc,lr
  898. {$endif}
  899. {$ifndef FPC_PIC}
  900. .Lfpc_system_lock:
  901. .long fpc_system_lock
  902. {$endif FPC_PIC}
  903. {$endif}
  904. {$endif}
  905. end;
  906. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  907. asm
  908. {$ifdef CPUARM_HAS_LDREX}
  909. .Lloop:
  910. ldrex r3, [r0]
  911. mov r12, #0
  912. cmp r3, r2
  913. strexeq r12, r1, [r0]
  914. cmp r12, #0
  915. bne .Lloop
  916. mov r0, r3
  917. bx lr
  918. {$else}
  919. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  920. stmfd r13!, {r4, lr}
  921. mov r4, r2 // Swap parameters around
  922. mov r2, r0
  923. mov r0, r4 // Use r4 because we'll need the new value for later
  924. // r1 and r2 will not be clobbered by kuser_cmpxchg
  925. // If we have to loop, r0 will be set to the original Comperand
  926. // kuser_cmpxchg is documented to destroy r3, therefore setting
  927. // r3 must be in the loop
  928. .Linterlocked_compare_exchange_loop:
  929. mvn r3, #0x0000f000
  930. sub r3, r3, #0x3F
  931. {$ifdef CPUARM_HAS_BLX}
  932. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  933. {$else}
  934. mov lr, pc
  935. {$ifdef CPUARM_HAS_BX}
  936. bx r3
  937. {$else}
  938. mov pc, r3
  939. {$endif}
  940. {$endif}
  941. movcs r0, r4 // Return the previous value on success
  942. ldmcsfd r13!, {r4, pc}
  943. // The error case is a bit tricky, kuser_cmpxchg does not return the current value
  944. // So we may need to loop to avoid race conditions
  945. // The loop case is HIGHLY unlikely, it would require that we got rescheduled between
  946. // calling kuser_cmpxchg and the ldr. While beeing rescheduled another process/thread
  947. // would have the set the value to our comperand
  948. ldr r0, [r2] // Load the currently set value
  949. cmp r0, r4 // Return if Comperand != current value, otherwise loop again
  950. ldmnefd r13!, {r4, pc}
  951. // If we need to loop here, we have to
  952. b .Linterlocked_compare_exchange_loop
  953. {$else}
  954. // lock
  955. {$ifdef FPC_PIC}
  956. push {r0,r1,r2,lr}
  957. bl get_fpc_system_lock_ptr
  958. mov r12,r0
  959. pop {r0,r1,r2,lr}
  960. {$else FPC_PIC}
  961. ldr r12, .Lfpc_system_lock
  962. {$endif FPC_PIC}
  963. mov r3, #1
  964. .Lloop:
  965. swp r3, r3, [r12]
  966. cmp r3, #0
  967. bne .Lloop
  968. // do the job
  969. ldr r3, [r0]
  970. cmp r3, r2
  971. streq r1, [r0]
  972. mov r0, r3
  973. // unlock and return
  974. mov r3, #0
  975. str r3, [r12]
  976. {$ifdef CPUARM_HAS_BX}
  977. bx lr
  978. {$else}
  979. mov pc,lr
  980. {$endif}
  981. {$ifndef FPC_PIC}
  982. .Lfpc_system_lock:
  983. .long fpc_system_lock
  984. {$endif FPC_PIC}
  985. {$endif}
  986. {$endif}
  987. end;
  988. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  989. function declocked(var l: longint) : boolean; inline;
  990. begin
  991. Result:=InterLockedDecrement(l) = 0;
  992. end;
  993. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  994. procedure inclocked(var l: longint); inline;
  995. begin
  996. InterLockedIncrement(l);
  997. end;
  998. // --- InterLocked functions end
  999. procedure fpc_cpucodeinit;
  1000. begin
  1001. {$ifdef FPC_SYSTEM_FPC_MOVE}
  1002. {$ifndef CPUARM_HAS_EDSP}
  1003. cpu_has_edsp:=true;
  1004. in_edsp_test:=true;
  1005. asm
  1006. bic r0,sp,#7
  1007. // ldrd r0,r1,[r0]
  1008. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  1009. .long 0xe1c000d0
  1010. end;
  1011. in_edsp_test:=false;
  1012. if cpu_has_edsp then
  1013. moveproc:=@move_pld
  1014. else
  1015. moveproc:=@move_blended;
  1016. {$else CPUARM_HAS_EDSP}
  1017. cpu_has_edsp:=true;
  1018. {$endif CPUARM_HAS_EDSP}
  1019. {$endif FPC_SYSTEM_FPC_MOVE}
  1020. end;
  1021. {$define FPC_SYSTEM_HAS_SWAPENDIAN}
  1022. { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
  1023. function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1024. begin
  1025. { the extra Word type cast is necessary because the "AValue shr 8" }
  1026. { is turned into "longint(AValue) shr 8", so if AValue < 0 then }
  1027. { the sign bits from the upper 16 bits are shifted in rather than }
  1028. { zeroes. }
  1029. Result := SmallInt((Word(AValue) shr 8) or (Word(AValue) shl 8));
  1030. end;
  1031. function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
  1032. begin
  1033. Result := Word((AValue shr 8) or (AValue shl 8));
  1034. end;
  1035. (*
  1036. This is kept for reference. Thats what the compiler COULD generate in these cases.
  1037. But FPC currently does not support inlining of asm-functions, so the whole call-overhead
  1038. is bigger than the gain of the optimized function.
  1039. function AsmSwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif};assembler;nostackframe;
  1040. asm
  1041. // We're starting with 4321
  1042. {$if defined(CPUARM_HAS_REV)}
  1043. rev r0, r0 // Reverse byteorder r0 = 1234
  1044. mov r0, r0, shr #16 // Shift down to 16bits r0 = 0012
  1045. {$else}
  1046. mov r0, r0, shl #16 // Shift to make that 2100
  1047. mov r0, r0, ror #24 // Rotate to 1002
  1048. orr r0, r0, r0 shr #16 // Shift and combine into 0012
  1049. {$endif}
  1050. end;
  1051. *)
  1052. {
  1053. These used to be an assembler-function, but with newer improvements to the compiler this
  1054. generates a perfect 4 cycle code sequence and can be inlined.
  1055. }
  1056. function SwapEndian(const AValue: LongWord): LongWord;{$ifdef SYSTEMINLINE}inline;{$endif}
  1057. var
  1058. Temp: LongWord;
  1059. begin
  1060. Temp := AValue xor rordword(AValue,16);
  1061. Temp := Temp and $FF00FFFF;
  1062. Result:= (Temp shr 8) xor rordword(AValue,8);
  1063. end;
  1064. function SwapEndian(const AValue: LongInt): LongInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1065. begin
  1066. Result:=LongInt(SwapEndian(DWord(AValue)));
  1067. end;
  1068. {
  1069. Currently freepascal will not generate a good assembler sequence for
  1070. Result:=(SwapEndian(longword(lo(AValue))) shl 32) or
  1071. (SwapEndian(longword(hi(AValue))));
  1072. So we keep an assembly version for now
  1073. }
  1074. function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
  1075. asm
  1076. // fpc >2.6.0 adds the "rev" instruction in the internal assembler
  1077. {$if defined(CPUARM_HAS_REV)}
  1078. rev r2, r0
  1079. rev r0, r1
  1080. mov r1, r2
  1081. {$else}
  1082. mov ip, r1
  1083. // We're starting with r0 = $87654321
  1084. eor r1, r0, r0, ror #16 // r1 = $C444C444
  1085. bic r1, r1, #16711680 // r1 = r1 and $ff00ffff = $C400C444
  1086. mov r0, r0, ror #8 // r0 = $21876543
  1087. eor r1, r0, r1, lsr #8 // r1 = $21436587
  1088. eor r0, ip, ip, ror #16
  1089. bic r0, r0, #16711680
  1090. mov ip, ip, ror #8
  1091. eor r0, ip, r0, lsr #8
  1092. {$endif}
  1093. end;
  1094. function SwapEndian(const AValue: QWord): QWord; {$ifdef SYSTEMINLINE}inline;{$endif}
  1095. begin
  1096. Result:=QWord(SwapEndian(Int64(AValue)));
  1097. end;
  1098. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  1099. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  1100. { Generic read/readwrite barrier code. }
  1101. procedure barrier; assembler; nostackframe;
  1102. asm
  1103. // manually encode the instructions to avoid bootstrap and -march external
  1104. // assembler settings
  1105. {$ifdef CPUARM_HAS_DMB}
  1106. .long 0xf57ff05f // dmb sy
  1107. {$else CPUARM_HAS_DMB}
  1108. {$ifdef CPUARMV6}
  1109. mov r0, #0
  1110. .long 0xee070fba // mcr 15, 0, r0, cr7, cr10, {5}
  1111. {$else CPUARMV6}
  1112. {$ifdef SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1113. stmfd r13!, {lr}
  1114. mvn r0, #0x0000f000
  1115. sub r0, r0, #0x5F
  1116. {$ifdef CPUARM_HAS_BLX}
  1117. blx r0 // Call kuser_memory_barrier at address 0xffff0fa0
  1118. {$else CPUARM_HAS_BLX}
  1119. mov lr, pc
  1120. {$ifdef CPUARM_HAS_BX}
  1121. bx r0
  1122. {$else CPUARM_HAS_BX}
  1123. mov pc, r0
  1124. {$endif CPUARM_HAS_BX}
  1125. {$endif CPUARM_HAS_BLX}
  1126. ldmfd r13!, {pc}
  1127. {$endif SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1128. {$endif CPUARMV6}
  1129. {$endif CPUARM_HAS_DMB}
  1130. end;
  1131. procedure ReadBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1132. begin
  1133. barrier;
  1134. end;
  1135. procedure ReadDependencyBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1136. begin
  1137. { reads imply barrier on earlier reads depended on; not required on ARM }
  1138. end;
  1139. procedure ReadWriteBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1140. begin
  1141. barrier;
  1142. end;
  1143. procedure WriteBarrier; assembler; nostackframe;
  1144. asm
  1145. // specialize the write barrier because according to ARM, implementations for
  1146. // "dmb st" may be more optimal than the more generic "dmb sy"
  1147. {$ifdef CPUARM_HAS_DMB}
  1148. .long 0xf57ff05e // dmb st
  1149. {$else CPUARM_HAS_DMB}
  1150. {$ifdef CPUARMV6}
  1151. mov r0, #0
  1152. .long 0xee070fba // mcr 15, 0, r0, cr7, cr10, {5}
  1153. {$else CPUARMV6}
  1154. {$ifdef SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1155. stmfd r13!, {lr}
  1156. mvn r0, #0x0000f000
  1157. sub r0, r0, #0x5F
  1158. {$ifdef CPUARM_HAS_BLX}
  1159. blx r0 // Call kuser_memory_barrier at address 0xffff0fa0
  1160. {$else CPUARM_HAS_BLX}
  1161. mov lr, pc
  1162. {$ifdef CPUARM_HAS_BX}
  1163. bx r0
  1164. {$else CPUARM_HAS_BX}
  1165. mov pc, r0
  1166. {$endif CPUARM_HAS_BX}
  1167. {$endif CPUARM_HAS_BLX}
  1168. ldmfd r13!, {pc}
  1169. {$endif SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1170. {$endif CPUARMV6}
  1171. {$endif CPUARM_HAS_DMB}
  1172. end;
  1173. {$endif}
  1174. {include hand-optimized assembler division code}
  1175. {$i divide.inc}