arm.inc 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. ARM
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. { IMPORTANT!
  13. Never use the "BLX label" instruction! Use "BL label" instead.
  14. The linker will always change BL to BLX if necessary, but not vice versa (linker version dependent).
  15. "BLX label" ALWAYS changes the instruction set. It changes a processor in ARM state to Thumb state,
  16. or a processor in Thumb state to ARM state.
  17. }
  18. {$ifndef FPC_SYSTEM_HAS_MOVE}
  19. {$define FPC_SYSTEM_FPC_MOVE}
  20. {$endif FPC_SYSTEM_HAS_MOVE}
  21. {$ifdef FPC_SYSTEM_FPC_MOVE}
  22. const
  23. cpu_has_edsp : boolean = false;
  24. in_edsp_test : boolean = false;
  25. {$endif FPC_SYSTEM_FPC_MOVE}
  26. {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  27. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  28. { for bootstrapping with 3.0.x/3.2.x }
  29. {$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3) and not defined(FPUVFPV4) and not defined(FPUVFPV3_D16) and not defined(FPUARM_HAS_VFP_EXTENSION))}
  30. {$define FPUARM_HAS_FPA}
  31. {$else}
  32. {$define FPUARM_HAS_VFP_EXTENSION}
  33. {$endif}
  34. {$if defined(FPUARM_HAS_FPA)}
  35. {$define FPC_SYSTEM_HAS_GETSETNATIVEFPUCONTROLWORD}
  36. function GetNativeFPUControlWord: TNativeFPUControlWord; assembler;
  37. asm
  38. rfs r0
  39. end;
  40. procedure SetNativeFPUControlWord(const cw: TNativeFPUControlWord);
  41. begin
  42. DefaultFPUControlWord:=cw;
  43. asm
  44. ldr r0, cw
  45. wfs r0
  46. end;
  47. end;
  48. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  49. begin
  50. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  51. SetNativeFPUControlWord((GetNativeFPUControlWord and $ffe0ffff) or $00070000);
  52. softfloat_exception_mask:=[float_flag_underflow,float_flag_inexact,float_flag_denormal];
  53. softfloat_exception_flags:=[];
  54. end;
  55. {$elseif defined(FPUARM_HAS_VFP_EXTENSION)}
  56. const
  57. fpu_nx = 1 shl 0;
  58. fpu_uf = 1 shl 1;
  59. fpu_of = 1 shl 2;
  60. fpu_dz = 1 shl 3;
  61. fpu_nv = 1 shl 4;
  62. FPSCR_IOC = 1;
  63. FPSCR_DZC = 1 shl 1;
  64. FPSCR_OFC = 1 shl 2;
  65. FPSCR_UFC = 1 shl 3;
  66. FPSCR_IXC = 1 shl 4;
  67. FPSCR_IDC = 1 shl 7;
  68. FPSCR_EXCEPTIONS = FPSCR_IOC or FPSCR_DZC or FPSCR_OFC or FPSCR_UFC or FPSCR_IXC or FPSCR_IDC;
  69. function getfpscr: sizeuint; nostackframe; assembler; nostackframe;
  70. asm
  71. fmrx r0,fpscr
  72. end;
  73. procedure setfpscr(flags : sizeuint);
  74. begin
  75. DefaultFPUControlWord:=flags and not(FPSCR_EXCEPTIONS);
  76. asm
  77. ldr r0, flags
  78. fmxr fpscr,r0
  79. end;
  80. end;
  81. {$define FPC_SYSTEM_HAS_GETSETNATIVEFPUCONTROLWORD}
  82. function GetNativeFPUControlWord: TNativeFPUControlWord; {$if defined(SYSTEMINLINE)}inline;{$endif}
  83. begin
  84. result:=getfpscr;
  85. end;
  86. procedure SetNativeFPUControlWord(const cw: TNativeFPUControlWord); {$if defined(SYSTEMINLINE)}inline;{$endif}
  87. begin
  88. setfpscr(cw);
  89. end;
  90. procedure RaisePendingExceptions;
  91. var
  92. fpscr : longint;
  93. f: TFPUException;
  94. begin
  95. { at this point, we know already, that an exception will be risen }
  96. fpscr:=getfpscr;
  97. if (fpscr and FPSCR_DZC) <> 0 then
  98. float_raise(exZeroDivide);
  99. if (fpscr and FPSCR_OFC) <> 0 then
  100. float_raise(exOverflow);
  101. if (fpscr and FPSCR_UFC) <> 0 then
  102. float_raise(exUnderflow);
  103. if (fpscr and FPSCR_IOC) <> 0 then
  104. float_raise(exInvalidOp);
  105. if (fpscr and FPSCR_IXC) <> 0 then
  106. float_raise(exPrecision);
  107. if (fpscr and FPSCR_IDC) <> 0 then
  108. float_raise(exDenormalized);
  109. { now the soft float exceptions }
  110. for f in softfloat_exception_flags do
  111. float_raise(f);
  112. end;
  113. procedure fpc_throwfpuexception;[public,alias:'FPC_THROWFPUEXCEPTION'];
  114. var
  115. fpscr : dword;
  116. f: TFPUException;
  117. begin
  118. { at this point, we know already, that an exception will be risen }
  119. fpscr:=getfpscr;
  120. { check, if the exception is masked, as ARM without hardware exceptions have no masking functionality,
  121. we use the software mask }
  122. if ((fpscr and FPSCR_DZC) <> 0) and (exZeroDivide in softfloat_exception_mask) then
  123. fpscr:=fpscr and not(FPSCR_DZC);
  124. if ((fpscr and FPSCR_OFC) <> 0) and (exOverflow in softfloat_exception_mask) then
  125. fpscr:=fpscr and not(FPSCR_OFC);
  126. if ((fpscr and FPSCR_UFC) <> 0) and (exUnderflow in softfloat_exception_mask) then
  127. fpscr:=fpscr and not(FPSCR_UFC);
  128. if ((fpscr and FPSCR_IOC) <> 0) and (exInvalidOp in softfloat_exception_mask) then
  129. fpscr:=fpscr and not(FPSCR_IOC);
  130. if ((fpscr and FPSCR_IXC) <> 0) and (exPrecision in softfloat_exception_mask) then
  131. fpscr:=fpscr and not(FPSCR_IXC);
  132. if ((fpscr and FPSCR_IDC) <> 0) and (exDenormalized in softfloat_exception_mask) then
  133. fpscr:=fpscr and not(FPSCR_IDC);
  134. setfpscr(fpscr);
  135. if (fpscr and FPSCR_EXCEPTIONS)<>0 then
  136. RaisePendingExceptions;
  137. end;
  138. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  139. begin
  140. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  141. asm
  142. fmrx r0,fpscr
  143. // set "round to nearest" mode
  144. and r0,r0,#0xff3fffff
  145. // mask "exception happened" and overflow flags
  146. and r0,r0,#0xffffff20
  147. // mask exception flags
  148. and r0,r0,#0xffff40ff
  149. {$ifndef darwin}
  150. // Floating point exceptions cause kernel panics on iPhoneOS 2.2.1...
  151. // disable flush-to-zero mode (IEEE math compliant)
  152. and r0,r0,#0xfeffffff
  153. // enable invalid operation, div-by-zero and overflow exceptions
  154. orr r0,r0,#0x00000700
  155. {$endif}
  156. fmxr fpscr,r0
  157. end;
  158. softfloat_exception_mask:=[float_flag_underflow,float_flag_inexact,float_flag_denormal];
  159. softfloat_exception_flags:=[];
  160. end;
  161. {$endif defined(FPUARM_HAS_VFP_EXTENSION)}
  162. {$endif not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
  163. {$ifdef wince}
  164. function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
  165. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  166. Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  167. begin
  168. softfloat_exception_flags:=[];
  169. end;
  170. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  171. Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
  172. begin
  173. { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
  174. { FPU precision 64 bit, rounding to nearest, affine infinity }
  175. _controlfp($000C0003, $030F031F);
  176. softfloat_exception_mask:=[float_flag_underflow,float_flag_inexact,float_flag_denormal];
  177. softfloat_exception_flags:=[];
  178. end;
  179. {$define FPC_SYSTEM_HAS_GETSETNATIVEFPUCONTROLWORD}
  180. function GetNativeFPUControlWord: TNativeFPUControlWord;
  181. begin
  182. result:=_controlfp(0,0);
  183. end;
  184. procedure SetNativeFPUControlWord(const cw: TNativeFPUControlWord);
  185. begin
  186. _controlfp(cw,$ffffffff);
  187. end;
  188. {$endif wince}
  189. {$ifndef FPC_SYSTEM_HAS_GETSETNATIVEFPUCONTROLWORD}
  190. {$define FPC_SYSTEM_HAS_GETSETNATIVEFPUCONTROLWORD}
  191. function GetNativeFPUControlWord: TNativeFPUControlWord; {$if defined(SYSTEMINLINE)}inline;{$endif}
  192. begin
  193. result:=0;
  194. end;
  195. procedure SetNativeFPUControlWord(const cw: TNativeFPUControlWord); {$if defined(SYSTEMINLINE)}inline;{$endif}
  196. begin
  197. end;
  198. {$endif}
  199. {$ifdef linux}
  200. function fpc_read_tp : pointer; [public, alias: 'fpc_read_tp'];assembler; nostackframe;
  201. asm
  202. // Helper is located at 0xffff0fe0
  203. mvn r0,#0x0000f000 // mov r0, #0xffff0fff
  204. sub pc,r0,#0x1f // Jump to helper
  205. end;
  206. {$endif linux}
  207. {****************************************************************************
  208. stack frame related stuff
  209. ****************************************************************************}
  210. {$IFNDEF INTERNAL_BACKTRACE}
  211. {$define FPC_SYSTEM_HAS_GET_FRAME}
  212. function get_frame:pointer;assembler;nostackframe;
  213. asm
  214. {$ifndef darwin}
  215. mov r0,r11
  216. {$else}
  217. mov r0,r7
  218. {$endif}
  219. end;
  220. {$ENDIF not INTERNAL_BACKTRACE}
  221. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  222. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;assembler;nostackframe;
  223. asm
  224. cmp r0,#0
  225. {$ifndef darwin}
  226. ldrne r0,[r0,#-4]
  227. {$else}
  228. ldrne r0,[r0,#4]
  229. {$endif}
  230. end;
  231. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  232. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;assembler;nostackframe;
  233. asm
  234. cmp r0,#0
  235. {$ifndef darwin}
  236. ldrne r0,[r0,#-12]
  237. {$else}
  238. ldrne r0,[r0]
  239. {$endif}
  240. end;
  241. {$define FPC_SYSTEM_HAS_SPTR}
  242. Function Sptr : pointer;assembler;nostackframe;
  243. asm
  244. mov r0,sp
  245. end;
  246. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  247. {$define FPC_SYSTEM_HAS_FILLCHAR}
  248. Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
  249. asm
  250. // less than 0?
  251. cmp r1,#0
  252. {$ifdef CPUARM_HAS_BX}
  253. bxle lr
  254. {$else}
  255. movle pc,lr
  256. {$endif}
  257. mov r3,r0
  258. orr r2,r2,r2,lsl #8
  259. orr r2,r2,r2,lsl #16
  260. tst r3, #3 // Aligned?
  261. bne .LFillchar_do_align
  262. .LFillchar_is_aligned:
  263. subs r1,r1,#8
  264. bmi .LFillchar_less_than_8bytes
  265. mov ip,r2
  266. .LFillchar_at_least_8bytes:
  267. // Do 16 bytes per loop
  268. // More unrolling is uncessary, as we'll just stall on the write buffers
  269. stmia r3!,{r2,ip}
  270. subs r1,r1,#8
  271. stmplia r3!,{r2,ip}
  272. subpls r1,r1,#8
  273. bpl .LFillchar_at_least_8bytes
  274. .LFillchar_less_than_8bytes:
  275. // Do the rest
  276. adds r1, r1, #8
  277. {$ifdef CPUARM_HAS_BX}
  278. bxeq lr
  279. {$else}
  280. moveq pc,lr
  281. {$endif}
  282. tst r1, #4
  283. strne r2,[r3],#4
  284. {$ifdef CPUARM_HAS_ALL_MEM}
  285. tst r1, #2
  286. strneh r2,[r3],#2
  287. {$else CPUARM_HAS_ALL_MEM}
  288. tst r1, #2
  289. strneb r2,[r3],#1
  290. strneb r2,[r3],#1
  291. {$endif CPUARM_HAS_ALL_MEM}
  292. tst r1, #1
  293. strneb r2,[r3],#1
  294. {$ifdef CPUARM_HAS_BX}
  295. bx lr
  296. {$else}
  297. mov pc,lr
  298. {$endif}
  299. // Special case for unaligned start
  300. // We make a maximum of 3 loops here
  301. .LFillchar_do_align:
  302. strb r2,[r3],#1
  303. subs r1, r1, #1
  304. {$ifdef CPUARM_HAS_BX}
  305. bxeq lr
  306. {$else}
  307. moveq pc,lr
  308. {$endif}
  309. tst r3,#3
  310. bne .LFillchar_do_align
  311. b .LFillchar_is_aligned
  312. end;
  313. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  314. {$ifndef FPC_SYSTEM_HAS_MOVE}
  315. {$define FPC_SYSTEM_HAS_MOVE}
  316. {$ifdef CPUARM_HAS_EDSP}
  317. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  318. {$else CPUARM_HAS_EDSP}
  319. procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
  320. {$endif CPUARM_HAS_EDSP}
  321. asm
  322. // pld [r0]
  323. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  324. .long 0xf5d0f000
  325. // count <=0 ?
  326. cmp r2,#0
  327. {$ifdef CPUARM_HAS_BX}
  328. bxle lr
  329. {$else}
  330. movle pc,lr
  331. {$endif}
  332. // overlap?
  333. subs r3, r1, r0 // if (dest > source) and
  334. cmphi r2, r3 // (count > dest - src) then
  335. bhi .Loverlapped // DoReverseByteCopy;
  336. cmp r2,#8 // if (count < 8) then
  337. blt .Lbyteloop // DoForwardByteCopy;
  338. // Any way to avoid the above jump and fuse the next two instructions?
  339. tst r0, #3 // if (source and 3) <> 0 or
  340. tsteq r1, #3 // (dest and 3) <> 0 then
  341. bne .Lbyteloop // DoForwardByteCopy;
  342. // pld [r0,#32]
  343. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  344. .long 0xf5d0f020
  345. .Ldwordloop:
  346. ldmia r0!, {r3, ip}
  347. // preload
  348. // pld [r0,#64]
  349. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  350. .long 0xf5d0f040
  351. sub r2,r2,#8
  352. cmp r2, #8
  353. stmia r1!, {r3, ip}
  354. bge .Ldwordloop
  355. cmp r2,#0
  356. {$ifdef CPUARM_HAS_BX}
  357. bxeq lr
  358. {$else}
  359. moveq pc,lr
  360. {$endif}
  361. .Lbyteloop:
  362. subs r2,r2,#1
  363. ldrb r3,[r0],#1
  364. strb r3,[r1],#1
  365. bne .Lbyteloop
  366. {$ifdef CPUARM_HAS_BX}
  367. bx lr
  368. {$else}
  369. mov pc,lr
  370. {$endif}
  371. .Loverlapped:
  372. subs r2,r2,#1
  373. ldrb r3,[r0,r2]
  374. strb r3,[r1,r2]
  375. bne .Loverlapped
  376. end;
  377. {$ifndef CPUARM_HAS_EDSP}
  378. procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
  379. asm
  380. // count <=0 ?
  381. cmp r2,#0
  382. {$ifdef CPUARM_HAS_BX}
  383. bxle lr
  384. {$else}
  385. movle pc,lr
  386. {$endif}
  387. // overlap?
  388. subs r3, r1, r0 // if (dest > source) and
  389. cmphi r2, r3 // (count > dest - src) then
  390. bhi .Loverlapped // DoReverseByteCopy;
  391. cmp r2,#8 // if (count < 8) then
  392. blt .Lbyteloop // DoForwardByteCopy;
  393. // Any way to avoid the above jump and fuse the next two instructions?
  394. tst r0, #3 // if (source and 3) <> 0 or
  395. tsteq r1, #3 // (dest and 3) <> 0 then
  396. bne .Lbyteloop // DoForwardByteCopy;
  397. .Ldwordloop:
  398. ldmia r0!, {r3, ip}
  399. sub r2,r2,#8
  400. cmp r2, #8
  401. stmia r1!, {r3, ip}
  402. bge .Ldwordloop
  403. cmp r2,#0
  404. {$ifdef CPUARM_HAS_BX}
  405. bxeq lr
  406. {$else}
  407. moveq pc,lr
  408. {$endif}
  409. .Lbyteloop:
  410. subs r2,r2,#1
  411. ldrb r3,[r0],#1
  412. strb r3,[r1],#1
  413. bne .Lbyteloop
  414. {$ifdef CPUARM_HAS_BX}
  415. bx lr
  416. {$else}
  417. mov pc,lr
  418. {$endif}
  419. .Loverlapped:
  420. subs r2,r2,#1
  421. ldrb r3,[r0,r2]
  422. strb r3,[r1,r2]
  423. bne .Loverlapped
  424. end;
  425. const
  426. moveproc : procedure(const source;var dest;count:longint) = @move_blended;
  427. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE']; {$ifndef FPC_PIC} assembler;nostackframe; {$endif FPC_PIC}
  428. {$ifdef FPC_PIC}
  429. begin
  430. moveproc(source,dest,count);
  431. end;
  432. {$else FPC_PIC}
  433. asm
  434. ldr ip,.Lmoveproc
  435. ldr pc,[ip]
  436. .Lmoveproc:
  437. .long moveproc
  438. end;
  439. {$endif FPC_PIC}
  440. {$endif CPUARM_HAS_EDSP}
  441. {$endif FPC_SYSTEM_HAS_MOVE}
  442. {****************************************************************************
  443. String
  444. ****************************************************************************}
  445. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  446. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  447. procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
  448. {r0: __RESULT
  449. r1: len
  450. r2: sstr}
  451. asm
  452. ldrb r12,[r2],#1
  453. cmp r12,r1
  454. movgt r12,r1
  455. strb r12,[r0],#1
  456. cmp r12,#6 (* 6 seems to be the break even point. *)
  457. blt .LStartTailCopy
  458. (* Align destination on 32bits. This is the only place where unrolling
  459. really seems to help, since in the common case, sstr is aligned on
  460. 32 bits, therefore in the common case we need to copy 3 bytes to
  461. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  462. rsb r3,r0,#0
  463. ands r3,r3,#3
  464. sub r12,r12,r3
  465. ldrneb r1,[r2],#1
  466. strneb r1,[r0],#1
  467. subnes r3,r3,#1
  468. ldrneb r1,[r2],#1
  469. strneb r1,[r0],#1
  470. subnes r3,r3,#1
  471. ldrneb r1,[r2],#1
  472. strneb r1,[r0],#1
  473. subnes r3,r3,#1
  474. .LDoneAlign:
  475. (* Destination should be aligned now, but source might not be aligned,
  476. if this is the case, do a byte-per-byte copy. *)
  477. tst r2,#3
  478. bne .LStartTailCopy
  479. (* Start the main copy, 32 bit at a time. *)
  480. movs r3,r12,lsr #2
  481. and r12,r12,#3
  482. beq .LStartTailCopy
  483. .LNext4bytes:
  484. (* Unrolling this loop would save a little bit of time for long strings
  485. (>20 chars), but alas, it hurts for short strings and they are the
  486. common case.*)
  487. ldrne r1,[r2],#4
  488. strne r1,[r0],#4
  489. subnes r3,r3,#1
  490. bne .LNext4bytes
  491. .LStartTailCopy:
  492. (* Do remaining bytes. *)
  493. cmp r12,#0
  494. beq .LDoneTail
  495. .LNextChar3:
  496. ldrb r1,[r2],#1
  497. strb r1,[r0],#1
  498. subs r12,r12,#1
  499. bne .LNextChar3
  500. .LDoneTail:
  501. end;
  502. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
  503. {r0: len
  504. r1: sstr
  505. r2: dstr}
  506. asm
  507. ldrb r12,[r1],#1
  508. cmp r12,r0
  509. movgt r12,r0
  510. strb r12,[r2],#1
  511. cmp r12,#6 (* 6 seems to be the break even point. *)
  512. blt .LStartTailCopy
  513. (* Align destination on 32bits. This is the only place where unrolling
  514. really seems to help, since in the common case, sstr is aligned on
  515. 32 bits, therefore in the common case we need to copy 3 bytes to
  516. align, i.e. in the case of a loop, you wouldn't branch out early.*)
  517. rsb r3,r2,#0
  518. ands r3,r3,#3
  519. sub r12,r12,r3
  520. ldrneb r0,[r1],#1
  521. strneb r0,[r2],#1
  522. subnes r3,r3,#1
  523. ldrneb r0,[r1],#1
  524. strneb r0,[r2],#1
  525. subnes r3,r3,#1
  526. ldrneb r0,[r1],#1
  527. strneb r0,[r2],#1
  528. subnes r3,r3,#1
  529. .LDoneAlign:
  530. (* Destination should be aligned now, but source might not be aligned,
  531. if this is the case, do a byte-per-byte copy. *)
  532. tst r1,#3
  533. bne .LStartTailCopy
  534. (* Start the main copy, 32 bit at a time. *)
  535. movs r3,r12,lsr #2
  536. and r12,r12,#3
  537. beq .LStartTailCopy
  538. .LNext4bytes:
  539. (* Unrolling this loop would save a little bit of time for long strings
  540. (>20 chars), but alas, it hurts for short strings and they are the
  541. common case.*)
  542. ldrne r0,[r1],#4
  543. strne r0,[r2],#4
  544. subnes r3,r3,#1
  545. bne .LNext4bytes
  546. .LStartTailCopy:
  547. (* Do remaining bytes. *)
  548. cmp r12,#0
  549. beq .LDoneTail
  550. .LNextChar3:
  551. ldrb r0,[r1],#1
  552. strb r0,[r2],#1
  553. subs r12,r12,#1
  554. bne .LNextChar3
  555. .LDoneTail:
  556. end;
  557. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  558. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  559. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  560. function fpc_Pchar_length(p:PAnsiChar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
  561. asm
  562. cmp r0,#0
  563. mov r1,r0
  564. beq .Ldone
  565. .Lnextchar:
  566. (*Are we aligned?*)
  567. tst r1,#3
  568. bne .Ltest_unaligned (*No, do byte per byte.*)
  569. ldr r3,.L01010101
  570. .Ltest_aligned:
  571. (*Aligned, load 4 bytes at a time.*)
  572. ldr r12,[r1],#4
  573. (*Check wether r12 contains a 0 byte.*)
  574. sub r2,r12,r3
  575. mvn r12,r12
  576. and r2,r2,r12
  577. ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
  578. beq .Ltest_aligned (*No 0 byte, repeat.*)
  579. sub r1,r1,#4
  580. .Ltest_unaligned:
  581. ldrb r12,[r1],#1
  582. cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
  583. bcs .Lnextchar
  584. (*Dirty trick: we need to subtract 1 extra because we have counted the
  585. terminating 0, due to the known carry flag sbc can do this.*)
  586. sbc r0,r1,r0
  587. .Ldone:
  588. {$ifdef CPUARM_HAS_BX}
  589. bx lr
  590. {$else}
  591. mov pc,lr
  592. {$endif}
  593. .L01010101:
  594. .long 0x01010101
  595. end;
  596. {$endif}
  597. {$ifndef darwin}
  598. {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
  599. Procedure fpc_ansistr_decr_ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF'];assembler;nostackframe; compilerproc;
  600. asm
  601. ldr r1, [r0]
  602. // On return the pointer will always be set to zero, so utilize the delay slots
  603. mov r2, #0
  604. str r2, [r0]
  605. // Check for a zero string
  606. cmp r1, #0
  607. // Load reference counter
  608. ldrne r2, [r1, #-8]
  609. {$ifdef CPUARM_HAS_BX}
  610. bxeq lr
  611. {$else}
  612. moveq pc,lr
  613. {$endif}
  614. // Check for a constant string
  615. cmp r2, #0
  616. {$ifdef CPUARM_HAS_BX}
  617. bxlt lr
  618. {$else}
  619. movlt pc,lr
  620. {$endif}
  621. stmfd sp!, {r1, lr}
  622. sub r0, r1, #8
  623. bl InterLockedDecrement
  624. // InterLockedDecrement is a nice guy and sets the z flag for us
  625. // if the reference count dropped to 0
  626. ldmnefd sp!, {r1, pc}
  627. ldmfd sp!, {r0, lr}
  628. // We currently can not use constant symbols in ARM-Assembly
  629. // but we need to stay backward compatible with 2.6
  630. sub r0, r0, #12
  631. // Jump without a link, so freemem directly returns to our caller
  632. b FPC_FREEMEM
  633. end;
  634. {$define FPC_SYSTEM_HAS_ANSISTR_INCR_REF}
  635. Procedure fpc_ansistr_incr_ref (S : Pointer); [Public,Alias:'FPC_ANSISTR_INCR_REF'];assembler;nostackframe; compilerproc;
  636. asm
  637. // Null string?
  638. cmp r0, #0
  639. // Load reference counter
  640. ldrne r1, [r0, #-8]
  641. // pointer to counter, calculate here for delay slot utilization
  642. subne r0, r0, #8
  643. {$ifdef CPUARM_HAS_BX}
  644. bxeq lr
  645. {$else}
  646. moveq pc,lr
  647. {$endif}
  648. // Check for a constant string
  649. cmp r1, #0
  650. // Tailcall
  651. // Hopefully the linker will place InterLockedIncrement as layed out here
  652. bge InterLockedIncrement
  653. // Freepascal will generate a proper return here, save some cachespace
  654. end;
  655. {$endif not darwin}
  656. // --- InterLocked functions begin
  657. {$if not defined(CPUARM_HAS_LDREX) and not defined(SYSTEM_HAS_KUSER_CMPXCHG) }
  658. // Use generic interlock implementation
  659. var
  660. fpc_system_lock: longint;
  661. {$ifdef FPC_PIC}
  662. // Use generic interlock implementation with PIC
  663. // A helper function to get a pointer to fpc_system_lock in the PIC compatible way.
  664. function get_fpc_system_lock_ptr: pointer;
  665. begin
  666. get_fpc_system_lock_ptr:=@fpc_system_lock;
  667. end;
  668. {$endif FPC_PIC}
  669. {$endif}
  670. {$ifdef VER3_2}
  671. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  672. {$else VER3_2}
  673. {$define FPC_SYSTEM_HAS_ATOMIC_DEC_32}
  674. function fpc_atomic_dec_32 (var Target: longint) : longint; assembler; nostackframe;
  675. {$endif VER3_2}
  676. asm
  677. {$ifdef CPUARM_HAS_LDREX}
  678. .Lloop:
  679. ldrex r1, [r0]
  680. sub r1, r1, #1
  681. strex r2, r1, [r0]
  682. cmp r2, #0
  683. bne .Lloop
  684. movs r0, r1
  685. bx lr
  686. {$else}
  687. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  688. stmfd r13!, {lr}
  689. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  690. .Latomic_dec_loop:
  691. ldr r0, [r2] // Load the current value
  692. // We expect this to work without looping most of the time
  693. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  694. // loop here again, we have to reload the value. Normaly this just fills the
  695. // load stall-cycles from the above ldr so in reality we'll not get any additional
  696. // delays because of this
  697. // Don't use ldr to load r3 to avoid cacheline trashing
  698. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  699. // the kuser_cmpxchg entry point
  700. mvn r3, #0x0000f000
  701. sub r3, r3, #0x3F
  702. sub r1, r0, #1 // Decrement value
  703. {$ifdef CPUARM_HAS_BLX}
  704. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  705. {$else}
  706. mov lr, pc
  707. {$ifdef CPUARM_HAS_BX}
  708. bx r3
  709. {$else}
  710. mov pc, r3
  711. {$endif}
  712. {$endif}
  713. // MOVS sets the Z flag when the result reaches zero, this can be used later on
  714. // The C-Flag will not be modified by this because we're not doing any shifting
  715. movcss r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  716. ldmcsfd r13!, {pc}
  717. b .Latomic_dec_loop // kuser_cmpxchg sets C flag on error
  718. {$else}
  719. // lock
  720. {$ifdef FPC_PIC}
  721. push {r0,lr}
  722. bl get_fpc_system_lock_ptr
  723. mov r3,r0
  724. pop {r0,lr}
  725. {$else FPC_PIC}
  726. ldr r3, .Lfpc_system_lock
  727. {$endif FPC_PIC}
  728. mov r1, #1
  729. .Lloop:
  730. swp r2, r1, [r3]
  731. cmp r2, #0
  732. bne .Lloop
  733. // do the job
  734. ldr r1, [r0]
  735. sub r1, r1, #1
  736. str r1, [r0]
  737. movs r0, r1
  738. // unlock and return
  739. str r2, [r3]
  740. {$ifdef CPUARM_HAS_BX}
  741. bx lr
  742. {$else}
  743. mov pc,lr
  744. {$endif}
  745. {$ifndef FPC_PIC}
  746. .Lfpc_system_lock:
  747. .long fpc_system_lock
  748. {$endif FPC_PIC}
  749. {$endif}
  750. {$endif}
  751. end;
  752. {$ifdef VER3_2}
  753. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  754. {$else VER3_2}
  755. {$define FPC_SYSTEM_HAS_ATOMIC_INC_32}
  756. function fpc_atomic_inc_32 (var Target: longint) : longint; assembler; nostackframe;
  757. {$endif VER3_2}
  758. asm
  759. {$ifdef CPUARM_HAS_LDREX}
  760. .Lloop:
  761. ldrex r1, [r0]
  762. add r1, r1, #1
  763. strex r2, r1, [r0]
  764. cmp r2, #0
  765. bne .Lloop
  766. mov r0, r1
  767. bx lr
  768. {$else}
  769. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  770. stmfd r13!, {lr}
  771. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  772. .Latomic_inc_loop:
  773. ldr r0, [r2] // Load the current value
  774. // We expect this to work without looping most of the time
  775. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  776. // loop here again, we have to reload the value. Normaly this just fills the
  777. // load stall-cycles from the above ldr so in reality we'll not get any additional
  778. // delays because of this
  779. // Don't use ldr to load r3 to avoid cacheline trashing
  780. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  781. // the kuser_cmpxchg entry point
  782. mvn r3, #0x0000f000
  783. sub r3, r3, #0x3F
  784. add r1, r0, #1 // Increment value
  785. {$ifdef CPUARM_HAS_BLX}
  786. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  787. {$else}
  788. mov lr, pc
  789. {$ifdef CPUARM_HAS_BX}
  790. bx r3
  791. {$else}
  792. mov pc, r3
  793. {$endif}
  794. {$endif}
  795. movcs r0, r1 // We expect that to work most of the time so keep it pipeline friendly
  796. ldmcsfd r13!, {pc}
  797. b .Latomic_inc_loop // kuser_cmpxchg sets C flag on error
  798. {$else}
  799. // lock
  800. {$ifdef FPC_PIC}
  801. push {r0,lr}
  802. bl get_fpc_system_lock_ptr
  803. mov r3,r0
  804. pop {r0,lr}
  805. {$else FPC_PIC}
  806. ldr r3, .Lfpc_system_lock
  807. {$endif FPC_PIC}
  808. mov r1, #1
  809. .Lloop:
  810. swp r2, r1, [r3]
  811. cmp r2, #0
  812. bne .Lloop
  813. // do the job
  814. ldr r1, [r0]
  815. add r1, r1, #1
  816. str r1, [r0]
  817. mov r0, r1
  818. // unlock and return
  819. str r2, [r3]
  820. {$ifdef CPUARM_HAS_BX}
  821. bx lr
  822. {$else}
  823. mov pc,lr
  824. {$endif}
  825. {$ifndef FPC_PIC}
  826. .Lfpc_system_lock:
  827. .long fpc_system_lock
  828. {$endif FPC_PIC}
  829. {$endif}
  830. {$endif}
  831. end;
  832. {$ifdef VER3_2}
  833. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  834. {$else VER3_2}
  835. {$define FPC_SYSTEM_HAS_ATOMIC_XCHG_32}
  836. function fpc_atomic_xchg_32 (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  837. {$endif VER3_2}
  838. asm
  839. {$ifdef CPUARM_HAS_LDREX}
  840. // swp is deprecated on ARMv6 and above
  841. .Lloop:
  842. ldrex r2, [r0]
  843. strex r3, r1, [r0]
  844. cmp r3, #0
  845. bne .Lloop
  846. mov r0, r2
  847. bx lr
  848. {$else}
  849. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  850. stmfd r13!, {r4, lr}
  851. mov r2, r0 // kuser_cmpxchg does not clobber r2 (and r1) by definition
  852. .Latomic_add_loop:
  853. ldr r0, [r2] // Load the current value
  854. // We expect this to work without looping most of the time
  855. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  856. // loop here again, we have to reload the value. Normaly this just fills the
  857. // load stall-cycles from the above ldr so in reality we'll not get any additional
  858. // delays because of this
  859. // Don't use ldr to load r3 to avoid cacheline trashing
  860. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  861. // the kuser_cmpxchg entry point
  862. mvn r3, #0x0000f000
  863. sub r3, r3, #0x3F
  864. mov r4, r0 // save the current value because kuser_cmpxchg clobbers r0
  865. {$ifdef CPUARM_HAS_BLX}
  866. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  867. {$else}
  868. mov lr, pc
  869. {$ifdef CPUARM_HAS_BX}
  870. bx r3
  871. {$else}
  872. mov pc, r3
  873. {$endif}
  874. {$endif}
  875. // restore the original value if needed
  876. movcs r0, r4
  877. ldmcsfd r13!, {r4, pc}
  878. b .Latomic_add_loop // kuser_cmpxchg failed, loop back
  879. {$else}
  880. // lock
  881. {$ifdef FPC_PIC}
  882. push {r0,r1,lr}
  883. bl get_fpc_system_lock_ptr
  884. mov r3,r0
  885. pop {r0,r1,lr}
  886. {$else FPC_PIC}
  887. ldr r3, .Lfpc_system_lock
  888. {$endif FPC_PIC}
  889. mov r2, #1
  890. .Lloop:
  891. swp r2, r2, [r3]
  892. cmp r2, #0
  893. bne .Lloop
  894. // do the job
  895. ldr r2, [r0]
  896. str r1, [r0]
  897. mov r0, r2
  898. // unlock and return
  899. mov r2, #0
  900. str r2, [r3]
  901. {$ifdef CPUARM_HAS_BX}
  902. bx lr
  903. {$else}
  904. mov pc,lr
  905. {$endif}
  906. {$ifndef FPC_PIC}
  907. .Lfpc_system_lock:
  908. .long fpc_system_lock
  909. {$endif FPC_PIC}
  910. {$endif}
  911. {$endif}
  912. end;
  913. {$ifdef VER3_2}
  914. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  915. {$else VER3_2}
  916. {$define FPC_SYSTEM_HAS_ATOMIC_ADD_32}
  917. function fpc_atomic_add_32 (var Target: longint;Value : longint) : longint; assembler; nostackframe;
  918. {$endif VER3_2}
  919. asm
  920. {$ifdef CPUARM_HAS_LDREX}
  921. .Lloop:
  922. ldrex r2, [r0]
  923. add r12, r1, r2
  924. strex r3, r12, [r0]
  925. cmp r3, #0
  926. bne .Lloop
  927. mov r0, r2
  928. bx lr
  929. {$else}
  930. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  931. stmfd r13!, {r4, lr}
  932. mov r2, r0 // kuser_cmpxchg does not clobber r2 by definition
  933. mov r4, r1 // Save addend
  934. .Latomic_add_loop:
  935. ldr r0, [r2] // Load the current value
  936. // We expect this to work without looping most of the time
  937. // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
  938. // loop here again, we have to reload the value. Normaly this just fills the
  939. // load stall-cycles from the above ldr so in reality we'll not get any additional
  940. // delays because of this
  941. // Don't use ldr to load r3 to avoid cacheline trashing
  942. // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
  943. // the kuser_cmpxchg entry point
  944. mvn r3, #0x0000f000
  945. sub r3, r3, #0x3F
  946. add r1, r0, r4 // Add to value
  947. {$ifdef CPUARM_HAS_BLX}
  948. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  949. {$else}
  950. mov lr, pc
  951. {$ifdef CPUARM_HAS_BX}
  952. bx r3
  953. {$else}
  954. mov pc, r3
  955. {$endif}
  956. {$endif}
  957. // r1 does not get clobbered, so just get back the original value
  958. // Otherwise we would have to allocate one more register and store the
  959. // temporary value
  960. subcs r0, r1, r4
  961. ldmcsfd r13!, {r4, pc}
  962. b .Latomic_add_loop // kuser_cmpxchg failed, loop back
  963. {$else}
  964. // lock
  965. {$ifdef FPC_PIC}
  966. push {r0,r1,lr}
  967. bl get_fpc_system_lock_ptr
  968. mov r3,r0
  969. pop {r0,r1,lr}
  970. {$else FPC_PIC}
  971. ldr r3, .Lfpc_system_lock
  972. {$endif FPC_PIC}
  973. mov r2, #1
  974. .Lloop:
  975. swp r2, r2, [r3]
  976. cmp r2, #0
  977. bne .Lloop
  978. // do the job
  979. ldr r2, [r0]
  980. add r1, r1, r2
  981. str r1, [r0]
  982. mov r0, r2
  983. // unlock and return
  984. mov r2, #0
  985. str r2, [r3]
  986. {$ifdef CPUARM_HAS_BX}
  987. bx lr
  988. {$else}
  989. mov pc,lr
  990. {$endif}
  991. {$ifndef FPC_PIC}
  992. .Lfpc_system_lock:
  993. .long fpc_system_lock
  994. {$endif FPC_PIC}
  995. {$endif}
  996. {$endif}
  997. end;
  998. {$ifdef VER3_2}
  999. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
  1000. {$else VER3_2}
  1001. {$define FPC_SYSTEM_HAS_ATOMIC_CMP_XCHG_32}
  1002. function fpc_atomic_cmp_xchg_32 (var Target: longint; NewValue: longint; Comparand: longint) : longint; [public,alias:'FPC_ATOMIC_CMP_XCHG_32']; assembler; nostackframe;
  1003. {$endif VER3_2}
  1004. asm
  1005. {$ifdef CPUARM_HAS_LDREX}
  1006. .Lloop:
  1007. ldrex r3, [r0]
  1008. mov r12, #0
  1009. cmp r3, r2
  1010. strexeq r12, r1, [r0]
  1011. cmp r12, #0
  1012. bne .Lloop
  1013. mov r0, r3
  1014. bx lr
  1015. {$else}
  1016. {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
  1017. stmfd r13!, {r4, lr}
  1018. mov r4, r2 // Swap parameters around
  1019. mov r2, r0
  1020. mov r0, r4 // Use r4 because we'll need the new value for later
  1021. // r1 and r2 will not be clobbered by kuser_cmpxchg
  1022. // If we have to loop, r0 will be set to the original Comperand
  1023. // kuser_cmpxchg is documented to destroy r3, therefore setting
  1024. // r3 must be in the loop
  1025. .Linterlocked_compare_exchange_loop:
  1026. mvn r3, #0x0000f000
  1027. sub r3, r3, #0x3F
  1028. {$ifdef CPUARM_HAS_BLX}
  1029. blx r3 // Call kuser_cmpxchg, sets C-Flag on success
  1030. {$else}
  1031. mov lr, pc
  1032. {$ifdef CPUARM_HAS_BX}
  1033. bx r3
  1034. {$else}
  1035. mov pc, r3
  1036. {$endif}
  1037. {$endif}
  1038. movcs r0, r4 // Return the previous value on success
  1039. ldmcsfd r13!, {r4, pc}
  1040. // The error case is a bit tricky, kuser_cmpxchg does not return the current value
  1041. // So we may need to loop to avoid race conditions
  1042. // The loop case is HIGHLY unlikely, it would require that we got rescheduled between
  1043. // calling kuser_cmpxchg and the ldr. While beeing rescheduled another process/thread
  1044. // would have the set the value to our comperand
  1045. ldr r0, [r2] // Load the currently set value
  1046. cmp r0, r4 // Return if Comperand != current value, otherwise loop again
  1047. ldmnefd r13!, {r4, pc}
  1048. // If we need to loop here, we have to
  1049. b .Linterlocked_compare_exchange_loop
  1050. {$else}
  1051. // lock
  1052. {$ifdef FPC_PIC}
  1053. push {r0,r1,r2,lr}
  1054. bl get_fpc_system_lock_ptr
  1055. mov r12,r0
  1056. pop {r0,r1,r2,lr}
  1057. {$else FPC_PIC}
  1058. ldr r12, .Lfpc_system_lock
  1059. {$endif FPC_PIC}
  1060. mov r3, #1
  1061. .Lloop:
  1062. swp r3, r3, [r12]
  1063. cmp r3, #0
  1064. bne .Lloop
  1065. // do the job
  1066. ldr r3, [r0]
  1067. cmp r3, r2
  1068. streq r1, [r0]
  1069. mov r0, r3
  1070. // unlock and return
  1071. mov r3, #0
  1072. str r3, [r12]
  1073. {$ifdef CPUARM_HAS_BX}
  1074. bx lr
  1075. {$else}
  1076. mov pc,lr
  1077. {$endif}
  1078. {$ifndef FPC_PIC}
  1079. .Lfpc_system_lock:
  1080. .long fpc_system_lock
  1081. {$endif FPC_PIC}
  1082. {$endif}
  1083. {$endif}
  1084. end;
  1085. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  1086. function declocked(var l: longint) : boolean; inline;
  1087. begin
  1088. Result:=InterLockedDecrement(l) = 0;
  1089. end;
  1090. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  1091. procedure inclocked(var l: longint); inline;
  1092. begin
  1093. InterLockedIncrement(l);
  1094. end;
  1095. // --- InterLocked functions end
  1096. procedure fpc_cpucodeinit;
  1097. begin
  1098. {$ifdef FPC_SYSTEM_FPC_MOVE}
  1099. {$ifndef CPUARM_HAS_EDSP}
  1100. cpu_has_edsp:=true;
  1101. in_edsp_test:=true;
  1102. asm
  1103. bic r0,sp,#7
  1104. // ldrd r0,r1,[r0]
  1105. // encode this using .long so the rtl assembles also with instructions sets not supporting pld
  1106. .long 0xe1c000d0
  1107. end;
  1108. in_edsp_test:=false;
  1109. if cpu_has_edsp then
  1110. moveproc:=@move_pld
  1111. else
  1112. moveproc:=@move_blended;
  1113. {$else CPUARM_HAS_EDSP}
  1114. cpu_has_edsp:=true;
  1115. {$endif CPUARM_HAS_EDSP}
  1116. {$endif FPC_SYSTEM_FPC_MOVE}
  1117. end;
  1118. {$define FPC_SYSTEM_HAS_SWAPENDIAN}
  1119. { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
  1120. function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1121. begin
  1122. { the extra Word type cast is necessary because the "AValue shr 8" }
  1123. { is turned into "longint(AValue) shr 8", so if AValue < 0 then }
  1124. { the sign bits from the upper 16 bits are shifted in rather than }
  1125. { zeroes. }
  1126. Result := SmallInt(((Word(AValue) shr 8) or (Word(AValue) shl 8)) and $ffff);
  1127. end;
  1128. function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
  1129. begin
  1130. Result := ((AValue shr 8) or (AValue shl 8)) and $ffff;
  1131. end;
  1132. (*
  1133. This is kept for reference. Thats what the compiler COULD generate in these cases.
  1134. But FPC currently does not support inlining of asm-functions, so the whole call-overhead
  1135. is bigger than the gain of the optimized function.
  1136. function AsmSwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif};assembler;nostackframe;
  1137. asm
  1138. // We're starting with 4321
  1139. {$if defined(CPUARM_HAS_REV)}
  1140. rev r0, r0 // Reverse byteorder r0 = 1234
  1141. mov r0, r0, shr #16 // Shift down to 16bits r0 = 0012
  1142. {$else}
  1143. mov r0, r0, shl #16 // Shift to make that 2100
  1144. mov r0, r0, ror #24 // Rotate to 1002
  1145. orr r0, r0, r0 shr #16 // Shift and combine into 0012
  1146. {$endif}
  1147. end;
  1148. *)
  1149. {
  1150. These used to be an assembler-function, but with newer improvements to the compiler this
  1151. generates a perfect 4 cycle code sequence and can be inlined.
  1152. }
  1153. function SwapEndian(const AValue: LongWord): LongWord;{$ifdef SYSTEMINLINE}inline;{$endif}
  1154. var
  1155. Temp: LongWord;
  1156. begin
  1157. Temp := AValue xor rordword(AValue,16);
  1158. Temp := Temp and $FF00FFFF;
  1159. Result:= (Temp shr 8) xor rordword(AValue,8);
  1160. end;
  1161. function SwapEndian(const AValue: LongInt): LongInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  1162. begin
  1163. Result:=LongInt(SwapEndian(DWord(AValue)));
  1164. end;
  1165. {
  1166. Currently freepascal will not generate a good assembler sequence for
  1167. Result:=(SwapEndian(longword(lo(AValue))) shl 32) or
  1168. (SwapEndian(longword(hi(AValue))));
  1169. So we keep an assembly version for now
  1170. }
  1171. function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
  1172. asm
  1173. // fpc >2.6.0 adds the "rev" instruction in the internal assembler
  1174. {$if defined(CPUARM_HAS_REV)}
  1175. rev r2, r0
  1176. rev r0, r1
  1177. mov r1, r2
  1178. {$else}
  1179. mov ip, r1
  1180. // We're starting with r0 = $87654321
  1181. eor r1, r0, r0, ror #16 // r1 = $C444C444
  1182. bic r1, r1, #16711680 // r1 = r1 and $ff00ffff = $C400C444
  1183. mov r0, r0, ror #8 // r0 = $21876543
  1184. eor r1, r0, r1, lsr #8 // r1 = $21436587
  1185. eor r0, ip, ip, ror #16
  1186. bic r0, r0, #16711680
  1187. mov ip, ip, ror #8
  1188. eor r0, ip, r0, lsr #8
  1189. {$endif}
  1190. end;
  1191. function SwapEndian(const AValue: QWord): QWord; {$ifdef SYSTEMINLINE}inline;{$endif}
  1192. begin
  1193. Result:=QWord(SwapEndian(Int64(AValue)));
  1194. end;
  1195. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  1196. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  1197. { Generic read/readwrite barrier code. }
  1198. procedure barrier; assembler; nostackframe;
  1199. asm
  1200. // manually encode the instructions to avoid bootstrap and -march external
  1201. // assembler settings
  1202. {$ifdef CPUARM_HAS_DMB}
  1203. .long 0xf57ff05f // dmb sy
  1204. {$else CPUARM_HAS_DMB}
  1205. {$ifdef CPUARMV6}
  1206. mov r0, #0
  1207. .long 0xee070fba // mcr 15, 0, r0, cr7, cr10, {5}
  1208. {$else CPUARMV6}
  1209. {$ifdef SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1210. stmfd r13!, {lr}
  1211. mvn r0, #0x0000f000
  1212. sub r0, r0, #0x5F
  1213. {$ifdef CPUARM_HAS_BLX}
  1214. blx r0 // Call kuser_memory_barrier at address 0xffff0fa0
  1215. {$else CPUARM_HAS_BLX}
  1216. mov lr, pc
  1217. {$ifdef CPUARM_HAS_BX}
  1218. bx r0
  1219. {$else CPUARM_HAS_BX}
  1220. mov pc, r0
  1221. {$endif CPUARM_HAS_BX}
  1222. {$endif CPUARM_HAS_BLX}
  1223. ldmfd r13!, {pc}
  1224. {$endif SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1225. {$endif CPUARMV6}
  1226. {$endif CPUARM_HAS_DMB}
  1227. end;
  1228. procedure ReadBarrier;
  1229. begin
  1230. barrier;
  1231. end;
  1232. procedure ReadDependencyBarrier;
  1233. begin
  1234. { reads imply barrier on earlier reads depended on; not required on ARM }
  1235. end;
  1236. procedure ReadWriteBarrier;
  1237. begin
  1238. barrier;
  1239. end;
  1240. procedure WriteBarrier; assembler; nostackframe;
  1241. asm
  1242. // specialize the write barrier because according to ARM, implementations for
  1243. // "dmb st" may be more optimal than the more generic "dmb sy"
  1244. {$ifdef CPUARM_HAS_DMB}
  1245. .long 0xf57ff05e // dmb st
  1246. {$else CPUARM_HAS_DMB}
  1247. {$ifdef CPUARMV6}
  1248. mov r0, #0
  1249. .long 0xee070fba // mcr 15, 0, r0, cr7, cr10, {5}
  1250. {$else CPUARMV6}
  1251. {$ifdef SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1252. stmfd r13!, {lr}
  1253. mvn r0, #0x0000f000
  1254. sub r0, r0, #0x5F
  1255. {$ifdef CPUARM_HAS_BLX}
  1256. blx r0 // Call kuser_memory_barrier at address 0xffff0fa0
  1257. {$else CPUARM_HAS_BLX}
  1258. mov lr, pc
  1259. {$ifdef CPUARM_HAS_BX}
  1260. bx r0
  1261. {$else CPUARM_HAS_BX}
  1262. mov pc, r0
  1263. {$endif CPUARM_HAS_BX}
  1264. {$endif CPUARM_HAS_BLX}
  1265. ldmfd r13!, {pc}
  1266. {$endif SYSTEM_HAS_KUSER_MEMORY_BARRIER}
  1267. {$endif CPUARMV6}
  1268. {$endif CPUARM_HAS_DMB}
  1269. end;
  1270. {$endif}
  1271. {include hand-optimized assembler division code}
  1272. {$i divide.inc}