powerpc.inc 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 2000-2001 by the Free Pascal development team.
  5. Portions Copyright (c) 2000 by Casey Duncan ([email protected])
  6. Processor dependent implementation for the system unit for
  7. PowerPC
  8. See the file COPYING.FPC, included in this distribution,
  9. for details about the copyright.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. **********************************************************************}
  14. {****************************************************************************
  15. PowerPC specific stuff
  16. ****************************************************************************}
  17. { This function is never called directly, it's a dummy to hold the register save/
  18. load subroutines
  19. }
  20. {$ifndef MACOS}
  21. procedure saverestorereg;assembler;
  22. asm
  23. { exit }
  24. .global _restfpr_14_x
  25. _restfpr_14_x: lfd f14, -144(r11)
  26. .global _restfpr_15_x
  27. _restfpr_15_x: lfd f15, -136(r11)
  28. .global _restfpr_16_x
  29. _restfpr_16_x: lfd f16, -128(r11)
  30. .global _restfpr_17_x
  31. _restfpr_17_x: lfd f17, -120(r11)
  32. .global _restfpr_18_x
  33. _restfpr_18_x: lfd f18, -112(r11)
  34. .global _restfpr_19_x
  35. _restfpr_19_x: lfd f19, -104(r11)
  36. .global _restfpr_20_x
  37. _restfpr_20_x: lfd f20, -96(r11)
  38. .global _restfpr_21_x
  39. _restfpr_21_x: lfd f21, -88(r11)
  40. .global _restfpr_22_x
  41. _restfpr_22_x: lfd f22, -80(r11)
  42. .global _restfpr_23_x
  43. _restfpr_23_x: lfd f23, -72(r11)
  44. .global _restfpr_24_x
  45. _restfpr_24_x: lfd f24, -64(r11)
  46. .global _restfpr_25_x
  47. _restfpr_25_x: lfd f25, -56(r11)
  48. .global _restfpr_26_x
  49. _restfpr_26_x: lfd f26, -48(r11)
  50. .global _restfpr_27_x
  51. _restfpr_27_x: lfd f27, -40(r11)
  52. .global _restfpr_28_x
  53. _restfpr_28_x: lfd f28, -32(r11)
  54. .global _restfpr_29_x
  55. _restfpr_29_x: lfd f29, -24(r11)
  56. .global _restfpr_30_x
  57. _restfpr_30_x: lfd f30, -16(r11)
  58. .global _restfpr_31_x
  59. _restfpr_31_x: lwz r0, 4(r11)
  60. lfd f31, -8(r11)
  61. mtlr r0
  62. ori r1, r11, 0
  63. blr
  64. { exit with restoring lr }
  65. .global _restfpr_14_l
  66. _restfpr_14_l: lfd f14, -144(r11)
  67. .global _restfpr_15_l
  68. _restfpr_15_l: lfd f15, -136(r11)
  69. .global _restfpr_16_l
  70. _restfpr_16_l: lfd f16, -128(r11)
  71. .global _restfpr_17_l
  72. _restfpr_17_l: lfd f17, -120(r11)
  73. .global _restfpr_18_l
  74. _restfpr_18_l: lfd f18, -112(r11)
  75. .global _restfpr_19_l
  76. _restfpr_19_l: lfd f19, -104(r11)
  77. .global _restfpr_20_l
  78. _restfpr_20_l: lfd f20, -96(r11)
  79. .global _restfpr_21_l
  80. _restfpr_21_l: lfd f21, -88(r11)
  81. .global _restfpr_22_l
  82. _restfpr_22_l: lfd f22, -80(r11)
  83. .global _restfpr_23_l
  84. _restfpr_23_l: lfd f23, -72(r11)
  85. .global _restfpr_24_l
  86. _restfpr_24_l: lfd f24, -64(r11)
  87. .global _restfpr_25_l
  88. _restfpr_25_l: lfd f25, -56(r11)
  89. .global _restfpr_26_l
  90. _restfpr_26_l: lfd f26, -48(r11)
  91. .global _restfpr_27_l
  92. _restfpr_27_l: lfd f27, -40(r11)
  93. .global _restfpr_28_l
  94. _restfpr_28_l: lfd f28, -32(r11)
  95. .global _restfpr_29_l
  96. _restfpr_29_l: lfd f29, -24(r11)
  97. .global _restfpr_30_l
  98. _restfpr_30_l: lfd f30, -16(r11)
  99. .global _restfpr_31_l
  100. _restfpr_31_l: lwz r0, 4(r11)
  101. lfd f31, -8(r11)
  102. mtlr r0
  103. ori r1, r11, 0
  104. blr
  105. end;
  106. {$endif MACOS}
  107. {****************************************************************************
  108. Move / Fill
  109. ****************************************************************************}
  110. {$define FPC_SYSTEM_HAS_MOVE}
  111. procedure Move(const source;var dest;count:longint);assembler;
  112. asm
  113. { count <= 0 ? }
  114. cmpwi cr0,r5,0
  115. { check if we have to do the move backwards because of overlap }
  116. sub r10,r4,r3
  117. { carry := boolean(dest-source < count) = boolean(overlap) }
  118. subc r10,r10,r5
  119. { count < 15 ? (to decide whether we will move dwords or bytes }
  120. cmpwi cr1,r5,15
  121. { if overlap, then r10 := -1 else r10 := 0 }
  122. subfe r10,r10,r10
  123. { count < 63 ? (32 + max. alignment (31) }
  124. cmpwi cr7,r5,63
  125. { if count <= 0, stop }
  126. ble cr0,LMoveDone
  127. { load the begin of the source in the data cache }
  128. dcbt 0,r3
  129. { and the dest as well }
  130. dcbtst 0,r4
  131. { if overlap, then r0 := count else r0 := 0 }
  132. and r0,r5,r10
  133. { if overlap, then point source and dest to the end }
  134. add r3,r3,r0
  135. add r4,r4,r0
  136. { if overlap, then r6 := 0, else r6 := -1 }
  137. not r6,r10
  138. { if overlap, then r10 := -2, else r10 := 0 }
  139. slwi r10,r10,1
  140. { if overlap, then r10 := -1, else r10 := 1 }
  141. addi r10,r10,1
  142. { if count < 15, copy everything byte by byte }
  143. blt cr1,LMoveBytes
  144. { if no overlap, then source/dest += -1, otherwise they stay }
  145. { After the next instruction, r3/r4 + r10 = next position to }
  146. { load/store from/to }
  147. add r3,r3,r6
  148. add r4,r4,r6
  149. { otherwise, guarantee 4 byte alignment for dest for starters }
  150. LMove4ByteAlignLoop:
  151. lbzux r0,r3,r10
  152. stbux r0,r4,r10
  153. { is dest now 4 aligned? }
  154. andi. r0,r4,3
  155. subi r5,r5,1
  156. { while not aligned, continue }
  157. bne cr0,LMove4ByteAlignLoop
  158. {$ifndef ppc603}
  159. { check for 32 byte alignment }
  160. andi. r7,r4,31
  161. {$endif non ppc603}
  162. { we are going to copy one byte again (the one at the newly }
  163. { aligned address), so increase count byte 1 }
  164. addi r5,r5,1
  165. { count div 4 for number of dwords to copy }
  166. srwi r0,r5,2
  167. { if 11 <= count < 63, copy using dwords }
  168. blt cr7,LMoveDWords
  169. {$ifndef ppc603}
  170. { # of dwords to copy to reach 32 byte alignment (*4) }
  171. { (depends on forward/backward copy) }
  172. { if forward copy, r6 = -1 -> r8 := 32 }
  173. { if backward copy, r6 = 0 -> r8 := 0 }
  174. rlwinm r8,r6,0,31-6+1,31-6+1
  175. { if forward copy, we have to copy 32 - unaligned count bytes }
  176. { if backward copy unaligned count bytes }
  177. sub r7,r8,r7
  178. { if backward copy, the calculated value is now negate -> }
  179. { make it positive again }
  180. not r8, r6
  181. add r7, r7, r8
  182. xor r7, r7, r8
  183. {$endif not ppc603}
  184. { multiply the update count with 4 }
  185. slwi r10,r10,2
  186. slwi r6,r6,2
  187. { and adapt the source and dest }
  188. add r3,r3,r6
  189. add r4,r4,r6
  190. {$ifndef ppc603}
  191. beq cr0,LMove32BytesAligned
  192. L32BytesAlignMoveLoop:
  193. { count >= 39 -> align to 8 byte boundary and then use the FPU }
  194. { since we're already at 4 byte alignment, use dword store }
  195. subic. r7,r7,4
  196. lwzux r0,r3,r10
  197. subi r5,r5,4
  198. stwux r0,r4,r10
  199. bne L32BytesAlignMoveLoop
  200. LMove32BytesAligned:
  201. { count div 32 ( >= 1, since count was >=63 }
  202. srwi r0,r5,5
  203. { remainder }
  204. andi. r5,r5,31
  205. { to decide if we will do some dword stores (instead of only }
  206. { byte stores) afterwards or not }
  207. {$else not ppc603}
  208. srwi r0,r5,4
  209. andi. r5,r5,15
  210. {$endif not ppc603}
  211. cmpwi cr1,r5,11
  212. mtctr r0
  213. { r0 := count div 4, will be moved to ctr when copying dwords }
  214. srwi r0,r5,2
  215. {$ifndef ppc603}
  216. { adjust the update count: it will now be 8 or -8 depending on overlap }
  217. slwi r10,r10,1
  218. { adjust source and dest pointers: because of the above loop, dest is now }
  219. { aligned to 8 bytes. So if we add r6 we will still have an 8 bytes }
  220. { aligned address) }
  221. add r3,r3,r6
  222. add r4,r4,r6
  223. slwi r6,r6,1
  224. { the dcbz offset must give a 32 byte aligned address when added }
  225. { to the current dest address and its address must point to the }
  226. { bytes that will be overwritten in the current iteration. In case }
  227. { of a forward loop, the dest address has currently an offset of }
  228. { -8 compared to the bytes that will be overwritten (and r6 = -8). }
  229. { In case of a backward of a loop, the dest address currently has }
  230. { an offset of +32 compared to the bytes that will be overwritten }
  231. { (and r6 = 0). So the forward dcbz offset must become +8 and the }
  232. { backward -32 -> (-r6 * 5) - 32 gives the correct offset }
  233. slwi r7,r6,2
  234. add r7,r7,r6
  235. neg r7,r7
  236. subi r7,r7,32
  237. LMove32ByteDcbz:
  238. lfdux f0,r3,r10
  239. lfdux f1,r3,r10
  240. lfdux f2,r3,r10
  241. lfdux f3,r3,r10
  242. { must be done only now, in case source and dest are less than }
  243. { 32 bytes apart! }
  244. dcbz r4,r7
  245. stfdux f0,r4,r10
  246. stfdux f1,r4,r10
  247. stfdux f2,r4,r10
  248. stfdux f3,r4,r10
  249. bdnz LMove32ByteDcbz
  250. LMove32ByteLoopDone:
  251. {$else not ppc603}
  252. LMove16ByteLoop:
  253. lwzux r11,r3,r10
  254. lwzux r7,r3,r10
  255. lwzux r8,r3,r10
  256. lwzux r9,r3,r10
  257. stwux r11,r4,r10
  258. stwux r7,r4,r10
  259. stwux r8,r4,r10
  260. stwux r9,r4,r10
  261. bdnz LMove16ByteLoop
  262. {$endif not ppc603}
  263. { cr0*4+eq is true if "count and 31" = 0 }
  264. beq cr0,LMoveDone
  265. { make r10 again -1 or 1, but first adjust source/dest pointers }
  266. sub r3,r3,r6
  267. sub r4,r4,r6
  268. {$ifndef ppc603}
  269. srawi r10,r10,3
  270. srawi r6,r6,3
  271. {$else not ppc603}
  272. srawi r10,r10,2
  273. srawi r6,r6,2
  274. {$endif not ppc603}
  275. { cr1 contains whether count <= 11 }
  276. ble cr1,LMoveBytes
  277. LMoveDWords:
  278. mtctr r0
  279. andi. r5,r5,3
  280. { r10 * 4 }
  281. slwi r10,r10,2
  282. slwi r6,r6,2
  283. add r3,r3,r6
  284. add r4,r4,r6
  285. LMoveDWordsLoop:
  286. lwzux r0,r3,r10
  287. stwux r0,r4,r10
  288. bdnz LMoveDWordsLoop
  289. beq cr0,LMoveDone
  290. { make r10 again -1 or 1 }
  291. sub r3,r3,r6
  292. sub r4,r4,r6
  293. srawi r10,r10,2
  294. srawi r6,r6,2
  295. LMoveBytes:
  296. add r3,r3,r6
  297. add r4,r4,r6
  298. mtctr r5
  299. LMoveBytesLoop:
  300. lbzux r0,r3,r10
  301. stbux r0,r4,r10
  302. bdnz LMoveBytesLoop
  303. LMoveDone:
  304. end;
  305. {$define FPC_SYSTEM_HAS_FILLCHAR}
  306. Procedure FillChar(var x;count:longint;value:byte);assembler;
  307. { input: x in r3, count in r4, value in r5 }
  308. {$ifndef ABI_AIX}
  309. { in the AIX ABI, we can use te red zone for temp storage, otherwise we have }
  310. { to explicitely allocate room }
  311. var
  312. temp : packed record
  313. case byte of
  314. 0: (l1,l2: longint);
  315. 1: (d: double);
  316. end;
  317. {$endif ABI_AIX}
  318. asm
  319. { no bytes? }
  320. cmpwi cr6,r4,0
  321. { less than 15 bytes? }
  322. cmpwi cr7,r4,15
  323. { less than 63 bytes? }
  324. cmpwi cr1,r4,63
  325. { fill r5 with ValueValueValueValue }
  326. rlwimi r5,r5,8,16,23
  327. { setup for aligning x to multiple of 4}
  328. rlwinm r10,r3,0,31-2+1,31
  329. rlwimi r5,r5,16,0,15
  330. ble cr6,LFillCharDone
  331. { get the start of the data in the cache (and mark it as "will be }
  332. { modified") }
  333. dcbtst 0,r3
  334. subfic r10,r10,4
  335. blt cr7,LFillCharVerySmall
  336. { just store 4 bytes instead of using a loop to align (there are }
  337. { plenty of other instructions now to keep the processor busy }
  338. { while it handles the (possibly unaligned) store) }
  339. stw r5,0(r3)
  340. { r3 := align(r3,4) }
  341. add r3,r3,r10
  342. { decrease count with number of bytes already stored }
  343. sub r4,r4,r10
  344. blt cr1,LFillCharSmall
  345. { if we have to fill with 0 (which happens a lot), we can simply use }
  346. { dcbz for the most part, which is very fast, so make a special case }
  347. { for that }
  348. cmplwi cr1,r5,0
  349. { align to a multiple of 32 (and immediately check whether we aren't }
  350. { already 32 byte aligned) }
  351. rlwinm. r10,r3,0,31-5+1,31
  352. { setup r3 for using update forms of store instructions }
  353. subi r3,r3,4
  354. { get number of bytes to store }
  355. subfic r10,r10,32
  356. { if already 32byte aligned, skip align loop }
  357. beq L32ByteAlignLoopDone
  358. { substract from the total count }
  359. sub r4,r4,r10
  360. L32ByteAlignLoop:
  361. { we were already aligned to 4 byres, so this will count down to }
  362. { exactly 0 }
  363. subic. r10,r10,4
  364. stwu r5,4(r3)
  365. bne L32ByteAlignLoop
  366. L32ByteAlignLoopDone:
  367. { get the amount of 32 byte blocks }
  368. srwi r10,r4,5
  369. { and keep the rest in r4 (recording whether there is any rest) }
  370. rlwinm. r4,r4,0,31-5+1,31
  371. { move to ctr }
  372. mtctr r10
  373. { check how many rest there is (to decide whether we'll use }
  374. { FillCharSmall or FillCharVerySmall) }
  375. cmpl cr7,r4,11
  376. { if filling with zero, only use dcbz }
  377. bne cr1, LFillCharNoZero
  378. { make r3 point again to the actual store position }
  379. addi r3,r3,4
  380. LFillCharDCBZLoop:
  381. dcbz 0,r3
  382. addi r3,r3,32
  383. bdnz LFillCharDCBZLoop
  384. { if there was no rest, we're finished }
  385. beq LFillCharDone
  386. b LFillCharVerySmall
  387. LFillCharNoZero:
  388. {$ifdef ABI_AIX}
  389. stw r5,0(sp)
  390. stw r5,4(sp)
  391. lfd f0,0(sp)
  392. {$else ABI_AIX}
  393. stw r5,temp
  394. stw r5,4+temp
  395. lfd f0,temp
  396. {$endif ABI_AIX}
  397. { make r3 point to address-8, so we're able to use fp double stores }
  398. { with update (it's already -4 now) }
  399. subi r3,r3,4
  400. { load r10 with 8, so that dcbz uses the correct address }
  401. li r10, 8
  402. LFillChar32ByteLoop:
  403. dcbz r3,r10
  404. stfdu f0,8(r3)
  405. stfdu f0,8(r3)
  406. stfdu f0,8(r3)
  407. stfdu f0,8(r3)
  408. bdnz LFillChar32ByteLoop
  409. { if there was no rest, we're finished }
  410. beq LFillCharDone
  411. { make r3 point again to the actual next byte that must be written }
  412. addi r3,r3,8
  413. b LFillCharVerySmall
  414. LFillCharSmall:
  415. { when we arrive here, we're already 4 byte aligned }
  416. { get count div 4 to store dwords }
  417. srwi r10,r4,2
  418. { get ready for use of update stores }
  419. subi r3,r3,4
  420. mtctr r10
  421. rlwinm. r4,r4,0,31-2+1,31
  422. LFillCharSmallLoop:
  423. stwu r5,4(r3)
  424. bdnz LFillCharSmallLoop
  425. { if nothing left, stop }
  426. beq LFillCharDone
  427. { get ready to store bytes }
  428. addi r3,r3,4
  429. LFillCharVerySmall:
  430. mtctr r4
  431. subi r3,r3,1
  432. LFillCharVerySmallLoop:
  433. stbu r5,1(r3)
  434. bdnz LFillCharVerySmallLoop
  435. LFillCharDone:
  436. end;
  437. {$define FPC_SYSTEM_HAS_FILLDWORD}
  438. procedure filldword(var x;count : longint;value : dword);
  439. assembler;
  440. asm
  441. { registers:
  442. r3 x
  443. r4 count
  444. r5 value
  445. }
  446. cmpwi cr0,r4,0
  447. mtctr r4
  448. subi r3,r3,4
  449. ble LFillDWordEnd //if count<=0 Then Exit
  450. LFillDWordLoop:
  451. stwu r5,4(r3)
  452. bdnz LFillDWordLoop
  453. LFillDWordEnd:
  454. end;
  455. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  456. function IndexByte(const buf;len:longint;b:byte):longint; assembler;
  457. { input: r3 = buf, r4 = len, r5 = b }
  458. { output: r3 = position of b in buf (-1 if not found) }
  459. asm
  460. { load the begin of the buffer in the data cache }
  461. dcbt 0,r3
  462. cmplwi r4,0
  463. mtctr r4
  464. subi r10,r3,1
  465. mr r0,r3
  466. { assume not found }
  467. li r3,-1
  468. ble LIndexByteDone
  469. LIndexByteLoop:
  470. lbzu r9,1(r10)
  471. cmplw r9,r5
  472. bdnzf cr0*4+eq,LIndexByteLoop
  473. { r3 still contains -1 here }
  474. bne LIndexByteDone
  475. sub r3,r10,r0
  476. LIndexByteDone:
  477. end;
  478. {$define FPC_SYSTEM_HAS_INDEXWORD}
  479. function IndexWord(const buf;len:longint;b:word):longint; assembler;
  480. { input: r3 = buf, r4 = len, r5 = b }
  481. { output: r3 = position of b in buf (-1 if not found) }
  482. asm
  483. { load the begin of the buffer in the data cache }
  484. dcbt 0,r3
  485. cmplwi r4,0
  486. mtctr r4
  487. subi r10,r3,2
  488. mr r0,r3
  489. { assume not found }
  490. li r3,-1
  491. ble LIndexWordDone
  492. LIndexWordLoop:
  493. lhzu r9,2(r10)
  494. cmplw r9,r5
  495. bdnzf cr0*4+eq,LIndexWordLoop
  496. { r3 still contains -1 here }
  497. bne LIndexWordDone
  498. sub r3,r10,r0
  499. LIndexWordDone:
  500. end;
  501. {$define FPC_SYSTEM_HAS_INDEXDWORD}
  502. function IndexDWord(const buf;len:longint;b:DWord):longint; assembler;
  503. { input: r3 = buf, r4 = len, r5 = b }
  504. { output: r3 = position of b in buf (-1 if not found) }
  505. asm
  506. { load the begin of the buffer in the data cache }
  507. dcbt 0,r3
  508. cmplwi r4,0
  509. mtctr r4
  510. subi r10,r3,4
  511. mr r0,r3
  512. { assume not found }
  513. li r3,-1
  514. ble LIndexDWordDone
  515. LIndexDWordLoop:
  516. lwzu r9,4(r10)
  517. cmplw r9,r5
  518. bdnzf cr0*4+eq, LIndexDWordLoop
  519. { r3 still contains -1 here }
  520. bne LIndexDWordDone
  521. sub r3,r10,r0
  522. LIndexDWordDone:
  523. end;
  524. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  525. function CompareByte(const buf1,buf2;len:longint):longint; assembler;
  526. { input: r3 = buf1, r4 = buf2, r5 = len }
  527. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  528. { note: almost direct copy of strlcomp() from strings.inc }
  529. asm
  530. { load the begin of the first buffer in the data cache }
  531. dcbt 0,r3
  532. { use r0 instead of r3 for buf1 since r3 contains result }
  533. cmplwi r5,0
  534. mtctr r5
  535. subi r11,r3,1
  536. subi r4,r4,1
  537. li r3,0
  538. ble LCompByteDone
  539. LCompByteLoop:
  540. { load next chars }
  541. lbzu r9,1(r11)
  542. lbzu r10,1(r4)
  543. { calculate difference }
  544. sub. r3,r9,r10
  545. { if chars not equal or at the end, we're ready }
  546. bdnzt cr0*4+eq, LCompByteLoop
  547. LCompByteDone:
  548. end;
  549. {$define FPC_SYSTEM_HAS_COMPAREWORD}
  550. function CompareWord(const buf1,buf2;len:longint):longint; assembler;
  551. { input: r3 = buf1, r4 = buf2, r5 = len }
  552. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  553. { note: almost direct copy of strlcomp() from strings.inc }
  554. asm
  555. { load the begin of the first buffer in the data cache }
  556. dcbt 0,r3
  557. { use r0 instead of r3 for buf1 since r3 contains result }
  558. cmplwi r5,0
  559. mtctr r5
  560. subi r11,r3,2
  561. subi r4,r4,2
  562. li r3,0
  563. ble LCompWordDone
  564. LCompWordLoop:
  565. { load next chars }
  566. lhzu r9,2(r11)
  567. lhzu r10,2(r4)
  568. { calculate difference }
  569. sub. r3,r9,r10
  570. { if chars not equal or at the end, we're ready }
  571. bdnzt cr0*4+eq, LCompWordLoop
  572. LCompWordDone:
  573. end;
  574. {$define FPC_SYSTEM_HAS_COMPAREDWORD}
  575. function CompareDWord(const buf1,buf2;len:longint):longint; assembler;
  576. { input: r3 = buf1, r4 = buf2, r5 = len }
  577. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  578. { note: almost direct copy of strlcomp() from strings.inc }
  579. asm
  580. { load the begin of the first buffer in the data cache }
  581. dcbt 0,r3
  582. { use r0 instead of r3 for buf1 since r3 contains result }
  583. cmplwi r5,0
  584. mtctr r5
  585. subi r11,r3,4
  586. subi r4,r4,4
  587. li r3,0
  588. ble LCompDWordDone
  589. LCompDWordLoop:
  590. { load next chars }
  591. lwzu r9,4(r11)
  592. lwzu r10,4(r4)
  593. { calculate difference }
  594. sub. r3,r9,r10
  595. { if chars not equal or at the end, we're ready }
  596. bdnzt cr0*4+eq, LCompDWordLoop
  597. LCompDWordDone:
  598. end;
  599. {$define FPC_SYSTEM_HAS_INDEXCHAR0}
  600. function IndexChar0(const buf;len:longint;b:Char):longint; assembler;
  601. { input: r3 = buf, r4 = len, r5 = b }
  602. { output: r3 = position of found position (-1 if not found) }
  603. asm
  604. { load the begin of the buffer in the data cache }
  605. dcbt 0,r3
  606. { length = 0? }
  607. cmplwi r4,0
  608. mtctr r4
  609. subi r9,r3,1
  610. subi r0,r3,1
  611. { assume not found }
  612. li r3,-1
  613. { if yes, do nothing }
  614. ble LIndexChar0Done
  615. LIndexChar0Loop:
  616. lbzu r10,1(r9)
  617. cmplwi cr1,r10,0
  618. cmplw r10,r5
  619. beq cr1,LIndexChar0Done
  620. bdnzf cr0*4+eq, LIndexChar0Loop
  621. bne LIndexChar0Done
  622. sub r3,r9,r0
  623. LIndexChar0Done:
  624. end;
  625. {****************************************************************************
  626. String
  627. ****************************************************************************}
  628. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COPY}
  629. function fpc_shortstr_to_shortstr(len:longint; const sstr: shortstring): shortstring; [public,alias: 'FPC_SHORTSTR_TO_SHORTSTR']; compilerproc;
  630. assembler;
  631. { input: r3: pointer to result, r4: len, r5: sstr }
  632. asm
  633. { load length source }
  634. lbz r10,0(r5)
  635. { load the begin of the dest buffer in the data cache }
  636. dcbtst 0,r3
  637. { put min(length(sstr),len) in r4 }
  638. subfc r7,r10,r4 { r0 := r4 - r10 }
  639. subfe r4,r4,r4 { if r3 >= r4 then r3' := 0 else r3' := -1 }
  640. and r7,r7,r4 { if r3 >= r4 then r3' := 0 else r3' := r3-r10 }
  641. add r4,r10,r7 { if r3 >= r4 then r3' := r10 else r3' := r3 }
  642. cmplwi r4,0
  643. { put length in ctr }
  644. mtctr r4
  645. stb r4,0(r3)
  646. beq LShortStrCopyDone
  647. LShortStrCopyLoop:
  648. lbzu r0,1(r5)
  649. stbu r0,1(r3)
  650. bdnz LShortStrCopyLoop
  651. LShortStrCopyDone:
  652. end;
  653. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  654. {$ifdef interncopy}
  655. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_ASSIGN'];
  656. {$else}
  657. procedure fpc_shortstr_copy(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_COPY'];
  658. {$endif}
  659. assembler;
  660. { input: r3: len, r4: sstr, r5: dstr }
  661. asm
  662. { load length source }
  663. lbz r10,0(r4)
  664. { load the begin of the dest buffer in the data cache }
  665. dcbtst 0,r5
  666. { put min(length(sstr),len) in r3 }
  667. subc r0,r3,r10 { r0 := r3 - r10 }
  668. subfe r3,r3,r3 { if r3 >= r4 then r3' := 0 else r3' := -1 }
  669. and r3,r0,r3 { if r3 >= r4 then r3' := 0 else r3' := r3-r10 }
  670. add r3,r3,r10 { if r3 >= r4 then r3' := r10 else r3' := r3 }
  671. cmplwi r3,0
  672. { put length in ctr }
  673. mtctr r3
  674. stb r3,0(r5)
  675. beq LShortStrCopyDone2
  676. LShortStrCopyLoop2:
  677. lbzu r0,1(r4)
  678. stbu r0,1(r5)
  679. bdnz LShortStrCopyLoop2
  680. LShortStrCopyDone2:
  681. end;
  682. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_APPEND_SHORTSTR}
  683. procedure fpc_shortstr_append_shortstr(var s1: shortstring; const s2: shortstring); compilerproc;
  684. { expects that results (r3) contains a pointer to the current string s1, r4 }
  685. { high(s1) and (r5) a pointer to the one that has to be concatenated }
  686. assembler;
  687. asm
  688. { load length s1 }
  689. lbz r6, 0(r3)
  690. { load length s2 }
  691. lbz r10, 0(r5)
  692. { length 0? }
  693. cmplwi r10,0
  694. { calculate min(length(s2),high(result)-length(result)) }
  695. sub r9,r4,r6
  696. subc r8,r9,r10 { r8 := r9 - r10 }
  697. subfe r9,r9,r9 { if r9 >= r10 then r9' := 0 else r9' := -1 }
  698. and r9,r8,r9 { if r9 >= r10 then r9' := 0 else r9' := r9-r10 }
  699. add r9,r9,r10 { if r9 >= r10 then r9' := r10 else r9' := r9 }
  700. { calculate new length }
  701. add r10,r6,r9
  702. { load value to copy in ctr }
  703. mtctr r9
  704. { store new length }
  705. stb r10,0(r3)
  706. { go to last current character of result }
  707. add r3,r6,r3
  708. { if nothing to do, exit }
  709. beq LShortStrAppendDone
  710. { and concatenate }
  711. LShortStrAppendLoop:
  712. lbzu r10,1(r5)
  713. stbu r10,1(r3)
  714. bdnz LShortStrAppendLoop
  715. LShortStrAppendDone:
  716. end;
  717. (*
  718. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  719. function fpc_shortstr_compare(const dstr,sstr:shortstring): longint; [public,alias:'FPC_SHORTSTR_COMPARE']; compilerproc;
  720. assembler;
  721. asm
  722. { load length sstr }
  723. lbz r9,0(r4)
  724. { load length dstr }
  725. lbz r10,0(r3)
  726. { save their difference for later and }
  727. { calculate min(length(sstr),length(dstr)) }
  728. subfc r7,r10,r9 { r0 := r9 - r10 }
  729. subfe r9,r9,r9 { if r9 >= r10 then r9' := 0 else r9' := -1 }
  730. and r7,r7,r9 { if r9 >= r10 then r9' := 0 else r9' := r9-r8 }
  731. add r9,r10,r7 { if r9 >= r10 then r9' := r10 else r9' := r9 }
  732. { first compare dwords (length/4) }
  733. srwi. r5,r9,2
  734. { keep length mod 4 for the ends }
  735. rlwinm r9,r9,0,30,31
  736. { already check whether length mod 4 = 0 }
  737. cmplwi cr1,r9,0
  738. { so we can load r3 with 0, in case the strings both have length 0 }
  739. mr r8,r3
  740. li r3, 0
  741. { length div 4 in ctr for loop }
  742. mtctr r5
  743. { if length < 3, goto byte comparing }
  744. beq LShortStrCompare1
  745. { setup for use of update forms of load/store with dwords }
  746. subi r4,r4,3
  747. subi r8,r8,3
  748. LShortStrCompare4Loop:
  749. lwzu r3,4(r4)
  750. lwzu r10,4(r8)
  751. sub. r3,r3,r10
  752. bdnzt cr0+eq,LShortStrCompare4Loop
  753. { r3 contains result if we stopped because of "ne" flag }
  754. bne LShortStrCompareDone
  755. { setup for use of update forms of load/store with bytes }
  756. addi r4,r4,3
  757. addi r8,r8,3
  758. LShortStrCompare1:
  759. { if comparelen mod 4 = 0, skip this and return the difference in }
  760. { lengths }
  761. beq cr1,LShortStrCompareLen
  762. mtctr r9
  763. LShortStrCompare1Loop:
  764. lbzu r3,1(r4)
  765. lbzu r10,1(r8)
  766. sub. r3,r3,r10
  767. bdnzt cr0+eq,LShortStrCompare1Loop
  768. bne LShortStrCompareDone
  769. LShortStrCompareLen:
  770. { also return result in flags, maybe we can use this in the CG }
  771. mr. r3,r3
  772. LShortStrCompareDone:
  773. end;
  774. *)
  775. {$define FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  776. function fpc_pchar_to_shortstr(p:pchar):shortstring;[public,alias:'FPC_PCHAR_TO_SHORTSTR']; compilerproc;
  777. assembler;
  778. {$include strpas.inc}
  779. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  780. function fpc_pchar_length(p:pchar):longint;assembler;[public,alias:'FPC_PCHAR_LENGTH']; {$ifdef hascompilerproc} compilerproc; {$endif}
  781. {$include strlen.inc}
  782. {$define FPC_SYSTEM_HAS_GET_FRAME}
  783. function get_frame:pointer;assembler;
  784. asm
  785. { all abi's I know use r1 as stack pointer }
  786. mr r3, r1
  787. end;
  788. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  789. function get_caller_addr(framebp:pointer):pointer;assembler;
  790. asm
  791. cmplwi r3,0
  792. beq Lcaller_addr_frame_null
  793. lwz r3,0(r3)
  794. cmplwi r3,0
  795. beq Lcaller_addr_frame_null
  796. lwz r3,4(r3)
  797. Lcaller_addr_frame_null:
  798. // !!!!!!! depends on ABI !!!!!!!!
  799. end;
  800. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  801. function get_caller_frame(framebp:pointer):pointer;assembler;
  802. asm
  803. cmplwi r3,0
  804. beq Lcaller_frame_null
  805. lwz r3,0(r3)
  806. Lcaller_frame_null:
  807. end;
  808. {$define FPC_SYSTEM_HAS_ABS_LONGINT}
  809. function abs(l:longint):longint; assembler;[internconst:in_const_abs];
  810. asm
  811. srawi r0,r3,31
  812. add r3,r0,r3
  813. xor r3,r3,r0
  814. end;
  815. {****************************************************************************
  816. Math
  817. ****************************************************************************}
  818. {$define FPC_SYSTEM_HAS_ODD_LONGINT}
  819. function odd(l:longint):boolean;assembler;[internconst:in_const_odd];
  820. asm
  821. rlwinm r3,r3,0,31,31
  822. end;
  823. {$define FPC_SYSTEM_HAS_SQR_LONGINT}
  824. function sqr(l:longint):longint;assembler;[internconst:in_const_sqr];
  825. asm
  826. mullw r3,r3,r3
  827. end;
  828. {$define FPC_SYSTEM_HAS_SPTR}
  829. Function Sptr : Longint;assembler;
  830. asm
  831. mr r3,r1
  832. end;
  833. {****************************************************************************
  834. Str()
  835. ****************************************************************************}
  836. { int_str: generic implementation is used for now }
  837. {****************************************************************************
  838. Multithreading
  839. ****************************************************************************}
  840. { do a thread save inc/dec }
  841. {$define FPC_SYSTEM_HAS_DECLOCKED}
  842. function declocked(var l : longint) : boolean;assembler;
  843. { input: address of l in r3 }
  844. { output: boolean indicating whether l is zero after decrementing }
  845. asm
  846. LDecLockedLoop:
  847. lwarx r10,0,r3
  848. subi r10,r10,1
  849. stwcx. r10,0,r3
  850. bne- LDecLockedLoop
  851. cntlzw r3,r10
  852. srwi r3,r3,5
  853. end;
  854. {$define FPC_SYSTEM_HAS_INCLOCKED}
  855. procedure inclocked(var l : longint);assembler;
  856. asm
  857. LIncLockedLoop:
  858. lwarx r10,0,r3
  859. addi r10,r10,1
  860. stwcx. r10,0,r3
  861. bne- LIncLockedLoop
  862. end;
  863. {
  864. $Log$
  865. Revision 1.51 2003-06-14 12:41:08 jonas
  866. * fixed compilation problems (removed unnecessary modified registers
  867. lists from procedures)
  868. Revision 1.50 2003/06/01 14:50:17 jonas
  869. * fpc_shortstr_append_shortstr has to use high(s1) instead of 255 as
  870. maxlen
  871. + ppc version of fpc_shortstr_append_shortstr
  872. Revision 1.49 2003/05/29 21:17:27 jonas
  873. * compile with -dppc603 to not use unaligned float loads in move() and
  874. g_concatcopy, because the 603 and 604 take an exception for those
  875. (and netbsd doesn't even handle those in the kernel). There are
  876. still some of those left that could cause problems though (e.g.
  877. in the set helpers)
  878. Revision 1.48 2003/05/29 14:32:54 jonas
  879. * changed dcbst to dcbtst (former means "flush cache block to memory,
  880. the latter means "I will soon store something to that cache block")
  881. Revision 1.47 2003/05/29 12:14:02 jonas
  882. * move() now uses dcbz if possible
  883. Revision 1.46 2003/05/17 00:19:51 jonas
  884. * fixed inclocked
  885. Revision 1.45 2003/05/14 19:47:35 jonas
  886. * fixed stupid bug in filldword
  887. Revision 1.44 2003/05/13 20:39:26 florian
  888. * uncommented shortstring compare, buggy
  889. Revision 1.43 2003/05/12 19:39:33 jonas
  890. * fixed final fillchar error (tfillchr passes now)
  891. Revision 1.42 2003/05/12 19:00:50 jonas
  892. * fixed bug in fillchar
  893. Revision 1.41 2003/05/10 20:33:39 jonas
  894. * fixed get_caller_frame and get_caller_addr
  895. Revision 1.40 2003/05/10 17:33:06 jonas
  896. * final (? :) fix to move, passes new tests/test/tmove test
  897. Revision 1.39 2003/05/02 19:03:25 jonas
  898. * fixed some bugs in move()
  899. Revision 1.38 2003/04/27 16:24:44 jonas
  900. - disabled fpc_shortstr_concat because it's called differently than that
  901. routine is declared
  902. Revision 1.37 2003/04/26 20:37:17 jonas
  903. * fixed and re-enabled routines commented out by Florian :)
  904. Revision 1.36 2003/04/26 17:46:49 florian
  905. * commented out not working routines
  906. * reactivated assembler fillchar
  907. Revision 1.35 2003/04/26 17:35:15 jonas
  908. * fixed FillChar
  909. Revision 1.34 2003/04/26 12:05:10 florian
  910. * removed object/class helpers, the compiler uses the generic ones
  911. Revision 1.33 2003/04/26 11:55:52 florian
  912. * fixed newlines
  913. Revision 1.32 2003/04/23 21:04:48 florian
  914. * fixed fpc_shortstr_to_shortstr
  915. Revision 1.31 2003/03/17 14:30:11 peter
  916. * changed address parameter/return values to pointer instead
  917. of longint
  918. Revision 1.30 2003/03/12 19:21:29 jonas
  919. + implemented get_frame()
  920. * fixed bug in IndexDWord()
  921. Revision 1.29 2003/01/09 20:14:35 florian
  922. * fixed helper declarations
  923. Revision 1.28 2003/01/09 13:38:56 florian
  924. * dec/inclocked got defines
  925. Revision 1.27 2002/11/07 15:23:13 jonas
  926. * always use code that was between 'ifdef mt', since that define is
  927. deprecated now
  928. Revision 1.26 2002/11/01 13:27:55 jonas
  929. * changed "dcbtst r0,x" to "dcbtst 0,x"
  930. Revision 1.25 2002/10/23 15:26:00 olle
  931. * excluded saverestorereg for target macos
  932. Revision 1.24 2002/10/20 13:40:55 jonas
  933. * move/fill*/index*/comp* routines immediately exit if length is negative
  934. Revision 1.23 2002/10/17 10:12:50 jonas
  935. * fixed return value of declocked()
  936. Revision 1.22 2002/10/05 14:20:16 peter
  937. * fpc_pchar_length compilerproc and strlen alias
  938. Revision 1.21 2002/10/02 18:21:52 peter
  939. * Copy() changed to internal function calling compilerprocs
  940. * FPC_SHORTSTR_COPY renamed to FPC_SHORTSTR_ASSIGN because of the
  941. new copy functions
  942. Revision 1.20 2002/09/10 21:30:34 jonas
  943. * disabled powerpc-specific fpc_shortstr_concat for now, it was
  944. completely wrong
  945. Revision 1.19 2002/09/10 17:47:20 jonas
  946. * fixed bug with concatting 0-length shortstrings
  947. Revision 1.18 2002/09/07 16:01:26 peter
  948. * old logs removed and tabs fixed
  949. Revision 1.17 2002/08/31 21:29:57 florian
  950. * several PC related fixes
  951. Revision 1.16 2002/08/31 16:08:36 florian
  952. * fixed undefined labels
  953. Revision 1.15 2002/08/31 13:11:11 florian
  954. * several fixes for Linux/PPC compilation
  955. Revision 1.14 2002/08/18 22:11:10 florian
  956. * fixed remaining assembler errors
  957. Revision 1.13 2002/08/18 21:37:48 florian
  958. * several errors in inline assembler fixed
  959. Revision 1.12 2002/08/10 17:14:36 jonas
  960. * various fixes, mostly changing the names of the modifies registers to
  961. upper case since that seems to be required by the compiler
  962. Revision 1.11 2002/07/30 17:29:53 florian
  963. + dummy setjmp and longjmp added
  964. + dummy implemtation of the destructor helper
  965. Revision 1.10 2002/07/28 21:39:29 florian
  966. * made abs a compiler proc if it is generic
  967. Revision 1.9 2002/07/28 20:43:49 florian
  968. * several fixes for linux/powerpc
  969. * several fixes to MT
  970. Revision 1.8 2002/07/26 15:45:56 florian
  971. * changed multi threading define: it's MT instead of MTRTL
  972. }