powerpc.inc 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 2000-2001 by the Free Pascal development team.
  5. Portions Copyright (c) 2000 by Casey Duncan ([email protected])
  6. Processor dependent implementation for the system unit for
  7. PowerPC
  8. See the file COPYING.FPC, included in this distribution,
  9. for details about the copyright.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. **********************************************************************}
  14. {****************************************************************************
  15. PowerPC specific stuff
  16. ****************************************************************************}
  17. { This function is never called directly, it's a dummy to hold the register save/
  18. load subroutines
  19. }
  20. {$ifndef MACOS}
  21. label
  22. _restfpr_14_x,
  23. _restfpr_15_x,
  24. _restfpr_16_x,
  25. _restfpr_17_x,
  26. _restfpr_18_x,
  27. _restfpr_19_x,
  28. _restfpr_20_x,
  29. _restfpr_21_x,
  30. _restfpr_22_x,
  31. _restfpr_23_x,
  32. _restfpr_24_x,
  33. _restfpr_25_x,
  34. _restfpr_26_x,
  35. _restfpr_27_x,
  36. _restfpr_28_x,
  37. _restfpr_29_x,
  38. _restfpr_30_x,
  39. _restfpr_31_x,
  40. _restfpr_14_l,
  41. _restfpr_15_l,
  42. _restfpr_16_l,
  43. _restfpr_17_l,
  44. _restfpr_18_l,
  45. _restfpr_19_l,
  46. _restfpr_20_l,
  47. _restfpr_21_l,
  48. _restfpr_22_l,
  49. _restfpr_23_l,
  50. _restfpr_24_l,
  51. _restfpr_25_l,
  52. _restfpr_26_l,
  53. _restfpr_27_l,
  54. _restfpr_28_l,
  55. _restfpr_29_l,
  56. _restfpr_30_l,
  57. _restfpr_31_l;
  58. procedure saverestorereg;assembler;
  59. asm
  60. { exit }
  61. .globl _restfpr_14_x
  62. _restfpr_14_x: lfd f14, -144(r11)
  63. .globl _restfpr_15_x
  64. _restfpr_15_x: lfd f15, -136(r11)
  65. .globl _restfpr_16_x
  66. _restfpr_16_x: lfd f16, -128(r11)
  67. .globl _restfpr_17_x
  68. _restfpr_17_x: lfd f17, -120(r11)
  69. .globl _restfpr_18_x
  70. _restfpr_18_x: lfd f18, -112(r11)
  71. .globl _restfpr_19_x
  72. _restfpr_19_x: lfd f19, -104(r11)
  73. .globl _restfpr_20_x
  74. _restfpr_20_x: lfd f20, -96(r11)
  75. .globl _restfpr_21_x
  76. _restfpr_21_x: lfd f21, -88(r11)
  77. .globl _restfpr_22_x
  78. _restfpr_22_x: lfd f22, -80(r11)
  79. .globl _restfpr_23_x
  80. _restfpr_23_x: lfd f23, -72(r11)
  81. .globl _restfpr_24_x
  82. _restfpr_24_x: lfd f24, -64(r11)
  83. .globl _restfpr_25_x
  84. _restfpr_25_x: lfd f25, -56(r11)
  85. .globl _restfpr_26_x
  86. _restfpr_26_x: lfd f26, -48(r11)
  87. .globl _restfpr_27_x
  88. _restfpr_27_x: lfd f27, -40(r11)
  89. .globl _restfpr_28_x
  90. _restfpr_28_x: lfd f28, -32(r11)
  91. .globl _restfpr_29_x
  92. _restfpr_29_x: lfd f29, -24(r11)
  93. .globl _restfpr_30_x
  94. _restfpr_30_x: lfd f30, -16(r11)
  95. .globl _restfpr_31_x
  96. _restfpr_31_x: lwz r0, 4(r11)
  97. lfd f31, -8(r11)
  98. mtlr r0
  99. ori r1, r11, 0
  100. blr
  101. { exit with restoring lr }
  102. .globl _restfpr_14_l
  103. _restfpr_14_l: lfd f14, -144(r11)
  104. .globl _restfpr_15_l
  105. _restfpr_15_l: lfd f15, -136(r11)
  106. .globl _restfpr_16_l
  107. _restfpr_16_l: lfd f16, -128(r11)
  108. .globl _restfpr_17_l
  109. _restfpr_17_l: lfd f17, -120(r11)
  110. .globl _restfpr_18_l
  111. _restfpr_18_l: lfd f18, -112(r11)
  112. .globl _restfpr_19_l
  113. _restfpr_19_l: lfd f19, -104(r11)
  114. .globl _restfpr_20_l
  115. _restfpr_20_l: lfd f20, -96(r11)
  116. .globl _restfpr_21_l
  117. _restfpr_21_l: lfd f21, -88(r11)
  118. .globl _restfpr_22_l
  119. _restfpr_22_l: lfd f22, -80(r11)
  120. .globl _restfpr_23_l
  121. _restfpr_23_l: lfd f23, -72(r11)
  122. .globl _restfpr_24_l
  123. _restfpr_24_l: lfd f24, -64(r11)
  124. .globl _restfpr_25_l
  125. _restfpr_25_l: lfd f25, -56(r11)
  126. .globl _restfpr_26_l
  127. _restfpr_26_l: lfd f26, -48(r11)
  128. .globl _restfpr_27_l
  129. _restfpr_27_l: lfd f27, -40(r11)
  130. .globl _restfpr_28_l
  131. _restfpr_28_l: lfd f28, -32(r11)
  132. .globl _restfpr_29_l
  133. _restfpr_29_l: lfd f29, -24(r11)
  134. .globl _restfpr_30_l
  135. _restfpr_30_l: lfd f30, -16(r11)
  136. .globl _restfpr_31_l
  137. _restfpr_31_l: lwz r0, 4(r11)
  138. lfd f31, -8(r11)
  139. mtlr r0
  140. ori r1, r11, 0
  141. blr
  142. end;
  143. {$endif MACOS}
  144. {****************************************************************************
  145. Move / Fill
  146. ****************************************************************************}
  147. {$define FPC_SYSTEM_HAS_MOVE}
  148. procedure Move(const source;var dest;count:longint);assembler;
  149. asm
  150. { count <= 0 ? }
  151. cmpwi cr0,r5,0
  152. { check if we have to do the move backwards because of overlap }
  153. sub r10,r4,r3
  154. { carry := boolean(dest-source < count) = boolean(overlap) }
  155. subc r10,r10,r5
  156. { count < 15 ? (to decide whether we will move dwords or bytes }
  157. cmpwi cr1,r5,15
  158. { if overlap, then r10 := -1 else r10 := 0 }
  159. subfe r10,r10,r10
  160. { count < 63 ? (32 + max. alignment (31) }
  161. cmpwi cr7,r5,63
  162. { if count <= 0, stop }
  163. ble cr0,.LMoveDone
  164. { load the begin of the source in the data cache }
  165. dcbt 0,r3
  166. { and the dest as well }
  167. dcbtst 0,r4
  168. { if overlap, then r0 := count else r0 := 0 }
  169. and r0,r5,r10
  170. { if overlap, then point source and dest to the end }
  171. add r3,r3,r0
  172. add r4,r4,r0
  173. { if overlap, then r6 := 0, else r6 := -1 }
  174. not r6,r10
  175. { if overlap, then r10 := -2, else r10 := 0 }
  176. slwi r10,r10,1
  177. { if overlap, then r10 := -1, else r10 := 1 }
  178. addi r10,r10,1
  179. { if count < 15, copy everything byte by byte }
  180. blt cr1,.LMoveBytes
  181. { if no overlap, then source/dest += -1, otherwise they stay }
  182. { After the next instruction, r3/r4 + r10 = next position to }
  183. { load/store from/to }
  184. add r3,r3,r6
  185. add r4,r4,r6
  186. { otherwise, guarantee 4 byte alignment for dest for starters }
  187. .LMove4ByteAlignLoop:
  188. lbzux r0,r3,r10
  189. stbux r0,r4,r10
  190. { is dest now 4 aligned? }
  191. andi. r0,r4,3
  192. subi r5,r5,1
  193. { while not aligned, continue }
  194. bne cr0,.LMove4ByteAlignLoop
  195. {$ifndef ppc603}
  196. { check for 32 byte alignment }
  197. andi. r7,r4,31
  198. {$endif non ppc603}
  199. { we are going to copy one byte again (the one at the newly }
  200. { aligned address), so increase count byte 1 }
  201. addi r5,r5,1
  202. { count div 4 for number of dwords to copy }
  203. srwi r0,r5,2
  204. { if 11 <= count < 63, copy using dwords }
  205. blt cr7,.LMoveDWords
  206. {$ifndef ppc603}
  207. { # of dwords to copy to reach 32 byte alignment (*4) }
  208. { (depends on forward/backward copy) }
  209. { if forward copy, r6 = -1 -> r8 := 32 }
  210. { if backward copy, r6 = 0 -> r8 := 0 }
  211. rlwinm r8,r6,0,31-6+1,31-6+1
  212. { if forward copy, we have to copy 32 - unaligned count bytes }
  213. { if backward copy unaligned count bytes }
  214. sub r7,r8,r7
  215. { if backward copy, the calculated value is now negate -> }
  216. { make it positive again }
  217. not r8, r6
  218. add r7, r7, r8
  219. xor r7, r7, r8
  220. {$endif not ppc603}
  221. { multiply the update count with 4 }
  222. slwi r10,r10,2
  223. slwi r6,r6,2
  224. { and adapt the source and dest }
  225. add r3,r3,r6
  226. add r4,r4,r6
  227. {$ifndef ppc603}
  228. beq cr0,.LMove32BytesAligned
  229. .L32BytesAlignMoveLoop:
  230. { count >= 39 -> align to 8 byte boundary and then use the FPU }
  231. { since we're already at 4 byte alignment, use dword store }
  232. subic. r7,r7,4
  233. lwzux r0,r3,r10
  234. subi r5,r5,4
  235. stwux r0,r4,r10
  236. bne .L32BytesAlignMoveLoop
  237. .LMove32BytesAligned:
  238. { count div 32 ( >= 1, since count was >=63 }
  239. srwi r0,r5,5
  240. { remainder }
  241. andi. r5,r5,31
  242. { to decide if we will do some dword stores (instead of only }
  243. { byte stores) afterwards or not }
  244. {$else not ppc603}
  245. srwi r0,r5,4
  246. andi. r5,r5,15
  247. {$endif not ppc603}
  248. cmpwi cr1,r5,11
  249. mtctr r0
  250. { r0 := count div 4, will be moved to ctr when copying dwords }
  251. srwi r0,r5,2
  252. {$ifndef ppc603}
  253. { adjust the update count: it will now be 8 or -8 depending on overlap }
  254. slwi r10,r10,1
  255. { adjust source and dest pointers: because of the above loop, dest is now }
  256. { aligned to 8 bytes. So if we add r6 we will still have an 8 bytes }
  257. { aligned address) }
  258. add r3,r3,r6
  259. add r4,r4,r6
  260. slwi r6,r6,1
  261. { the dcbz offset must give a 32 byte aligned address when added }
  262. { to the current dest address and its address must point to the }
  263. { bytes that will be overwritten in the current iteration. In case }
  264. { of a forward loop, the dest address has currently an offset of }
  265. { -8 compared to the bytes that will be overwritten (and r6 = -8). }
  266. { In case of a backward of a loop, the dest address currently has }
  267. { an offset of +32 compared to the bytes that will be overwritten }
  268. { (and r6 = 0). So the forward dcbz offset must become +8 and the }
  269. { backward -32 -> (-r6 * 5) - 32 gives the correct offset }
  270. slwi r7,r6,2
  271. add r7,r7,r6
  272. neg r7,r7
  273. subi r7,r7,32
  274. .LMove32ByteDcbz:
  275. lfdux f0,r3,r10
  276. lfdux f1,r3,r10
  277. lfdux f2,r3,r10
  278. lfdux f3,r3,r10
  279. { must be done only now, in case source and dest are less than }
  280. { 32 bytes apart! }
  281. dcbz r4,r7
  282. stfdux f0,r4,r10
  283. stfdux f1,r4,r10
  284. stfdux f2,r4,r10
  285. stfdux f3,r4,r10
  286. bdnz .LMove32ByteDcbz
  287. .LMove32ByteLoopDone:
  288. {$else not ppc603}
  289. .LMove16ByteLoop:
  290. lwzux r11,r3,r10
  291. lwzux r7,r3,r10
  292. lwzux r8,r3,r10
  293. lwzux r9,r3,r10
  294. stwux r11,r4,r10
  295. stwux r7,r4,r10
  296. stwux r8,r4,r10
  297. stwux r9,r4,r10
  298. bdnz .LMove16ByteLoop
  299. {$endif not ppc603}
  300. { cr0*4+eq is true if "count and 31" = 0 }
  301. beq cr0,.LMoveDone
  302. { make r10 again -1 or 1, but first adjust source/dest pointers }
  303. sub r3,r3,r6
  304. sub r4,r4,r6
  305. {$ifndef ppc603}
  306. srawi r10,r10,3
  307. srawi r6,r6,3
  308. {$else not ppc603}
  309. srawi r10,r10,2
  310. srawi r6,r6,2
  311. {$endif not ppc603}
  312. { cr1 contains whether count <= 11 }
  313. ble cr1,.LMoveBytes
  314. .LMoveDWords:
  315. mtctr r0
  316. andi. r5,r5,3
  317. { r10 * 4 }
  318. slwi r10,r10,2
  319. slwi r6,r6,2
  320. add r3,r3,r6
  321. add r4,r4,r6
  322. .LMoveDWordsLoop:
  323. lwzux r0,r3,r10
  324. stwux r0,r4,r10
  325. bdnz .LMoveDWordsLoop
  326. beq cr0,.LMoveDone
  327. { make r10 again -1 or 1 }
  328. sub r3,r3,r6
  329. sub r4,r4,r6
  330. srawi r10,r10,2
  331. srawi r6,r6,2
  332. .LMoveBytes:
  333. add r3,r3,r6
  334. add r4,r4,r6
  335. mtctr r5
  336. .LMoveBytesLoop:
  337. lbzux r0,r3,r10
  338. stbux r0,r4,r10
  339. bdnz .LMoveBytesLoop
  340. .LMoveDone:
  341. end;
  342. {$define FPC_SYSTEM_HAS_FILLCHAR}
  343. Procedure FillChar(var x;count:longint;value:byte);assembler;
  344. { input: x in r3, count in r4, value in r5 }
  345. {$ifndef FPC_ABI_AIX}
  346. { in the AIX ABI, we can use te red zone for temp storage, otherwise we have }
  347. { to explicitely allocate room }
  348. var
  349. temp : packed record
  350. case byte of
  351. 0: (l1,l2: longint);
  352. 1: (d: double);
  353. end;
  354. {$endif FPC_ABI_AIX}
  355. asm
  356. { no bytes? }
  357. cmpwi cr6,r4,0
  358. { less than 15 bytes? }
  359. cmpwi cr7,r4,15
  360. { less than 63 bytes? }
  361. cmpwi cr1,r4,63
  362. { fill r5 with ValueValueValueValue }
  363. rlwimi r5,r5,8,16,23
  364. { setup for aligning x to multiple of 4}
  365. rlwinm r10,r3,0,31-2+1,31
  366. rlwimi r5,r5,16,0,15
  367. ble cr6,.LFillCharDone
  368. { get the start of the data in the cache (and mark it as "will be }
  369. { modified") }
  370. dcbtst 0,r3
  371. subfic r10,r10,4
  372. blt cr7,.LFillCharVerySmall
  373. { just store 4 bytes instead of using a loop to align (there are }
  374. { plenty of other instructions now to keep the processor busy }
  375. { while it handles the (possibly unaligned) store) }
  376. stw r5,0(r3)
  377. { r3 := align(r3,4) }
  378. add r3,r3,r10
  379. { decrease count with number of bytes already stored }
  380. sub r4,r4,r10
  381. blt cr1,.LFillCharSmall
  382. { if we have to fill with 0 (which happens a lot), we can simply use }
  383. { dcbz for the most part, which is very fast, so make a special case }
  384. { for that }
  385. cmplwi cr1,r5,0
  386. { align to a multiple of 32 (and immediately check whether we aren't }
  387. { already 32 byte aligned) }
  388. rlwinm. r10,r3,0,31-5+1,31
  389. { setup r3 for using update forms of store instructions }
  390. subi r3,r3,4
  391. { get number of bytes to store }
  392. subfic r10,r10,32
  393. { if already 32byte aligned, skip align loop }
  394. beq .L32ByteAlignLoopDone
  395. { substract from the total count }
  396. sub r4,r4,r10
  397. .L32ByteAlignLoop:
  398. { we were already aligned to 4 byres, so this will count down to }
  399. { exactly 0 }
  400. subic. r10,r10,4
  401. stwu r5,4(r3)
  402. bne .L32ByteAlignLoop
  403. .L32ByteAlignLoopDone:
  404. { get the amount of 32 byte blocks }
  405. srwi r10,r4,5
  406. { and keep the rest in r4 (recording whether there is any rest) }
  407. rlwinm. r4,r4,0,31-5+1,31
  408. { move to ctr }
  409. mtctr r10
  410. { check how many rest there is (to decide whether we'll use }
  411. { FillCharSmall or FillCharVerySmall) }
  412. cmpl cr7,r4,11
  413. { if filling with zero, only use dcbz }
  414. bne cr1, .LFillCharNoZero
  415. { make r3 point again to the actual store position }
  416. addi r3,r3,4
  417. .LFillCharDCBZLoop:
  418. dcbz 0,r3
  419. addi r3,r3,32
  420. bdnz .LFillCharDCBZLoop
  421. { if there was no rest, we're finished }
  422. beq .LFillCharDone
  423. b .LFillCharVerySmall
  424. .LFillCharNoZero:
  425. {$ifdef FPC_ABI_AIX}
  426. stw r5,0(r1)
  427. stw r5,4(r1)
  428. lfd f0,0(r1)
  429. {$else FPC_ABI_AIX}
  430. stw r5,temp
  431. stw r5,4+temp
  432. lfd f0,temp
  433. {$endif FPC_ABI_AIX}
  434. { make r3 point to address-8, so we're able to use fp double stores }
  435. { with update (it's already -4 now) }
  436. subi r3,r3,4
  437. { load r10 with 8, so that dcbz uses the correct address }
  438. li r10, 8
  439. .LFillChar32ByteLoop:
  440. dcbz r3,r10
  441. stfdu f0,8(r3)
  442. stfdu f0,8(r3)
  443. stfdu f0,8(r3)
  444. stfdu f0,8(r3)
  445. bdnz .LFillChar32ByteLoop
  446. { if there was no rest, we're finished }
  447. beq .LFillCharDone
  448. { make r3 point again to the actual next byte that must be written }
  449. addi r3,r3,8
  450. b .LFillCharVerySmall
  451. .LFillCharSmall:
  452. { when we arrive here, we're already 4 byte aligned }
  453. { get count div 4 to store dwords }
  454. srwi r10,r4,2
  455. { get ready for use of update stores }
  456. subi r3,r3,4
  457. mtctr r10
  458. rlwinm. r4,r4,0,31-2+1,31
  459. .LFillCharSmallLoop:
  460. stwu r5,4(r3)
  461. bdnz .LFillCharSmallLoop
  462. { if nothing left, stop }
  463. beq .LFillCharDone
  464. { get ready to store bytes }
  465. addi r3,r3,4
  466. .LFillCharVerySmall:
  467. mtctr r4
  468. subi r3,r3,1
  469. .LFillCharVerySmallLoop:
  470. stbu r5,1(r3)
  471. bdnz .LFillCharVerySmallLoop
  472. .LFillCharDone:
  473. end;
  474. {$define FPC_SYSTEM_HAS_FILLDWORD}
  475. procedure filldword(var x;count : longint;value : dword);
  476. assembler;
  477. asm
  478. { registers:
  479. r3 x
  480. r4 count
  481. r5 value
  482. }
  483. cmpwi cr0,r4,0
  484. mtctr r4
  485. subi r3,r3,4
  486. ble .LFillDWordEnd //if count<=0 Then Exit
  487. .LFillDWordLoop:
  488. stwu r5,4(r3)
  489. bdnz .LFillDWordLoop
  490. .LFillDWordEnd:
  491. end;
  492. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  493. function IndexByte(const buf;len:longint;b:byte):longint; assembler;
  494. { input: r3 = buf, r4 = len, r5 = b }
  495. { output: r3 = position of b in buf (-1 if not found) }
  496. asm
  497. { load the begin of the buffer in the data cache }
  498. dcbt 0,r3
  499. cmplwi r4,0
  500. mtctr r4
  501. subi r10,r3,1
  502. mr r0,r3
  503. { assume not found }
  504. li r3,-1
  505. ble .LIndexByteDone
  506. .LIndexByteLoop:
  507. lbzu r9,1(r10)
  508. cmplw r9,r5
  509. bdnzf cr0*4+eq,.LIndexByteLoop
  510. { r3 still contains -1 here }
  511. bne .LIndexByteDone
  512. sub r3,r10,r0
  513. .LIndexByteDone:
  514. end;
  515. {$define FPC_SYSTEM_HAS_INDEXWORD}
  516. function IndexWord(const buf;len:longint;b:word):longint; assembler;
  517. { input: r3 = buf, r4 = len, r5 = b }
  518. { output: r3 = position of b in buf (-1 if not found) }
  519. asm
  520. { load the begin of the buffer in the data cache }
  521. dcbt 0,r3
  522. cmplwi r4,0
  523. mtctr r4
  524. subi r10,r3,2
  525. mr r0,r3
  526. { assume not found }
  527. li r3,-1
  528. ble .LIndexWordDone
  529. .LIndexWordLoop:
  530. lhzu r9,2(r10)
  531. cmplw r9,r5
  532. bdnzf cr0*4+eq,.LIndexWordLoop
  533. { r3 still contains -1 here }
  534. bne .LIndexWordDone
  535. sub r3,r10,r0
  536. .LIndexWordDone:
  537. end;
  538. {$define FPC_SYSTEM_HAS_INDEXDWORD}
  539. function IndexDWord(const buf;len:longint;b:DWord):longint; assembler;
  540. { input: r3 = buf, r4 = len, r5 = b }
  541. { output: r3 = position of b in buf (-1 if not found) }
  542. asm
  543. { load the begin of the buffer in the data cache }
  544. dcbt 0,r3
  545. cmplwi r4,0
  546. mtctr r4
  547. subi r10,r3,4
  548. mr r0,r3
  549. { assume not found }
  550. li r3,-1
  551. ble .LIndexDWordDone
  552. .LIndexDWordLoop:
  553. lwzu r9,4(r10)
  554. cmplw r9,r5
  555. bdnzf cr0*4+eq, .LIndexDWordLoop
  556. { r3 still contains -1 here }
  557. bne .LIndexDWordDone
  558. sub r3,r10,r0
  559. .LIndexDWordDone:
  560. end;
  561. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  562. function CompareByte(const buf1,buf2;len:longint):longint; assembler;
  563. { input: r3 = buf1, r4 = buf2, r5 = len }
  564. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  565. { note: almost direct copy of strlcomp() from strings.inc }
  566. asm
  567. { load the begin of the first buffer in the data cache }
  568. dcbt 0,r3
  569. { use r0 instead of r3 for buf1 since r3 contains result }
  570. cmplwi r5,0
  571. mtctr r5
  572. subi r11,r3,1
  573. subi r4,r4,1
  574. li r3,0
  575. ble .LCompByteDone
  576. .LCompByteLoop:
  577. { load next chars }
  578. lbzu r9,1(r11)
  579. lbzu r10,1(r4)
  580. { calculate difference }
  581. sub. r3,r9,r10
  582. { if chars not equal or at the end, we're ready }
  583. bdnzt cr0*4+eq, .LCompByteLoop
  584. .LCompByteDone:
  585. end;
  586. {$define FPC_SYSTEM_HAS_COMPAREWORD}
  587. function CompareWord(const buf1,buf2;len:longint):longint; assembler;
  588. { input: r3 = buf1, r4 = buf2, r5 = len }
  589. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  590. { note: almost direct copy of strlcomp() from strings.inc }
  591. asm
  592. { load the begin of the first buffer in the data cache }
  593. dcbt 0,r3
  594. { use r0 instead of r3 for buf1 since r3 contains result }
  595. cmplwi r5,0
  596. mtctr r5
  597. subi r11,r3,2
  598. subi r4,r4,2
  599. li r3,0
  600. ble .LCompWordDone
  601. .LCompWordLoop:
  602. { load next chars }
  603. lhzu r9,2(r11)
  604. lhzu r10,2(r4)
  605. { calculate difference }
  606. sub. r3,r9,r10
  607. { if chars not equal or at the end, we're ready }
  608. bdnzt cr0*4+eq, .LCompWordLoop
  609. .LCompWordDone:
  610. end;
  611. {$define FPC_SYSTEM_HAS_COMPAREDWORD}
  612. function CompareDWord(const buf1,buf2;len:longint):longint; assembler;
  613. { input: r3 = buf1, r4 = buf2, r5 = len }
  614. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  615. { note: almost direct copy of strlcomp() from strings.inc }
  616. asm
  617. { load the begin of the first buffer in the data cache }
  618. dcbt 0,r3
  619. { use r0 instead of r3 for buf1 since r3 contains result }
  620. cmplwi r5,0
  621. mtctr r5
  622. subi r11,r3,4
  623. subi r4,r4,4
  624. li r3,0
  625. ble .LCompDWordDone
  626. .LCompDWordLoop:
  627. { load next chars }
  628. lwzu r9,4(r11)
  629. lwzu r10,4(r4)
  630. { calculate difference }
  631. sub. r3,r9,r10
  632. { if chars not equal or at the end, we're ready }
  633. bdnzt cr0*4+eq, .LCompDWordLoop
  634. .LCompDWordDone:
  635. end;
  636. {$define FPC_SYSTEM_HAS_INDEXCHAR0}
  637. function IndexChar0(const buf;len:longint;b:Char):longint; assembler;
  638. { input: r3 = buf, r4 = len, r5 = b }
  639. { output: r3 = position of found position (-1 if not found) }
  640. asm
  641. { load the begin of the buffer in the data cache }
  642. dcbt 0,r3
  643. { length = 0? }
  644. cmplwi r4,0
  645. mtctr r4
  646. subi r9,r3,1
  647. subi r0,r3,1
  648. { assume not found }
  649. li r3,-1
  650. { if yes, do nothing }
  651. ble .LIndexChar0Done
  652. .LIndexChar0Loop:
  653. lbzu r10,1(r9)
  654. cmplwi cr1,r10,0
  655. cmplw r10,r5
  656. beq cr1,.LIndexChar0Done
  657. bdnzf cr0*4+eq, .LIndexChar0Loop
  658. bne .LIndexChar0Done
  659. sub r3,r9,r0
  660. .LIndexChar0Done:
  661. end;
  662. {****************************************************************************
  663. String
  664. ****************************************************************************}
  665. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COPY}
  666. function fpc_shortstr_to_shortstr(len:longint; const sstr: shortstring): shortstring; [public,alias: 'FPC_SHORTSTR_TO_SHORTSTR']; compilerproc;
  667. assembler;
  668. { input: r3: pointer to result, r4: len, r5: sstr }
  669. asm
  670. { load length source }
  671. lbz r10,0(r5)
  672. { load the begin of the dest buffer in the data cache }
  673. dcbtst 0,r3
  674. { put min(length(sstr),len) in r4 }
  675. subfc r7,r10,r4 { r0 := r4 - r10 }
  676. subfe r4,r4,r4 { if r3 >= r4 then r3' := 0 else r3' := -1 }
  677. and r7,r7,r4 { if r3 >= r4 then r3' := 0 else r3' := r3-r10 }
  678. add r4,r10,r7 { if r3 >= r4 then r3' := r10 else r3' := r3 }
  679. cmplwi r4,0
  680. { put length in ctr }
  681. mtctr r4
  682. stb r4,0(r3)
  683. beq .LShortStrCopyDone
  684. .LShortStrCopyLoop:
  685. lbzu r0,1(r5)
  686. stbu r0,1(r3)
  687. bdnz .LShortStrCopyLoop
  688. .LShortStrCopyDone:
  689. end;
  690. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  691. {$ifdef interncopy}
  692. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_ASSIGN'];
  693. {$else}
  694. procedure fpc_shortstr_copy(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_COPY'];
  695. {$endif}
  696. assembler;
  697. { input: r3: len, r4: sstr, r5: dstr }
  698. asm
  699. { load length source }
  700. lbz r10,0(r4)
  701. { load the begin of the dest buffer in the data cache }
  702. dcbtst 0,r5
  703. { put min(length(sstr),len) in r3 }
  704. subc r0,r3,r10 { r0 := r3 - r10 }
  705. subfe r3,r3,r3 { if r3 >= r4 then r3' := 0 else r3' := -1 }
  706. and r3,r0,r3 { if r3 >= r4 then r3' := 0 else r3' := r3-r10 }
  707. add r3,r3,r10 { if r3 >= r4 then r3' := r10 else r3' := r3 }
  708. cmplwi r3,0
  709. { put length in ctr }
  710. mtctr r3
  711. stb r3,0(r5)
  712. beq .LShortStrCopyDone2
  713. .LShortStrCopyLoop2:
  714. lbzu r0,1(r4)
  715. stbu r0,1(r5)
  716. bdnz .LShortStrCopyLoop2
  717. .LShortStrCopyDone2:
  718. end;
  719. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_APPEND_SHORTSTR}
  720. procedure fpc_shortstr_append_shortstr(var s1: shortstring; const s2: shortstring); compilerproc;
  721. { expects that results (r3) contains a pointer to the current string s1, r4 }
  722. { high(s1) and (r5) a pointer to the one that has to be concatenated }
  723. assembler;
  724. asm
  725. { load length s1 }
  726. lbz r6, 0(r3)
  727. { load length s2 }
  728. lbz r10, 0(r5)
  729. { length 0? }
  730. cmplwi r10,0
  731. { calculate min(length(s2),high(result)-length(result)) }
  732. sub r9,r4,r6
  733. subc r8,r9,r10 { r8 := r9 - r10 }
  734. subfe r9,r9,r9 { if r9 >= r10 then r9' := 0 else r9' := -1 }
  735. and r9,r8,r9 { if r9 >= r10 then r9' := 0 else r9' := r9-r10 }
  736. add r9,r9,r10 { if r9 >= r10 then r9' := r10 else r9' := r9 }
  737. { calculate new length }
  738. add r10,r6,r9
  739. { load value to copy in ctr }
  740. mtctr r9
  741. { store new length }
  742. stb r10,0(r3)
  743. { go to last current character of result }
  744. add r3,r6,r3
  745. { if nothing to do, exit }
  746. beq .LShortStrAppendDone
  747. { and concatenate }
  748. .LShortStrAppendLoop:
  749. lbzu r10,1(r5)
  750. stbu r10,1(r3)
  751. bdnz .LShortStrAppendLoop
  752. .LShortStrAppendDone:
  753. end;
  754. (*
  755. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  756. function fpc_shortstr_compare(const dstr,sstr:shortstring): longint; [public,alias:'FPC_SHORTSTR_COMPARE']; compilerproc;
  757. assembler;
  758. asm
  759. { load length sstr }
  760. lbz r9,0(r4)
  761. { load length dstr }
  762. lbz r10,0(r3)
  763. { save their difference for later and }
  764. { calculate min(length(sstr),length(dstr)) }
  765. subfc r7,r10,r9 { r0 := r9 - r10 }
  766. subfe r9,r9,r9 { if r9 >= r10 then r9' := 0 else r9' := -1 }
  767. and r7,r7,r9 { if r9 >= r10 then r9' := 0 else r9' := r9-r8 }
  768. add r9,r10,r7 { if r9 >= r10 then r9' := r10 else r9' := r9 }
  769. { first compare dwords (length/4) }
  770. srwi. r5,r9,2
  771. { keep length mod 4 for the ends }
  772. rlwinm r9,r9,0,30,31
  773. { already check whether length mod 4 = 0 }
  774. cmplwi cr1,r9,0
  775. { so we can load r3 with 0, in case the strings both have length 0 }
  776. mr r8,r3
  777. li r3, 0
  778. { length div 4 in ctr for loop }
  779. mtctr r5
  780. { if length < 3, goto byte comparing }
  781. beq LShortStrCompare1
  782. { setup for use of update forms of load/store with dwords }
  783. subi r4,r4,3
  784. subi r8,r8,3
  785. LShortStrCompare4Loop:
  786. lwzu r3,4(r4)
  787. lwzu r10,4(r8)
  788. sub. r3,r3,r10
  789. bdnzt cr0+eq,LShortStrCompare4Loop
  790. { r3 contains result if we stopped because of "ne" flag }
  791. bne LShortStrCompareDone
  792. { setup for use of update forms of load/store with bytes }
  793. addi r4,r4,3
  794. addi r8,r8,3
  795. LShortStrCompare1:
  796. { if comparelen mod 4 = 0, skip this and return the difference in }
  797. { lengths }
  798. beq cr1,LShortStrCompareLen
  799. mtctr r9
  800. LShortStrCompare1Loop:
  801. lbzu r3,1(r4)
  802. lbzu r10,1(r8)
  803. sub. r3,r3,r10
  804. bdnzt cr0+eq,LShortStrCompare1Loop
  805. bne LShortStrCompareDone
  806. LShortStrCompareLen:
  807. { also return result in flags, maybe we can use this in the CG }
  808. mr. r3,r3
  809. LShortStrCompareDone:
  810. end;
  811. *)
  812. {$define FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  813. function fpc_pchar_to_shortstr(p:pchar):shortstring;[public,alias:'FPC_PCHAR_TO_SHORTSTR']; compilerproc;
  814. assembler;
  815. {$include strpas.inc}
  816. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  817. function fpc_pchar_length(p:pchar):longint;assembler;[public,alias:'FPC_PCHAR_LENGTH']; {$ifdef hascompilerproc} compilerproc; {$endif}
  818. {$include strlen.inc}
  819. {$define FPC_SYSTEM_HAS_GET_FRAME}
  820. function get_frame:pointer;assembler;
  821. asm
  822. { all abi's I know use r1 as stack pointer }
  823. mr r3, r1
  824. end;
  825. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  826. function get_caller_addr(framebp:pointer):pointer;assembler;
  827. asm
  828. cmplwi r3,0
  829. beq .Lcaller_addr_frame_null
  830. lwz r3,0(r3)
  831. cmplwi r3,0
  832. beq .Lcaller_addr_frame_null
  833. {$ifdef FPC_ABI_AIX}
  834. lwz r3,8(r3)
  835. {$else FPC_ABI_AIX}
  836. lwz r3,4(r3)
  837. {$endif FPC_ABI_AIX}
  838. .Lcaller_addr_frame_null:
  839. end;
  840. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  841. function get_caller_frame(framebp:pointer):pointer;assembler;
  842. asm
  843. cmplwi r3,0
  844. beq .Lcaller_frame_null
  845. lwz r3,0(r3)
  846. .Lcaller_frame_null:
  847. end;
  848. {$define FPC_SYSTEM_HAS_ABS_LONGINT}
  849. function abs(l:longint):longint; assembler;[internconst:in_const_abs];
  850. asm
  851. srawi r0,r3,31
  852. add r3,r0,r3
  853. xor r3,r3,r0
  854. end;
  855. {****************************************************************************
  856. Math
  857. ****************************************************************************}
  858. {$define FPC_SYSTEM_HAS_ODD_LONGINT}
  859. function odd(l:longint):boolean;assembler;[internconst:in_const_odd];
  860. asm
  861. rlwinm r3,r3,0,31,31
  862. end;
  863. {$define FPC_SYSTEM_HAS_SQR_LONGINT}
  864. function sqr(l:longint):longint;assembler;[internconst:in_const_sqr];
  865. asm
  866. mullw r3,r3,r3
  867. end;
  868. {$define FPC_SYSTEM_HAS_SPTR}
  869. Function Sptr : Pointer;assembler;
  870. asm
  871. mr r3,r1
  872. end;
  873. {****************************************************************************
  874. Str()
  875. ****************************************************************************}
  876. { int_str: generic implementation is used for now }
  877. {****************************************************************************
  878. Multithreading
  879. ****************************************************************************}
  880. { do a thread save inc/dec }
  881. {$define FPC_SYSTEM_HAS_DECLOCKED}
  882. function declocked(var l : longint) : boolean;assembler;
  883. { input: address of l in r3 }
  884. { output: boolean indicating whether l is zero after decrementing }
  885. asm
  886. .LDecLockedLoop:
  887. lwarx r10,0,r3
  888. subi r10,r10,1
  889. stwcx. r10,0,r3
  890. bne- .LDecLockedLoop
  891. cntlzw r3,r10
  892. srwi r3,r3,5
  893. end;
  894. {$define FPC_SYSTEM_HAS_INCLOCKED}
  895. procedure inclocked(var l : longint);assembler;
  896. asm
  897. .LIncLockedLoop:
  898. lwarx r10,0,r3
  899. addi r10,r10,1
  900. stwcx. r10,0,r3
  901. bne- .LIncLockedLoop
  902. end;
  903. {
  904. $Log$
  905. Revision 1.56 2003-11-23 17:34:27 jonas
  906. * fixed some label names
  907. Revision 1.55 2003/11/15 19:01:27 florian
  908. * fixed rtl to work with the integrated fpc ppc assembler reader
  909. Revision 1.54 2003/09/14 20:33:28 jonas
  910. * renamed sp to r1, gnu as doesn't understand sp
  911. Revision 1.53 2003/09/06 10:44:41 olle
  912. + Used macros ABI_AIX and ABI_SYSV exchanged to FPC_ABI_AIX and FPC_ABI_SYSV.
  913. Revision 1.52 2003/08/24 20:47:49 olle
  914. + added support for ABI_AIX in get_caller_addr
  915. Revision 1.51 2003/06/14 12:41:08 jonas
  916. * fixed compilation problems (removed unnecessary modified registers
  917. lists from procedures)
  918. Revision 1.50 2003/06/01 14:50:17 jonas
  919. * fpc_shortstr_append_shortstr has to use high(s1) instead of 255 as
  920. maxlen
  921. + ppc version of fpc_shortstr_append_shortstr
  922. Revision 1.49 2003/05/29 21:17:27 jonas
  923. * compile with -dppc603 to not use unaligned float loads in move() and
  924. g_concatcopy, because the 603 and 604 take an exception for those
  925. (and netbsd doesn't even handle those in the kernel). There are
  926. still some of those left that could cause problems though (e.g.
  927. in the set helpers)
  928. Revision 1.48 2003/05/29 14:32:54 jonas
  929. * changed dcbst to dcbtst (former means "flush cache block to memory,
  930. the latter means "I will soon store something to that cache block")
  931. Revision 1.47 2003/05/29 12:14:02 jonas
  932. * move() now uses dcbz if possible
  933. Revision 1.46 2003/05/17 00:19:51 jonas
  934. * fixed inclocked
  935. Revision 1.45 2003/05/14 19:47:35 jonas
  936. * fixed stupid bug in filldword
  937. Revision 1.44 2003/05/13 20:39:26 florian
  938. * uncommented shortstring compare, buggy
  939. Revision 1.43 2003/05/12 19:39:33 jonas
  940. * fixed final fillchar error (tfillchr passes now)
  941. Revision 1.42 2003/05/12 19:00:50 jonas
  942. * fixed bug in fillchar
  943. Revision 1.41 2003/05/10 20:33:39 jonas
  944. * fixed get_caller_frame and get_caller_addr
  945. Revision 1.40 2003/05/10 17:33:06 jonas
  946. * final (? :) fix to move, passes new tests/test/tmove test
  947. Revision 1.39 2003/05/02 19:03:25 jonas
  948. * fixed some bugs in move()
  949. Revision 1.38 2003/04/27 16:24:44 jonas
  950. - disabled fpc_shortstr_concat because it's called differently than that
  951. routine is declared
  952. Revision 1.37 2003/04/26 20:37:17 jonas
  953. * fixed and re-enabled routines commented out by Florian :)
  954. Revision 1.36 2003/04/26 17:46:49 florian
  955. * commented out not working routines
  956. * reactivated assembler fillchar
  957. Revision 1.35 2003/04/26 17:35:15 jonas
  958. * fixed FillChar
  959. Revision 1.34 2003/04/26 12:05:10 florian
  960. * removed object/class helpers, the compiler uses the generic ones
  961. Revision 1.33 2003/04/26 11:55:52 florian
  962. * fixed newlines
  963. Revision 1.32 2003/04/23 21:04:48 florian
  964. * fixed fpc_shortstr_to_shortstr
  965. Revision 1.31 2003/03/17 14:30:11 peter
  966. * changed address parameter/return values to pointer instead
  967. of longint
  968. Revision 1.30 2003/03/12 19:21:29 jonas
  969. + implemented get_frame()
  970. * fixed bug in IndexDWord()
  971. Revision 1.29 2003/01/09 20:14:35 florian
  972. * fixed helper declarations
  973. Revision 1.28 2003/01/09 13:38:56 florian
  974. * dec/inclocked got defines
  975. Revision 1.27 2002/11/07 15:23:13 jonas
  976. * always use code that was between 'ifdef mt', since that define is
  977. deprecated now
  978. Revision 1.26 2002/11/01 13:27:55 jonas
  979. * changed "dcbtst r0,x" to "dcbtst 0,x"
  980. Revision 1.25 2002/10/23 15:26:00 olle
  981. * excluded saverestorereg for target macos
  982. Revision 1.24 2002/10/20 13:40:55 jonas
  983. * move/fill*/index*/comp* routines immediately exit if length is negative
  984. Revision 1.23 2002/10/17 10:12:50 jonas
  985. * fixed return value of declocked()
  986. Revision 1.22 2002/10/05 14:20:16 peter
  987. * fpc_pchar_length compilerproc and strlen alias
  988. Revision 1.21 2002/10/02 18:21:52 peter
  989. * Copy() changed to internal function calling compilerprocs
  990. * FPC_SHORTSTR_COPY renamed to FPC_SHORTSTR_ASSIGN because of the
  991. new copy functions
  992. Revision 1.20 2002/09/10 21:30:34 jonas
  993. * disabled powerpc-specific fpc_shortstr_concat for now, it was
  994. completely wrong
  995. Revision 1.19 2002/09/10 17:47:20 jonas
  996. * fixed bug with concatting 0-length shortstrings
  997. Revision 1.18 2002/09/07 16:01:26 peter
  998. * old logs removed and tabs fixed
  999. Revision 1.17 2002/08/31 21:29:57 florian
  1000. * several PC related fixes
  1001. Revision 1.16 2002/08/31 16:08:36 florian
  1002. * fixed undefined labels
  1003. Revision 1.15 2002/08/31 13:11:11 florian
  1004. * several fixes for Linux/PPC compilation
  1005. Revision 1.14 2002/08/18 22:11:10 florian
  1006. * fixed remaining assembler errors
  1007. Revision 1.13 2002/08/18 21:37:48 florian
  1008. * several errors in inline assembler fixed
  1009. Revision 1.12 2002/08/10 17:14:36 jonas
  1010. * various fixes, mostly changing the names of the modifies registers to
  1011. upper case since that seems to be required by the compiler
  1012. Revision 1.11 2002/07/30 17:29:53 florian
  1013. + dummy setjmp and longjmp added
  1014. + dummy implemtation of the destructor helper
  1015. Revision 1.10 2002/07/28 21:39:29 florian
  1016. * made abs a compiler proc if it is generic
  1017. Revision 1.9 2002/07/28 20:43:49 florian
  1018. * several fixes for linux/powerpc
  1019. * several fixes to MT
  1020. Revision 1.8 2002/07/26 15:45:56 florian
  1021. * changed multi threading define: it's MT instead of MTRTL
  1022. }