powerpc.inc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 2000-2001 by the Free Pascal development team.
  5. Portions Copyright (c) 2000 by Casey Duncan ([email protected])
  6. Processor dependent implementation for the system unit for
  7. PowerPC
  8. See the file COPYING.FPC, included in this distribution,
  9. for details about the copyright.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. **********************************************************************}
  14. {****************************************************************************
  15. Move / Fill
  16. ****************************************************************************}
  17. {$define FPC_SYSTEM_HAS_MOVE}
  18. procedure Move(var source;var dest;count:longint);assembler;
  19. asm
  20. { load the begin of the source in the data cache }
  21. dcbt r0,r3
  22. { count <= 0 ? }
  23. cmpwi cr0,r5,0
  24. { check if we have to do the move backwards because of overlap }
  25. sub r30,r4,r3
  26. { carry := boolean(dest-source < count) = boolean(overlap) }
  27. subc r30,r30,r5
  28. { count < 11 ? (to decide whether we will move dwords or bytes }
  29. cmpwi cr1,r5,11
  30. { if overlap, then r30 := -1 else r30 := 0 }
  31. subfe r30,r30,r30
  32. { count < 39 ? (32 + max. alignment (7) }
  33. cmpwi cr7,r5,39
  34. { if count <= 0, stop }
  35. ble cr0,LMoveDone
  36. { if overlap, then r29 := count else r29 := 0 }
  37. and r29,r5,r30
  38. { if overlap, then point source and dest to the end }
  39. add r3,r3,r29
  40. add r4,r4,r29
  41. { if overlap, then r29 := 0, else r29 := -1 }
  42. not r29,r30
  43. { if overlap, then r30 := -2, else r30 := 0 }
  44. slwi r30,r30,1
  45. { if overlap, then r30 := -1, else r30 := 1 }
  46. addi r30,r30,1
  47. { if overlap, then source/dest += -1, otherwise they stay }
  48. { After the next instruction, r3/r4 + r30 = next position }
  49. { to load/store from/to }
  50. add r3,r3,r29
  51. add r4,r4,r29
  52. { if count < 11, copy everything byte by byte }
  53. blt cr1,LMoveBytes
  54. { otherwise, guarantee 4 byte alignment for dest for starters }
  55. LMove4ByteAlignLoop:
  56. lbzux r29,r3,r30
  57. stbux r29,r4,r30
  58. { is dest now 4 aligned? }
  59. andi. r29,r4,3
  60. subi r5,r5,1
  61. { while not aligned, continue }
  62. bne cr0,LMove4ByteAlignLoop
  63. { check for 8 byte alignment }
  64. andi. r29,r4,7
  65. { we are going to copy one byte again (the one at the newly }
  66. { aligned address), so increase count again }
  67. addi r5,r5,1
  68. { count div 4 for number of dwords to copy }
  69. srwi r29,r5,2
  70. { if 11 <= count < 39, copy using dwords }
  71. blt cr7,LMoveDWords
  72. { multiply the update count with 4 }
  73. slwi r30,r30,2
  74. beq cr0,L8BytesAligned
  75. { count >= 39 -> align to 8 byte boundary and then use the FPU }
  76. { since we're already at 4 byte alignment, use dword store }
  77. lwz r29,0(r3)
  78. add r3,r3,r30
  79. stw r29,0(r4)
  80. add r4,r4,r30
  81. L8BytesAligned:
  82. { count div 32 ( >= 1, since count was >=39 }
  83. srwi r29,r5,5
  84. { remainder }
  85. andi. r5,r5,31
  86. { to decide if we will do some dword stores afterwards or not }
  87. cmpwi cr1,r5,11
  88. mtctr r29
  89. { r29 := count div 4, will be moved to ctr when copying dwords }
  90. srwi r29,r5,2
  91. { adjust the update count: it will now be 8 or -8 depending on overlap }
  92. slwi r30,r30,1
  93. { adjust source and dest pointers: because of the above loop, dest is now }
  94. { aligned to 8 bytes. So if we substract r30 we will still have an 8 bytes }
  95. { aligned address) }
  96. sub r3,r3,r30
  97. sub r4,r4,r30
  98. LMove32ByteLoop:
  99. lfdux f31,r3,r30
  100. lfdux f30,r3,r30
  101. lfdux f29,r3,r30
  102. lfdux f28,r3,r30
  103. stfdux f31,r4,r30
  104. stfdux f30,r4,r30
  105. stfdux f29,r4,r30
  106. stfdux f28,r4,r30
  107. bdnz LMove32ByteLoop
  108. { cr0*4+eq is true if "count and 31" = 0 }
  109. beq cr0,LMoveDone
  110. { make r30 again -1 or 1, but first adjust source/dest pointers }
  111. add r3,r3,r30
  112. add r4,r4,r30
  113. srawi r30,r30,3
  114. sub r3,r3,r30
  115. sub r4,r4,r30
  116. { cr1 contains whether count <= 11 }
  117. ble cr1,LMoveBytes
  118. add r3,r3,r30
  119. add r4,r4,r30
  120. LMoveDWords:
  121. mtctr r29
  122. andi. r5,r5,3
  123. { r30 * 4 }
  124. slwi r30,r30,2
  125. sub r3,r3,r30
  126. sub r4,r4,r30
  127. LMoveDWordsLoop:
  128. lwzux r29,r3,r30
  129. stwux r29,r4,r30
  130. bdnz LMoveDWordsLoop
  131. beq cr0,LMoveDone
  132. { make r30 again -1 or 1 }
  133. add r3,r3,r30
  134. add r4,r4,r30
  135. srawi r30,r30,2
  136. sub r3,r3,r30
  137. sub r4,r4,r30
  138. LMoveBytes:
  139. mtctr r5
  140. LMoveBytesLoop:
  141. lbzux r29,r3,r30
  142. stbux r29,r4,r30
  143. bdnz LMoveBytesLoop
  144. LMoveDone:
  145. end ['R3','R4','R5','R29','R30','F28','F29','F30','F31','CTR','CR0','CR1','CR7'];
  146. {$define FPC_SYSTEM_HAS_FILLCHAR}
  147. Procedure FillChar(var x;count:longint;value:byte);
  148. begin
  149. asm
  150. { Register Usage:
  151. r3 x
  152. r4 count
  153. r5 value
  154. r13 value.value.value.value
  155. r14 ptr to current dest char
  156. r15 byte increment, Scratch
  157. r16 Block count
  158. r17 misalignment byte count
  159. }
  160. cmpwi cr2,r4,12
  161. mr r14,r3
  162. andi. r17,r3,3
  163. sub r14,r3,r17 //32 bit align
  164. blt cr2,.FillBytes //if count<12 then fill byte by byte
  165. sub r16,r4,r17
  166. andi r17,r16,3
  167. cmpwi cr2,r17,0
  168. srwi r16,r16,2 //r16:=count div 4
  169. subi r16,r16,2
  170. mtctr r16 //counter:=r16
  171. mr r13,r5 //insert
  172. insrwi r13,r5,8,16 // value into all four bytes
  173. insrwi r13,r13,16,0 // of r13
  174. li r15,4
  175. stw r13,0(r3) //fill first few bytes
  176. .FillWordLoop:
  177. stwux r13,r14,r15
  178. bdnz .FillWordLoop
  179. beq cr2,FillEnd //No trailing bytes, so exit
  180. add r14,r3,r4
  181. stw r13,-4(r14) //fill last few bytes
  182. b .FillEnd
  183. .FillBytes:
  184. mtctr r4 //counter:=count
  185. li r15,1
  186. subi r14,r3,1
  187. .FillByteLoop:
  188. stbux r13,r14,r15
  189. bdnz .FillByteLoop
  190. .FillEnd:
  191. end [r13,r14,r15,r16,r17,ctr];
  192. end;
  193. {$define FPC_SYSTEM_HAS_FILLWORD}
  194. procedure fillword(var x;count : longint;value : word);
  195. begin
  196. { registers:
  197. r3 x
  198. r4 count
  199. r5 value
  200. r13 value.value
  201. r14 ptr to dest word
  202. r15 increment 1
  203. r16 increment 2
  204. r17 scratch
  205. r18 scratch
  206. f1 value.value.value.value
  207. }
  208. asm
  209. cmpwi cr0,r3,0
  210. andi r17,r4,$3
  211. srwi r18,r4,1 //r18:=count div 2
  212. mr r13,r3
  213. li r14,4
  214. ble .FillWordEnd //if count<=0 Then Exit
  215. .FillWordLoop:
  216. stwux r5,r13,r14
  217. bdnz .FillWordLoop
  218. .FillWordEnd:
  219. end [r13,r14,ctr]
  220. end;
  221. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  222. function IndexByte(var buf;len:longint;b:byte):longint; assembler;
  223. { input: r3 = buf, r4 = len, r5 = b }
  224. { output: r3 = position of b in buf (-1 if not found) }
  225. asm
  226. { load the begin of the buffer in the data cache }
  227. dcbt r0,r3
  228. cmpli r4,0
  229. mtctr r4
  230. subi r30,r3,1
  231. mr r28,r3
  232. { assume not found }
  233. li r3,-1
  234. beq LIndexByteDone
  235. LIndexByteLoop:
  236. lbzu r29,1(r30)
  237. cmpl r29,r5
  238. bdnzf cr0*4+eq,LIndexByteLoop
  239. { r3 still contains -1 here }
  240. bne LIndexByteDone
  241. sub r3,r30,r28
  242. LIndexByteDone:
  243. end ['r3','r28','r29','r30','cr0','ctr'];
  244. {$define FPC_SYSTEM_HAS_INDEXWORD}
  245. function IndexWord(var buf;len:longint;b:word):longint; assembler;
  246. { input: r3 = buf, r4 = len, r5 = b }
  247. { output: r3 = position of b in buf (-1 if not found) }
  248. asm
  249. { load the begin of the buffer in the data cache }
  250. dcbt r0,r3
  251. cmpli r4,0
  252. mtctr r4
  253. subi r30,r3,2
  254. mr r28,r3
  255. { assume not found }
  256. li r3,-1
  257. beq LIndexWordDone
  258. LIndexWordLoop:
  259. lhzu r29,2(r30)
  260. cmpl r29,r5
  261. bdnzf cr0*4+eq,LIndexWordLoop
  262. { r3 still contains -1 here }
  263. bne LIndexWordDone
  264. sub r3,r30,r28
  265. LIndexWordDone:
  266. end ['r3','r28','r29','r30','cr0','ctr'];
  267. {$define FPC_SYSTEM_HAS_INDEXDWORD}
  268. function IndexDWord(var buf;len:longint;b:DWord):longint; assembler;
  269. { input: r3 = buf, r4 = len, r5 = b }
  270. { output: r3 = position of b in buf (-1 if not found) }
  271. asm
  272. { load the begin of the buffer in the data cache }
  273. dcbt r0,r3
  274. cmpli r4,0
  275. mtctr r4
  276. subi r30,r3,4
  277. mr r28,r3
  278. { assume not found }
  279. li r3,-1
  280. beq LIndexDWordDone
  281. LIndexDWordLoop:
  282. lwzu r29,4(r30)
  283. cmpl r29,r5
  284. bdnzf cr0*4+eq, LIndexDWordLoop
  285. { r3 still contains -1 here }
  286. bne LIndexDWordDone
  287. sub r3,r30,r28
  288. LIndexDWordDone:
  289. end ['r3','r28','r29','r30','cr0','ctr'];
  290. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  291. function CompareByte(var buf1,buf2;len:longint):longint; assembler;
  292. { input: r3 = buf1, r4 = buf2, r5 = len }
  293. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  294. { note: almost direct copy of strlcomp() from strings.inc }
  295. asm
  296. { load the begin of the first buffer in the data cache }
  297. dcbt r0,r3
  298. { use r28 instead of r3 for buf1 since r3 contains result }
  299. cmpl r5,0
  300. mtctr r5
  301. subi r28,r3,1
  302. subi r4,r4,1
  303. li r3,0
  304. beq LCompByteDone
  305. LCompByteLoop:
  306. { load next chars }
  307. lbzu r29,1(r28)
  308. lbzu r30,1(r4)
  309. { calculate difference }
  310. sub. r3,r29,r30
  311. { if chars not equal or at the end, we're ready }
  312. bdnzt cr0*4+eq, LCompByteLoop
  313. LCompByteDone:
  314. end ['r3','r4','r28','r29','r30','cr0','ctr'];
  315. {$define FPC_SYSTEM_HAS_COMPAREWORD}
  316. function CompareWord(var buf1,buf2;len:longint):longint; assembler;
  317. { input: r3 = buf1, r4 = buf2, r5 = len }
  318. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  319. { note: almost direct copy of strlcomp() from strings.inc }
  320. asm
  321. { load the begin of the first buffer in the data cache }
  322. dcbt r0,r3
  323. { use r28 instead of r3 for buf1 since r3 contains result }
  324. cmpl r5,0
  325. mtctr r5
  326. subi r28,r3,2
  327. subi r4,r4,2
  328. li r3,0
  329. beq LCompWordDone
  330. LCompWordLoop:
  331. { load next chars }
  332. lhzu r29,2(r28)
  333. lhzu r30,2(r4)
  334. { calculate difference }
  335. sub. r3,r29,r30
  336. { if chars not equal or at the end, we're ready }
  337. bdnzt cr0*4+eq, LCompWordLoop
  338. LCompWordDone:
  339. end ['r3','r4','r28','r29','r30','cr0','ctr'];
  340. {$define FPC_SYSTEM_HAS_COMPAREDWORD}
  341. function CompareDWord(var buf1,buf2;len:longint):longint; assembler;
  342. { input: r3 = buf1, r4 = buf2, r5 = len }
  343. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  344. { note: almost direct copy of strlcomp() from strings.inc }
  345. asm
  346. { load the begin of the first buffer in the data cache }
  347. dcbt r0,r3
  348. { use r28 instead of r3 for buf1 since r3 contains result }
  349. cmpl r5,0
  350. mtctr r5
  351. subi r28,r3,4
  352. subi r4,r4,4
  353. li r3,0
  354. beq LCompDWordDone
  355. LCompDWordLoop:
  356. { load next chars }
  357. lwzu r29,4(r28)
  358. lwzu r30,4(r4)
  359. { calculate difference }
  360. sub. r3,r29,r30
  361. { if chars not equal or at the end, we're ready }
  362. bdnzt cr0*4+eq, LCompDWordLoop
  363. LCompDWordDone:
  364. end ['r3','r4','r28','r29','r30','cr0','ctr'];
  365. {$define FPC_SYSTEM_HAS_INDEXCHAR0}
  366. function IndexChar0(var buf;len:longint;b:Char):longint; assembler;
  367. { input: r3 = buf, r4 = len, r5 = b }
  368. { output: r3 = position of found position (-1 if not found) }
  369. asm
  370. { load the begin of the buffer in the data cache }
  371. dcbt r0,r3
  372. { length = 0? }
  373. cmpli r5,0
  374. mtctr r5
  375. subi r29,r3,1
  376. mr r28,r29
  377. { assume not found }
  378. li r3,-1
  379. { if yes, do nothing }
  380. beq LIndexChar0Done
  381. subi r3,r3,1
  382. LIndexChar0Loop:
  383. lbzu r30,1(r29)
  384. cmpli cr1,r30,0
  385. cmpl r30,r4
  386. beq cr1,LIndexChar0Done
  387. bdnzf cr0*4+eq, LIndexChar0Loop
  388. bne LIndexChar0Done
  389. sub r3,r29,r28
  390. LIndexChar0Done:
  391. end ['r3','r4','r28','r29','r30','cr0','ctr'];
  392. { all FPC_HELP_* are still missing (JM) }
  393. {****************************************************************************
  394. String
  395. ****************************************************************************}
  396. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COPY}
  397. procedure int_strcopy(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_COPY'];
  398. assembler;
  399. { input: r3: len, sstr: r4, dstr: r5 }
  400. asm
  401. { load length source }
  402. lbz r30,0(r4)
  403. { load the begin of the dest buffer in the data cache }
  404. dcbtst r0,r5
  405. { put min(length(sstr),len) in r3 }
  406. subc r29,r3,r30 { r29 := r3 - r30 }
  407. subme r3,r3,r3 { if r3 >= r4 then r3' := 0 else r3' := -1 }
  408. and r3,r29,r3 { if r3 >= r4 then r3' := 0 else r3' := r3-r30 }
  409. add r3,r3,r30 { if r3 >= r4 then r3' := r30 else r3' := r3 }
  410. cmpli r3,0
  411. { put length in ctr }
  412. mtctr r3
  413. stb r3,0(r5)
  414. beq LShortStrCopyDone
  415. LShortStrCopyLoop:
  416. lbzu r29,1(r4)
  417. stbu r29,1(r5)
  418. bdnz LShortStrCopyLoop
  419. end ['r3','r4','r5','r29','r30','cr0','ctr'];
  420. {
  421. $Log$
  422. Revision 1.5 2001-07-07 12:46:12 jonas
  423. * some small bugfixes and cache optimizations
  424. Revision 1.4 2001/03/03 13:53:36 jonas
  425. * fixed small bug in move
  426. Revision 1.3 2001/03/02 13:24:10 jonas
  427. + new, complete implementation of move procedure (including support for
  428. overlapping regions)
  429. Revision 1.2 2001/02/11 17:59:46 jonas
  430. * implemented several more procedures
  431. Revision 1.1 2000/07/27 07:32:12 jonas
  432. + initial version by Casey Duncan (not yet thoroughly debugged or complete)
  433. }