powerpc.inc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 1999 by the Free Pascal development team.
  5. Portions Copyright (c) 2000 by Casey Duncan ([email protected])
  6. Processor dependent implementation for the system unit for
  7. PowerPC
  8. See the file COPYING.FPC, included in this distribution,
  9. for details about the copyright.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. **********************************************************************}
  14. {****************************************************************************
  15. Move / Fill
  16. ****************************************************************************}
  17. {$define FPC_SYSTEM_HAS_MOVE}
  18. procedure Move(var source;var dest;count:longint);assembler;
  19. asm
  20. { count <= 0 ? }
  21. cmpwi cr0,r5,0
  22. { check if we have to do the move backwards because of overlap }
  23. sub r30,r4,r3
  24. { carry := boolean(dest-source < count) = boolean(overlap) }
  25. subc r30,r30,r5
  26. { count < 11 ? (to decide whether we will move dwords or bytes }
  27. cmpwi cr1,r5,11
  28. { if overlap, then r30 := -1 else r30 := 0 }
  29. subfe r30,r30,r30
  30. { count < 39 ? (32 + max. alignment (7) }
  31. cmpwi cr7,r5,39
  32. { if count <= 0, stop }
  33. ble cr0,LMoveDone
  34. { if overlap, then r29 := count else r29 := 0 }
  35. and r29,r5,r30
  36. { if overlap, then point source and dest to the end }
  37. add r3,r3,r29
  38. add r4,r4,r29
  39. { if overlap, then r29 := 0, else r29 := -1 }
  40. not r29,r30
  41. { if overlap, then r30 := -2, else r30 := 0 }
  42. slwi r30,r30,1
  43. { if overlap, then r30 := -1, else r30 := 1 }
  44. addi r30,r30,1
  45. { if overlap, then source/dest += -1, otherwise they stay }
  46. { After the next instruction, r3/r4 + r30 = next position }
  47. { to load/store from/to }
  48. add r3,r3,r29
  49. add r4,r4,r29
  50. { if count < 11, copy everything byte by byte }
  51. blt cr1,LMoveBytes
  52. { otherwise, guarantee 4 byte alignment for dest for starters }
  53. LMove4ByteAlignLoop:
  54. lbzux r29,r3,r30
  55. stbux r29,r4,r30
  56. { is dest now 4 aligned? }
  57. andi. r29,r4,3
  58. subi r5,r5,1
  59. { while not aligned, continue }
  60. bne cr0,LMove4ByteAlignLoop
  61. { check for 8 byte alignment }
  62. andi. r29,r4,7
  63. { we are going to copy one byte again (the one at the newly }
  64. { aligned address), so increase count again }
  65. addi r5,r5,1
  66. { count div 4 for number of dwords to copy }
  67. srwi r29,r5,2
  68. { if 11 <= count < 39, copy using dwords }
  69. blt cr7,LMoveDWords
  70. { multiply the update count with 4 }
  71. slwi r30,r30,2
  72. beq cr0,L8BytesAligned
  73. { count >= 39 -> align to 8 byte boundary and then use the FPU }
  74. { since we're already at 4 byte alignment, use dword store }
  75. lwz r29,0(r3)
  76. add r3,r3,r30,
  77. stw r29,0(r4)
  78. add r4,r4,r30,
  79. L8BytesAligned:
  80. { count div 32 ( >= 1, since count was >=39 }
  81. srwi r29,r5,5
  82. { remainder }
  83. andi. r5,r5,31
  84. { to decide if we will do some dword stores afterwards or not }
  85. cmpwi cr1,r5,11
  86. mtctr r29
  87. { r29 := count div 4, will be moved to ctr when copying dwords }
  88. srwi r29,r5,2
  89. { adjust the update count: it will now be 8 or -8 depending on overlap }
  90. slwi r30,r30,1
  91. { adjust source and dest pointers: because of the above loop, dest is now }
  92. { aligned to 8 bytes. So if we substract r30 we will still have an 8 bytes }
  93. { aligned address) }
  94. sub r3,r3,r30
  95. sub r4,r4,r30
  96. LMove32ByteLoop:
  97. lfdux f31,r3,r30
  98. lfdux f30,r3,r30
  99. lfdux f29,r3,r30
  100. lfdux f28,r3,r30
  101. stfdux f31,r4,r30
  102. stfdux f30,r4,r30
  103. stfdux f29,r4,r30
  104. stfdux f28,r4,r30
  105. bdnz LMove32ByteLoop
  106. { cr0*4+eq is true if "count and 31" = 0 }
  107. beq cr0,LMoveDone
  108. { make r30 again -1 or 1, but first adjust source/dest pointers }
  109. add r3,r3,r30
  110. add r4,r4,r30
  111. srawi r30,r30,3
  112. sub r3,r3,r30
  113. sub r4,r4,r30
  114. { cr1 contains whether count <= 11 }
  115. ble cr1,LMoveBytes
  116. add r3,r3,r30
  117. add r4,r4,r30
  118. LMoveDWords:
  119. mtctr r29
  120. andi. r5,r5,3
  121. { r30 * 4 }
  122. slwi r30,r30,2
  123. sub r3,r3,r30
  124. sub r4,r4,r30
  125. LMoveDWordsLoop:
  126. lwzux r29,r3,r30
  127. stwux r29,r4,r30
  128. bdnz LMoveDWordsLoop
  129. beq cr0,LMoveDone
  130. { make r30 again -1 or 1 }
  131. add r3,r3,r30
  132. add r4,r4,r30
  133. srawi r30,r30,2
  134. sub r3,r3,r30
  135. sub r4,r4,r30
  136. LMoveBytes:
  137. mtctr r5
  138. LMoveBytesLoop:
  139. lbzux r29,r3,r30
  140. stbux r29,r4,r30
  141. bdnz LMoveBytesLoop
  142. LMoveDone:
  143. end ['R3','R4','R5','R29','R30','F28','F29','F30','F31','CTR','CR0','CR1','CR7'];
  144. {$define FPC_SYSTEM_HAS_FILLCHAR}
  145. Procedure FillChar(var x;count:longint;value:byte);
  146. begin
  147. asm
  148. { Register Usage:
  149. r3 x
  150. r4 count
  151. r5 value
  152. r13 value.value.value.value
  153. r14 ptr to current dest char
  154. r15 byte increment, Scratch
  155. r16 Block count
  156. r17 misalignment byte count
  157. }
  158. cmpwi cr2,r4,12
  159. mr r14,r3
  160. andi. r17,r3,3
  161. sub r14,r3,r17 //32 bit align
  162. blt cr2,.FillBytes //if count<12 then fill byte by byte
  163. sub r16,r4,r17
  164. andi r17,r16,3
  165. cmpwi cr2,r17,0
  166. srwi r16,r16,2 //r16:=count div 4
  167. subi r16,r16,2
  168. mtctr r16 //counter:=r16
  169. mr r13,r5 //insert
  170. insrwi r13,r5,8,16 // value into all four bytes
  171. insrwi r13,r13,16,0 // of r13
  172. li r15,4
  173. stw r13,0(r3) //fill first few bytes
  174. .FillWordLoop:
  175. stwux r13,r14,r15
  176. bdnz .FillWordLoop
  177. beq cr2,FillEnd //No trailing bytes, so exit
  178. add r14,r3,r4
  179. stw r13,-4(r14) //fill last few bytes
  180. b .FillEnd
  181. .FillBytes:
  182. mtctr r4 //counter:=count
  183. li r15,1
  184. subi r14,r3,1
  185. .FillByteLoop:
  186. stbux r13,r14,r15
  187. bdnz .FillByteLoop
  188. .FillEnd:
  189. end [r13,r14,r15,r16,r17,ctr];
  190. end;
  191. {$define FPC_SYSTEM_HAS_FILLWORD}
  192. procedure fillword(var x;count : longint;value : word);
  193. begin
  194. { registers:
  195. r3 x
  196. r4 count
  197. r5 value
  198. r13 value.value
  199. r14 ptr to dest word
  200. r15 increment 1
  201. r16 increment 2
  202. r17 scratch
  203. r18 scratch
  204. f1 value.value.value.value
  205. }
  206. asm
  207. cmpwi cr0,r3,0
  208. andi r17,r4,$3
  209. srwi r18,r4,1 //r18:=count div 2
  210. mr r13,r3
  211. li r14,4
  212. ble .FillWordEnd //if count<=0 Then Exit
  213. .FillWordLoop:
  214. stwux r5,r13,r14
  215. bdnz .FillWordLoop
  216. .FillWordEnd:
  217. end [r13,r14,ctr]
  218. end;
  219. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  220. function IndexByte(var buf;len:longint;b:byte):longint; assembler;
  221. { input: r3 = buf, r4 = len, r5 = b }
  222. { output: r3 = position of b in buf (-1 if not found) }
  223. asm
  224. cmpli r4,0
  225. mtctr r4
  226. subi r30,r3,1
  227. { assume not found }
  228. li r3,-1
  229. beq LIndexByteNotFound
  230. LIndexByteLoop:
  231. lbzu r29,1(r30)
  232. cmpl r29,r5
  233. bdnzne LIndexByteLoop
  234. { r3 still contains -1 here }
  235. bne LIndexByteDone
  236. sub r3,r29,r3
  237. LIndexByteDone:
  238. end ['r3','r29','r30','cr0','ctr'];
  239. {$define FPC_SYSTEM_HAS_INDEXWORD}
  240. function Indexword(var buf;len:longint;b:word):longint; assembler;
  241. { input: r3 = buf, r4 = len, r5 = b }
  242. { output: r3 = position of b in buf (-1 if not found) }
  243. asm
  244. cmpli r4,0
  245. mtctr r4
  246. subi r30,r3,2
  247. { assume not found }
  248. li r3,-1
  249. beq LIndexWordNotFound
  250. LIndexWordLoop:
  251. lhzu r29,2(r30)
  252. cmpl r29,r5
  253. bdnzne LIndexWordLoop
  254. { r3 still contains -1 here }
  255. bne LIndexWordDone
  256. sub r3,r29,r3
  257. LIndexWordDone:
  258. end ['r3','r29','r30','cr0','ctr'];
  259. {$define FPC_SYSTEM_HAS_INDEXDWORD}
  260. function IndexDWord(var buf;len:longint;b:DWord):longint; assembler;
  261. { input: r3 = buf, r4 = len, r5 = b }
  262. { output: r3 = position of b in buf (-1 if not found) }
  263. asm
  264. cmpli r4,0
  265. mtctr r4
  266. subi r30,r3,4
  267. { assume not found }
  268. li r3,-1
  269. beq LIndexDWordNotFound
  270. LIndexDWordLoop:
  271. lwzu r29,4(r30)
  272. cmpl r29,r5
  273. bdnzne LIndexDWordLoop
  274. { r3 still contains -1 here }
  275. bne LIndexDWordDone
  276. sub r3,r29,r3
  277. LIndexDWordDone:
  278. end ['r3','r29','r30','cr0','ctr'];
  279. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  280. function CompareByte(var buf1,buf2;len:longint):longint; assembler;
  281. { input: r3 = buf1, r4 = buf2, r5 = len }
  282. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  283. { note: almost direct copy of strlcomp() from strings.inc }
  284. asm
  285. { use r28 instead of r3 for buf1 since r3 contains result }
  286. cmpl r5,0
  287. subi r28,r3,1
  288. li r3,0
  289. beq LCompByteDone
  290. mtctr r5
  291. subi r4,r4,1
  292. LCompByteLoop:
  293. { load next chars }
  294. lbzu r29,1(r28)
  295. lbzu r30,1(r4)
  296. { calculate difference }
  297. sub. r3,r29,r30
  298. { if chars not equal or at the end, we're ready }
  299. bdnze LCompByteDone
  300. LCompByteDone:
  301. end ['r3','r4','r28','r29','r30','cr0','ctr'];
  302. {$define FPC_SYSTEM_HAS_COMPAREWORD}
  303. function CompareWord(var buf1,buf2;len:longint):longint; assembler;
  304. { input: r3 = buf1, r4 = buf2, r5 = len }
  305. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  306. { note: almost direct copy of strlcomp() from strings.inc }
  307. asm
  308. { use r28 instead of r3 for buf1 since r3 contains result }
  309. cmpl r5,0
  310. subi r28,r3,2
  311. li r3,0
  312. beq LCompWordDone
  313. mtctr r5
  314. subi r4,r4,2
  315. LCompWordLoop:
  316. { load next chars }
  317. lhzu r29,2(r28)
  318. lhzu r30,2(r4)
  319. { calculate difference }
  320. sub. r3,r29,r30
  321. { if chars not equal or at the end, we're ready }
  322. bdnze LCompWordDone
  323. LCompWordDone:
  324. end ['r3','r4','r28','r29','r30','cr0','ctr'];
  325. {$define FPC_SYSTEM_HAS_COMPAREDWORD}
  326. function CompareDWord(var buf1,buf2;len:longint):longint; assembler;
  327. { input: r3 = buf1, r4 = buf2, r5 = len }
  328. { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
  329. { note: almost direct copy of strlcomp() from strings.inc }
  330. asm
  331. { use r28 instead of r3 for buf1 since r3 contains result }
  332. cmpl r5,0
  333. subi r28,r3,4
  334. li r3,0
  335. beq LCompDWordDone
  336. mtctr r5
  337. subi r4,r4,4
  338. LCompDWordLoop:
  339. { load next chars }
  340. lwzu r29,4(r28)
  341. lwzu r30,4(r4)
  342. { calculate difference }
  343. sub. r3,r29,r30
  344. { if chars not equal or at the end, we're ready }
  345. bdnze LCompDWordDone
  346. LCompDWordDone:
  347. end ['r3','r4','r28','r29','r30','cr0','ctr'];
  348. {$define FPC_SYSTEM_HAS_INDEXCHAR0}
  349. function IndexChar0(var buf;len:longint;b:Char):longint; assembler;
  350. { input: r3 = buf, r4 = len, r5 = b }
  351. { output: r3 = position of found position (-1 if not found) }
  352. asm
  353. { length = 0? }
  354. cmpli r5,0
  355. subi r29,r3,1
  356. { assume not found }
  357. li r3,-1
  358. mtctr r5
  359. { if yes, do nothing }
  360. beq LIndexChar0Done
  361. subi r3,r3,1
  362. LIndexChar0Loop:
  363. lbzu r30,1(r29)
  364. cmpli cr1,r30,0
  365. cmpl r30,r4
  366. beq cr1,LIndexChar0Done
  367. bdnzne LIndexChar0Loop
  368. bne LIndexChar0Done
  369. sub r3,r29,r3
  370. LIndexCharDone:
  371. end ['r3','r4','r29','r30','cr0','ctr'];
  372. { all FPC_HELP_* are still missing (JM) }
  373. {****************************************************************************
  374. String
  375. ****************************************************************************}
  376. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COPY}
  377. procedure int_strcopy(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_COPY'];
  378. assembler;
  379. { input: r3: len, sstr: r4, dstr: r5 }
  380. asm
  381. { load length source }
  382. lbz r30,0(r4)
  383. { put min(length(sstr),len) in r3 }
  384. subc r29,r3,r30 { r29 := r3 - r30 }
  385. subme r3,r3,r3 { if r3 >= r4 then r3' := 0 else r3' := -1 }
  386. and r3,r29,r3 { if r3 >= r4 then r3' := 0 else r3' := r3-r30 }
  387. add r3,r3,r30 { if r3 >= r4 then r3' := r30 else r3' := r3 }
  388. cmpli r3,0
  389. { put length in ctr }
  390. mtctr r3
  391. stb r3,0(r5)
  392. beq LShortStrCopyDone
  393. LShortStrCopyLoop:
  394. lbzu r29,1(r4)
  395. stbu r29,1(r5)
  396. bdnz LShortStrCopyLoop
  397. end ['r3','r4','r5','r29','r30','cr0','ctr'];
  398. {
  399. $Log$
  400. Revision 1.4 2001-03-03 13:53:36 jonas
  401. * fixed small bug in move
  402. Revision 1.3 2001/03/02 13:24:10 jonas
  403. + new, complete implementation of move procedure (including support for
  404. overlapping regions)
  405. Revision 1.2 2001/02/11 17:59:46 jonas
  406. * implemented several more procedures
  407. Revision 1.1 2000/07/27 07:32:12 jonas
  408. + initial version by Casey Duncan (not yet thoroughly debugged or complete)
  409. }