123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494 |
- {
- $Id$
- This file is part of the Free Pascal run time library.
- Copyright (c) 2000-2001 by the Free Pascal development team.
- Portions Copyright (c) 2000 by Casey Duncan ([email protected])
- Processor dependent implementation for the system unit for
- PowerPC
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {****************************************************************************
- Move / Fill
- ****************************************************************************}
- {$define FPC_SYSTEM_HAS_MOVE}
- procedure Move(var source;var dest;count:longint);assembler;
- asm
- { load the begin of the source in the data cache }
- dcbt r0,r3
- { count <= 0 ? }
- cmpwi cr0,r5,0
- { check if we have to do the move backwards because of overlap }
- sub r30,r4,r3
- { carry := boolean(dest-source < count) = boolean(overlap) }
- subc r30,r30,r5
- { count < 11 ? (to decide whether we will move dwords or bytes }
- cmpwi cr1,r5,11
- { if overlap, then r30 := -1 else r30 := 0 }
- subfe r30,r30,r30
- { count < 39 ? (32 + max. alignment (7) }
- cmpwi cr7,r5,39
- { if count <= 0, stop }
- ble cr0,LMoveDone
- { if overlap, then r29 := count else r29 := 0 }
- and r29,r5,r30
- { if overlap, then point source and dest to the end }
- add r3,r3,r29
- add r4,r4,r29
- { if overlap, then r29 := 0, else r29 := -1 }
- not r29,r30
- { if overlap, then r30 := -2, else r30 := 0 }
- slwi r30,r30,1
- { if overlap, then r30 := -1, else r30 := 1 }
- addi r30,r30,1
- { if overlap, then source/dest += -1, otherwise they stay }
- { After the next instruction, r3/r4 + r30 = next position }
- { to load/store from/to }
- add r3,r3,r29
- add r4,r4,r29
- { if count < 11, copy everything byte by byte }
- blt cr1,LMoveBytes
- { otherwise, guarantee 4 byte alignment for dest for starters }
- LMove4ByteAlignLoop:
- lbzux r29,r3,r30
- stbux r29,r4,r30
- { is dest now 4 aligned? }
- andi. r29,r4,3
- subi r5,r5,1
- { while not aligned, continue }
- bne cr0,LMove4ByteAlignLoop
- { check for 8 byte alignment }
- andi. r29,r4,7
- { we are going to copy one byte again (the one at the newly }
- { aligned address), so increase count again }
- addi r5,r5,1
- { count div 4 for number of dwords to copy }
- srwi r29,r5,2
- { if 11 <= count < 39, copy using dwords }
- blt cr7,LMoveDWords
- { multiply the update count with 4 }
- slwi r30,r30,2
- beq cr0,L8BytesAligned
- { count >= 39 -> align to 8 byte boundary and then use the FPU }
- { since we're already at 4 byte alignment, use dword store }
- lwz r29,0(r3)
- add r3,r3,r30
- stw r29,0(r4)
- add r4,r4,r30
- L8BytesAligned:
- { count div 32 ( >= 1, since count was >=39 }
- srwi r29,r5,5
- { remainder }
- andi. r5,r5,31
- { to decide if we will do some dword stores afterwards or not }
- cmpwi cr1,r5,11
- mtctr r29
- { r29 := count div 4, will be moved to ctr when copying dwords }
- srwi r29,r5,2
- { adjust the update count: it will now be 8 or -8 depending on overlap }
- slwi r30,r30,1
- { adjust source and dest pointers: because of the above loop, dest is now }
- { aligned to 8 bytes. So if we substract r30 we will still have an 8 bytes }
- { aligned address) }
- sub r3,r3,r30
- sub r4,r4,r30
- LMove32ByteLoop:
- lfdux f31,r3,r30
- lfdux f30,r3,r30
- lfdux f29,r3,r30
- lfdux f28,r3,r30
- stfdux f31,r4,r30
- stfdux f30,r4,r30
- stfdux f29,r4,r30
- stfdux f28,r4,r30
- bdnz LMove32ByteLoop
- { cr0*4+eq is true if "count and 31" = 0 }
- beq cr0,LMoveDone
- { make r30 again -1 or 1, but first adjust source/dest pointers }
- add r3,r3,r30
- add r4,r4,r30
- srawi r30,r30,3
- sub r3,r3,r30
- sub r4,r4,r30
- { cr1 contains whether count <= 11 }
- ble cr1,LMoveBytes
- add r3,r3,r30
- add r4,r4,r30
- LMoveDWords:
- mtctr r29
- andi. r5,r5,3
- { r30 * 4 }
- slwi r30,r30,2
- sub r3,r3,r30
- sub r4,r4,r30
- LMoveDWordsLoop:
- lwzux r29,r3,r30
- stwux r29,r4,r30
- bdnz LMoveDWordsLoop
- beq cr0,LMoveDone
- { make r30 again -1 or 1 }
- add r3,r3,r30
- add r4,r4,r30
- srawi r30,r30,2
- sub r3,r3,r30
- sub r4,r4,r30
- LMoveBytes:
- mtctr r5
- LMoveBytesLoop:
- lbzux r29,r3,r30
- stbux r29,r4,r30
- bdnz LMoveBytesLoop
- LMoveDone:
- end ['R3','R4','R5','R29','R30','F28','F29','F30','F31','CTR','CR0','CR1','CR7'];
- {$define FPC_SYSTEM_HAS_FILLCHAR}
- Procedure FillChar(var x;count:longint;value:byte);
- begin
- asm
- { Register Usage:
- r3 x
- r4 count
- r5 value
- r13 value.value.value.value
- r14 ptr to current dest char
- r15 byte increment, Scratch
- r16 Block count
- r17 misalignment byte count
- }
- cmpwi cr2,r4,12
- mr r14,r3
- andi. r17,r3,3
- sub r14,r3,r17 //32 bit align
- blt cr2,.FillBytes //if count<12 then fill byte by byte
- sub r16,r4,r17
- andi r17,r16,3
- cmpwi cr2,r17,0
- srwi r16,r16,2 //r16:=count div 4
- subi r16,r16,2
- mtctr r16 //counter:=r16
- mr r13,r5 //insert
- insrwi r13,r5,8,16 // value into all four bytes
- insrwi r13,r13,16,0 // of r13
- li r15,4
- stw r13,0(r3) //fill first few bytes
- .FillWordLoop:
- stwux r13,r14,r15
- bdnz .FillWordLoop
- beq cr2,FillEnd //No trailing bytes, so exit
- add r14,r3,r4
- stw r13,-4(r14) //fill last few bytes
- b .FillEnd
- .FillBytes:
- mtctr r4 //counter:=count
- li r15,1
- subi r14,r3,1
- .FillByteLoop:
- stbux r13,r14,r15
- bdnz .FillByteLoop
- .FillEnd:
- end [r13,r14,r15,r16,r17,ctr];
- end;
- {$define FPC_SYSTEM_HAS_FILLWORD}
- procedure fillword(var x;count : longint;value : word);
- begin
- { registers:
- r3 x
- r4 count
- r5 value
- r13 value.value
- r14 ptr to dest word
- r15 increment 1
- r16 increment 2
- r17 scratch
- r18 scratch
- f1 value.value.value.value
- }
- asm
- cmpwi cr0,r3,0
- andi r17,r4,$3
- srwi r18,r4,1 //r18:=count div 2
- mr r13,r3
- li r14,4
- ble .FillWordEnd //if count<=0 Then Exit
- .FillWordLoop:
- stwux r5,r13,r14
- bdnz .FillWordLoop
- .FillWordEnd:
- end [r13,r14,ctr]
- end;
- {$define FPC_SYSTEM_HAS_INDEXBYTE}
- function IndexByte(var buf;len:longint;b:byte):longint; assembler;
- { input: r3 = buf, r4 = len, r5 = b }
- { output: r3 = position of b in buf (-1 if not found) }
- asm
- { load the begin of the buffer in the data cache }
- dcbt r0,r3
- cmpli r4,0
- mtctr r4
- subi r30,r3,1
- mr r28,r3
- { assume not found }
- li r3,-1
- beq LIndexByteDone
- LIndexByteLoop:
- lbzu r29,1(r30)
- cmpl r29,r5
- bdnzf cr0*4+eq,LIndexByteLoop
- { r3 still contains -1 here }
- bne LIndexByteDone
- sub r3,r30,r28
- LIndexByteDone:
- end ['r3','r28','r29','r30','cr0','ctr'];
- {$define FPC_SYSTEM_HAS_INDEXWORD}
- function IndexWord(var buf;len:longint;b:word):longint; assembler;
- { input: r3 = buf, r4 = len, r5 = b }
- { output: r3 = position of b in buf (-1 if not found) }
- asm
- { load the begin of the buffer in the data cache }
- dcbt r0,r3
- cmpli r4,0
- mtctr r4
- subi r30,r3,2
- mr r28,r3
- { assume not found }
- li r3,-1
- beq LIndexWordDone
- LIndexWordLoop:
- lhzu r29,2(r30)
- cmpl r29,r5
- bdnzf cr0*4+eq,LIndexWordLoop
- { r3 still contains -1 here }
- bne LIndexWordDone
- sub r3,r30,r28
- LIndexWordDone:
- end ['r3','r28','r29','r30','cr0','ctr'];
- {$define FPC_SYSTEM_HAS_INDEXDWORD}
- function IndexDWord(var buf;len:longint;b:DWord):longint; assembler;
- { input: r3 = buf, r4 = len, r5 = b }
- { output: r3 = position of b in buf (-1 if not found) }
- asm
- { load the begin of the buffer in the data cache }
- dcbt r0,r3
- cmpli r4,0
- mtctr r4
- subi r30,r3,4
- mr r28,r3
- { assume not found }
- li r3,-1
- beq LIndexDWordDone
- LIndexDWordLoop:
- lwzu r29,4(r30)
- cmpl r29,r5
- bdnzf cr0*4+eq, LIndexDWordLoop
- { r3 still contains -1 here }
- bne LIndexDWordDone
- sub r3,r30,r28
- LIndexDWordDone:
- end ['r3','r28','r29','r30','cr0','ctr'];
- {$define FPC_SYSTEM_HAS_COMPAREBYTE}
- function CompareByte(var buf1,buf2;len:longint):longint; assembler;
- { input: r3 = buf1, r4 = buf2, r5 = len }
- { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
- { note: almost direct copy of strlcomp() from strings.inc }
- asm
- { load the begin of the first buffer in the data cache }
- dcbt r0,r3
- { use r28 instead of r3 for buf1 since r3 contains result }
- cmpl r5,0
- mtctr r5
- subi r28,r3,1
- subi r4,r4,1
- li r3,0
- beq LCompByteDone
- LCompByteLoop:
- { load next chars }
- lbzu r29,1(r28)
- lbzu r30,1(r4)
- { calculate difference }
- sub. r3,r29,r30
- { if chars not equal or at the end, we're ready }
- bdnzt cr0*4+eq, LCompByteLoop
- LCompByteDone:
- end ['r3','r4','r28','r29','r30','cr0','ctr'];
- {$define FPC_SYSTEM_HAS_COMPAREWORD}
- function CompareWord(var buf1,buf2;len:longint):longint; assembler;
- { input: r3 = buf1, r4 = buf2, r5 = len }
- { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
- { note: almost direct copy of strlcomp() from strings.inc }
- asm
- { load the begin of the first buffer in the data cache }
- dcbt r0,r3
- { use r28 instead of r3 for buf1 since r3 contains result }
- cmpl r5,0
- mtctr r5
- subi r28,r3,2
- subi r4,r4,2
- li r3,0
- beq LCompWordDone
- LCompWordLoop:
- { load next chars }
- lhzu r29,2(r28)
- lhzu r30,2(r4)
- { calculate difference }
- sub. r3,r29,r30
- { if chars not equal or at the end, we're ready }
- bdnzt cr0*4+eq, LCompWordLoop
- LCompWordDone:
- end ['r3','r4','r28','r29','r30','cr0','ctr'];
- {$define FPC_SYSTEM_HAS_COMPAREDWORD}
- function CompareDWord(var buf1,buf2;len:longint):longint; assembler;
- { input: r3 = buf1, r4 = buf2, r5 = len }
- { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
- { note: almost direct copy of strlcomp() from strings.inc }
- asm
- { load the begin of the first buffer in the data cache }
- dcbt r0,r3
- { use r28 instead of r3 for buf1 since r3 contains result }
- cmpl r5,0
- mtctr r5
- subi r28,r3,4
- subi r4,r4,4
- li r3,0
- beq LCompDWordDone
- LCompDWordLoop:
- { load next chars }
- lwzu r29,4(r28)
- lwzu r30,4(r4)
- { calculate difference }
- sub. r3,r29,r30
- { if chars not equal or at the end, we're ready }
- bdnzt cr0*4+eq, LCompDWordLoop
- LCompDWordDone:
- end ['r3','r4','r28','r29','r30','cr0','ctr'];
- {$define FPC_SYSTEM_HAS_INDEXCHAR0}
- function IndexChar0(var buf;len:longint;b:Char):longint; assembler;
- { input: r3 = buf, r4 = len, r5 = b }
- { output: r3 = position of found position (-1 if not found) }
- asm
- { load the begin of the buffer in the data cache }
- dcbt r0,r3
- { length = 0? }
- cmpli r5,0
- mtctr r5
- subi r29,r3,1
- mr r28,r29
- { assume not found }
- li r3,-1
- { if yes, do nothing }
- beq LIndexChar0Done
- subi r3,r3,1
- LIndexChar0Loop:
- lbzu r30,1(r29)
- cmpli cr1,r30,0
- cmpl r30,r4
- beq cr1,LIndexChar0Done
- bdnzf cr0*4+eq, LIndexChar0Loop
- bne LIndexChar0Done
- sub r3,r29,r28
- LIndexChar0Done:
- end ['r3','r4','r28','r29','r30','cr0','ctr'];
- { all FPC_HELP_* are still missing (JM) }
- {****************************************************************************
- String
- ****************************************************************************}
- {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COPY}
- procedure int_strcopy(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_COPY'];
- assembler;
- { input: r3: len, sstr: r4, dstr: r5 }
- asm
- { load length source }
- lbz r30,0(r4)
- { load the begin of the dest buffer in the data cache }
- dcbtst r0,r5
- { put min(length(sstr),len) in r3 }
- subc r29,r3,r30 { r29 := r3 - r30 }
- subme r3,r3,r3 { if r3 >= r4 then r3' := 0 else r3' := -1 }
- and r3,r29,r3 { if r3 >= r4 then r3' := 0 else r3' := r3-r30 }
- add r3,r3,r30 { if r3 >= r4 then r3' := r30 else r3' := r3 }
- cmpli r3,0
- { put length in ctr }
- mtctr r3
- stb r3,0(r5)
- beq LShortStrCopyDone
- LShortStrCopyLoop:
- lbzu r29,1(r4)
- stbu r29,1(r5)
- bdnz LShortStrCopyLoop
- end ['r3','r4','r5','r29','r30','cr0','ctr'];
- {
- $Log$
- Revision 1.5 2001-07-07 12:46:12 jonas
- * some small bugfixes and cache optimizations
- Revision 1.4 2001/03/03 13:53:36 jonas
- * fixed small bug in move
- Revision 1.3 2001/03/02 13:24:10 jonas
- + new, complete implementation of move procedure (including support for
- overlapping regions)
- Revision 1.2 2001/02/11 17:59:46 jonas
- * implemented several more procedures
- Revision 1.1 2000/07/27 07:32:12 jonas
- + initial version by Casey Duncan (not yet thoroughly debugged or complete)
- }
|