|
@@ -152,21 +152,22 @@ asm
|
|
{ if overlap, then point source and dest to the end }
|
|
{ if overlap, then point source and dest to the end }
|
|
add r3,r3,r0
|
|
add r3,r3,r0
|
|
add r4,r4,r0
|
|
add r4,r4,r0
|
|
- { if overlap, then r0 := 0, else r0 := -1 }
|
|
|
|
- not r0,r10
|
|
|
|
|
|
+ { if overlap, then r0 := 6, else r6 := -1 }
|
|
|
|
+ not r6,r10
|
|
{ if overlap, then r10 := -2, else r10 := 0 }
|
|
{ if overlap, then r10 := -2, else r10 := 0 }
|
|
slwi r10,r10,1
|
|
slwi r10,r10,1
|
|
{ if overlap, then r10 := -1, else r10 := 1 }
|
|
{ if overlap, then r10 := -1, else r10 := 1 }
|
|
addi r10,r10,1
|
|
addi r10,r10,1
|
|
- { if overlap, then source/dest += -1, otherwise they stay }
|
|
|
|
- { After the next instruction, r3/r4 + r10 = next position }
|
|
|
|
- { to load/store from/to }
|
|
|
|
- add r3,r3,r0
|
|
|
|
- add r4,r4,r0
|
|
|
|
|
|
|
|
{ if count < 15, copy everything byte by byte }
|
|
{ if count < 15, copy everything byte by byte }
|
|
blt cr1,LMoveBytes
|
|
blt cr1,LMoveBytes
|
|
|
|
|
|
|
|
+ { if no overlap, then source/dest += -1, otherwise they stay }
|
|
|
|
+ { After the next instruction, r3/r4 + r10 = next position to }
|
|
|
|
+ { load/store from/to }
|
|
|
|
+ add r3,r3,r6
|
|
|
|
+ add r4,r4,r6
|
|
|
|
+
|
|
{ otherwise, guarantee 4 byte alignment for dest for starters }
|
|
{ otherwise, guarantee 4 byte alignment for dest for starters }
|
|
LMove4ByteAlignLoop:
|
|
LMove4ByteAlignLoop:
|
|
lbzux r0,r3,r10
|
|
lbzux r0,r3,r10
|
|
@@ -189,6 +190,10 @@ LMove4ByteAlignLoop:
|
|
|
|
|
|
{ multiply the update count with 4 }
|
|
{ multiply the update count with 4 }
|
|
slwi r10,r10,2
|
|
slwi r10,r10,2
|
|
|
|
+ slwi r6,r6,2
|
|
|
|
+ { and adapt the source and dest }
|
|
|
|
+ add r3,r3,r6
|
|
|
|
+ add r4,r4,r6
|
|
|
|
|
|
beq cr0,L8BytesAligned
|
|
beq cr0,L8BytesAligned
|
|
|
|
|
|
@@ -212,12 +217,13 @@ L8BytesAligned:
|
|
|
|
|
|
{ adjust the update count: it will now be 8 or -8 depending on overlap }
|
|
{ adjust the update count: it will now be 8 or -8 depending on overlap }
|
|
slwi r10,r10,1
|
|
slwi r10,r10,1
|
|
|
|
+ slwi r6,r6,1
|
|
|
|
|
|
{ adjust source and dest pointers: because of the above loop, dest is now }
|
|
{ adjust source and dest pointers: because of the above loop, dest is now }
|
|
- { aligned to 8 bytes. So if we substract r10 we will still have an 8 bytes }
|
|
|
|
|
|
+ { aligned to 8 bytes. So if we add r6 we will still have an 8 bytes }
|
|
{ aligned address) }
|
|
{ aligned address) }
|
|
- sub r3,r3,r10
|
|
|
|
- sub r4,r4,r10
|
|
|
|
|
|
+ add r3,r3,r6
|
|
|
|
+ add r4,r4,r6
|
|
|
|
|
|
LMove32ByteLoop:
|
|
LMove32ByteLoop:
|
|
lfdux f0,r3,r10
|
|
lfdux f0,r3,r10
|
|
@@ -234,24 +240,22 @@ LMove32ByteLoop:
|
|
beq cr0,LMoveDone
|
|
beq cr0,LMoveDone
|
|
|
|
|
|
{ make r10 again -1 or 1, but first adjust source/dest pointers }
|
|
{ make r10 again -1 or 1, but first adjust source/dest pointers }
|
|
- add r3,r3,r10
|
|
|
|
- add r4,r4,r10
|
|
|
|
|
|
+ sub r3,r3,r6
|
|
|
|
+ sub r4,r4,r6
|
|
srawi r10,r10,3
|
|
srawi r10,r10,3
|
|
- sub r3,r3,r10
|
|
|
|
- sub r4,r4,r10
|
|
|
|
|
|
+ srawi r6,r6,3
|
|
|
|
|
|
{ cr1 contains whether count <= 11 }
|
|
{ cr1 contains whether count <= 11 }
|
|
ble cr1,LMoveBytes
|
|
ble cr1,LMoveBytes
|
|
- add r3,r3,r10
|
|
|
|
- add r4,r4,r10
|
|
|
|
|
|
|
|
LMoveDWords:
|
|
LMoveDWords:
|
|
mtctr r0
|
|
mtctr r0
|
|
andi. r5,r5,3
|
|
andi. r5,r5,3
|
|
{ r10 * 4 }
|
|
{ r10 * 4 }
|
|
slwi r10,r10,2
|
|
slwi r10,r10,2
|
|
- sub r3,r3,r10
|
|
|
|
- sub r4,r4,r10
|
|
|
|
|
|
+ slwi r6,r6,2
|
|
|
|
+ add r3,r3,r6
|
|
|
|
+ add r4,r4,r6
|
|
|
|
|
|
LMoveDWordsLoop:
|
|
LMoveDWordsLoop:
|
|
lwzux r0,r3,r10
|
|
lwzux r0,r3,r10
|
|
@@ -260,12 +264,13 @@ LMoveDWordsLoop:
|
|
|
|
|
|
beq cr0,LMoveDone
|
|
beq cr0,LMoveDone
|
|
{ make r10 again -1 or 1 }
|
|
{ make r10 again -1 or 1 }
|
|
- add r3,r3,r10
|
|
|
|
- add r4,r4,r10
|
|
|
|
|
|
+ sub r3,r3,r6
|
|
|
|
+ sub r4,r4,r6
|
|
srawi r10,r10,2
|
|
srawi r10,r10,2
|
|
- sub r3,r3,r10
|
|
|
|
- sub r4,r4,r10
|
|
|
|
|
|
+ srawi r6,r6,2
|
|
LMoveBytes:
|
|
LMoveBytes:
|
|
|
|
+ add r3,r3,r6
|
|
|
|
+ add r4,r4,r6
|
|
mtctr r5
|
|
mtctr r5
|
|
LMoveBytesLoop:
|
|
LMoveBytesLoop:
|
|
lbzux r0,r3,r10
|
|
lbzux r0,r3,r10
|
|
@@ -881,7 +886,10 @@ end ['R3','R10'];
|
|
|
|
|
|
{
|
|
{
|
|
$Log$
|
|
$Log$
|
|
- Revision 1.38 2003-04-27 16:24:44 jonas
|
|
|
|
|
|
+ Revision 1.39 2003-05-02 19:03:25 jonas
|
|
|
|
+ * fixed some bugs in move()
|
|
|
|
+
|
|
|
|
+ Revision 1.38 2003/04/27 16:24:44 jonas
|
|
- disabled fpc_shortstr_concat because it's called differently than that
|
|
- disabled fpc_shortstr_concat because it's called differently than that
|
|
routine is declared
|
|
routine is declared
|
|
|
|
|