Selaa lähdekoodia

* some small bugfixes and cache optimizations

Jonas Maebe 24 vuotta sitten
vanhempi
commit
91a2f0d3f5
4 muutettua tiedostoa jossa 112 lisäystä ja 46 poistoa
  1. 56 31
      rtl/powerpc/powerpc.inc
  2. 18 1
      rtl/powerpc/set.inc
  3. 27 7
      rtl/powerpc/strings.inc
  4. 11 7
      rtl/powerpc/stringss.inc

+ 56 - 31
rtl/powerpc/powerpc.inc

@@ -1,7 +1,7 @@
 {
     $Id$
     This file is part of the Free Pascal run time library.
-    Copyright (c) 1999 by the Free Pascal development team.
+    Copyright (c) 2000-2001 by the Free Pascal development team.
 
     Portions Copyright (c) 2000 by Casey Duncan ([email protected])
 
@@ -26,6 +26,8 @@
 
 procedure Move(var source;var dest;count:longint);assembler;
 asm
+                {  load the begin of the source in the data cache }
+                dcbt    r0,r3
                 {  count <= 0 ?  }
                 cmpwi   cr0,r5,0
                 {  check if we have to do the move backwards because of overlap  }
@@ -93,9 +95,9 @@ LMove4ByteAlignLoop:
                 {  count >= 39 -> align to 8 byte boundary and then use the FPU  }
                 {  since we're already at 4 byte alignment, use dword store      }
                 lwz     r29,0(r3)
-                add     r3,r3,r30,
+                add     r3,r3,r30
                 stw     r29,0(r4)
-                add     r4,r4,r30,
+                add     r4,r4,r30
 L8BytesAligned:
                 { count div 32 ( >= 1, since count was >=39 }
                 srwi    r29,r5,5
@@ -260,43 +262,49 @@ function IndexByte(var buf;len:longint;b:byte):longint; assembler;
 { input: r3 = buf, r4 = len, r5 = b                   }
 { output: r3 = position of b in buf (-1 if not found) }
 asm
+                {  load the begin of the buffer in the data cache }
+                dcbt    r0,r3
                 cmpli   r4,0
                 mtctr   r4
                 subi    r30,r3,1
+                mr      r28,r3
                 { assume not found }
                 li      r3,-1
-                beq     LIndexByteNotFound
+                beq     LIndexByteDone
 LIndexByteLoop:
                 lbzu    r29,1(r30)
                 cmpl    r29,r5
-                bdnzne  LIndexByteLoop
+                bdnzf   cr0*4+eq,LIndexByteLoop
                 { r3 still contains -1 here }
                 bne     LIndexByteDone
-                sub     r3,r29,r3
+                sub     r3,r30,r28
 LIndexByteDone:
-end ['r3','r29','r30','cr0','ctr'];
+end ['r3','r28','r29','r30','cr0','ctr'];
 
 
 {$define FPC_SYSTEM_HAS_INDEXWORD}
-function Indexword(var buf;len:longint;b:word):longint; assembler;
+function IndexWord(var buf;len:longint;b:word):longint; assembler;
 { input: r3 = buf, r4 = len, r5 = b                   }
 { output: r3 = position of b in buf (-1 if not found) }
 asm
+                {  load the begin of the buffer in the data cache }
+                dcbt    r0,r3
                 cmpli   r4,0
                 mtctr   r4
                 subi    r30,r3,2
+                mr      r28,r3
                 { assume not found }
                 li      r3,-1
-                beq     LIndexWordNotFound
+                beq     LIndexWordDone
 LIndexWordLoop:
                 lhzu    r29,2(r30)
                 cmpl    r29,r5
-                bdnzne  LIndexWordLoop
+                bdnzf   cr0*4+eq,LIndexWordLoop
                 { r3 still contains -1 here }
                 bne     LIndexWordDone
-                sub     r3,r29,r3
+                sub     r3,r30,r28
 LIndexWordDone:
-end ['r3','r29','r30','cr0','ctr'];
+end ['r3','r28','r29','r30','cr0','ctr'];
 
 
 {$define FPC_SYSTEM_HAS_INDEXDWORD}
@@ -304,21 +312,24 @@ function IndexDWord(var buf;len:longint;b:DWord):longint; assembler;
 { input: r3 = buf, r4 = len, r5 = b                   }
 { output: r3 = position of b in buf (-1 if not found) }
 asm
+                {  load the begin of the buffer in the data cache }
+                dcbt    r0,r3
                 cmpli   r4,0
                 mtctr   r4
                 subi    r30,r3,4
+                mr      r28,r3
                 { assume not found }
                 li      r3,-1
-                beq     LIndexDWordNotFound
+                beq     LIndexDWordDone
 LIndexDWordLoop:
                 lwzu    r29,4(r30)
                 cmpl    r29,r5
-                bdnzne  LIndexDWordLoop
+                bdnzf   cr0*4+eq, LIndexDWordLoop
                 { r3 still contains -1 here }
                 bne     LIndexDWordDone
-                sub     r3,r29,r3
+                sub     r3,r30,r28
 LIndexDWordDone:
-end ['r3','r29','r30','cr0','ctr'];
+end ['r3','r28','r29','r30','cr0','ctr'];
 
 {$define FPC_SYSTEM_HAS_COMPAREBYTE}
 function CompareByte(var buf1,buf2;len:longint):longint; assembler;
@@ -326,13 +337,15 @@ function CompareByte(var buf1,buf2;len:longint):longint; assembler;
 { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
 { note: almost direct copy of strlcomp() from strings.inc         }
 asm
+        {  load the begin of the first buffer in the data cache }
+        dcbt    r0,r3
         { use r28 instead of r3 for buf1 since r3 contains result }
         cmpl    r5,0
+        mtctr   r5
         subi    r28,r3,1
+        subi    r4,r4,1
         li      r3,0
         beq     LCompByteDone
-        mtctr   r5
-        subi    r4,r4,1
 LCompByteLoop:
         { load next chars }
         lbzu    r29,1(r28)
@@ -340,7 +353,7 @@ LCompByteLoop:
         { calculate difference }
         sub.    r3,r29,r30
         { if chars not equal or at the end, we're ready }
-        bdnze     LCompByteDone
+        bdnzt   cr0*4+eq, LCompByteLoop
 LCompByteDone:
 end ['r3','r4','r28','r29','r30','cr0','ctr'];
 
@@ -350,13 +363,15 @@ function CompareWord(var buf1,buf2;len:longint):longint; assembler;
 { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
 { note: almost direct copy of strlcomp() from strings.inc         }
 asm
+        {  load the begin of the first buffer in the data cache }
+        dcbt    r0,r3
         { use r28 instead of r3 for buf1 since r3 contains result }
         cmpl    r5,0
+        mtctr   r5
         subi    r28,r3,2
+        subi    r4,r4,2
         li      r3,0
         beq     LCompWordDone
-        mtctr   r5
-        subi    r4,r4,2
 LCompWordLoop:
         { load next chars }
         lhzu    r29,2(r28)
@@ -364,7 +379,7 @@ LCompWordLoop:
         { calculate difference }
         sub.    r3,r29,r30
         { if chars not equal or at the end, we're ready }
-        bdnze     LCompWordDone
+        bdnzt   cr0*4+eq, LCompWordLoop
 LCompWordDone:
 end ['r3','r4','r28','r29','r30','cr0','ctr'];
 
@@ -375,13 +390,15 @@ function CompareDWord(var buf1,buf2;len:longint):longint; assembler;
 { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
 { note: almost direct copy of strlcomp() from strings.inc         }
 asm
+        {  load the begin of the first buffer in the data cache }
+        dcbt    r0,r3
         { use r28 instead of r3 for buf1 since r3 contains result }
         cmpl    r5,0
+        mtctr   r5
         subi    r28,r3,4
+        subi    r4,r4,4
         li      r3,0
         beq     LCompDWordDone
-        mtctr   r5
-        subi    r4,r4,4
 LCompDWordLoop:
         { load next chars }
         lwzu    r29,4(r28)
@@ -389,7 +406,7 @@ LCompDWordLoop:
         { calculate difference }
         sub.    r3,r29,r30
         { if chars not equal or at the end, we're ready }
-        bdnze     LCompDWordDone
+        bdnzt   cr0*4+eq, LCompDWordLoop
 LCompDWordDone:
 end ['r3','r4','r28','r29','r30','cr0','ctr'];
 
@@ -398,12 +415,15 @@ function IndexChar0(var buf;len:longint;b:Char):longint; assembler;
 { input: r3 = buf, r4 = len, r5 = b                         }
 { output: r3 = position of found position (-1 if not found) }
 asm
+        {  load the begin of the buffer in the data cache }
+        dcbt    r0,r3
         { length = 0? }
         cmpli   r5,0
+        mtctr   r5
         subi    r29,r3,1
+        mr      r28,r29
         { assume not found }
         li      r3,-1
-        mtctr   r5
         { if yes, do nothing }
         beq     LIndexChar0Done
         subi    r3,r3,1
@@ -412,11 +432,11 @@ LIndexChar0Loop:
         cmpli   cr1,r30,0
         cmpl    r30,r4
         beq     cr1,LIndexChar0Done
-        bdnzne  LIndexChar0Loop
+        bdnzf   cr0*4+eq, LIndexChar0Loop
         bne     LIndexChar0Done
-        sub     r3,r29,r3
-LIndexCharDone:
-end ['r3','r4','r29','r30','cr0','ctr'];
+        sub     r3,r29,r28
+LIndexChar0Done:
+end ['r3','r4','r28','r29','r30','cr0','ctr'];
 
 { all FPC_HELP_* are still missing (JM) }
 
@@ -432,6 +452,8 @@ assembler;
 asm
         { load length source }
         lbz     r30,0(r4)
+        {  load the begin of the dest buffer in the data cache }
+        dcbtst  r0,r5
 
         { put min(length(sstr),len) in r3 }
         subc    r29,r3,r30    { r29 := r3 - r30                              }
@@ -453,7 +475,10 @@ end ['r3','r4','r5','r29','r30','cr0','ctr'];
 
 {
   $Log$
-  Revision 1.4  2001-03-03 13:53:36  jonas
+  Revision 1.5  2001-07-07 12:46:12  jonas
+    * some small bugfixes and cache optimizations
+
+  Revision 1.4  2001/03/03 13:53:36  jonas
     * fixed small bug in move
 
   Revision 1.3  2001/03/02 13:24:10  jonas

+ 18 - 1
rtl/powerpc/set.inc

@@ -140,6 +140,8 @@ asm
   stw    r7,saveR7
   stw    r8,saveR8
   rlwinm r6,r4,32-3,0,31-2    // divide by 8 to get starting and ending byte-
+  { load the set the data cache }
+  dcbt   r3,r6
   rlwinm r7,r5,32-3,0,31-2    // address and clear two lowest bits to get
                               //  start/end longint address
   sub.   r7,r6,r7             // are bit lo and hi in the same longint?
@@ -215,6 +217,8 @@ procedure do_add_sets(set1,set2,dest : pointer);assembler;[public,alias:'FPC_SET
 var
   saveR6, saveR7, saveR8: longint;
 asm
+       {  load the begin of the first set in the data cache }
+       dcbt    r0,r3
        stw      r6,saveR6
        stw      r7,saveR7
        subi     r5,r5,4
@@ -244,6 +248,8 @@ procedure do_mul_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SET_M
 var
   saveR6, saveR7, saveR8: longint;
 asm
+       {  load the begin of the first set in the data cache }
+       dcbt    r0,r3
        stw      r6,saveR6
        stw      r7,saveR7
        subi     r5,r5,4
@@ -273,6 +279,8 @@ procedure do_sub_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SET_S
 var
   saveR6, saveR7, saveR8: longint;
 asm
+       {  load the begin of the first set in the data cache }
+       dcbt    r0,r3
        stw      r6,saveR6
        stw      r7,saveR7
        subi     r5,r5,4
@@ -302,6 +310,8 @@ procedure do_symdif_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SE
 var
   saveR6, saveR7, saveR8: longint;
 asm
+       {  load the begin of the first set in the data cache }
+       dcbt    r0,r3
        stw      r6,saveR6
        stw      r7,saveR7
        subi     r5,r5,4
@@ -331,6 +341,8 @@ procedure do_comp_sets(set1,set2 : pointer);assembler;[public,alias:'FPC_SET_COM
 var
   saveR5, saveR6, saveR7: longint;
 asm
+       {  load the begin of the first set in the data cache }
+       dcbt    r0,r3
        stw      r5,saveR5
        mfctr    r5
        stw      r6,saveR6
@@ -359,6 +371,8 @@ procedure do_contains_sets(set1,set2 : pointer);assembler;[public,alias:'FPC_SET
 var
   saveR5, saveR6, saveR7: longint;
 asm
+       {  load the begin of the first set in the data cache }
+       dcbt    r0,r3
        stw      r5,saveR5
        mfctr    r5
        stw      r6,saveR6
@@ -544,7 +558,10 @@ end;
 
 {
   $Log$
-  Revision 1.7  2001-03-03 13:54:26  jonas
+  Revision 1.8  2001-07-07 12:46:12  jonas
+    * some small bugfixes and cache optimizations
+
+  Revision 1.7  2001/03/03 13:54:26  jonas
     * changed 'bdnzeq  cr0' to 'bdnzt  cr0*4+eq'
 
   Revision 1.6  2000/10/07 14:42:16  jonas

+ 27 - 7
rtl/powerpc/strings.inc

@@ -24,6 +24,8 @@ function strcopy(dest,source : pchar) : pchar;assembler;
 asm
 {  in: dest in r3, source in r4  }
 {  out: result (dest) in r3      }
+        {  load the begin of the source string in the data cache }
+        dcbt    r0,r4
         {  get # of misaligned bytes  }
         rlwinm. r30,r4,0,31-2,31
         subfic  r30,r30,4
@@ -51,17 +53,20 @@ LStrCopyAligned:
         addis   r27,r27,0x0feff
         li      r26,0x08080
         addis    r26,r26,0x08081
-LStrCopyAlignedLoop:
 
-        {  load next 4 bytes  }
+        { load first 4 bytes  }
         lwzu    r28,4(r4)
 
+LStrCopyAlignedLoop:
         { test for zero byte }
         add     r30,r28,r27
         andc    r30,r30,r28
         and.    r30,r30,r26
         bne     LStrCopyEndFound
         stwu    r28,4(r29)
+        { load next 4 bytes (do it here so the load can begin while the }
+        { the branch is processed)                                      }
+        lwzu    r28,4(r4)
         b       LStrCopyAlignedLoop
 LStrCopyEndFound:
         { result is either 0, 8, 16 or 24 depending on which byte is zero }
@@ -81,6 +86,8 @@ function strecopy(dest,source : pchar) : pchar;assembler;
 { in: dest in r3, source in r4        }
 { out: result (end of new dest) in r3 }
 asm
+        {  load the begin of the source string in the data cache }
+        dcbt    r0,r4
         {  get # of misaligned bytes  }
         rlwinm. r30,r4,0,31-2,31
         subfic  r30,r30,4
@@ -136,6 +143,8 @@ function strlcopy(dest,source : pchar;maxlen : longint) : pchar;assembler;
 { in: dest in r3, source in r4, maxlen in r5 }
 { out: result (dest) in r3                   }
 asm
+        {  load the begin of the source string in the data cache }
+        dcbt    r0,r4
         mtctr   r5
         subi    r4,r4,1
         subi    r29,r3,1
@@ -143,7 +152,7 @@ LStrlCopyLoop:
         lbzu    r30,1(r4)
         cmpli   r30,0
         stbu    r30,1(r29)
-        bdnzne  LStrlCopyLoop
+        bdnzf   cr0*4+eq, LStrlCopyLoop
         { if we stopped because we copied a #0, we're done }
         beq     LStrlCopyDone
         { otherwise add the #0 }
@@ -157,6 +166,8 @@ function strlen(p : pchar) : longint;assembler;
 { in: p in r3                }
 { out: result (length) in r3 }
 asm
+        {  load the begin of the string in the data cache }
+        dcbt    r0,r3
         { empty/invalid string? }
         cmpli   r3,0
         { if yes, do nothing }
@@ -175,6 +186,8 @@ function strend(p : pchar) : pchar;assembler;
 { in: p in r3                  }
 { out: result (end of p) in r3 }
 asm
+        {  load the begin of the string in the data cache }
+        dcbt    r0,r3
         { empty/invalid string? }
         cmpli   r3,0
         { if yes, do nothing }
@@ -220,6 +233,8 @@ function strlcomp(str1,str2 : pchar;l : longint) : longint;assembler;
 { out: result (= 0 if strings equal, < 0 if str1 < str2, > 0 if str1 > str2 }
 {      in r3                                                                }
 asm
+        { load the begin of one of the strings in the data cache }
+        dcbt    r0,r3
         { use r28 instead of r3 for str1 since r3 contains result }
         cmpl    r5,0
         subi    r28,r3,1
@@ -240,7 +255,7 @@ LStrlCompLoop:
         { if they are equal and one is zero, then the other one is zero too }
         { and we're done as well (r3 also contains 0 then)                  }
         { otherwise loop (if ctr <> 0)                                      }
-        bdnzne  cr1,LStrlCompLoop
+        bdnzf  cr1*4+eq,LStrlCompLoop
 LStrlCompDone:
 end ['r3','r4','r28','r29','r30','cr0','cr1','ctr'];
 
@@ -313,11 +328,13 @@ function strlicomp(str1,str2 : pchar;l : longint) : longint;assembler;
 { in: str1 in r3, str2 in r4, l in r5                            }
 { out: result of case insensitive comparison (< 0, = 0, > 0)     }
 asm
+        {  load the begin of one of the string in the data cache }
+        dcbt    r0,r3
         { use r28 instead of r3 for str1 since r3 contains result }
         cmpl    r5,0
         subi    r28,r3,1
         li      r3,0
-        beq     LStrlCompDone
+        beq-    LStrlCompDone
         mtctr   r5
         subi    r4,r4,1
 LStriCompLoop:
@@ -359,7 +376,7 @@ LStriCompEqual:
         { if they are equal and one is zero, then the other one is zero too }
         { and we're done as well (r3 also contains 0 then)                  }
         { otherwise loop (if ctr <> 0)                                      }
-        bdnzne  cr1,LStriCompLoop
+        bdnzf    cr1*4+eq,LStriCompLoop
 LStriCompDone:
 end ['r3','r4','r26','r27','r28','r29','r30','cr0','cr1','ctr'];
 
@@ -453,7 +470,10 @@ end ['r28','r29','r30','cr0','cr1'];
 
 {
   $Log$
-  Revision 1.6  2001-02-23 14:05:33  jonas
+  Revision 1.7  2001-07-07 12:46:12  jonas
+    * some small bugfixes and cache optimizations
+
+  Revision 1.6  2001/02/23 14:05:33  jonas
     * optimized strcopy/strecopy
 
   Revision 1.5  2001/02/11 17:59:14  jonas

+ 11 - 7
rtl/powerpc/stringss.inc

@@ -18,6 +18,8 @@
 
 function strpas(p : pchar) : string; assembler;
 asm
+        {  load the begin of the string in the data cache }
+        dcbt    r0,r3
         { load result address in r9 }
         li      r29,__RESULT@l
         addis   r29,__RESULT@ha
@@ -33,14 +35,13 @@ LStrPasLoop:
         lbzu    r30,1(r4)
         cmpli   r30,0
         stbu    r30,1(r29)
-        bdnzne  LStrPasLoop
+        bdnzf   cr0*4+eq, LStrPasLoop
 
         { get remaining count for length }
         mfctr   r30
-        subfic  r30,r30,255
-
         { if we stopped because of a terminating #0, decrease the length by 1 }
         mfcr    r4
+        subfic  r30,r30,255
         { put "equal" condition bit of cr0 in bit position 31 (= rightmost) }
         { and clear other bits                                              }
         rlwinm  r4,r4,cr0*4+eq+1,31,31
@@ -58,21 +59,24 @@ asm
         cmpli   r30,0
         mtctr   r30
         subi    r29,r3,1
-        beq     LStrCopyEmpty
+        beq     LStrPCopyEmpty
 LStrPCopyLoop:
         { copy everything }
         lbzu    r30,1(r4)
         stbu    r30,1(r29)
-        bdnz    LStrCopyLoop
+        bdnz    LStrPCopyLoop
         { add terminating #0 }
         li      r30,0
-LStrCopyEmpty:
+LStrPCopyEmpty:
         stb     r30,1(r29)
 end ['r4','r29','r30','cr0','ctr'];
 
 {
   $Log$
-  Revision 1.2  2001-02-11 12:15:03  jonas
+  Revision 1.3  2001-07-07 12:46:12  jonas
+    * some small bugfixes and cache optimizations
+
+  Revision 1.2  2001/02/11 12:15:03  jonas
     * some small optimizations and bugfixes
 
   Revision 1.1  2001/02/10 16:10:32  jonas