Răsfoiți Sursa

* conversion to compilerproc and to structure used by i386 rtl
* some bugfixes
* powerpc.inc is almost complete (only fillchar/word/dword, get_frame etc
and the class helpers are still needed
- removed unnecessary register saving in set.inc (thanks to compilerproc)
* use registers reserved for parameters as much as possible instead of
those reserved for local vars (since those have to be saved by the
called anyway, while the ones for local vars have to be saved by the
callee)

Jonas Maebe 24 ani în urmă
părinte
comite
9baf1000ae
6 a modificat fișierele cu 1029 adăugiri și 690 ștergeri
  1. 501 207
      rtl/powerpc/powerpc.inc
  2. 249 289
      rtl/powerpc/set.inc
  3. 145 151
      rtl/powerpc/strings.inc
  4. 22 43
      rtl/powerpc/stringss.inc
  5. 48 0
      rtl/powerpc/strlen.inc
  6. 64 0
      rtl/powerpc/strpas.inc

+ 501 - 207
rtl/powerpc/powerpc.inc

@@ -26,153 +26,153 @@
 
 procedure Move(var source;var dest;count:longint);assembler;
 asm
-                {  load the begin of the source in the data cache }
-                dcbt    r0,r3
-                {  count <= 0 ?  }
-                cmpwi   cr0,r5,0
-                {  check if we have to do the move backwards because of overlap  }
-                sub     r30,r4,r3
-                {  carry := boolean(dest-source < count) = boolean(overlap) }
-                subc    r30,r30,r5
-
-                {  count < 11 ? (to decide whether we will move dwords or bytes  }
-                cmpwi   cr1,r5,11
-
-                {  if overlap, then r30 := -1 else r30 := 0  }
-                subfe   r30,r30,r30
-
-                {  count < 39 ? (32 + max. alignment (7) }
-                cmpwi   cr7,r5,39
-
-                {  if count <= 0, stop  }
-                ble     cr0,LMoveDone
-
-                {  if overlap, then r29 := count else r29 := 0  }
-                and     r29,r5,r30
-                {  if overlap, then point source and dest to the end  }
-                add     r3,r3,r29
-                add     r4,r4,r29
-                {  if overlap, then r29 := 0, else r29 := -1  }
-                not     r29,r30
-                {  if overlap, then r30 := -2, else r30 := 0  }
-                slwi    r30,r30,1
-                {  if overlap, then r30 := -1, else r30 := 1  }
-                addi    r30,r30,1
-                {  if overlap, then source/dest += -1, otherwise they stay }
-                {  After the next instruction, r3/r4 + r30 = next position }
-                {  to load/store from/to                                   }
-                add     r3,r3,r29
-                add     r4,r4,r29
-
-                {  if count < 11, copy everything byte by byte  }
-                blt     cr1,LMoveBytes
-
-                {  otherwise, guarantee 4 byte alignment for dest for starters  }
+          {  load the begin of the source in the data cache }
+          dcbt    0,r3
+          {  count <= 0 ?  }
+          cmpwi   cr0,r5,0
+          {  check if we have to do the move backwards because of overlap  }
+          sub     r10,r4,r3
+          {  carry := boolean(dest-source < count) = boolean(overlap) }
+          subc    r10,r10,r5
+
+          {  count < 11 ? (to decide whether we will move dwords or bytes  }
+          cmpwi   cr1,r5,11
+
+          {  if overlap, then r10 := -1 else r10 := 0  }
+          subfe   r10,r10,r10
+
+          {  count < 39 ? (32 + max. alignment (7) }
+          cmpwi   cr7,r5,39
+
+          {  if count <= 0, stop  }
+          ble     cr0,LMoveDone
+
+          {  if overlap, then r0 := count else r0 := 0  }
+          and     r0,r5,r10
+          {  if overlap, then point source and dest to the end  }
+          add     r3,r3,r0
+          add     r4,r4,r0
+          {  if overlap, then r0 := 0, else r0 := -1  }
+          not     r0,r10
+          {  if overlap, then r10 := -2, else r10 := 0  }
+          slwi    r10,r10,1
+          {  if overlap, then r10 := -1, else r10 := 1  }
+          addi    r10,r10,1
+          {  if overlap, then source/dest += -1, otherwise they stay }
+          {  After the next instruction, r3/r4 + r10 = next position }
+          {  to load/store from/to                                   }
+          add     r3,r3,r0
+          add     r4,r4,r0
+
+          {  if count < 11, copy everything byte by byte  }
+          blt     cr1,LMoveBytes
+
+          {  otherwise, guarantee 4 byte alignment for dest for starters  }
 LMove4ByteAlignLoop:
-                lbzux   r29,r3,r30
-                stbux   r29,r4,r30
-                {  is dest now 4 aligned?  }
-                andi.   r29,r4,3
-                subi    r5,r5,1
-                {  while not aligned, continue  }
-                bne     cr0,LMove4ByteAlignLoop
-
-                { check for 8 byte alignment }
-                andi.   r29,r4,7
-                { we are going to copy one byte again (the one at the newly }
-                { aligned address), so increase count again                 }
-                addi    r5,r5,1
-                { count div 4 for number of dwords to copy }
-                srwi    r29,r5,2
-                {  if 11 <= count < 39, copy using dwords }
-                blt     cr7,LMoveDWords
-
-                { multiply the update count with 4 }
-                slwi    r30,r30,2
-
-                beq     cr0,L8BytesAligned
-
-                {  count >= 39 -> align to 8 byte boundary and then use the FPU  }
-                {  since we're already at 4 byte alignment, use dword store      }
-                lwz     r29,0(r3)
-                add     r3,r3,r30
-                stw     r29,0(r4)
-                add     r4,r4,r30
+          lbzux   r0,r3,r10
+          stbux   r0,r4,r10
+          {  is dest now 4 aligned?  }
+          andi.   r0,r4,3
+          subi    r5,r5,1
+          {  while not aligned, continue  }
+          bne     cr0,LMove4ByteAlignLoop
+
+          { check for 8 byte alignment }
+          andi.   r0,r4,7
+          { we are going to copy one byte again (the one at the newly }
+          { aligned address), so increase count byte 1                }
+          addi    r5,r5,1
+          { count div 4 for number of dwords to copy }
+          srwi    r0,r5,2
+          {  if 11 <= count < 39, copy using dwords }
+          blt     cr7,LMoveDWords
+
+          { multiply the update count with 4 }
+          slwi    r10,r10,2
+
+          beq     cr0,L8BytesAligned
+
+          {  count >= 39 -> align to 8 byte boundary and then use the FPU  }
+          {  since we're already at 4 byte alignment, use dword store      }
+          lwzux   r0,r3,r10
+          stwux   r0,r4,r10
+          subi    r5,r5,4
 L8BytesAligned:
-                { count div 32 ( >= 1, since count was >=39 }
-                srwi    r29,r5,5
-                { remainder }
-                andi.   r5,r5,31
-                { to decide if we will do some dword stores afterwards or not }
-                cmpwi   cr1,r5,11
-                mtctr   r29
-
-                {  r29 := count div 4, will be moved to ctr when copying dwords  }
-                srwi    r29,r5,2
-
-                {  adjust the update count: it will now be 8 or -8 depending on overlap  }
-                slwi    r30,r30,1
-
-                {  adjust source and dest pointers: because of the above loop, dest is now   }
-                {  aligned to 8 bytes. So if we substract r30 we will still have an 8 bytes  }
-                { aligned address)                                                           }
-                sub     r3,r3,r30
-                sub     r4,r4,r30
+          { count div 32 ( >= 1, since count was >=39 }
+          srwi    r0,r5,5
+          { remainder }
+          andi.   r5,r5,31
+          { to decide if we will do some dword stores (instead of only }
+          { byte stores) afterwards or not                             }
+          cmpwi   cr1,r5,11
+          mtctr   r0
+
+          {  r0 := count div 4, will be moved to ctr when copying dwords  }
+          srwi    r0,r5,2
+
+          {  adjust the update count: it will now be 8 or -8 depending on overlap  }
+          slwi    r10,r10,1
+
+          {  adjust source and dest pointers: because of the above loop, dest is now   }
+          {  aligned to 8 bytes. So if we substract r10 we will still have an 8 bytes  }
+          { aligned address)                                                           }
+          sub     r3,r3,r10
+          sub     r4,r4,r10
 
 LMove32ByteLoop:
-                lfdux   f31,r3,r30
-                lfdux   f30,r3,r30
-                lfdux   f29,r3,r30
-                lfdux   f28,r3,r30
-                stfdux  f31,r4,r30
-                stfdux  f30,r4,r30
-                stfdux  f29,r4,r30
-                stfdux  f28,r4,r30
-                bdnz    LMove32ByteLoop
-
-                { cr0*4+eq is true if "count and 31" = 0 }
-                beq     cr0,LMoveDone
-
-                {  make r30 again -1 or 1, but first adjust source/dest pointers }
-                add     r3,r3,r30
-                add     r4,r4,r30
-                srawi   r30,r30,3
-                sub     r3,r3,r30
-                sub     r4,r4,r30
-
-                { cr1 contains whether count <= 11 }
-                ble     cr1,LMoveBytes
-                add     r3,r3,r30
-                add     r4,r4,r30
+          lfdux   f13,r3,r10
+          lfdux   f12,r3,r10
+          lfdux   f11,r3,r10
+          lfdux   f0,r3,r10
+          stfdux  f13,r4,r10
+          stfdux  f12,r4,r10
+          stfdux  f11,r4,r10
+          stfdux  f0,r4,r10
+          bdnz    LMove32ByteLoop
+
+          { cr0*4+eq is true if "count and 31" = 0 }
+          beq     cr0,LMoveDone
+
+          {  make r10 again -1 or 1, but first adjust source/dest pointers }
+          add     r3,r3,r10
+          add     r4,r4,r10
+          srawi   r10,r10,3
+          sub     r3,r3,r10
+          sub     r4,r4,r10
+
+          { cr1 contains whether count <= 11 }
+          ble     cr1,LMoveBytes
+          add     r3,r3,r10
+          add     r4,r4,r10
 
 LMoveDWords:
-                mtctr   r29
-                andi.   r5,r5,3
-                {  r30 * 4  }
-                slwi    r30,r30,2
-                sub     r3,r3,r30
-                sub     r4,r4,r30
+          mtctr   r0
+          andi.   r5,r5,3
+          {  r10 * 4  }
+          slwi    r10,r10,2
+          sub     r3,r3,r10
+          sub     r4,r4,r10
 
 LMoveDWordsLoop:
-                lwzux   r29,r3,r30
-                stwux   r29,r4,r30
-                bdnz    LMoveDWordsLoop
-
-                beq     cr0,LMoveDone
-                {  make r30 again -1 or 1  }
-                add     r3,r3,r30
-                add     r4,r4,r30
-                srawi   r30,r30,2
-                sub     r3,r3,r30
-                sub     r4,r4,r30
+          lwzux   r0,r3,r10
+          stwux   r0,r4,r10
+          bdnz    LMoveDWordsLoop
+
+          beq     cr0,LMoveDone
+          {  make r10 again -1 or 1  }
+          add     r3,r3,r10
+          add     r4,r4,r10
+          srawi   r10,r10,2
+          sub     r3,r3,r10
+          sub     r4,r4,r10
 LMoveBytes:
-                mtctr   r5
+          mtctr   r5
 LMoveBytesLoop:
-                lbzux   r29,r3,r30
-                stbux   r29,r4,r30
-                bdnz    LMoveBytesLoop
+          lbzux   r0,r3,r10
+          stbux   r0,r4,r10
+          bdnz    LMoveBytesLoop
 LMoveDone:
-end ['R3','R4','R5','R29','R30','F28','F29','F30','F31','CTR','CR0','CR1','CR7'];
+end ['R0','R3','R4','R5','R10','F0','F11','F12','F13','CTR','CR0','CR1','CR7'];
 
 
 {$define FPC_SYSTEM_HAS_FILLCHAR}
@@ -263,23 +263,23 @@ function IndexByte(var buf;len:longint;b:byte):longint; assembler;
 { output: r3 = position of b in buf (-1 if not found) }
 asm
                 {  load the begin of the buffer in the data cache }
-                dcbt    r0,r3
-                cmpli   r4,0
+                dcbt    0,r3
+                cmplwi  r4,0
                 mtctr   r4
-                subi    r30,r3,1
-                mr      r28,r3
+                subi    r10,r3,1
+                mr      r0,r3
                 { assume not found }
                 li      r3,-1
                 beq     LIndexByteDone
 LIndexByteLoop:
-                lbzu    r29,1(r30)
-                cmpl    r29,r5
+                lbzu    r9,1(r10)
+                cmplw   r9,r5
                 bdnzf   cr0*4+eq,LIndexByteLoop
                 { r3 still contains -1 here }
                 bne     LIndexByteDone
-                sub     r3,r30,r28
+                sub     r3,r10,r0
 LIndexByteDone:
-end ['r3','r28','r29','r30','cr0','ctr'];
+end ['r0','r3','r9','r10','cr0','ctr'];
 
 
 {$define FPC_SYSTEM_HAS_INDEXWORD}
@@ -288,23 +288,23 @@ function IndexWord(var buf;len:longint;b:word):longint; assembler;
 { output: r3 = position of b in buf (-1 if not found) }
 asm
                 {  load the begin of the buffer in the data cache }
-                dcbt    r0,r3
-                cmpli   r4,0
+                dcbt    0,r3
+                cmplwi  r4,0
                 mtctr   r4
-                subi    r30,r3,2
-                mr      r28,r3
+                subi    r10,r3,2
+                mr      r0,r3
                 { assume not found }
                 li      r3,-1
                 beq     LIndexWordDone
 LIndexWordLoop:
-                lhzu    r29,2(r30)
-                cmpl    r29,r5
+                lhzu    r9,2(r10)
+                cmplw   r9,r5
                 bdnzf   cr0*4+eq,LIndexWordLoop
                 { r3 still contains -1 here }
                 bne     LIndexWordDone
-                sub     r3,r30,r28
+                sub     r3,r10,r0
 LIndexWordDone:
-end ['r3','r28','r29','r30','cr0','ctr'];
+end ['r0','r3','r9','r10','cr0','ctr'];
 
 
 {$define FPC_SYSTEM_HAS_INDEXDWORD}
@@ -313,23 +313,23 @@ function IndexDWord(var buf;len:longint;b:DWord):longint; assembler;
 { output: r3 = position of b in buf (-1 if not found) }
 asm
                 {  load the begin of the buffer in the data cache }
-                dcbt    r0,r3
-                cmpli   r4,0
+                dcbt    0,r3
+                cmplwi  r4,0
                 mtctr   r4
-                subi    r30,r3,4
-                mr      r28,r3
+                subi    r10,r3,4
+                mr      r0,r3
                 { assume not found }
                 li      r3,-1
                 beq     LIndexDWordDone
 LIndexDWordLoop:
-                lwzu    r29,4(r30)
-                cmpl    r29,r5
+                lwzu    r9,4(r30)
+                cmplw   r9,r5
                 bdnzf   cr0*4+eq, LIndexDWordLoop
                 { r3 still contains -1 here }
                 bne     LIndexDWordDone
-                sub     r3,r30,r28
+                sub     r3,r10,r0
 LIndexDWordDone:
-end ['r3','r28','r29','r30','cr0','ctr'];
+end ['r0','r3','r9','r10','cr0','ctr'];
 
 {$define FPC_SYSTEM_HAS_COMPAREBYTE}
 function CompareByte(var buf1,buf2;len:longint):longint; assembler;
@@ -338,24 +338,24 @@ function CompareByte(var buf1,buf2;len:longint):longint; assembler;
 { note: almost direct copy of strlcomp() from strings.inc         }
 asm
         {  load the begin of the first buffer in the data cache }
-        dcbt    r0,r3
-        { use r28 instead of r3 for buf1 since r3 contains result }
-        cmpl    r5,0
+        dcbt    0,r3
+        { use r0 instead of r3 for buf1 since r3 contains result }
+        cmplwi  r5,0
         mtctr   r5
-        subi    r28,r3,1
+        subi    r0,r3,1
         subi    r4,r4,1
         li      r3,0
         beq     LCompByteDone
 LCompByteLoop:
         { load next chars }
-        lbzu    r29,1(r28)
-        lbzu    r30,1(r4)
+        lbzu    r9,1(r0)
+        lbzu    r10,1(r4)
         { calculate difference }
-        sub.    r3,r29,r30
+        sub.    r3,r9,r10
         { if chars not equal or at the end, we're ready }
         bdnzt   cr0*4+eq, LCompByteLoop
 LCompByteDone:
-end ['r3','r4','r28','r29','r30','cr0','ctr'];
+end ['r0','r3','r4','r9','r10','cr0','ctr'];
 
 {$define FPC_SYSTEM_HAS_COMPAREWORD}
 function CompareWord(var buf1,buf2;len:longint):longint; assembler;
@@ -364,24 +364,24 @@ function CompareWord(var buf1,buf2;len:longint):longint; assembler;
 { note: almost direct copy of strlcomp() from strings.inc         }
 asm
         {  load the begin of the first buffer in the data cache }
-        dcbt    r0,r3
-        { use r28 instead of r3 for buf1 since r3 contains result }
-        cmpl    r5,0
+        dcbt    0,r3
+        { use r0 instead of r3 for buf1 since r3 contains result }
+        cmplwi  r5,0
         mtctr   r5
-        subi    r28,r3,2
+        subi    r0,r3,2
         subi    r4,r4,2
         li      r3,0
         beq     LCompWordDone
 LCompWordLoop:
         { load next chars }
-        lhzu    r29,2(r28)
-        lhzu    r30,2(r4)
+        lhzu    r9,2(r0)
+        lhzu    r10,2(r4)
         { calculate difference }
-        sub.    r3,r29,r30
+        sub.    r3,r9,r10
         { if chars not equal or at the end, we're ready }
         bdnzt   cr0*4+eq, LCompWordLoop
 LCompWordDone:
-end ['r3','r4','r28','r29','r30','cr0','ctr'];
+end ['r0','r3','r4','r9','r10','cr0','ctr'];
 
 
 {$define FPC_SYSTEM_HAS_COMPAREDWORD}
@@ -391,24 +391,24 @@ function CompareDWord(var buf1,buf2;len:longint):longint; assembler;
 { note: almost direct copy of strlcomp() from strings.inc         }
 asm
         {  load the begin of the first buffer in the data cache }
-        dcbt    r0,r3
-        { use r28 instead of r3 for buf1 since r3 contains result }
-        cmpl    r5,0
+        dcbt    0,r3
+        { use r0 instead of r3 for buf1 since r3 contains result }
+        cmplwi  r5,0
         mtctr   r5
-        subi    r28,r3,4
+        subi    r0,r3,4
         subi    r4,r4,4
         li      r3,0
         beq     LCompDWordDone
 LCompDWordLoop:
         { load next chars }
-        lwzu    r29,4(r28)
-        lwzu    r30,4(r4)
+        lwzu    r9,4(r0)
+        lwzu    r10,4(r4)
         { calculate difference }
-        sub.    r3,r29,r30
+        sub.    r3,r9,r10
         { if chars not equal or at the end, we're ready }
         bdnzt   cr0*4+eq, LCompDWordLoop
 LCompDWordDone:
-end ['r3','r4','r28','r29','r30','cr0','ctr'];
+end ['r0','r3','r4','r9','r10','cr0','ctr'];
 
 {$define FPC_SYSTEM_HAS_INDEXCHAR0}
 function IndexChar0(var buf;len:longint;b:Char):longint; assembler;
@@ -416,66 +416,360 @@ function IndexChar0(var buf;len:longint;b:Char):longint; assembler;
 { output: r3 = position of found position (-1 if not found) }
 asm
         {  load the begin of the buffer in the data cache }
-        dcbt    r0,r3
+        dcbt    0,r3
         { length = 0? }
-        cmpli   r5,0
-        mtctr   r5
-        subi    r29,r3,1
-        mr      r28,r29
+        cmplwi  r4,0
+        mtctr   r4
+        subi    r9,r3,1
+        mr      r0,r9
         { assume not found }
         li      r3,-1
         { if yes, do nothing }
         beq     LIndexChar0Done
         subi    r3,r3,1
 LIndexChar0Loop:
-        lbzu    r30,1(r29)
-        cmpli   cr1,r30,0
-        cmpl    r30,r4
+        lbzu    r10,1(r9)
+        cmplwi  cr1,r10,0
+        cmplw   r10,r5
         beq     cr1,LIndexChar0Done
         bdnzf   cr0*4+eq, LIndexChar0Loop
         bne     LIndexChar0Done
-        sub     r3,r29,r28
+        sub     r3,r9,r0
 LIndexChar0Done:
-end ['r3','r4','r28','r29','r30','cr0','ctr'];
+end ['r0','r3','r4','r9','r10','cr0','ctr'];
+
+
+{****************************************************************************
+                              Object Helpers
+****************************************************************************}
+
+{define FPC_SYSTEM_HAS_FPC_HELP_CONSTRUCTOR}
+(*
+use generic implementation for now
+procedure fpc_help_constructor; assembler; [public,alias:'FPC_HELP_CONSTRUCTOR']; {$ifdef hascompilerproc} compilerproc; {$endif}
+*)
+
+{$define FPC_SYSTEM_HAS_FPC_HELP_FAIL}
+procedure fpc_help_fail;assembler;[public,alias:'FPC_HELP_FAIL']; {$ifdef hascompilerproc} compilerproc; {$endif}
+assembler
+asm
+!!!!!!!!!!!
+end;
+
+
+{$define FPC_SYSTEM_HAS_FPC_HELP_DESTRUCTOR}
+(*
+use generic implementation for now
+procedure fpc_help_destructor;assembler;[public,alias:'FPC_HELP_DESTRUCTOR']; {$ifdef hascompilerproc} compilerproc; {$endif}
+*)
+
+{$define FPC_SYSTEM_HAS_FPC_NEW_CLASS}
+procedure fpc_new_class;assembler;[public,alias:'FPC_NEW_CLASS']; {$ifdef hascompilerproc} compilerproc; {$endif}
+assembler;
+asm
+!!!!!!!!!!!
+end;
+
+
+{$define FPC_SYSTEM_HAS_FPC_DISPOSE_CLASS}
+procedure fpc_dispose_class;assembler;[public,alias:'FPC_DISPOSE_CLASS']; {$ifdef hascompilerproc} compilerproc; {$endif}
+assembler;
+asm
+!!!!!!!!!!!
+end;
+
+{$define FPC_SYSTEM_HAS_FPC_HELP_FAIL_CLASS}
+procedure fpc_help_fail_class;assembler;[public,alias:'FPC_HELP_FAIL_CLASS']; {$ifdef hascompilerproc} compilerproc; {$endif}
+{ a non zero class must allways be disposed
+  VMT is allways at pos 0 }
+assembler;
+asm
+!!!!!!!!!!!
+end;
 
-{ all FPC_HELP_* are still missing (JM) }
 
 
+{define FPC_SYSTEM_HAS_FPC_CHECK_OBJECT}
+{ we want the stack for debugging !! PM }
+(*
+use generic implementation for now
+procedure fpc_check_object(obj : pointer);[public,alias:'FPC_CHECK_OBJECT']; {$ifdef hascompilerproc} compilerproc; {$endif}
+*)
+
+{define FPC_SYSTEM_HAS_FPC_CHECK_OBJECT_EXT}
+(*
+use generic implementation for now
+procedure fpc_check_object_ext;assembler;[public,alias:'FPC_CHECK_OBJECT_EXT']; {$ifdef hascompilerproc} compilerproc; {$endif}
+*)
+
 {****************************************************************************
                                  String
 ****************************************************************************}
 
 {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COPY}
-procedure int_strcopy(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_COPY'];
+function fpc_shortstr_to_shortstr(len:longint; const sstr: shortstring): shortstring; [public,alias: 'FPC_SHORTSTR_TO_SHORTSTR']; compilerproc;
 assembler;
-{ input: r3: len, sstr: r4, dstr: r5 }
+{ input: r3: pointer to result, r4: len, r5: sstr }
 asm
         { load length source }
-        lbz     r30,0(r4)
+        lbz     r10,0(r5)
+        {  load the begin of the dest buffer in the data cache }
+        dcbtst  r0,r3
+
+        { put min(length(sstr),len) in r3 }
+        subc    r0,r4,r10     { r0 := r3 - r10                               }
+        subme   r4,r4,r4      { if r3 >= r4 then r3' := 0 else r3' := -1     }
+        and     r4,r0,r4      { if r3 >= r4 then r3' := 0 else r3' := r3-r10 }
+        add     r4,r4,r10     { if r3 >= r4 then r3' := r10 else r3' := r3   }
+
+        cmplwi  r4,0
+        { put length in ctr }
+        mtctr   r4
+        stb     r4,0(r3)
+        beq     LShortStrCopyDone
+LShortStrCopyLoop:
+        lbzu    r0,1(r5)
+        stbu    r0,1(r3)
+        bdnz    LShortStrCopyLoop
+end ['r0','r3','r4','r5','r10','cr0','ctr'];
+
+
+{$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COPY}
+procedure fpc_shortstr_copy(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_COPY'];
+assembler;
+{ input: r3: len, r4: sstr, r5: dstr }
+asm
+        { load length source }
+        lbz     r10,0(r4)
         {  load the begin of the dest buffer in the data cache }
         dcbtst  r0,r5
 
         { put min(length(sstr),len) in r3 }
-        subc    r29,r3,r30    { r29 := r3 - r30                              }
-        subme   r3,r3,r3      { if r3 >= r4 then r3' := 0 else r3' := -1     }
-        and     r3,r29,r3     { if r3 >= r4 then r3' := 0 else r3' := r3-r30 }
-        add     r3,r3,r30     { if r3 >= r4 then r3' := r30 else r3' := r3   }
+        subc    r0,r3,r10    { r0 := r3 - r10                               }
+        subme   r3,r3,r3     { if r3 >= r4 then r3' := 0 else r3' := -1     }
+        and     r3,r0,r3     { if r3 >= r4 then r3' := 0 else r3' := r3-r10 }
+        add     r3,r3,r10    { if r3 >= r4 then r3' := r10 else r3' := r3   }
 
-        cmpli   r3,0
+        cmplwi  r3,0
         { put length in ctr }
         mtctr   r3
         stb     r3,0(r5)
         beq     LShortStrCopyDone
 LShortStrCopyLoop:
-        lbzu    r29,1(r4)
-        stbu    r29,1(r5)
+        lbzu    r0,1(r4)
+        stbu    r0,1(r5)
         bdnz    LShortStrCopyLoop
-end ['r3','r4','r5','r29','r30','cr0','ctr'];
+end ['r0','r3','r4','r5','r10','cr0','ctr'];
+
+
+function fpc_shortstr_concat(const s1: shortstring): shortstring; compilerproc;
+{ expects that results (r3) contains a pointer to the current string and s1 }
+{ (r4) a pointer to the one that has to be concatenated                     }
+assembler;
+asm
+      { load length s1 }
+      lbz     r9, 0(r4)
+      { load length result }
+      lbz     r10, 0(r3)
+      { go to last current character of result }
+      add     r4,r9,r4
+
+      { calculate min(length(s1),255-length(result)) }
+      subfic  r9,r9,255
+      subc    r8,r9,r10    { r8 := r9 - r10                               }
+      subme   r9,r9,r9     { if r9 >= r10 then r9' := 0 else r9' := -1    }
+      and     r9,r8,r9     { if r9 >= r10 then r9' := 0 else r9' := r9-r8 }
+      add     r9,r9,r10    { if r9 >= r10 then r9' := r10 else r9' := r9  }
+
+      { and concatenate }
+      mtctr   r9
+LShortStrConcatLoop:
+      lbzu    r10,1(r4)
+      stbu    r10,1(r3)
+      bdnz    LShortStrConcatLoop
+end ['r3','r4','r8','r9','r10','ctr'];
+
+
+{$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
+function fpc_shortstr_compare(const dstr,sstr:shortstring): longint; [public,alias:'FPC_SHORTSTR_COMPARE']; compilerproc;
+assembler;
+asm
+      { load length sstr }
+      lbz     r9,0(r4)
+      { load length dstr }
+      lbz     r10,0(r3)
+      { save their difference for later and      }
+      { calculate min(length(sstr),length(dstr)) }
+      subc    r0,r9,r10    { r0 := r9 - r10                               }
+      subme   r9,r9,r9     { if r9 >= r10 then r9' := 0 else r9' := -1    }
+      and     r9,r0,r9     { if r9 >= r10 then r9' := 0 else r9' := r9-r8 }
+      add     r9,r9,r10    { if r9 >= r10 then r9' := r10 else r9' := r9  }
+      
+      { first compare dwords (length/4) }
+      srwi.   r8,r9,2
+      { keep length mod 4 for the ends }
+      rlwinm  r9,r9,0,30,31
+      { already check whether length mod 4 = 0 }
+      cmplwi  cr1,r9,0
+      { length div 4 in ctr for loop }
+      mtctr   r8
+      { if length < 3, goto byte comparing }
+      beq     LShortStrCompare1
+      { setup for use of update forms of load/store with dwords }
+      subi    r4,r4,3
+      subi    r8,r3,3
+LShortStrCompare4Loop:
+      lwzu    r3,4(r4)
+      lwzu    r10,4(r8)
+      sub.    r3,r3,r10
+      bdnzt   cr0+eq,LShortStrCompare4Loop
+      { r3 contains result if we stopped because of "ne" flag }
+      bne     LShortStrCompareDone
+      { setup for use of update forms of load/store with bytes }
+      addi    r4,r4,3
+      addi    r8,r8,3
+LShortStrCompare1:
+      { if comparelen mod 4 = 0, skip this and return the difference in }
+      { lengths                                                         } 
+      beq     cr1,LShortStrCompareLen
+LShortStrCompare1Loop:
+      lbzu    r3,1(r4)
+      lbzu    r10,1(r8)
+      sub.    r3,r3,r10
+      bdnzt   cr0+eq,LShortStrCompare4Loop
+      bne     LShortStrCompareDone
+LShortStrCompareLen:
+      { also return result in flags, maybe we can use this in the CG }
+      mr.     r3,r0
+LShortStrCompareDone:
+end ['r0','r3','r4','r8','r9','r10','cr0','cr1','ctr'];
+
+
+{$define FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
+function fpc_pchar_to_shortstr(p:pchar):shortstring;[public,alias:'FPC_PCHAR_TO_SHORTSTR']; compilerproc;
+assembler;
+{$include strpas.inc}
+
+
+{$define FPC_SYSTEM_HAS_STRLEN}
+function strlen(p:pchar):longint;assembler;
+{$include strlen.inc}
+
+
+{$define FPC_SYSTEM_HAS_GET_FRAME}
+function get_frame:longint;assembler;
+asm
+        !!!!!!! depends on ABI !!!!!!!!
+end ['r3'];
+
+
+{$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
+function get_caller_addr(framebp:longint):longint;assembler;
+asm
+        !!!!!!! depends on ABI !!!!!!!!
+end ['r3'];
+
+
+{$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
+function get_caller_frame(framebp:longint):longint;assembler;
+asm
+        !!!!!!! depends on ABI !!!!!!!!
+end ['r3'];
+
+{$define FPC_SYSTEM_HAS_ABS_LONGINT}
+function abs(l:longint):longint; assembler;[internconst:in_const_abs];
+asm
+        srawi   r0,r3,31
+        add     r3,r0,r3
+        xor     r3,r3,r0
+end ['r0','r3'];
+
+
+{****************************************************************************
+                                 Math
+****************************************************************************}
+
+{$define FPC_SYSTEM_HAS_ODD_LONGINT}
+function odd(l:longint):boolean;assembler;[internconst:in_const_odd];
+asm
+        rlwinm  r3,r3,0,31,31
+end ['r3'];
+
+
+{$define FPC_SYSTEM_HAS_SQR_LONGINT}
+function sqr(l:longint):longint;assembler;[internconst:in_const_sqr];
+asm
+        mullw   r3,r3,r3
+end ['r3'];
+
+
+{$define FPC_SYSTEM_HAS_SPTR}
+Function Sptr : Longint;assembler;
+asm
+        mr    r3,sp
+end ['r3'];
+
+
+{****************************************************************************
+                                 Str()
+****************************************************************************}
+
+{ int_str: generic implementation is used for now }
+
+
+{****************************************************************************
+                             Multithreading
+****************************************************************************}
+
+{ do a thread save inc/dec }
+
+function declocked(var l : longint) : boolean;assembler;
+{ input:  address of l in r3                                      }
+{ output: boolean indicating whether l is zero after decrementing }
+asm
+LDecLockedLoop:   
+{$ifdef MTRTL}
+    lwarx   r10,0,r3
+    subi    r10,r10,1
+    stwcx.  r10,0,r3
+    bne-    LDecLockedLoop
+{$else MTRTL}
+    lwzx    r10,0,r3
+    subi    r10,r10,1
+    stw     r10,0,r3
+{$endif MTRTL}
+    mr.     r3,r10
+end ['r3','r10'];
+
+procedure inclocked(var l : longint);assembler;
+LIncLockedLoop:   
+{$ifdef MTRTL}
+    lwarx   r10,0,r3
+    addi    r10,r10,1
+    stwcx.  r10,0,r3
+    bne-    LDecLockedLoop
+{$else MTRTL}
+    lwzx    r10,0,r3
+    addi    r10,r10,1
+    stw     r10,0,r3
+{$endif MTRTL}
+end ['r3','r10'];
 
 
 {
   $Log$
-  Revision 1.5  2001-07-07 12:46:12  jonas
+  Revision 1.6  2001-09-27 15:30:29  jonas
+    * conversion to compilerproc and to structure used by i386 rtl
+    * some bugfixes
+    * powerpc.inc is almost complete (only fillchar/word/dword, get_frame etc
+      and the class helpers are still needed
+    - removed unnecessary register saving in set.inc (thanks to compilerproc)
+    * use registers reserved for parameters as much as possible instead of
+      those reserved for local vars (since those have to be saved by the
+      called anyway, while the ones for local vars have to be saved by the
+      callee)
+
+  Revision 1.5  2001/07/07 12:46:12  jonas
     * some small bugfixes and cache optimizations
 
   Revision 1.4  2001/03/03 13:53:36  jonas

+ 249 - 289
rtl/powerpc/set.inc

@@ -15,7 +15,7 @@
 
  **********************************************************************}
 
-procedure do_load_small(p : pointer;l:longint);assembler;[public,alias:'FPC_SET_LOAD_SMALL'];
+function fpc_set_load_small(l: fpc_small_set): fpc_normal_set;assembler;[public,alias:'FPC_SET_LOAD_SMALL']; compilerproc;
 {
   load a normal set p from a smallset l
 
@@ -23,375 +23,324 @@ procedure do_load_small(p : pointer;l:longint);assembler;[public,alias:'FPC_SET_
 }
 asm
         stw     r4,(r3)
-        li      r4,0
-        stw     r4,4(r3)
-        stw     r4,8(r3)
-        stw     r4,12(r3)
-        stw     r4,16(r3)
-        stw     r4,20(r3)
-        stw     r4,24(r3)
-        stw     r4,28(r3)
-end ['R4'];
-
-
-procedure do_create_element(p : pointer;b : byte);assembler;[public,alias:'FPC_SET_CREATE_ELEMENT'];
+        li      r0,0
+        stw     r0,4(r3)
+        stw     r0,8(r3)
+        stw     r0,12(r3)
+        stw     r0,16(r3)
+        stw     r0,20(r3)
+        stw     r0,24(r3)
+        stw     r0,28(r3)
+end ['r0'];
+
+
+function fpc_set_create_element(b : byte): fpc_normal_set;assembler;[public,alias:'FPC_SET_CREATE_ELEMENT']; compilerproc;
 {
   create a new set in p from an element b
 
-  on entry: p in r3, b in r4
+  on entry: pointer to result in r3, b in r4
 }
-var
-  saveR5, saveR6: longint;
 asm
-        stw     r5,saveR5
-        li      r5,0
-        stw     r6,saveR6
-        stw     r5,(r3)
-        stw     r5,4(r3)
-        li      r6,1
-        stw     r5,8(r3)
-        stw     r5,12(r3)
-        stw     r5,16(r3)
-        stw     r5,20(r3)
-
-        // r6 := 1 shl r4[27-31] -> bit index in dword (shift instructions
+        li      r0,0
+        stw     r0,(r3)
+        stw     r0,4(r3)
+        stw     r0,8(r3)
+        stw     r0,12(r3)
+        stw     r0,16(r3)
+        stw     r0,20(r3)
+        stw     r0,24(r3)
+        stw     r0,28(r3)
+
+        // r0 := 1 shl r4[27-31] -> bit index in dword (shift instructions
         // with count in register only consider lower 5 bits of this register)
-        slw     r6,r6,r4
-
-        stw     r5,24(r3)
-        stw     r5,28(r3)
+        li      r0,1
+        slw     r0,r0,r4
 
         // get the index of the correct *dword* in the set
         // (((b div 8) div 4)*4= (b div 8) and not(3))
         // r5 := (r4 rotl(32-3)) and (0x0fffffff8)
-        rlwinm  r5,r4,29,0,31-2
+        rlwinm  r4,r4,29,0,31-2
 
         // store the result
-        stwx    r6,r3,r5
-        lwz     r5,saveR5
-        lwz     r6,saveR6
-end ['R4'];
+        stwx    r0,r3,r4
+end ['r0','r4','r10'];
 
-procedure do_set_byte(p : pointer;b : byte);assembler;[public,alias:'FPC_SET_SET_BYTE'];
+function fpc_set_set_byte(const source: fpc_normal_set; b : byte): fpc_normal_set;assembler; compilerproc;
 {
   add the element b to the set pointed by p
 
-  on entry: p in r3, b in r4
+  on entry: result in r3, source in r4, b in r5
 }
-var
-  saveR5, saveR6: longint;
 asm
-       stw      r5,saveR5
-       stw      r6,saveR6
+       // copy source to result
+       li       r0,8
+       mtctr    r0
+       subi     r4,r4,4
+       subi     r3,r3,4
+Lset_set_byte_copy:
+       lwzu     r0,4(r4)
+       stwu     r0,4(r3)
+       bdnz     Lset_set_byte_copy
+       subi     r3,r3,32
        // get the index of the correct *dword* in the set
-       rlwinm   r5,r4,29,0,31-2   // r5 := (r4 rotl(32-3)) and (0x0fffffff8)
+       // r0 := (r5 rotl(32-3)) and (0x0fffffff8)
+       rlwinm   r0,r5,29,0,31-2
        // load dword in which the bit has to be set (and update r3 to this address)
-       lwzxu    r6,r3,r5
-       li       r5,1
+       lwzxu    r4,r3,r0
+       li       r0,1
        // generate bit which has to be inserted
-       slw      r4,r5,r4
+       slw      r5,r0,r5
        // insert it
-       lwz      r5,saveR5
-       or       r4,r7,r4
-       lwz      r6,saveR6
+       or       r5,r4,r5
        // store result
-       stw      r4,(r3)
-end ['R3','R4'];
+       stw      r5,(r3)
+end ['r0','r3','r4','r5','ctr'];
 
 
-procedure do_unset_byte(p : pointer;b : byte);assembler;[public,alias:'FPC_SET_UNSET_BYTE'];
+function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_set;assembler; compilerproc;
 {
   suppresses the element b to the set pointed by p
   used for exclude(set,element)
 
   on entry: p in r3, b in r4
 }
-var
-  saveR5, saveR6: longint;
 asm
-       stw      r5,saveR5
-       stw      r6,saveR6
+       // copy source to result
+       li       r0,8
+       mtctr    r0
+       subi     r4,r4,4
+       subi     r3,r3,4
+Lset_unset_byte_copy:
+       lwzu     r0,4(r4)
+       stwu     r0,4(r3)
+       bdnz     Lset_unset_byte_copy
+       subi     r3,r3,32
        // get the index of the correct *dword* in the set
-       rlwinm   r5,r4,29,0,31-2   // r5 := (r4 rotl(32-3)) and (0x0fffffff8)
-       // load dword in which the bit is (and update r3 to this address)
-       lwzxu    r6,r3,r5
-       li       r5,1
-       // generate bit which has to be cleared
-       slw      r4,r5,r4
-       lwz      r5,saveR5
+       // r0 := (r4 rotl(32-3)) and (0x0fffffff8)
+       rlwinm   r0,r5,29,0,31-2
+       // load dword in which the bit has to be set (and update r3 to this address)
+       lwzxu    r4,r3,r0
+       li       r0,1
+       // generate bit which has to be removed
+       slw      r5,r0,r5
        // remove it
-       andc     r4,r6,r4
-       lwz      r6,saveR6
+       andc     r5,r4,r5
        // store result
        stw      r4,(r3)
-end ['R3','R4'];
+end ['r0','r3','r4','r5','ctr'];
 
 
-procedure do_set_range(p : pointer;l,h : byte);assembler;[public,alias:'FPC_SET_SET_RANGE'];
+function fpc_set_set_range(const orgset: fpc_normal_set; l,h : byte): fpc_normal_set;assembler; compilerproc;
 {
-  on entry: p in r3, l in r4, h in r5
+  on entry: result in r3, l in r4, h in r5
+
+  on entry: result in r3, ptr to orgset in r4, l in r5, h in r6
 }
-var
-  saveR6, saveR7, saveR8: longint;
 asm
-  cmplw  cr0,r4,r5
-  bg     cr0,.LSET_RANGE_EXIT
-  stw    r6,saveR6
-  stw    r7,saveR7
-  stw    r8,saveR8
-  rlwinm r6,r4,32-3,0,31-2    // divide by 8 to get starting and ending byte-
+  // copy source to result
+  li       r0,8
+  mtctr    r0
+  subi     r4,r4,4
+  subi     r3,r3,4
+Lset_set_range_copy:
+  lwzu     r0,4(r4)
+  stwu     r0,4(r3)
+  bdnz     Lset_set_range_copy
+  subi     r3,r3,32
+
+  cmplw  cr0,r5,r6
+  bg     cr0,LSET_RANGE_EXIT
+  rlwinm r4,r5,32-3,0,31-2    // divide by 8 to get starting and ending byte-
   { load the set the data cache }
-  dcbt   r3,r6
-  rlwinm r7,r5,32-3,0,31-2    // address and clear two lowest bits to get
+  dcbst  r3,r4
+  rlwinm r9,r5,32-3,0,31-2    // address and clear two lowest bits to get
                               //  start/end longint address
-  sub.   r7,r6,r7             // are bit lo and hi in the same longint?
-  rlwinm r5,r5,0,31-4,31      // hi := hi mod 32 (= "hi and 31", but the andi
+  sub.   r9,r4,r9             // are bit lo and hi in the same longint?
+  rlwinm r6,r6,0,31-4,31      // hi := hi mod 32 (= "hi and 31", but the andi
                               //  instr. only exists in flags modifying form)
-  eqv    r8,r8,r8             // r8 = $0x0ffffffff = bitmask to be inserted
-  subfic r5,r5,31             // hi := 31 - (hi mod 32) = shift count for later
-  srw    r8,r8,r4             // shift bitmask to clear bits below lo
+  li     r10,$ffff            // r10 = $0x0ffffffff = bitmask to be inserted
+  subfic r6,r6,31             // hi := 31 - (hi mod 32) = shift count for later
+  srw    r10,r10,r4           // shift bitmask to clear bits below lo
                               // note: shift right = opposite little endian!!
-  lwzxu  r4,r3,r6             // go to starting pos in set and load value
+  lwzxu  r5,r3,r4             // go to starting pos in set and load value
                               //  (lo is not necessary anymore)
-  beq    .Lset_range_hi       // if bit lo and hi in same longint, keep
+  beq    Lset_range_hi        // if bit lo and hi in same longint, keep
                               //  current mask and adjust for hi bit
-  subic. r7,r7,4              // bit hi in next longint?
-  or     r4,r4,r8             // merge and
-  stw    r4,(r3)              // store current mask
-  eqv    r8,r8,r8             // new mask
-  lwzu   r4,4(r3)             // load next longint of set
-  beq    .Lset_range_hi       // bit hi in this longint -> go to adjust for hi
-.Lset_range_loop:
-  subic. r7,r7,4
-  stwu   r8,4(r3)             // fill longints in between with full mask
-  bne    .Lset_range_loop
-  lwzu    r4,4(r3)            // load next value from set
-.Lset_range_hi:               // in all cases, r3 here contains the address of
+  subic. r9,r9,4              // bit hi in next longint?
+  or     r5,r5,r10            // merge and
+  stw    r5,(r3)              // store current mask
+  li     r10,$ffff            // new mask
+  lwzu   r5,4(r3)             // load next longint of set
+  beq    Lset_range_hi        // bit hi in this longint -> go to adjust for hi
+Lset_range_loop:
+  subic. r9,r9,4
+  stwu   r10,4(r3)            // fill longints in between with full mask
+  bne    Lset_range_loop
+  lwzu   r5,4(r3)             // load next value from set
+Lset_range_hi:                // in all cases, r3 here contains the address of
                               //  the longint which contains the hi bit and r4
                               //  contains this longint
-  slw    r7,r8,r5             // r7 := bitmask shl (31 - (hi mod 32)) =
+  slw    r9,r10,r6            // r7 := bitmask shl (31 - (hi mod 32)) =
                               //  bitmask with bits higher than hi cleared
                               //  (r8 = $0xffffffff unless the first beq was
                               //   taken)
-  and    r8,r7,r8             // combine lo and hi bitmasks for this longint
-  or     r4,r4,r8             // and combine with existing set
-  stw    r4,(r3)              // store to set
-  lwz    r6,saver6
-  lwz    r7,saver7
-  lwz    r8,saver8
-.Lset_range_exit:
-end ['R3','R4','R5'];
+  and    r10,r9,r10           // combine lo and hi bitmasks for this longint
+  or     r5,r5,r10            // and combine with existing set
+  stw    r5,(r3)              // store to set
+Lset_range_exit:
+end ['r0','r3','r4','r5','r6','r9','r10','cr0','ctr'];
 
 
-procedure do_in_byte(p : pointer;b : byte);assembler;[public,alias:'FPC_SET_IN_BYTE'];
+function fpc_set_in_byte(const p: fpc_normal_set; b : byte): boolean;assembler;[public,alias:'FPC_SET_IN_BYTE'];
 {
   tests if the element b is in the set p, the **zero** flag is cleared if it's present
 
   on entry: p in r3, b in r4
 }
-var
-  saveR5: longint;
 asm
-       stw      r5,saveR5
        // get the index of the correct *dword* in the set
-       // r5 := (r4 rotl(32-3)) and (0x0fffffff8)
-       rlwinm   r5,r4,29,0,31-2
+       // r0 := (r4 rotl(32-3)) and (0x0fffffff8)
+       rlwinm   r0,r4,29,0,31-2
        // load dword in which the bit has to be tested
-       lwzx     r3,r3,r5
-       li       r5,1
+       lwzx     r3,r3,r0
+       li       r0,1
        // generate bit which has to be tested
-       slw      r4,r5,r4
-       lwz      r5,saveR5
+       slw      r4,r0,r4
        // test it
        and.     r3,r3,r4
-end ['R4'];
+end ['r0','r3','r4','cr0'];
 
 
 
-procedure do_add_sets(set1,set2,dest : pointer);assembler;[public,alias:'FPC_SET_ADD_SETS'];
+function fpc_set_add_sets(const set1,set2: fpc_normal_set): fpc_normal_set;assembler;[public,alias:'FPC_SET_ADD_SETS']; compilerproc;
 {
   adds set1 and set2 into set dest
-
-  on entry: set1 in r3, set2 in r4, dest in r5
+  on entry: result in r3, set1 in r4, set2 in r5
 }
-var
-  saveR6, saveR7, saveR8: longint;
 asm
-       {  load the begin of the first set in the data cache }
-       dcbt    r0,r3
-       stw      r6,saveR6
-       stw      r7,saveR7
+       {  load the begin of the result set in the data cache }
+       dcbst    0,r3
+       li       r0,8
+       mtctr    r0
        subi     r5,r5,4
-       li       r6,8
-       stw      r8,saveR8
-       subi     r3,4
-       subi     r4,4
-   .LMADDSETS1:
-      subic.    r6,r6,1
-      lwzu      r7,4(r3)
-      lwzu      r8,4(r4)
-      or        r7,r7,r8
-      stwu      r7,4(r5)
-      bne       cr0,.LMADDSETS1
-      lwz       r6,saveR6
-      lwz       r7,saveR7
-      lwz       r8,saveR8
-end ['R3','R4','R5'];
-
-
-
-procedure do_mul_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SET_MUL_SETS'];
+       subi     r4,r4,4
+       subi     r3,r3,4
+   LMADDSETS1:
+      lwzu      r0,4(r4)
+      lwzu      r10,4(r5)
+      or        r0,r0,r10
+      stwu      r0,4(r3)
+      bdnz      LMADDSETS1
+end ['r0','r3','r4','r5','r10','ctr'];
+
+
+
+function fpc_set_mul_sets(const set1,set2: fpc_normal_set): fpc_normal_set;assembler;[public,alias:'FPC_SET_MUL_SETS']; compilerproc;
 {
   multiplies (takes common elements of) set1 and set2 result put in dest
-  on entry: set1 in r3, set2 in r4, dest in r5
+  on entry: result in r3, set1 in r4, set2 in r5
 }
-var
-  saveR6, saveR7, saveR8: longint;
 asm
-       {  load the begin of the first set in the data cache }
-       dcbt    r0,r3
-       stw      r6,saveR6
-       stw      r7,saveR7
+       {  load the begin of the result set in the data cache }
+       dcbst    0,r3
+       li       r0,8
+       mtctr    r0
        subi     r5,r5,4
-       li       r6,8
-       stw      r8,saveR8
-       subi     r3,4
-       subi     r4,4
-   .LMADDSETS1:
-       subic.   r6,r6,1
-       lwzu     r7,4(r3)
-       lwzu     r8,4(r4)
-       and      r7,r7,r8
-       stwu     r7,4(r5)
-       bne      cr0,.LMADDSETS1
-       lwz      r6,saveR6
-       lwz      r7,saveR7
-       lwz      r8,saveR8
-end ['R3','R4','R5'];
-
-
-procedure do_sub_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SET_SUB_SETS'];
+       subi     r4,r4,4
+       subi     r3,r3,4
+   LMMULSETS1:
+      lwzu      r0,4(r4)
+      lwzu      r10,4(r5)
+      and       r0,r0,r10
+      stwu      r0,4(r3)
+      bdnz      LMMULSETS1
+end ['r0','r3','r4','r5','r10','ctr'];
+
+
+function fpc_set_sub_sets(const set1,set2: fpc_normal_set): fpc_normal_set;assembler;[public,alias:'FPC_SET_SUB_SETS']; compilerproc;
 {
   computes the diff from set1 to set2 result in dest
-
-  on entry: set1 in r3, set2 in r4, dest in r5
+  on entry: result in r3, set1 in r4, set2 in r5
 }
-var
-  saveR6, saveR7, saveR8: longint;
 asm
-       {  load the begin of the first set in the data cache }
-       dcbt    r0,r3
-       stw      r6,saveR6
-       stw      r7,saveR7
+       {  load the begin of the result set in the data cache }
+       dcbst    0,r3
+       li       r0,8
+       mtctr    r0
        subi     r5,r5,4
-       li       r6,8
-       stw      r8,saveR8
-       subi     r3,4
-       subi     r4,4
-   .LMSUBSETS1:
-       subi.    r6,r6,1
-       lwzu     r8,4(r4)
-       lwzu     r7,4(r3)
-       andc     r8,r8,r7
-       stwu     r8,4(r5)
-       bne      cr0,.LMSUBSETS1
-       lwz      r6,saveR6
-       lwz      r7,saveR7
-       lwz      r8,saveR8
-end ['R3','R4','R5'];
-
-
-procedure do_symdif_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SET_SYMDIF_SETS'];
+       subi     r4,r4,4
+       subi     r3,r3,4
+   LMSUBSETS1:
+      lwzu      r0,4(r4)
+      lwzu      r10,4(r5)
+      andc      r0,r0,r10
+      stwu      r0,4(r3)
+      bdnz      LMSUBSETS1
+end ['r0','r3','r4','r5','r10','ctr'];
+
+
+function fpc_set_symdif_sets(const set1,set2: fpc_normal_set): fpc_normal_set;assembler;[public,alias:'FPC_SET_SYMDIF_SETS']; compilerproc;
 {
    computes the symetric diff from set1 to set2 result in dest
-
-  on entry: set1 in r3, set2 in r4, dest in r5
+  on entry: result in r3, set1 in r4, set2 in r5
 }
-var
-  saveR6, saveR7, saveR8: longint;
 asm
-       {  load the begin of the first set in the data cache }
-       dcbt    r0,r3
-       stw      r6,saveR6
-       stw      r7,saveR7
+       {  load the begin of the result set in the data cache }
+       dcbst    0,r3
+       li       r0,8
+       mtctr    r0
        subi     r5,r5,4
-       li       r6,8
-       stw      r8,saveR8
-       subi     r3,4
-       subi     r4,4
-   .LMSYMDIFSETS1:
-       subi.    r6,r6,1
-       lwzu     r7,4(r3)
-       lwzu     r8,4(r4)
-       xor      r7,r7,r8
-       stwu     r7,4(r5)
-       bne      cr0,.LMSYMDIFSETS1
-       lwz      r6,saveR6
-       lwz      r7,saveR7
-       lwz      r8,saveR8
-end ['R3','R4','R5'];
-
-
-procedure do_comp_sets(set1,set2 : pointer);assembler;[public,alias:'FPC_SET_COMP_SETS'];
+       subi     r4,r4,4
+       subi     r3,r3,4
+   LMSYMDIFSETS1:
+      lwzu      r0,4(r4)
+      lwzu      r10,4(r5)
+      xor       r0,r0,r10
+      stwu      r0,4(r3)
+      bdnz      LMSYMDIFSETS1
+end ['r0','r3','r4','r5','r10','ctr'];
+
+
+function fpc_set_comp_sets(const set1,set2: fpc_normal_set): boolean;assembler;[public,alias:'FPC_SET_COMP_SETS']; compilerproc;
 {
   compares set1 and set2 zeroflag is set if they are equal
-
   on entry: set1 in r3, set2 in r4
 }
-var
-  saveR5, saveR6, saveR7: longint;
 asm
-       {  load the begin of the first set in the data cache }
-       dcbt    r0,r3
-       stw      r5,saveR5
-       mfctr    r5
-       stw      r6,saveR6
-       li       r6,8
-       stw      r7,saveR7
-       mtctr    r6
-       subi     r3,4
-       subi     r4,4
-    .LMCOMPSETS1:
-       lwzu     r6,4(r3)
-       lwzu     r7,4(r4)
-       cmplw    cr0,r6,r7
-       bdnzt    cr0*4+eq,.LMCOMPSETS1
-       mtctr    r5
-       lwz      r5,saveR5
-       lwz      r6,saveR6
-       lwz      r7,saveR7
-end ['R3','R4'];
-
-{$IfNDef NoSetInclusion}
-procedure do_contains_sets(set1,set2 : pointer);assembler;[public,alias:'FPC_SET_CONTAINS_SETS'];
+       li       r0,8
+       mtctr    r0
+       subi     r3,r3,4
+       subi     r4,r4,4
+    LMCOMPSETS1:
+       lwzu     r0,4(r3)
+       lwzu     r10,4(r4)
+       sub.     r0,r0,r10
+       bdnzt    cr0*4+eq,LMCOMPSETS1
+       cntlzw   r3,r0
+       srwi.    r3,r3,31 
+end ['r0','r3','r4','r10','cr0','ctr'];
+
+function fpc_set_contains_sets(const set1,set2: fpc_normal_set): boolean;assembler;[public,alias:'FPC_SET_CONTAINS_SETS']; compilerproc;
 {
   on exit, zero flag is set if set1 <= set2 (set2 contains set1)
   on entry: set1 in r3, set2 in r4
 }
-var
-  saveR5, saveR6, saveR7: longint;
 asm
-       {  load the begin of the first set in the data cache }
-       dcbt    r0,r3
-       stw      r5,saveR5
-       mfctr    r5
-       stw      r6,saveR6
-       li       r6,8
-       stw      r7,saveR7
-       mtctr    r6
-       subi     r3,4
-       subi     r4,4
-    .LMCOMPSETS1:
-       lwzu     r7,4(r4)
-       lwzu     r6,4(r3)
-       andc.    r7,r6,r7
-       bdnzt    cr0*4+eq,.LMCOMPSETS1
-       mtctr    r5
-       lwz      r5,saveR5
-       lwz      r6,saveR6
-       lwz      r7,saveR7
-end ['R3','R4'];
-{$EndIf SetInclusion}
+       li       r0,8
+       mtctr    r0
+       subi     r3,r3,4
+       subi     r4,r4,4
+    LMCONTAINSSETS1:
+       lwzu     r0,4(r3)
+       lwzu     r10,4(r4)
+       { set1 and not(set2) = 0? }
+       andc.    r0,r0,r10
+       bdnzt    cr0*4+eq,LMCONTAINSSETS1
+       cntlzw   r3,r0
+       srwi.    r3,r3,31 
+end ['r0','r3','r4','r10','cr0','ctr'];
+
+
 
 {$ifdef LARGESETS}
 
@@ -442,13 +391,13 @@ asm
       movl set2,%ebx
       movl dest,%edi
       movl size,%ecx
-  .LMADDSETSIZES1:
+  LMADDSETSIZES1:
       lodsl
       orl (%ebx),%eax
       stosl
       addl $4,%ebx
       decl %ecx
-      jnz .LMADDSETSIZES1
+      jnz LMADDSETSIZES1
 end;
 
 
@@ -462,13 +411,13 @@ asm
          movl set2,%ebx
          movl dest,%edi
          movl size,%ecx
-     .LMMULSETSIZES1:
+     LMMULSETSIZES1:
          lodsl
          andl (%ebx),%eax
          stosl
          addl $4,%ebx
          decl %ecx
-         jnz .LMMULSETSIZES1
+         jnz LMMULSETSIZES1
 end;
 
 
@@ -478,7 +427,7 @@ asm
          movl set2,%ebx
          movl dest,%edi
          movl size,%ecx
-     .LMSUBSETSIZES1:
+     LMSUBSETSIZES1:
          lodsl
          movl (%ebx),%edx
          notl %edx
@@ -486,7 +435,7 @@ asm
          stosl
          addl $4,%ebx
          decl %ecx
-         jnz .LMSUBSETSIZES1
+         jnz LMSUBSETSIZES1
 end;
 
 
@@ -499,14 +448,14 @@ asm
       movl set2,%ebx
       movl dest,%edi
       movl size,%ecx
-  .LMSYMDIFSETSIZE1:
+  LMSYMDIFSETSIZE1:
       lodsl
       movl (%ebx),%edx
       xorl %edx,%eax
       stosl
       addl $4,%ebx
       decl %ecx
-      jnz .LMSYMDIFSETSIZE1
+      jnz LMSYMDIFSETSIZE1
 end;
 
 
@@ -515,17 +464,17 @@ asm
       movl set1,%esi
       movl set2,%edi
       movl size,%ecx
-  .LMCOMPSETSIZES1:
+  LMCOMPSETSIZES1:
       lodsl
       movl (%edi),%edx
       cmpl %edx,%eax
-      jne  .LMCOMPSETSIZEEND
+      jne  LMCOMPSETSIZEEND
       addl $4,%edi
       decl %ecx
-      jnz .LMCOMPSETSIZES1
+      jnz LMCOMPSETSIZES1
       { we are here only if the two sets are equal
         we have zero flag set, and that what is expected }
-  .LMCOMPSETSIZEEND:
+  LMCOMPSETSIZEEND:
 end;
 
 {$IfNDef NoSetInclusion}
@@ -537,19 +486,19 @@ asm
         movl set1,%esi
         movl set2,%edi
         movl size,%ecx
-    .LMCONTAINSSETS2:
+    LMCONTAINSSETS2:
         movl (%esi),%eax
         movl (%edi),%edx
         andl %eax,%edx
         cmpl %edx,%eax  {set1 and set2 = set1?}
-        jne  .LMCONTAINSSETEND2
+        jne  LMCONTAINSSETEND2
         addl $4,%esi
         addl $4,%edi
         decl %ecx
-        jnz .LMCONTAINSSETS2
+        jnz LMCONTAINSSETS2
         { we are here only if set2 contains set1
           we have zero flag set, and that what is expected }
-    .LMCONTAINSSETEND2:
+    LMCONTAINSSETEND2:
 end;
 {$EndIf NoSetInclusion}
 
@@ -558,7 +507,18 @@ end;
 
 {
   $Log$
-  Revision 1.8  2001-07-07 12:46:12  jonas
+  Revision 1.9  2001-09-27 15:30:29  jonas
+    * conversion to compilerproc and to structure used by i386 rtl
+    * some bugfixes
+    * powerpc.inc is almost complete (only fillchar/word/dword, get_frame etc
+      and the class helpers are still needed
+    - removed unnecessary register saving in set.inc (thanks to compilerproc)
+    * use registers reserved for parameters as much as possible instead of
+      those reserved for local vars (since those have to be saved by the
+      called anyway, while the ones for local vars have to be saved by the
+      callee)
+
+  Revision 1.8  2001/07/07 12:46:12  jonas
     * some small bugfixes and cache optimizations
 
   Revision 1.7  2001/03/03 13:54:26  jonas

+ 145 - 151
rtl/powerpc/strings.inc

@@ -25,61 +25,61 @@ asm
 {  in: dest in r3, source in r4  }
 {  out: result (dest) in r3      }
         {  load the begin of the source string in the data cache }
-        dcbt    r0,r4
+        dcbt    0,r4
         {  get # of misaligned bytes  }
-        rlwinm. r30,r4,0,31-2,31
-        subfic  r30,r30,4
-        mtctr   r30
+        rlwinm. r10,r4,0,31-2,31
+        subfic  r10,r10,4
+        mtctr   r10
         {  since we have to return dest intact, use another register for  }
         {  dest in the copy loop                                          }
-        subi    r29,r3,1
+        subi    r9,r3,1
         subi    r4,r4,1
         beq     LStrCopyAligned
 LStrCopyAlignLoop:
         {  load next byte  }
-        lbzu    r28,1(r4)
+        lbzu    r0,1(r4)
         {  end of string?  }
-        cmpli   cr0,r28,0
+        cmpli   cr0,r0,0
         {  store byte  }
-        stbu    r28,1(r29)
+        stbu    r0,1(r9)
         {  loop if misaligned bytes left and not end of string found }
         bdnzf   eq,LStrCopyAlignLoop
         beq     LStrCopyDone
 LStrCopyAligned:
         subi    r4,r4,3
-        subi    r29,r29,3
+        subi    r9,r9,3
         { setup magic constants }
-        li      r27,0x0feff
-        addis   r27,r27,0x0feff
-        li      r26,0x08080
-        addis    r26,r26,0x08081
+        li      r8,0x0feff
+        addis   r8,r8,0x0feff
+        li      r7,0x08080
+        addis    r7,r7,0x08081
 
         { load first 4 bytes  }
-        lwzu    r28,4(r4)
+        lwzu    r0,4(r4)
 
 LStrCopyAlignedLoop:
         { test for zero byte }
-        add     r30,r28,r27
-        andc    r30,r30,r28
-        and.    r30,r30,r26
+        add     r10,r0,r8
+        andc    r10,r10,r0
+        and.    r10,r10,r7
         bne     LStrCopyEndFound
-        stwu    r28,4(r29)
+        stwu    r0,4(r9)
         { load next 4 bytes (do it here so the load can begin while the }
         { the branch is processed)                                      }
-        lwzu    r28,4(r4)
+        lwzu    r0,4(r4)
         b       LStrCopyAlignedLoop
 LStrCopyEndFound:
         { result is either 0, 8, 16 or 24 depending on which byte is zero }
-        cntlzw  r30,r30
-        addi    r29,r29,3
+        cntlzw  r10,r10
+        addi    r9,r9,3
 LStrCopyWrapUpLoop:
-        subic.  r30,r30,8
-        rlwinm  r28,r28,8,0,31
-        stbu    r28,1(r29)
+        subi    r10,r10,8
+        rlwinm  r0,r0,8,0,31
+        stbu    r0,1(r9)
         bge     LStrCopyWrapUpLoop
 LStrCopyDone:
         {  r3 still contains dest here  }
-end ['r4','r26','r27','r28','r29','r30','cr0','ctr'];
+end ['r4','r7','r8','r0','r9','r10','cr0','ctr'];
 
 
 function strecopy(dest,source : pchar) : pchar;assembler;
@@ -87,21 +87,21 @@ function strecopy(dest,source : pchar) : pchar;assembler;
 { out: result (end of new dest) in r3 }
 asm
         {  load the begin of the source string in the data cache }
-        dcbt    r0,r4
+        dcbt    0,r4
         {  get # of misaligned bytes  }
-        rlwinm. r30,r4,0,31-2,31
-        subfic  r30,r30,4
-        mtctr   r30
+        rlwinm. r10,r4,0,31-2,31
+        subfic  r10,r10,4
+        mtctr   r10
         subi    r3,r3,1
         subi    r4,r4,1
         beq     LStrCopyAligned
 LStrCopyAlignLoop:
         {  load next byte  }
-        lbzu    r28,1(r4)
+        lbzu    r0,1(r4)
         {  end of string?  }
-        cmpli   cr0,r28,0
+        cmpli   cr0,r0,0
         {  store byte  }
-        stbu    r28,1(r3)
+        stbu    r0,1(r3)
         {  loop if misaligned bytes left and not end of string found }
         bdnzf   eq,LStrCopyAlignLoop
         beq     LStrCopyDone
@@ -109,34 +109,34 @@ LStrCopyAligned:
         subi    r4,r4,3
         subi    r3,r3,3
         { setup magic constants }
-        li      r27,0x0feff
-        addis   r27,r27,0x0feff
-        li      r29,0x08080
-        addis    r29,r29,0x08081
+        li      r8,0x0feff
+        addis   r8,r8,0x0feff
+        li      r9,0x08080
+        addis    r9,r9,0x08081
 LStrCopyAlignedLoop:
 
         {  load next 4 bytes  }
-        lwzu    r28,4(r4)
+        lwzu    r0,4(r4)
 
         { test for zero byte }
-        add     r30,r28,r27
-        andc    r30,r30,r28
-        and.    r30,r30,r29
+        add     r10,r0,r8
+        andc    r10,r10,r0
+        and.    r10,r10,r9
         bne     LStrCopyEndFound
-        stwu    r28,4(r3)
+        stwu    r0,4(r3)
         b       LStrCopyAlignedLoop
 LStrCopyEndFound:
         { result is either 0, 8, 16 or 24 depending on which byte is zero }
-        cntlzw  r30,r30
+        cntlzw  r10,r10
         addi    r3,r3,3
 LStrCopyWrapUpLoop:
-        subic.  r30,r30,8
-        rlwinm  r28,r28,8,0,31
-        stbu    r28,1(r3)
+        subic.  r10,r10,8
+        rlwinm  r0,r0,8,0,31
+        stbu    r0,1(r3)
         bge     LStrCopyWrapUpLoop
 LStrCopyDone:
         {  r3 contains new dest here  }
-end ['r3','r4','r27','r28','r3','r30','cr0','ctr'];
+end ['r3','r4','r8','r0','r3','r10','cr0','ctr'];
 
 
 function strlcopy(dest,source : pchar;maxlen : longint) : pchar;assembler;
@@ -144,61 +144,44 @@ function strlcopy(dest,source : pchar;maxlen : longint) : pchar;assembler;
 { out: result (dest) in r3                   }
 asm
         {  load the begin of the source string in the data cache }
-        dcbt    r0,r4
+        dcbt    0,r4
         mtctr   r5
         subi    r4,r4,1
-        subi    r29,r3,1
+        subi    r0,r3,1
 LStrlCopyLoop:
-        lbzu    r30,1(r4)
-        cmpli   r30,0
-        stbu    r30,1(r29)
+        lbzu    r10,1(r4)
+        cmpli   r10,0
+        stbu    r10,1(r0)
         bdnzf   cr0*4+eq, LStrlCopyLoop
         { if we stopped because we copied a #0, we're done }
         beq     LStrlCopyDone
         { otherwise add the #0 }
-        li      r30,0
-        stb     r30,1(r29)
+        li      r10,0
+        stb     r10,1(r0)
 LStrlCopyDone:
-end ['r4','r29','r30','cr0'];
+end ['r0','r4','r30','cr0'];
 
 
 function strlen(p : pchar) : longint;assembler;
-{ in: p in r3                }
-{ out: result (length) in r3 }
-asm
-        {  load the begin of the string in the data cache }
-        dcbt    r0,r3
-        { empty/invalid string? }
-        cmpli   r3,0
-        { if yes, do nothing }
-        beq     LStrLenDone
-        subi    r29,r3,1
-LStrLenLoop:
-        lbzu    r30,1(r29)
-        cmpli   r30,0
-        bne     LStrLenLoop
-        sub     r3,r29,r3
-LStrLenDone:
-end ['r3','r4','r29','r30','cr0'];
-
+{$i strlen.inc}
 
 function strend(p : pchar) : pchar;assembler;
 { in: p in r3                  }
 { out: result (end of p) in r3 }
 asm
         {  load the begin of the string in the data cache }
-        dcbt    r0,r3
+        dcbt    0,r3
         { empty/invalid string? }
         cmpli   r3,0
         { if yes, do nothing }
         beq     LStrEndDone
         subi    r3,r3,1
 LStrEndLoop:
-        lbzu    r30,1(r3)
-        cmpli   r30,0
+        lbzu    r0,1(r3)
+        cmpli   r0,0
         bne     LStrEndLoop
 LStrEndDone:
-end ['r3','r4','r30','cr0'];
+end ['r0','r3','r4','cr0'];
 
 
 function strcomp(str1,str2 : pchar) : longint;assembler;
@@ -206,17 +189,17 @@ function strcomp(str1,str2 : pchar) : longint;assembler;
 { out: result (= 0 if strings equal, < 0 if str1 < str2, > 0 if str1 > str2 }
 {      in r3                                                                }
 asm
-        { use r28 instead of r3 for str1 since r3 contains result }
-        subi    r28,r3,1
+        { use r0 instead of r3 for str1 since r3 contains result }
+        subi    r0,r3,1
         subi    r4,r4,1
 LStrCompLoop:
         { load next chars }
-        lbzu    r29,1(r28)
+        lbzu    r9,1(r0)
         { check if one is zero }
-        cmpli   cr1,r29,0
-        lbzu    r30,1(r4)
+        cmpli   cr1,r9,0
+        lbzu    r10,1(r4)
         { calculate difference }
-        sub.    r3,r29,r30
+        sub.    r3,r9,r10
         { if chars not equal, we're ready }
         bne     LStrCompDone
         { if they are equal and one is zero, then the other one is zero too }
@@ -224,7 +207,7 @@ LStrCompLoop:
         { otherwise loop                                                    }
         bne     cr1,LStrCompLoop
 LStrCompDone:
-end ['r3','r4','r28','r29','r30','cr0','cr1'];
+end ['r0','r3','r4','r9','r10','cr0','cr1'];
 
 
 function strlcomp(str1,str2 : pchar;l : longint) : longint;assembler;
@@ -234,22 +217,22 @@ function strlcomp(str1,str2 : pchar;l : longint) : longint;assembler;
 {      in r3                                                                }
 asm
         { load the begin of one of the strings in the data cache }
-        dcbt    r0,r3
-        { use r28 instead of r3 for str1 since r3 contains result }
+        dcbt    0,r3
+        { use r0 instead of r3 for str1 since r3 contains result }
         cmpl    r5,0
-        subi    r28,r3,1
+        subi    r0,r3,1
         li      r3,0
         beq     LStrlCompDone
         mtctr   r5
         subi    r4,r4,1
 LStrlCompLoop:
         { load next chars }
-        lbzu    r29,1(r28)
+        lbzu    r9,1(r0)
         { check if one is zero }
-        cmpli   cr1,r29,0
-        lbzu    r30,1(r4)
+        cmpli   cr1,r9,0
+        lbzu    r10,1(r4)
         { calculate difference }
-        sub.    r3,r29,r30
+        sub.    r3,r9,r10
         { if chars not equal, we're ready }
         bne     LStrlCompDone
         { if they are equal and one is zero, then the other one is zero too }
@@ -257,7 +240,7 @@ LStrlCompLoop:
         { otherwise loop (if ctr <> 0)                                      }
         bdnzf  cr1*4+eq,LStrlCompLoop
 LStrlCompDone:
-end ['r3','r4','r28','r29','r30','cr0','cr1','ctr'];
+end ['r0','r3','r4','r9','r10','cr0','cr1','ctr'];
 
 
 function stricomp(str1,str2 : pchar) : longint;assembler;
@@ -294,7 +277,7 @@ LStriCompLoop:
         subfic   r3,r29,'Z'
         { if r29 < 'A' then r27 := 0 else r27 := $20 }
         andi     r27,r27,0x020
-        { if r29 > Z then r26 := 0 else r26 := $ffffffff
+        { if r29 > Z then r26 := 0 else r26 := $ffffffff }
         subfe    r26,r26,r26
         { same for r30 }
         subfic   r3,r30,'Z'
@@ -329,48 +312,48 @@ function strlicomp(str1,str2 : pchar;l : longint) : longint;assembler;
 { out: result of case insensitive comparison (< 0, = 0, > 0)     }
 asm
         {  load the begin of one of the string in the data cache }
-        dcbt    r0,r3
-        { use r28 instead of r3 for str1 since r3 contains result }
+        dcbt    0,r3
+        { use r0 instead of r3 for str1 since r3 contains result }
         cmpl    r5,0
-        subi    r28,r3,1
+        subi    r0,r3,1
         li      r3,0
         beq-    LStrlCompDone
         mtctr   r5
         subi    r4,r4,1
 LStriCompLoop:
         { load next chars }
-        lbzu    r29,1(r28)
+        lbzu    r9,1(r0)
         { check if one is zero }
-        cmpli   cr1,r29,0
-        lbzu    r30,1(r4)
+        cmpli   cr1,r9,0
+        lbzu    r10,1(r4)
         { calculate difference }
-        sub.    r3,r29,r30
+        sub.    r3,r9,r10
         { if chars are equal, no further test is necessary }
         beq+    LStriCompEqual
 
         { see stricomp for explanation }
-        li       r27,0
-        li       r25,0
-
-        subic    r3,r29,'A'
-        addme    r27,r27
-        subic    r3,r30,'A'
-        addme    r25,r25
-
-        subfic   r3,r29,'Z'
-        andi     r27,r27,0x020
-        subfe    r26,r26,r26
-        subfic   r3,r30,'Z'
-        andi     r25,r25,0x020
+        li       r8,0
+        li       r5,0
+
+        subic    r3,r9,'A'
+        addme    r8,r8
+        subic    r3,r10,'A'
+        addme    r5,r5
+
+        subfic   r3,r9,'Z'
+        andi     r8,r8,0x020
+        subfe    r7,r7,r7
+        subfic   r3,r10,'Z'
+        andi     r5,r5,0x020
         subfe    r24,r24,r24
 
-        and      r27,r27,r26
-        and      r25,r25,r24
-        add      r29,r29,r27
-        add      r30,r30,r25
+        and      r8,r8,r7
+        and      r5,r5,r24
+        add      r9,r9,r8
+        add      r10,r10,r5
 
         { compare again }
-        sub.     r3,r29,r30
+        sub.     r3,r9,r10
         bne      LStrCompDone
 LStriCompEqual:
         { if they are equal and one is zero, then the other one is zero too }
@@ -378,7 +361,7 @@ LStriCompEqual:
         { otherwise loop (if ctr <> 0)                                      }
         bdnzf    cr1*4+eq,LStriCompLoop
 LStriCompDone:
-end ['r3','r4','r26','r27','r28','r29','r30','cr0','cr1','ctr'];
+end ['r0','r3','r4','r5','r7','r8','r9','r10','cr0','cr1','ctr'];
 
 
 function strscan(p : pchar;c : char) : pchar;assembler;
@@ -389,13 +372,13 @@ asm
         beq     LStrScanDone
         subi    r3,r3,1
 LStrScanLoop:
-        lbzu    r30,1(r3)
-        cmpl    cr1,r30,r4
-        cmpli   r30,0
+        lbzu    r0,1(r3)
+        cmpl    cr1,r0,r4
+        cmpli   r0,0
         beq     cr1,LStrScanDone
         bne     LStrScanLoop
 LStrScanDone:
-end ['r3','r4','r30','cr0','cr1'];
+end ['r0','r3','r4','cr0','cr1'];
 
 
 function strrscan(p : pchar;c : char) : pchar;assembler;
@@ -404,73 +387,84 @@ asm
         cmpli   r3,0
         { if yes, do nothing }
         beq     LStrrScanDone
-        { make r29 $ffffffff, later on we take min(r29,r3) }
-        li      r29,0x0ffff
+        { make r0 $ffffffff, later on we take min(r0,r3) }
+        li      r0,0x0ffff
         subi    r3,r3,1
 LStrrScanLoop:
-        lbzu    r30,1(r3)
-        cmpl    cr1,r30,r4
-        cmpli   cr0,r30,0
+        lbzu    r10,1(r3)
+        cmpl    cr1,r10,r4
+        cmpli   cr0,r10,0
         bne+    cr1,LStrrScanNotFound
         { store address of found position }
-        mr      r29,r3
+        mr      r0,r3
 LStrrScanNotFound:
         bne     LStrrScanLoop
-        { Select min of r3 and r29 -> end of string or found position    }
+        { Select min of r3 and r0 -> end of string or found position    }
         { From the PPC compiler writer's guide, not sure if I could ever }
         { come up with something like this :)                            }
 
-        subfc   r30,r3,r29   { r30 = r29 - r3, CA = (r29 >= r3) ? 1 : 0 }
-        subfe   r29,r29,r29  { r29' = (r29 >= r3) ? 0 : -1              }
-        and     r30,r30,r29  { r30 = (r29 >= r3) ? 0 : r29 - r3         }
-        add     r3,r30,r3    { r3  = (r29 >= r3) ?  r3 : r29            }
+        subfc   r10,r3,r0   { r10 = r0 - r3, CA = (r0 >= r3) ? 1 : 0 }
+        subfe   r0,r0,r0    { r0' = (r0 >= r3) ? 0 : -1              }
+        and     r10,r10,r0  { r10 = (r0 >= r3) ? 0 : r0 - r3         }
+        add     r3,r10,r3   { r3  = (r0 >= r3) ?  r3 : r0            }
 LStrrScanDone:
-end ['r3','r4','r29','r30','cr0','cr1'];
+end ['r0','r3','r4','r10','cr0','cr1'];
 
 
 function strupper(p : pchar) : pchar;assembler;
 asm
         cmpli   r3,0
         beq     LStrUpperNil
-        subi    r29,r3,1
+        subi    r9,r3,1
 LStrUpperLoop:
-        lbzu    r30,1(r29)
+        lbzu    r10,1(r9)
         { a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) }
-        subi    r28,r30,97
-        cmpli   r28,122-97
-        cmpli   cr1,r30,0
-        subi    r30,r30,0x20
+        subi    r0,r10,97
+        cmpli   r0,122-97
+        cmpli   cr1,r10,0
+        subi    r10,r10,0x20
         bgt     LStrUpper1
-        stb     r30,0(r29)
+        stb     r10,0(r9)
 LStrUpper1:
         bne     cr1,LStrUpperLoop
 LStrUpperNil:
-end ['r28','r29','r30','cr0','cr1'];
+end ['r0','r9','r10','cr0','cr1'];
 
 
 function strlower(p : pchar) : pchar;assembler;
 asm
         cmpli   r3,0
         beq     LStrLowerNil
-        subi    r29,r3,1
+        subi    r9,r3,1
 LStrLowerLoop:
-        lbzu    r30,1(r29)
+        lbzu    r10,1(r9)
         { a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) }
-        subi    r28,r30,65
-        cmpli   r28,90-65
-        cmpli   cr1,r30,0
-        addi    r30,r30,0x20
+        subi    r0,r10,65
+        cmpli   r0,90-65
+        cmpli   cr1,r10,0
+        addi    r10,r10,0x20
         bgt     LStrLower1
-        stb     r30,0(r29)
+        stb     r10,0(r9)
 LStrLower1:
         bne     cr1,LStrLowerLoop
 LStrLowerNil:
-end ['r28','r29','r30','cr0','cr1'];
+end ['r0','r9','r10','cr0','cr1'];
 
 
 {
   $Log$
-  Revision 1.8  2001-07-21 15:51:50  jonas
+  Revision 1.9  2001-09-27 15:30:29  jonas
+    * conversion to compilerproc and to structure used by i386 rtl
+    * some bugfixes
+    * powerpc.inc is almost complete (only fillchar/word/dword, get_frame etc
+      and the class helpers are still needed
+    - removed unnecessary register saving in set.inc (thanks to compilerproc)
+    * use registers reserved for parameters as much as possible instead of
+      those reserved for local vars (since those have to be saved by the
+      called anyway, while the ones for local vars have to be saved by the
+      callee)
+
+  Revision 1.8  2001/07/21 15:51:50  jonas
     * fixed small bug in stricomp
 
   Revision 1.7  2001/07/07 12:46:12  jonas

+ 22 - 43
rtl/powerpc/stringss.inc

@@ -17,63 +17,42 @@
  **********************************************************************}
 
 function strpas(p : pchar) : string; assembler;
-asm
-        {  load the begin of the string in the data cache }
-        dcbt    r0,r3
-        { load result address in r9 }
-        li      r29,__RESULT@l
-        addis   r29,__RESULT@ha
-        { maxlength }
-        li      r30,255
-        mtctr   r30
-        lwz     r29,0(r29)
-        { save address for at the end }
-        mr      r3,r29
-        { no "subi r29,r29,1" because the first byte = length byte }
-        subi    r4,r4,1
-LStrPasLoop:
-        lbzu    r30,1(r4)
-        cmpli   r30,0
-        stbu    r30,1(r29)
-        bdnzf   cr0*4+eq, LStrPasLoop
-
-        { get remaining count for length }
-        mfctr   r30
-        { if we stopped because of a terminating #0, decrease the length by 1 }
-        mfcr    r4
-        subfic  r30,r30,255
-        { put "equal" condition bit of cr0 in bit position 31 (= rightmost) }
-        { and clear other bits                                              }
-        rlwinm  r4,r4,cr0*4+eq+1,31,31
-        sub     r30,r30,r4
-
-        { store length }
-        stb     r30,0(r3)
-end ['r3','r4','r29','r30','cr0','ctr'];
+{$i strpas.inc}
 
 function strpcopy(d : pchar;const s : string) : pchar;assembler;
 asm
         { get length  }
-        lbz     r30,0(r4)
+        lbz     r10,0(r4)
         { put in counter }
-        cmpli   r30,0
-        mtctr   r30
-        subi    r29,r3,1
+        cmpli   r10,0
+        mtctr   r10
+        subi    r0,r3,1
         beq     LStrPCopyEmpty
 LStrPCopyLoop:
         { copy everything }
-        lbzu    r30,1(r4)
-        stbu    r30,1(r29)
+        lbzu    r10,1(r4)
+        stbu    r10,1(r0)
         bdnz    LStrPCopyLoop
         { add terminating #0 }
-        li      r30,0
+        li      r10,0
 LStrPCopyEmpty:
-        stb     r30,1(r29)
-end ['r4','r29','r30','cr0','ctr'];
+        stb     r10,1(r0)
+end ['r0','r4','r10','cr0','ctr'];
 
 {
   $Log$
-  Revision 1.3  2001-07-07 12:46:12  jonas
+  Revision 1.4  2001-09-27 15:30:29  jonas
+    * conversion to compilerproc and to structure used by i386 rtl
+    * some bugfixes
+    * powerpc.inc is almost complete (only fillchar/word/dword, get_frame etc
+      and the class helpers are still needed
+    - removed unnecessary register saving in set.inc (thanks to compilerproc)
+    * use registers reserved for parameters as much as possible instead of
+      those reserved for local vars (since those have to be saved by the
+      called anyway, while the ones for local vars have to be saved by the
+      callee)
+
+  Revision 1.3  2001/07/07 12:46:12  jonas
     * some small bugfixes and cache optimizations
 
   Revision 1.2  2001/02/11 12:15:03  jonas

+ 48 - 0
rtl/powerpc/strlen.inc

@@ -0,0 +1,48 @@
+{
+    $Id$
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 1999-2000 by the Free Pascal development team
+
+    Processor specific implementation of strlen
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+{ in: p in r3                }
+{ out: result (length) in r3 }
+asm
+        {  load the begin of the string in the data cache }
+        dcbt    0,r3
+        { empty/invalid string? }
+        cmpli   r3,0
+        { if yes, do nothing }
+        beq     LStrLenDone
+        subi    r29,r3,1
+LStrLenLoop:
+        lbzu    r30,1(r29)
+        cmpli   r30,0
+        bne     LStrLenLoop
+        sub     r3,r29,r3
+LStrLenDone:
+end ['r3','r4','r29','r30','cr0'];
+
+{
+  $Log$
+  Revision 1.1  2001-09-27 15:30:29  jonas
+    * conversion to compilerproc and to structure used by i386 rtl
+    * some bugfixes
+    * powerpc.inc is almost complete (only fillchar/word/dword, get_frame etc
+      and the class helpers are still needed
+    - removed unnecessary register saving in set.inc (thanks to compilerproc)
+    * use registers reserved for parameters as much as possible instead of
+      those reserved for local vars (since those have to be saved by the
+      called anyway, while the ones for local vars have to be saved by the
+      callee)
+
+}

+ 64 - 0
rtl/powerpc/strpas.inc

@@ -0,0 +1,64 @@
+{
+    $Id$
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 1999-2000 by the Free Pascal development team
+
+    Processor specific implementation of strpas
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+asm
+        {  load the begin of the string in the data cache }
+        dcbt    r0,r3
+        { load result address in r9 }
+        li      r0,__RESULT@l
+        addis   r0,__RESULT@ha
+        { maxlength }
+        li      r10,255
+        mtctr   r10
+        lwz     r0,0(r0)
+        { save address for at the end }
+        mr      r3,r0
+        { no "subi r0,r0,1" because the first byte = length byte }
+        subi    r4,r4,1
+LStrPasLoop:
+        lbzu    r10,1(r4)
+        cmpli   r10,0
+        stbu    r10,1(r0)
+        bdnzf   cr0*4+eq, LStrPasLoop
+
+        { get remaining count for length }
+        mfctr   r10
+        { if we stopped because of a terminating #0, decrease the length by 1 }
+        mfcr    r4
+        subfic  r10,r10,255
+        { put "equal" condition bit of cr0 in bit position 31 (= rightmost) }
+        { and clear other bits                                              }
+        rlwinm  r4,r4,cr0*4+eq+1,31,31
+        sub     r10,r10,r4
+
+        { store length }
+        stb     r10,0(r3)
+end ['r0','r3','r4','r10','cr0','ctr'];
+
+{
+  $Log$
+  Revision 1.1  2001-09-27 15:30:29  jonas
+    * conversion to compilerproc and to structure used by i386 rtl
+    * some bugfixes
+    * powerpc.inc is almost complete (only fillchar/word/dword, get_frame etc
+      and the class helpers are still needed
+    - removed unnecessary register saving in set.inc (thanks to compilerproc)
+    * use registers reserved for parameters as much as possible instead of
+      those reserved for local vars (since those have to be saved by the
+      called anyway, while the ones for local vars have to be saved by the
+      callee)
+
+}