Browse Source

* patch and test by Rika: Redirect StrUtils.RPos to StrUtils.RPosEx, fix a bit, improve a bit, resolves #40394

florian 1 year ago
parent
commit
15e7dd3d9f
2 changed files with 97 additions and 113 deletions
  1. 39 113
      packages/rtl-objpas/src/inc/strutils.pp
  2. 58 0
      tests/test/units/strutils/trpos.pp

+ 39 - 113
packages/rtl-objpas/src/inc/strutils.pp

@@ -194,10 +194,10 @@ Function RPosEx(C:AnsiChar;const S : AnsiString;offs:SizeInt):SizeInt; overload;
 Function RPosEx(C:Unicodechar;const S : UnicodeString;offs:SizeInt):SizeInt; overload;
 Function RPosEx(Const Substr : AnsiString; Const Source : AnsiString;offs:SizeInt) : SizeInt; overload;
 Function RPosEx(Const Substr : UnicodeString; Const Source : UnicodeString;offs:SizeInt) : SizeInt; overload;
-Function RPos(c:AnsiChar;const S : AnsiString):SizeInt; overload;
-Function RPos(c:Unicodechar;const S : UnicodeString):SizeInt; overload;
-Function RPos(Const Substr : AnsiString; Const Source : AnsiString) : SizeInt; overload;
-Function RPos(Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt; overload;
+Function RPos(c:AnsiChar;const S : AnsiString):SizeInt; overload; inline;
+Function RPos(c:Unicodechar;const S : UnicodeString):SizeInt; overload; inline;
+Function RPos(Const Substr : AnsiString; Const Source : AnsiString) : SizeInt; overload; inline;
+Function RPos(Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt; overload; inline;
 
 function AddChar(C: AnsiChar; const S: string; N: Integer): string;
 function AddCharR(C: AnsiChar; const S: string; N: Integer): string;
@@ -3059,12 +3059,10 @@ end;
 
 function RPosEx(C: AnsiChar; const S: AnsiString; offs: SizeInt): SizeInt;
 
-var I   : SizeInt;
-    p,p2: PAnsiChar;
+var p,p2: PAnsiChar;
 
 Begin
- I:=Length(S);
- If (I<>0) and (offs<=i) Then
+ If (offs>0) and (offs<=Length(S)) Then
    begin
      p:=@s[offs];
      p2:=@s[1];
@@ -3077,47 +3075,13 @@ End;
 
 function RPos(c: AnsiChar; const S: AnsiString): SizeInt;
 
-var I   : SizeInt;
-    p,p2: PAnsiChar;
-
 Begin
- I:=Length(S);
- If I<>0 Then
-   begin
-     p:=@s[i];
-     p2:=@s[1];
-     while (p2<=p) and (p^<>c) do dec(p);
-     i:=p-p2+1;
-   end;
-  RPos:=i;
+ Result:=RPosEx(c,S,Length(S)); { Length(S) must be used because character version returns 0 on offs > length. }
 End;
 
 function RPos(const Substr: AnsiString; const Source: AnsiString): SizeInt;
-var
-  MaxLen,llen : SizeInt;
-  c : AnsiChar;
-  pc,pc2 : PAnsiChar;
 begin
-  rPos:=0;
-  llen:=Length(SubStr);
-  maxlen:=length(source);
-  if (llen>0) and (maxlen>0) and ( llen<=maxlen) then
-   begin
- //    i:=maxlen;
-     pc:=@source[maxlen];
-     pc2:=@source[llen-1];
-     c:=substr[llen];
-     while pc>=pc2 do
-      begin
-        if (c=pc^) and
-           (CompareChar(Substr[1],PAnsiChar(pc-llen+1)^,llen)=0) then
-         begin
-           rPos:=PAnsiChar(pc-llen+1)-PAnsiChar(@source[1])+1;
-           exit;
-         end;
-        dec(pc);
-      end;
-   end;
+  Result:=RPosEx(Substr,Source,High(Result)); { High(Result) is possible because string version clamps offs > length to offs = length. }
 end;
 
 function RPosEx(const Substr: AnsiString; const Source: AnsiString; offs: SizeInt): SizeInt;
@@ -3126,42 +3090,38 @@ var
   c : AnsiChar;
   pc,pc2 : PAnsiChar;
 begin
-  rPosex:=0;
   llen:=Length(SubStr);
   maxlen:=length(source);
   if offs<maxlen then maxlen:=offs;
   if (llen>0) and (maxlen>0) and ( llen<=maxlen)  then
    begin
-//     i:=maxlen;
-     pc:=@source[maxlen];
-     pc2:=@source[llen-1];
-     c:=substr[llen];
-     while pc>=pc2 do
-      begin
-        if (c=pc^) and
-           (CompareChar(Substr[1],PAnsiChar(pc-llen+1)^,llen)=0) then
-         begin
-           rPosex:=PAnsiChar(pc-llen+1)-PAnsiChar(@source[1])+1;
-           exit;
-         end;
-        dec(pc);
-      end;
+     pc:=@source[maxlen-llen+1];
+     pc2:=@source[1];
+     c:=substr[1];
+     repeat
+       if (c=pc^) and
+          (CompareChar(Substr[1],pc^,llen)=0) then
+        begin
+          rPosex:=pc-pc2+1;
+          exit;
+        end;
+       dec(pc);
+     until pc<pc2;
    end;
+  rPosex:=0;
 end;
 
 function RPosEx(C: unicodechar; const S: UnicodeString; offs: SizeInt): SizeInt;
 
-var I   : SizeInt;
-    p,p2: PUnicodeChar;
+var p,p2: PUnicodeChar;
 
 Begin
- I:=Length(S);
- If (I<>0) and (offs<=i) Then
+ If (offs>0) and (offs<=Length(S)) Then
    begin
      p:=@s[offs];
      p2:=@s[1];
      while (p2<=p) and (p^<>c) do dec(p);
-     RPosEx:=(p-p2)+1;
+     RPosEx:=SizeUint(pointer(p)-pointer(p2)) div sizeof(unicodechar)+1; { p-p2+1 but avoids signed division... }
    end
   else
     RPosEX:=0;
@@ -3169,46 +3129,13 @@ End;
 
 function RPos(c: Unicodechar; const S: UnicodeString): SizeInt;
 
-var I   : SizeInt;
-    p,p2: pUnicodeChar;
-
 Begin
- I:=Length(S);
- If I<>0 Then
-   begin
-     p:=@s[i];
-     p2:=@s[1];
-     while (p2<=p) and (p^<>c) do dec(p);
-     i:=p-p2+1;
-   end;
-  RPos:=i;
+ Result:=RPosEx(c,S,Length(S)); { Length(S) must be used because character version returns 0 on offs > length. }
 End;
 
 function RPos(const Substr: UnicodeString; const Source: UnicodeString): SizeInt;
-var
-  MaxLen,llen : SizeInt;
-  c : Unicodechar;
-  pc,pc2 : PUnicodechar;
 begin
-  rPos:=0;
-  llen:=Length(SubStr);
-  maxlen:=length(source);
-  if (llen>0) and (maxlen>0) and ( llen<=maxlen) then
-   begin
-     pc:=@source[maxlen];
-     pc2:=@source[llen-1];
-     c:=substr[llen];
-     while pc>=pc2 do
-      begin
-        if (c=pc^) and
-           (CompareWord(Substr[1],punicodechar(pc-llen+1)^,llen)=0) then
-         begin
-           rPos:=punicodechar(pc-llen+1)-punicodechar(@source[1])+1;
-           exit;
-         end;
-        dec(pc);
-      end;
-   end;
+  Result:=RPosEx(Substr,Source,High(Result)); { High(Result) is possible because string version clamps offs > length to offs = length. }
 end;
 
 function RPosEx(const Substr: UnicodeString; const Source: UnicodeString; offs: SizeInt): SizeInt;
@@ -3217,26 +3144,25 @@ var
   c : unicodechar;
   pc,pc2 : punicodechar;
 begin
-  rPosex:=0;
   llen:=Length(SubStr);
   maxlen:=length(source);
   if offs<maxlen then maxlen:=offs;
   if (llen>0) and (maxlen>0) and ( llen<=maxlen)  then
    begin
-     pc:=@source[maxlen];
-     pc2:=@source[llen-1];
-     c:=substr[llen];
-     while pc>=pc2 do
-      begin
-        if (c=pc^) and
-           (Compareword(Substr[1],punicodechar(pc-llen+1)^,llen)=0) then
-         begin
-           rPosex:=punicodechar(pc-llen+1)-punicodechar(@source[1])+1;
-           exit;
-         end;
-        dec(pc);
-      end;
+     pc:=@source[maxlen-llen+1];
+     pc2:=@source[1];
+     c:=substr[1];
+     repeat
+       if (c=pc^) and
+          (Compareword(Substr[1],pc^,llen)=0) then
+        begin
+          rPosex:=SizeUint(pointer(pc)-pointer(pc2)) div sizeof(unicodechar)+1; { pc-pc2+1 but avoids signed division... }
+          exit;
+        end;
+       dec(pc);
+     until pc<pc2;
    end;
+  rPosex:=0;
 end;
 
 procedure BinToHex(BinValue: PAnsiChar; HexValue: PAnsiChar; BinBufSize: Integer);

+ 58 - 0
tests/test/units/strutils/trpos.pp

@@ -0,0 +1,58 @@
+{$mode objfpc} {$longstrings on}
+uses
+	StrUtils;
+
+var
+	somethingFailed: boolean = false;
+
+procedure TestRPos(const needle, haystack: string; ofs, expect: SizeInt; const what: string = '');
+var
+	got: SizeInt;
+begin
+	if length(needle) = 1 then
+		got := RPosEx(needle[1], haystack, ofs)
+	else
+		got := RPosEx(needle, haystack, ofs);
+	if got <> expect then
+	begin
+		writeln('RPosEx(', IfThen(what <> '', what, 'needle = ' + needle + ', haystack = ' + haystack) + ', ofs = ', ofs, ') = ', got, ', expected ', expect);
+		somethingFailed := true;
+	end;
+end;
+
+var
+	haystack: string;
+
+begin
+	//           1       9      16     23        33 36  40      48     55     62     69     76
+	//           v       v      v      v         v  v   v       v      v      v      v      v
+	haystack := 'Hitotsu kotoba ari... Esoragoto de ii. Hitotsu karada ari... Tsugou yokute ii.';
+
+	TestRPos('e ii', haystack, High(SizeInt), 74);
+	TestRPos('e ii', haystack, 999, 74);
+	TestRPos('e ii', haystack, 78, 74);
+	TestRPos('e ii', haystack, 77, 74);
+	TestRPos('e ii', haystack, 76, 34);
+	TestRPos('e ii', haystack, 37, 34);
+	TestRPos('e ii', haystack, 36, 0);
+	TestRPos('e ii', haystack, -999, 0);
+	TestRPos('e ii', haystack, Low(SizeInt), 0);
+
+	TestRPos('i', haystack, High(SizeInt), 0); // Single-character version supposedly behaves like this, unlike (string, string) that clamps to length(haystack).
+	TestRPos('i', haystack, 999, 0);
+	TestRPos('i', haystack, 78, 77);
+	TestRPos('i', haystack, 77, 77);
+	TestRPos('i', haystack, 76, 76);
+	TestRPos('i', haystack, 75, 57);
+	TestRPos('i', haystack, 3, 2);
+	TestRPos('i', haystack, 2, 2);
+	TestRPos('i', haystack, 1, 0);
+	TestRPos('i', haystack, -999, 0);
+	TestRPos('i', haystack, Low(SizeInt), 0);
+
+	// Does not actually catch the error because wrong "match" at position 0 (character before first) will return the same 0 as if there was no match.
+	TestRPos(PChar(pointer(haystack))[-1] + 'Hitotsu', haystack, 78, 0, 'character-before-first + Hitotsu');
+
+	if somethingFailed then halt(1);
+	writeln('ok');
+end.