Browse Source

+ adds support for visual insert
* fix v2l algo, still need more verification

mazen 21 years ago
parent
commit
1814389256
1 changed files with 177 additions and 111 deletions
  1. 177 111
      rtl/objpas/utf8bidi.pp

+ 177 - 111
rtl/objpas/utf8bidi.pp

@@ -18,6 +18,7 @@ type
     drRTL,
     drLTR
   );
+  TVisualToLogical = Array[Byte]Of Byte;
 
 {****************************Conversion routines*******************************}
 {Converts an UCS 16/32 bits charcater to UTF8 character}
@@ -33,21 +34,45 @@ function UTF8ToDoubleByte(UTF8Str:PChar; Len:Cardinal; DBStr:PByte):Cardinal;
 {Returns the number of logical characters}
 function LLength(const UTF8Str:TUTF8String):Cardinal;
 {Converts visual position to logical position}
-function LPos(const UTF8Str:TUTF8String; vp:Cardinal; pDir:TDirection):Cardinal;
+function LPos(const UTF8Str:TUTF8String; vp:Integer; pDir:TDirection):Cardinal;
 {Returns character at a given logical position according to paragraph direction}
-function LCharOf(UTF8Str:TUTF8String; lp:Cardinal):TUTF8Char;
+function LCharOf(UTF8String:TUTF8String; lp:Integer):TUTF8Char;
 {****************************Visual aspects************************************}
 {Returns the number of visual characters}
 function VLength(const UTF8Str:TUTF8String):Cardinal;
 function VLength(p: PChar; Count:Cardinal):Cardinal;
 {Converts a logical position to a visual position}
-function VPos(const UTF8Str:TUTF8String; lp:Cardinal; pDir, cDir:TDirection):Cardinal;
+function VPos(const UTF8Str:TUTF8String; lp:Integer; pDir, cDir:TDirection):Cardinal;
 function VPos(UTF8Char:PChar; Len:integer; BytePos:integer):Cardinal;
 {Returns character at a given visual position according to paragraph direction}
-function VCharOf(UTF8Str:TUTF8String; vp:Cardinal; dir:TDirection):TUTF8Char;
+function VCharOf(UTF8Str:TUTF8String; vp:Integer; dir:TDirection):TUTF8Char;
+{Inserts a string into an other paying attention of RTL/LTR direction}
+procedure VInsert(const Src:TUTF8String; var Dest:TUTF8String; vp:Integer; pDir:TDirection);
+{****************************Helper routines***********************************}
+{Returns direction of a character}
+function DirectionOf(UTF8Char:TUTF8Char):TDirection;
+{Inserts a char as if it was typed using keyboard in the most user friendly way.
+Returns the new cursor position after insersion depending on the new visual text}
+function InsertChar(Src:TUTF8Char; var Dest:TUTF8String; vp:Integer; pDir:TDirection):Integer;
+{Returns a table mapping each visual position to its logical position in an UTF8*
+string}
+function VisualToLogical(const UTF8String:TUTF8String; pDir:TDirection):TVisualToLogical;
 
 implementation
 
+function UTF8Str(const s:TUTF8String):String;
+var
+  i:Integer;
+begin
+  Result := '';
+  for i:= 1 to Length(s) do
+    case s[i] of
+      #0..#127:
+         Result := Result + s[i];
+    else
+      Result := Result + '$' + HexStr(Ord(s[i]),2);
+    end;
+end;
 function ComputeCharLength(p:PChar):Cardinal;
 begin
   if ord(p^)<%11000000
@@ -80,6 +105,7 @@ begin
   else
     Result:=1
 end;
+
 {****************************Conversion routines*******************************}
 function UnicodeToUTF8(aChar:TUCS32Char):TUTF8Char;
 begin
@@ -179,65 +205,31 @@ begin
   Result := Length(UTF8Str);
 end;
 
-function LPos(const UTF8Str:TUTF8String; vp:Cardinal; pDir:TDirection):Cardinal;
+function LPos(const UTF8Str:TUTF8String; vp:Integer; pDir:TDirection):Cardinal;
 var
-{At beginning of the line we don't know which direction, thus the first
- character usually decides of paragrph direction}
-  LeftCursorPos, RightCursorPos:Integer;
-  uLen:Integer;
+  v2l:TVisualToLogical;
+  i:integer;
 begin
-  uLen := Length(UTF8Str);
-  LeftCursorPos := 1;
-  RightCursorPos := 1;
-  Result := 1;
-  if(uLen > 0) then
-    repeat
-      case UTF8Str[Result] of
-        #32,'{','}','/'://Does not change direction, this is a neutral character;
-          begin
-            if(pDir = drLTR) then
-              Inc(RightCursorPos);
-          end;
-        #$d8,#$d9://Arabic
-          begin
-            pDir := drRTL;
-            Inc(Result);//Consume control character
-          end;
-      else //Latin
-        begin
-          pDir := drLTR;
-          RightCursorPos := LeftCursorPos + 1;
-        end;
-      end;
-      Inc(LeftCursorPos);
-      Inc(Result);
-    until(Result > uLen) or
-         ((pDir = drLTR) and (LeftCursorPos > vp)) or
-         ((pDir = drRTL) and (RightCursorPos > vp));
-//WriteLn('uLen=',uLen,' Result=',Result,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
-  if(Result > uLen)
-  then begin
-    if(vp > LeftCursorPos) then begin
-      Inc(Result, vp - LeftCursorPos);
-      LeftCursorPos := vp;
-    end;
-    Inc(LeftCursorPos);
-    if(vp > RightCursorPos) then
-      if(pDir = drLTR) then
-        RightCursorPos := vp;
-  end;
-//WriteLn('CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
-  Result := Result;
+  v2l := VisualToLogical(UTF8Str, pDir);
+  for i:= 0 to v2l[0] do Write(v2l[i],' ');writeln('vp=',vp,' v2l[vp]=',v2l[vp]);
+  if vp <= v2l[0]
+  then
+    Result := v2l[vp]
+  else
+    Result := Length(UTF8Str) + 1;
 end;
 
-function LCharOf(UTF8Str:TUTF8String; lp:Cardinal):TUTF8Char;
+function LCharOf(UTF8String:TUTF8String; lp:Integer):TUTF8Char;
 begin
-  while(lp > 0) and (UTF8Str[lp] > #128) do
+  if lp > Length(UTF8String)
+  then
+    Exit('');
+  while(lp > 0) and ((Ord(UTF8String[lp]) and $F0) = $80) do
     Dec(lp);
   if lp = 0
   then
     Exit('');
-  Move(Result, UTF8Str[lp], SizeOf(Result));
+  Move(UTF8String[lp], Result[1], SizeOf(TUTF8Char) - 1);
   SetLength(Result, ComputeCharLength(@Result[1]));
 end;
 {****************************Visual aspects************************************}
@@ -250,7 +242,7 @@ function VLength(p:PChar; Count:Cardinal):Cardinal;
 var
   CharLen: LongInt;
 begin
-  VLength:=0;
+  Result := 0;
   while (Count>0) do begin
     inc(Result);
     CharLen:=ComputeCharLength(p);
@@ -259,64 +251,19 @@ begin
   end;
 end;
 
-function VPos(const UTF8Str:TUTF8String; lp:Cardinal; pDir, cDir:TDirection):Cardinal;
+function VPos(const UTF8Str:TUTF8String; lp:Integer; pDir, cDir:TDirection):Cardinal;
 var
-{At beginning of the line we don't know which direction, thus the first
- character usually decides of paragrph direction}
-  LeftCursorPos, RightCursorPos:Integer;
-  uLen:Integer;
+  v2l:TVisualToLogical;
+  vp:Integer;
 begin
-  uLen := Length(UTF8Str);
-  LeftCursorPos := 1;
-  RightCursorPos := 1;
-  Result := 1;
-  if(uLen > 0) then
-    repeat
-      case UTF8Str[Result] of
-        #32,'{','}','/'://Does not change direction, this is a neutral character;
-          begin
-            if(pDir = drLTR) then
-              Inc(RightCursorPos);
-          end;
-        #$d8,#$d9://Arabic
-          begin
-            pDir := drRTL;
-            Inc(Result);//Consume control character
-          end;
-      else //Latin
-        begin
-          pDir := drLTR;
-          RightCursorPos := LeftCursorPos + 1;
-        end;
-      end;
-      Inc(LeftCursorPos);
-      Inc(Result);
-    until(Result > uLen) or
-         ((pDir = drLTR) and (LeftCursorPos > lp)) or
-         ((pDir = drRTL) and (RightCursorPos > lp));
-//WriteLn('uLen=',uLen,' Result=',Result,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
-  if(Result > uLen)
-  then begin
-    if(lp > LeftCursorPos) then begin
-      Inc(Result, lp - LeftCursorPos);
-      LeftCursorPos := lp;
+  v2l := VisualToLogical(UTF8Str, pDir);
+  for vp := 1 to v2l[0] do
+  if lp = v2l[vp]
+  then
+    begin
+      Exit(vp);
     end;
-    Inc(LeftCursorPos);
-    if(lp > RightCursorPos) then
-      if(pDir = drLTR) then
-        RightCursorPos := lp;
-  end;
-//WriteLn('CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
-  Result := Result;
-{  case dir of
-    #32:
-      CursorPos := LeftCursorPos;
-    #$d8,#$d9:
-      CursorPos := RightCursorPos;
-    else
-      CursorPos := LeftCursorPos;
-  end;}
-//WriteLn('Result=',Result,' New CursorPos=',CursorPos);
+  Result := v2l[0];
 end;
 
 function VPos(UTF8Char:PChar; Len:integer; BytePos:integer):Cardinal;
@@ -324,12 +271,131 @@ begin
 end;
 
 
-function VCharOf(UTF8Str:TUTF8String; vp:Cardinal; dir:TDirection):TUTF8Char;
+function VCharOf(UTF8Str:TUTF8String; vp:Integer; dir:TDirection):TUTF8Char;
 var
   CharLen: LongInt;
 begin
   Result:=LCharOf(UTF8Str,LPos(UTF8Str, vp, dir));
 end;
 
+{****************************Helper routines***********************************}
+function DirectionOf(UTF8Char:TUTF8Char):TDirection;
+begin
+  case UTF8Char[1] of
+    #9,#32,'/','{','}','[',']','(',')':
+      Result := drNONE;
+    #$D8,#$D9:
+      Result := drRTL;
+  else
+    Result := drLTR;
+  end;
+end;
+
+function VisualToLogical(const UTF8String:TUTF8String; pDir:TDirection):TVisualToLogical;
+  procedure Insert(value:Byte; var v2l:TVisualToLogical; InsPos:Byte);
+    var
+      l:Byte;
+    begin
+      if v2l[0] < 255
+      then
+        Inc(InsPos);
+      if InsPos > v2l[0]
+      then
+        InsPos := v2l[0];
+      for l := v2l[0] downto InsPos do
+        v2l[l] := v2l[l-1];
+      v2l[InsPos] := Value;
+    end;
+var
+  lp, vp : Integer;
+  cDir:TDirection;
+  Character:TUTF8Char;
+i:Integer;
+begin
+  Result[0] := 0;
+  lp := 1;
+  vp := 1;
+  while lp <= Length(UTF8String) do
+  begin
+    Character := LCharOf(UTF8String, lp);
+    cDir := DirectionOf(Character);
+    Inc(Result[0]);
+          WriteLn('lpos=',lp,' vpos=',vp,' cDir=',Byte(cDir));
+    case cDir of
+      drRTL:
+        begin
+          pDir := drRTL;
+        end;
+      drLTR:
+        begin
+          pDir := drLTR;
+          vp := Result[0];
+        end;
+    else
+      case pDir of
+        drRTL:;
+        drLTR:
+          vp := Result[0];
+      else
+        vp := vp;
+      end;
+    end;
+    Insert(lp, Result, vp);
+    for i := 1 to Result[0] do Write('v2l[',i,']=',Result[vp],'/',lp);
+    Inc(lp, Length(Character));
+  end;
+end;
+
+function InsertChar(Src:TUTF8Char; var Dest:TUTF8String; vp:Integer; pDir:TDirection):Integer;
+var
+  v2l:TVisualToLogical;
+  lp:Integer;
+begin
+  v2l := VisualToLogical(Dest, pDir);
+  if vp > v2l[0]
+  then
+    begin
+      lp := Length(Dest) + 1
+    end
+  else
+    lp := v2l[vp];
+Write('vp=',vp,' lp=',lp,' len=', Length(Dest));
+  case DirectionOf(Src) of
+    drRTL:
+      begin
+        if lp > Length(Dest)
+        then
+          Insert(Src, Dest, v2l[v2l[0]])
+        else
+          if(vp > v2l[0]) or (DirectionOf(LCharOf(Dest,v2l[vp])) = drRTL)
+          then
+            Insert(Src, Dest, lp + Length(LCharOf(Dest, lp)))
+          else
+            Insert(Src, Dest, lp);
+        Result := vp;
+      end;
+    drLTR:
+      begin
+        Insert(Src, Dest, lp);
+        Result := vp + 1;
+      end;
+  else
+    begin
+      Insert(Src, Dest, lp);
+      if lp > Length(Dest)
+      then
+        Result := lp
+      else
+        Result := lp + 1;
+    end;
+  end;
+WriteLn(' Result=', Result);
+end;
+
+procedure VInsert(const Src:TUTF8String;var Dest:TUTF8String; vp:Integer; pDir:TDirection);
+begin
+  Insert(Src, Dest, LPos(Dest, vp, pDir));
+end;
+
 end.