|
@@ -10,110 +10,326 @@ interface
|
|
|
|
|
|
type
|
|
|
TUCS32Char = Cardinal;
|
|
|
- TUTF8Char = String[3];
|
|
|
- TUTF8Str = UTF8String;
|
|
|
+ TUCS16Char = Word;
|
|
|
+ TUTF8Char = String[4];
|
|
|
+ TUTF8String = UTF8String;
|
|
|
TDirection=(
|
|
|
drNONE,
|
|
|
drRTL,
|
|
|
drLTR
|
|
|
);
|
|
|
|
|
|
-function UnicodeToUtf8(aChar:TUCS32Char):TUTF8Char;
|
|
|
-function UnicodeToUtf8(aChar:WideChar):TUTF8Char;
|
|
|
-procedure insert(CharToInsert:TUTF8Char; var uString:TUTF8Str; var CursorPos:Integer);
|
|
|
+{****************************Conversion routines*******************************}
|
|
|
+{Converts an UCS 16/32 bits charcater to UTF8 character}
|
|
|
+function UnicodeToUTF8(aChar:TUCS32Char):TUTF8Char;
|
|
|
+{Converts a wide char UCS 16 bits chcarcter to UTF8 character}
|
|
|
+function UnicodeToUTF8(aChar:WideChar):TUTF8Char;
|
|
|
+{Converts an UTF8 character to UCS 32 bits character}
|
|
|
+function UTF8ToUnicode(const UTF8Char:TUTF8Char):TUCS32Char;
|
|
|
+{Converts an UTF8 string to a double byte string}
|
|
|
+function UTF8ToDoubleByteString(const UTF8Str:TUTF8String):String;
|
|
|
+function UTF8ToDoubleByte(UTF8Str:PChar; Len:Cardinal; DBStr:PByte):Cardinal;
|
|
|
+{****************************Logical aspects***********************************}
|
|
|
+{Returns the number of logical characters}
|
|
|
+function LLength(const UTF8Str:TUTF8String):Cardinal;
|
|
|
+{Converts visual position to logical position}
|
|
|
+function LPos(const UTF8Str:TUTF8String; vp:Cardinal; pDir:TDirection):Cardinal;
|
|
|
+{Returns character at a given logical position according to paragraph direction}
|
|
|
+function LCharOf(UTF8Str:TUTF8String; lp:Cardinal):TUTF8Char;
|
|
|
+{****************************Visual aspects************************************}
|
|
|
+{Returns the number of visual characters}
|
|
|
+function VLength(const UTF8Str:TUTF8String):Cardinal;
|
|
|
+function VLength(p: PChar; Count:Cardinal):Cardinal;
|
|
|
+{Converts a logical position to a visual position}
|
|
|
+function VPos(const UTF8Str:TUTF8String; lp:Cardinal; pDir, cDir:TDirection):Cardinal;
|
|
|
+function VPos(UTF8Char:PChar; Len:integer; BytePos:integer):Cardinal;
|
|
|
+{Returns character at a given visual position according to paragraph direction}
|
|
|
+function VCharOf(UTF8Str:TUTF8String; vp:Cardinal; dir:TDirection):TUTF8Char;
|
|
|
|
|
|
implementation
|
|
|
|
|
|
-function UnicodeToUtf8(aChar:TUCS32Char):TUTF8Char;
|
|
|
+function ComputeCharLength(p:PChar):Cardinal;
|
|
|
+begin
|
|
|
+ if ord(p^)<%11000000
|
|
|
+ then
|
|
|
+{regular single byte character (#0 is a normal char, this is UTF8Charascal ;)}
|
|
|
+ Result:=1
|
|
|
+ else if ((ord(p^) and %11100000) = %11000000)
|
|
|
+ then
|
|
|
+ if (ord(p[1]) and %11000000) = %10000000 then
|
|
|
+ Result:=2
|
|
|
+ else
|
|
|
+ Result:=1
|
|
|
+ else if ((ord(p^) and %11110000) = %11100000)
|
|
|
+ then
|
|
|
+ if ((ord(p[1]) and %11000000) = %10000000)
|
|
|
+ and ((ord(p[2]) and %11000000) = %10000000)
|
|
|
+ then
|
|
|
+ Result:=3
|
|
|
+ else
|
|
|
+ Result:=1
|
|
|
+ else if ((ord(p^) and %11111000) = %11110000)
|
|
|
+ then
|
|
|
+ if ((ord(p[1]) and %11000000) = %10000000)
|
|
|
+ and ((ord(p[2]) and %11000000) = %10000000)
|
|
|
+ and ((ord(p[3]) and %11000000) = %10000000)
|
|
|
+ then
|
|
|
+ Result:=4
|
|
|
+ else
|
|
|
+ Result:=1
|
|
|
+ else
|
|
|
+ Result:=1
|
|
|
+end;
|
|
|
+{****************************Conversion routines*******************************}
|
|
|
+function UnicodeToUTF8(aChar:TUCS32Char):TUTF8Char;
|
|
|
begin
|
|
|
case aChar of
|
|
|
0..$7f:
|
|
|
begin
|
|
|
Result[1]:=char(aChar);
|
|
|
- SetLength(Result,1);
|
|
|
+ SetLength(UnicodeToUTF8,1);
|
|
|
end;
|
|
|
$80..$7ff:
|
|
|
begin
|
|
|
Result[1]:=char($c0 or (aChar shr 6));
|
|
|
Result[2]:=char($80 or (aChar and $3f));
|
|
|
- SetLength(Result,2);
|
|
|
+ SetLength(UnicodeToUTF8,2);
|
|
|
end;
|
|
|
- else
|
|
|
+ $800..$ffff:
|
|
|
begin
|
|
|
+ SetLength(Result,3);
|
|
|
Result[1]:=char($e0 or (aChar shr 12));
|
|
|
- Result[2]:=char($80 or ((aChar shr 6)and $3f));
|
|
|
+ Result[2]:=char($80 or ((aChar shr 6) and $3f));
|
|
|
Result[3]:=char($80 or (aChar and $3f));
|
|
|
- SetLength(Result,3);
|
|
|
end;
|
|
|
+ $10000..$1fffff:
|
|
|
+ begin
|
|
|
+ SetLength(UnicodeToUTF8,4);
|
|
|
+ Result[1]:=char($f0 or (aChar shr 18));
|
|
|
+ Result[2]:=char($80 or ((aChar shr 12) and $3f));
|
|
|
+ Result[3]:=char($80 or ((aChar shr 6) and $3f));
|
|
|
+ Result[4]:=char($80 or (aChar and $3f));
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ SetLength(UnicodeToUTF8, 0);
|
|
|
end;
|
|
|
end;
|
|
|
|
|
|
-function UnicodeToUtf8(aChar:WideChar):TUTF8Char;
|
|
|
+function UnicodeToUTF8(aChar:WideChar):TUTF8Char;
|
|
|
begin
|
|
|
- UnicodeToUtf8(Word(aChar));
|
|
|
+ Result := UnicodeToUTF8(Word(aChar));
|
|
|
end;
|
|
|
|
|
|
-procedure insert(CharToInsert:TUTF8Char; var uString:TUTF8Str; var CursorPos:Integer);
|
|
|
+function UTF8ToUnicode(const UTF8Char:TUTF8Char):TUCS32Char;
|
|
|
+begin
|
|
|
+ case ComputeCharLength(@UTF8Char[1]) of
|
|
|
+ 1:{regular single byte character (#0 is a normal char, this is UTF8Charascal ;)}
|
|
|
+ Result := ord(UTF8Char[1]);
|
|
|
+ 2:
|
|
|
+ Result := ((ord(UTF8Char[1]) and %00011111) shl 6)
|
|
|
+ or (ord(UTF8Char[2]) and %00111111);
|
|
|
+ 3:
|
|
|
+ Result := ((ord(UTF8Char[1]) and %00011111) shl 12)
|
|
|
+ or ((ord(UTF8Char[1]) and %00111111) shl 6)
|
|
|
+ or (ord(UTF8Char[2]) and %00111111);
|
|
|
+ 4:
|
|
|
+ Result := ((ord(UTF8Char[1]) and %00011111) shl 18)
|
|
|
+ or ((ord(UTF8Char[2]) and %00111111) shl 12)
|
|
|
+ or ((ord(UTF8Char[3]) and %00111111) shl 6)
|
|
|
+ or (ord(UTF8Char[4]) and %00111111);
|
|
|
+ else
|
|
|
+ Result := $FFFFFFFF;
|
|
|
+ end
|
|
|
+end;
|
|
|
+
|
|
|
+function UTF8ToDoubleByteString(const UTF8Str: TUTF8String): string;
|
|
|
+var
|
|
|
+ Len: Integer;
|
|
|
+begin
|
|
|
+ Len:=VLength(UTF8Str);
|
|
|
+ SetLength(Result,Len*2);
|
|
|
+ if Len=0 then exit;
|
|
|
+ UTF8ToDoubleByte(PChar(UTF8Str),length(UTF8Str),PByte(Result));
|
|
|
+end;
|
|
|
+
|
|
|
+function UTF8ToDoubleByte(UTF8Str: PChar; Len:Cardinal; DBStr: PByte):Cardinal;
|
|
|
+var
|
|
|
+ SrcPos: PChar;
|
|
|
+ CharLen: LongInt;
|
|
|
+ DestPos: PByte;
|
|
|
+ u: Cardinal;
|
|
|
+begin
|
|
|
+ SrcPos:=UTF8Str;
|
|
|
+ DestPos:=DBStr;
|
|
|
+ Result:=0;
|
|
|
+ while Len>0 do begin
|
|
|
+ u:=UTF8ToUnicode(SrcPos);
|
|
|
+ DestPos^:=byte((u shr 8) and $ff);
|
|
|
+ inc(DestPos);
|
|
|
+ DestPos^:=byte(u and $ff);
|
|
|
+ inc(DestPos);
|
|
|
+ inc(SrcPos,CharLen);
|
|
|
+ dec(Len,CharLen);
|
|
|
+ inc(Result);
|
|
|
+ end;
|
|
|
+end;
|
|
|
+{****************************Logical aspects***********************************}
|
|
|
+function LLength(const UTF8Str:TUTF8String):Cardinal;
|
|
|
+begin
|
|
|
+ Result := Length(UTF8Str);
|
|
|
+end;
|
|
|
+
|
|
|
+function LPos(const UTF8Str:TUTF8String; vp:Cardinal; pDir:TDirection):Cardinal;
|
|
|
var
|
|
|
{At beginning of the line we don't know which direction, thus the first
|
|
|
character usually decides of paragrph direction}
|
|
|
- dir:TDirection;
|
|
|
- LeftCursorPos, RightCursorPos, InsertPos:Integer;
|
|
|
+ LeftCursorPos, RightCursorPos:Integer;
|
|
|
uLen:Integer;
|
|
|
begin
|
|
|
- dir := drNONE;
|
|
|
- uLen := Length(uString);
|
|
|
+ uLen := Length(UTF8Str);
|
|
|
LeftCursorPos := 1;
|
|
|
RightCursorPos := 1;
|
|
|
- InsertPos := 1;
|
|
|
+ Result := 1;
|
|
|
if(uLen > 0) then
|
|
|
repeat
|
|
|
- case uString[InsertPos] of
|
|
|
+ case UTF8Str[Result] of
|
|
|
#32,'{','}','/'://Does not change direction, this is a neutral character;
|
|
|
begin
|
|
|
- if(dir = drLTR) then
|
|
|
+ if(pDir = drLTR) then
|
|
|
Inc(RightCursorPos);
|
|
|
end;
|
|
|
#$d8,#$d9://Arabic
|
|
|
begin
|
|
|
- dir := drRTL;
|
|
|
- Inc(InsertPos);//Consume control character
|
|
|
+ pDir := drRTL;
|
|
|
+ Inc(Result);//Consume control character
|
|
|
end;
|
|
|
else //Latin
|
|
|
begin
|
|
|
- dir := drLTR;
|
|
|
+ pDir := drLTR;
|
|
|
RightCursorPos := LeftCursorPos + 1;
|
|
|
end;
|
|
|
end;
|
|
|
Inc(LeftCursorPos);
|
|
|
- Inc(InsertPos);
|
|
|
- until(InsertPos > uLen) or
|
|
|
- ((dir = drLTR) and (LeftCursorPos > CursorPos)) or
|
|
|
- ((dir = drRTL) and (RightCursorPos > CursorPos));
|
|
|
-//WriteLn('uLen=',uLen,' InsertPos=',InsertPos,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
|
|
|
- if(InsertPos > uLen)
|
|
|
+ Inc(Result);
|
|
|
+ until(Result > uLen) or
|
|
|
+ ((pDir = drLTR) and (LeftCursorPos > vp)) or
|
|
|
+ ((pDir = drRTL) and (RightCursorPos > vp));
|
|
|
+//WriteLn('uLen=',uLen,' Result=',Result,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
|
|
|
+ if(Result > uLen)
|
|
|
then begin
|
|
|
- if(CursorPos > LeftCursorPos) then begin
|
|
|
- Inc(InsertPos, CursorPos - LeftCursorPos);
|
|
|
- LeftCursorPos := CursorPos;
|
|
|
+ if(vp > LeftCursorPos) then begin
|
|
|
+ Inc(Result, vp - LeftCursorPos);
|
|
|
+ LeftCursorPos := vp;
|
|
|
end;
|
|
|
Inc(LeftCursorPos);
|
|
|
- if(CursorPos > RightCursorPos) then
|
|
|
- if(dir = drLTR) then
|
|
|
- RightCursorPos := CursorPos;
|
|
|
- uString := uString + StringOfChar(' ', InsertPos - uLen);
|
|
|
+ if(vp > RightCursorPos) then
|
|
|
+ if(pDir = drLTR) then
|
|
|
+ RightCursorPos := vp;
|
|
|
end;
|
|
|
//WriteLn('CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
|
|
|
- System.insert(CharToInsert, uString,InsertPos);
|
|
|
- case CharToInsert[1] of
|
|
|
+ Result := Result;
|
|
|
+end;
|
|
|
+
|
|
|
+function LCharOf(UTF8Str:TUTF8String; lp:Cardinal):TUTF8Char;
|
|
|
+begin
|
|
|
+ while(lp > 0) and (UTF8Str[lp] > #128) do
|
|
|
+ Dec(lp);
|
|
|
+ if lp = 0
|
|
|
+ then
|
|
|
+ Exit('');
|
|
|
+ Move(Result, UTF8Str[lp], SizeOf(Result));
|
|
|
+ SetLength(Result, ComputeCharLength(@Result[1]));
|
|
|
+end;
|
|
|
+{****************************Visual aspects************************************}
|
|
|
+function VLength(const UTF8Str:TUTF8String):Cardinal;
|
|
|
+begin
|
|
|
+ Result := VLength(PChar(UTF8Str),LLength(UTF8Str));
|
|
|
+end;
|
|
|
+
|
|
|
+function VLength(p:PChar; Count:Cardinal):Cardinal;
|
|
|
+var
|
|
|
+ CharLen: LongInt;
|
|
|
+begin
|
|
|
+ VLength:=0;
|
|
|
+ while (Count>0) do begin
|
|
|
+ inc(Result);
|
|
|
+ CharLen:=ComputeCharLength(p);
|
|
|
+ inc(p,CharLen);
|
|
|
+ dec(Count,CharLen);
|
|
|
+ end;
|
|
|
+end;
|
|
|
+
|
|
|
+function VPos(const UTF8Str:TUTF8String; lp:Cardinal; pDir, cDir:TDirection):Cardinal;
|
|
|
+var
|
|
|
+{At beginning of the line we don't know which direction, thus the first
|
|
|
+ character usually decides of paragrph direction}
|
|
|
+ LeftCursorPos, RightCursorPos:Integer;
|
|
|
+ uLen:Integer;
|
|
|
+begin
|
|
|
+ uLen := Length(UTF8Str);
|
|
|
+ LeftCursorPos := 1;
|
|
|
+ RightCursorPos := 1;
|
|
|
+ Result := 1;
|
|
|
+ if(uLen > 0) then
|
|
|
+ repeat
|
|
|
+ case UTF8Str[Result] of
|
|
|
+ #32,'{','}','/'://Does not change direction, this is a neutral character;
|
|
|
+ begin
|
|
|
+ if(pDir = drLTR) then
|
|
|
+ Inc(RightCursorPos);
|
|
|
+ end;
|
|
|
+ #$d8,#$d9://Arabic
|
|
|
+ begin
|
|
|
+ pDir := drRTL;
|
|
|
+ Inc(Result);//Consume control character
|
|
|
+ end;
|
|
|
+ else //Latin
|
|
|
+ begin
|
|
|
+ pDir := drLTR;
|
|
|
+ RightCursorPos := LeftCursorPos + 1;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ Inc(LeftCursorPos);
|
|
|
+ Inc(Result);
|
|
|
+ until(Result > uLen) or
|
|
|
+ ((pDir = drLTR) and (LeftCursorPos > lp)) or
|
|
|
+ ((pDir = drRTL) and (RightCursorPos > lp));
|
|
|
+//WriteLn('uLen=',uLen,' Result=',Result,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
|
|
|
+ if(Result > uLen)
|
|
|
+ then begin
|
|
|
+ if(lp > LeftCursorPos) then begin
|
|
|
+ Inc(Result, lp - LeftCursorPos);
|
|
|
+ LeftCursorPos := lp;
|
|
|
+ end;
|
|
|
+ Inc(LeftCursorPos);
|
|
|
+ if(lp > RightCursorPos) then
|
|
|
+ if(pDir = drLTR) then
|
|
|
+ RightCursorPos := lp;
|
|
|
+ end;
|
|
|
+//WriteLn('CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
|
|
|
+ Result := Result;
|
|
|
+{ case dir of
|
|
|
#32:
|
|
|
CursorPos := LeftCursorPos;
|
|
|
#$d8,#$d9:
|
|
|
CursorPos := RightCursorPos;
|
|
|
else
|
|
|
CursorPos := LeftCursorPos;
|
|
|
- end;
|
|
|
-//WriteLn('InsertPos=',InsertPos,' New CursorPos=',CursorPos);
|
|
|
+ end;}
|
|
|
+//WriteLn('Result=',Result,' New CursorPos=',CursorPos);
|
|
|
end;
|
|
|
+
|
|
|
+function VPos(UTF8Char:PChar; Len:integer; BytePos:integer):Cardinal;
|
|
|
+begin
|
|
|
+end;
|
|
|
+
|
|
|
+
|
|
|
+function VCharOf(UTF8Str:TUTF8String; vp:Cardinal; dir:TDirection):TUTF8Char;
|
|
|
+var
|
|
|
+ CharLen: LongInt;
|
|
|
+begin
|
|
|
+ Result:=LCharOf(UTF8Str,LPos(UTF8Str, vp, dir));
|
|
|
+end;
|
|
|
+
|
|
|
end.
|
|
|
|