4 years ago · 8c06529ae2
--- a/packages/rtl-unicode/src/inc/graphemebreakproperty.pp
+++ b/packages/rtl-unicode/src/inc/graphemebreakproperty.pp
@@ -55,6 +55,29 @@ type
 
				     gbpGlue_After_Zwj,
			
 
				     gbpE_Base_GAZ);
			
 
				 
			
 
				+  { TUnicodeStringExtendedGraphemeClustersEnumerator }
			
 
				+
			
 
				+  TUnicodeStringExtendedGraphemeClustersEnumerator = class
			
 
				+  private
			
 
				+    FStr: UnicodeString;
			
 
				+    FCurrentIndexStart: SizeInt;
			
 
				+    FCurrentIndexEnd: SizeInt;
			
 
				+    FNextIndexEnd: SizeInt;
			
 
				+    FNextGBP: TGraphemeBreakProperty;
			
 
				+    FNextCodePoint: UCS4Char;
			
 
				+    FCurrentGBP: TGraphemeBreakProperty;
			
 
				+    FCurrentCodePoint: UCS4Char;
			
 
				+    FRI_Sequence_Length: Integer;
			
 
				+    FE_Base_EBG_Extend_Sequence: Boolean;
			
 
				+    function GetCurrent: UnicodeString;
			
 
				+    procedure FetchNextChar;
			
 
				+  public
			
 
				+    constructor Create(const S: UnicodeString);
			
 
				+    function GetEnumerator: TUnicodeStringExtendedGraphemeClustersEnumerator;
			
 
				+    function MoveNext: Boolean;
			
 
				+    property Current: UnicodeString read GetCurrent;
			
 
				+  end;
			
 
				+
			
 
				 function GetGraphemeBreakProperty(Ch: UCS4Char): TGraphemeBreakProperty;
			
 
				 
			
 
				 implementation
			
@@ -64,4 +87,94 @@ begin
 
				   {$I graphemebreakproperty_code.inc}
			
 
				 end;
			
 
				 
			
 
				+{ TUnicodeStringExtendedGraphemeClustersEnumerator }
			
 
				+
			
 
				+function TUnicodeStringExtendedGraphemeClustersEnumerator.GetCurrent: UnicodeString;
			
 
				+begin
			
 
				+  Result := Copy(FStr, FCurrentIndexStart, FCurrentIndexEnd - FCurrentIndexStart + 1);
			
 
				+end;
			
 
				+
			
 
				+procedure TUnicodeStringExtendedGraphemeClustersEnumerator.FetchNextChar;
			
 
				+begin
			
 
				+  Inc(FNextIndexEnd);
			
 
				+  if FNextIndexEnd <= Length(FStr) then
			
 
				+  begin
			
 
				+    FNextCodePoint := Ord(FStr[FNextIndexEnd]);
			
 
				+    { high surrogate, followed by low surrogate? }
			
 
				+    if (FNextCodePoint >= $D800) and (FNextCodePoint <= $DBFF) and ((FNextIndexEnd + 1) <= Length(FStr)) and
			
 
				+       (Ord(FStr[FNextIndexEnd + 1]) >= $DC00) and (Ord(FStr[FNextIndexEnd + 1]) <= $DFFF) then
			
 
				+    begin
			
 
				+      Inc(FNextIndexEnd);
			
 
				+      FNextCodePoint := $10000 + (((FNextCodePoint - $D800) shl 10) or (Ord(FStr[FNextIndexEnd]) - $DC00));
			
 
				+    end;
			
 
				+  end
			
 
				+  else
			
 
				+    FNextCodePoint := 0;
			
 
				+  FNextGBP := GetGraphemeBreakProperty(FNextCodePoint);
			
 
				+end;
			
 
				+
			
 
				+constructor TUnicodeStringExtendedGraphemeClustersEnumerator.Create(const S: UnicodeString);
			
 
				+begin
			
 
				+  FStr := S;
			
 
				+  FCurrentIndexStart := 0;
			
 
				+  FCurrentIndexEnd := 0;
			
 
				+  FNextIndexEnd := 0;
			
 
				+  FRI_Sequence_Length := 0;
			
 
				+  FE_Base_EBG_Extend_Sequence := False;
			
 
				+  FetchNextChar;
			
 
				+end;
			
 
				+
			
 
				+function TUnicodeStringExtendedGraphemeClustersEnumerator.GetEnumerator: TUnicodeStringExtendedGraphemeClustersEnumerator;
			
 
				+begin
			
 
				+  Result := Self;
			
 
				+end;
			
 
				+
			
 
				+function TUnicodeStringExtendedGraphemeClustersEnumerator.MoveNext: Boolean;
			
 
				+begin
			
 
				+  FCurrentIndexStart := FCurrentIndexEnd + 1;
			
 
				+  if FCurrentIndexStart > Length(FStr) then
			
 
				+    Exit(false);
			
 
				+  repeat
			
 
				+    FCurrentGBP := FNextGBP;
			
 
				+    FCurrentCodePoint := FNextCodePoint;
			
 
				+    FCurrentIndexEnd := FNextIndexEnd;
			
 
				+    if FCurrentGBP = gpbRegional_Indicator then
			
 
				+      Inc(FRI_Sequence_Length)
			
 
				+    else
			
 
				+      FRI_Sequence_Length := 0;
			
 
				+    FE_Base_EBG_Extend_Sequence := (FCurrentGBP in [gbpE_Base, gbpE_Base_GAZ]) or (FE_Base_EBG_Extend_Sequence and (FCurrentGBP = gbpExtend));
			
 
				+    FetchNextChar;
			
 
				+    if FNextIndexEnd > Length(FStr) then
			
 
				+      Exit(True);
			
 
				+
			
 
				+    { Do not break between a CR and LF. Otherwise, break before and after controls. }
			
 
				+    if (FCurrentGBP = gbpCR) and (FNextGBP = gbpLF) then
			
 
				+      continue
			
 
				+    else if (FCurrentGBP in [gbpControl, gbpCR, gbpLF]) or (FNextGBP in [gbpControl, gbpCR, gbpLF]) then
			
 
				+      Exit(True)
			
 
				+    { Do not break Hangul syllable sequences. }
			
 
				+    else if ((FCurrentGBP = gbpL) and (FNextGBP in [gbpL, gbpV, gbpLV, gbpLVT])) or
			
 
				+            ((FCurrentGBP in [gbpLV, gbpV]) and (FNextGBP in [gbpV, gbpT])) or
			
 
				+            ((FCurrentGBP in [gbpLVT, gbpT]) and (FNextGBP = gbpT)) then
			
 
				+      continue
			
 
				+    { Do not break before extending characters or ZWJ. }
			
 
				+    else if FNextGBP in [gbpExtend, gbpZWJ] then
			
 
				+      continue
			
 
				+    { Only for extended grapheme clusters:
			
 
				+      Do not break before SpacingMarks, or after Prepend characters. }
			
 
				+    else if (FCurrentGBP = gbpPrepend) or (FNextGBP = gbpSpacingMark) then
			
 
				+      continue
			
 
				+    { Do not break within emoji modifier sequences or emoji zwj sequences. }
			
 
				+    else if ((FCurrentGBP = gbpZWJ) and (FNextGBP in [gbpGlue_After_Zwj, gbpE_Base_GAZ])) or
			
 
				+            (FE_Base_EBG_Extend_Sequence and (FNextGBP = gbpE_Modifier)) then
			
 
				+      continue
			
 
				+    { Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. }
			
 
				+    else if (FCurrentGBP = gpbRegional_Indicator) and (FNextGBP = gpbRegional_Indicator) and Odd(FRI_Sequence_Length) then
			
 
				+      continue
			
 
				+    { Otherwise, break everywhere. }
			
 
				+    else
			
 
				+      Exit(True);
			
 
				+  until False;
			
 
				+end;
			
 
				+
			
 
				 end.