123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855 |
- { Unicode "Character" properties handler.
- Copyright (c) 2012 by Inoussa OUEDRAOGO
- The source code is distributed under the Library GNU
- General Public License with the following modification:
- - object files and libraries linked into an application may be
- distributed without source code.
- If you didn't receive a copy of the file COPYING, contact:
- Free Software Foundation
- 675 Mass Ave
- Cambridge, MA 02139
- USA
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
- unit Character;
- interface
- {$ifndef VER2_4}
- {$mode objfpc}
- {$H+}
- {$PACKENUM 1}
- {$SCOPEDENUMS ON}
- uses
- unicodedata;
- type
- // Unicode General Category
- TUnicodeCategory = (
- ucUppercaseLetter, // Lu = Letter, uppercase
- ucLowercaseLetter, // Ll = Letter, lowercase
- ucTitlecaseLetter, // Lt = Letter, titlecase
- ucModifierLetter, // Lm = Letter, modifier
- ucOtherLetter, // Lo = Letter, other
-
- ucNonSpacingMark, // Mn = Mark, nonspacing
- ucCombiningMark, // Mc = Mark, spacing combining
- ucEnclosingMark, // Me = Mark, enclosing
-
- ucDecimalNumber, // Nd = Number, decimal digit
- ucLetterNumber, // Nl = Number, letter
- ucOtherNumber, // No = Number, other
-
- ucConnectPunctuation, // Pc = Punctuation, connector
- ucDashPunctuation, // Pd = Punctuation, dash
- ucOpenPunctuation, // Ps = Punctuation, open
- ucClosePunctuation, // Pe = Punctuation, close
- ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
- ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
- ucOtherPunctuation, // Po = Punctuation, other
-
- ucMathSymbol, // Sm = Symbol, math
- ucCurrencySymbol, // Sc = Symbol, currency
- ucModifierSymbol, // Sk = Symbol, modifier
- ucOtherSymbol, // So = Symbol, other
-
- ucSpaceSeparator, // Zs = Separator, space
- ucLineSeparator, // Zl = Separator, line
- ucParagraphSeparator, // Zp = Separator, paragraph
-
- ucControl, // Cc = Other, control
- ucFormat, // Cf = Other, format
- ucSurrogate, // Cs = Other, surrogate
- ucPrivateUse, // Co = Other, private use
- ucUnassigned // Cn = Other, not assigned (including noncharacters)
- );
- TUnicodeCategorySet = set of TUnicodeCategory;
- TCharacterOption = (coIgnoreInvalidSequence);
- TCharacterOptions = set of TCharacterOption;
- { TCharacter }
- TCharacter = class sealed
- private
- class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategory) : Boolean; overload; static;
- class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategorySet) : Boolean; overload; static;
- public
- constructor Create;
- class function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static;
- class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static;
- class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static;
- class function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; static;
-
- class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload;
- class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static;
-
- class function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload; static; inline;
- class function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload; static;
-
- class function IsControl(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
-
- class function IsDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
-
- class function IsSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload; static; inline;
- class function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
-
- class function IsLetter(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
-
- class function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
-
- class function IsLower(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsNumber(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function IsPunctuation(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsSeparator(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsSymbol(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsUpper(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static;
- class function ToLower(const AString : UnicodeString) : UnicodeString; inline;overload; static;
- class function ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
- class function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload; static;
- class function ToUpper(const AString : UnicodeString) : UnicodeString; inline; overload; static;
- class function ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
- end;
- // flat functions
- function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
- function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
- function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
- function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
- function GetNumericValue(AChar : UnicodeChar) : Double; overload;
- function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload;
- function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload;
- function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload;
- function IsControl(AChar : UnicodeChar) : Boolean; overload;
- function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsDigit(AChar : UnicodeChar) : Boolean; overload;
- function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsSurrogate(AChar : UnicodeChar) : Boolean; overload;
- function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload;
- function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload;
- function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload;
- function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsLetter(AChar : UnicodeChar) : Boolean; overload;
- function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload;
- function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsLower(AChar : UnicodeChar) : Boolean; overload;
- function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsNumber(AChar : UnicodeChar) : Boolean; overload;
- function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsPunctuation(AChar : UnicodeChar) : Boolean; overload;
- function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsSeparator(AChar : UnicodeChar) : Boolean; overload;
- function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsSymbol(AChar : UnicodeChar) : Boolean; overload;
- function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsUpper(AChar : UnicodeChar) : Boolean; overload;
- function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload;
- function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function ToLower(AChar : UnicodeChar) : UnicodeChar; overload;
- function ToLower(const AString : UnicodeString) : UnicodeString; overload;
- function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload;
- function ToUpper(const AString : UnicodeString) : UnicodeString; overload;
- {$endif VER2_4}
- implementation
- {$ifndef VER2_4}
- uses
- SysUtils,
- RtlConsts;
- const
- LETTER_CATEGORIES = [
- TUnicodeCategory.ucUppercaseLetter, TUnicodeCategory.ucLowercaseLetter,
- TUnicodeCategory.ucTitlecaseLetter, TUnicodeCategory.ucModifierLetter,
- TUnicodeCategory.ucOtherLetter
- ];
- LETTER_OR_DIGIT_CATEGORIES =
- LETTER_CATEGORIES +
- [TUnicodeCategory.ucDecimalNumber,TUnicodeCategory.ucLetterNumber];
- NUMBER_CATEGORIES =
- [ TUnicodeCategory.ucDecimalNumber, TUnicodeCategory.ucLetterNumber,
- TUnicodeCategory.ucOtherNumber
- ];
- PUNCTUATION_CATEGORIES = [
- TUnicodeCategory.ucConnectPunctuation, TUnicodeCategory.ucDashPunctuation,
- TUnicodeCategory.ucOpenPunctuation, TUnicodeCategory.ucClosePunctuation,
- TUnicodeCategory.ucInitialPunctuation, TUnicodeCategory.ucFinalPunctuation,
- TUnicodeCategory.ucOtherPunctuation
- ];
- SEPARATOR_CATEGORIES =
- [ TUnicodeCategory.ucSpaceSeparator, TUnicodeCategory.ucLineSeparator,
- TUnicodeCategory.ucParagraphSeparator
- ];
- SYMBOL_CATEGORIES =
- [ TUnicodeCategory.ucMathSymbol, TUnicodeCategory.ucCurrencySymbol,
- TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
- ];
- function ConvertFromUtf32(AChar: UCS4Char): UnicodeString;
- begin
- Result := TCharacter.ConvertFromUtf32(AChar);
- end;
- function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer): UCS4Char;
- begin
- Result := TCharacter.ConvertToUtf32(AString, AIndex);
- end;
- function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer; out ACharLength: Integer): UCS4Char;
- begin
- Result := TCharacter.ConvertToUtf32(AString, AIndex, ACharLength);
- end;
- function ConvertToUtf32(const AHighSurrogate, ALowSurrogate: UnicodeChar): UCS4Char;
- begin
- Result := TCharacter.ConvertToUtf32(AHighSurrogate, ALowSurrogate);
- end;
- function GetNumericValue(AChar: UnicodeChar): Double;
- begin
- Result := TCharacter.GetNumericValue(AChar);
- end;
- function GetNumericValue(const AString: UnicodeString; AIndex: Integer): Double;
- begin
- Result := TCharacter.GetNumericValue(AString, AIndex);
- end;
- function GetUnicodeCategory(AChar: UnicodeChar): TUnicodeCategory;
- begin
- Result := TCharacter.GetUnicodeCategory(AChar);
- end;
- function GetUnicodeCategory(const AString: UnicodeString; AIndex: Integer): TUnicodeCategory;
- begin
- Result := TCharacter.GetUnicodeCategory(AString, AIndex);
- end;
- function IsControl(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsControl(AChar);
- end;
- function IsControl(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsControl(AString, AIndex);
- end;
- function IsDigit(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsDigit(AChar);
- end;
- function IsDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsDigit(AString, AIndex);
- end;
- function IsSurrogate(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsSurrogate(AChar);
- end;
- function IsSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsSurrogate(AString, AIndex);
- end;
- function IsHighSurrogate(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsHighSurrogate(AChar);
- end;
- function IsHighSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsHighSurrogate(AString, AIndex);
- end;
- function IsLowSurrogate(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsLowSurrogate(AChar);
- end;
- function IsLowSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsLowSurrogate(AString, AIndex);
- end;
- function IsSurrogatePair(const AHighSurrogate, ALowSurrogate: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsSurrogatePair(AHighSurrogate, ALowSurrogate);
- end;
- function IsSurrogatePair(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsSurrogatePair(AString, AIndex);
- end;
- function IsLetter(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsLetter(AChar);
- end;
- function IsLetter(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsLetter(AString, AIndex);
- end;
- function IsLetterOrDigit(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsLetterOrDigit(AChar);
- end;
- function IsLetterOrDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsLetterOrDigit(AString, AIndex);
- end;
- function IsLower(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsLower(AChar);
- end;
- function IsLower(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsLower(AString, AIndex);
- end;
- function IsNumber(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsNumber(AChar);
- end;
- function IsNumber(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsNumber(AString, AIndex);
- end;
- function IsPunctuation(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsPunctuation(AChar);
- end;
- function IsPunctuation(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsPunctuation(AString, AIndex);
- end;
- function IsSeparator(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsSeparator(AChar);
- end;
- function IsSeparator(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsSeparator(AString, AIndex);
- end;
- function IsSymbol(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsSymbol(AChar);
- end;
- function IsSymbol(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsSymbol(AString, AIndex);
- end;
- function IsUpper(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsUpper(AChar);
- end;
- function IsUpper(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsUpper(AString, AIndex);
- end;
- function IsWhiteSpace(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsWhiteSpace(AChar);
- end;
- function IsWhiteSpace(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsWhiteSpace(AString, AIndex);
- end;
- function ToLower(AChar: UnicodeChar): UnicodeChar;
- begin
- Result := TCharacter.ToLower(AChar);
- end;
- function ToLower(const AString: UnicodeString): UnicodeString;
- begin
- Result := TCharacter.ToLower(AString);
- end;
- function ToUpper(AChar: UnicodeChar): UnicodeChar;
- begin
- Result := TCharacter.ToUpper(AChar);
- end;
- function ToUpper(const AString: UnicodeString): UnicodeString;
- begin
- Result := TCharacter.ToUpper(AString);
- end;
- { TCharacter }
- class function TCharacter.TestCategory(
- const AString : UnicodeString;
- AIndex : Integer;
- ACategory : TUnicodeCategory
- ) : Boolean;
- var
- pu : PUC_Prop;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- pu := GetProps(Word(AString[AIndex]));
- if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
- if not IsSurrogatePair(AString,AIndex) then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- pu := GetProps(AString[AIndex],AString[AIndex+1]);
- end;
- Result := (TUnicodeCategory(pu^.Category) = ACategory);
- end;
- class function TCharacter.TestCategory(
- const AString : UnicodeString;
- AIndex : Integer;
- ACategory : TUnicodeCategorySet
- ) : Boolean;
- var
- pu : PUC_Prop;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- pu := GetProps(Word(AString[AIndex]));
- if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
- if not IsSurrogatePair(AString,AIndex) then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- pu := GetProps(AString[AIndex],AString[AIndex+1]);
- end;
- Result := (TUnicodeCategory(pu^.Category) in ACategory);
- end;
- constructor TCharacter.Create;
- begin
- raise ENoConstructException.CreateFmt(SClassCantBeConstructed, [ClassName]);
- end;
- class function TCharacter.ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
- begin
- if AChar < UCS4_HALF_BASE then
- begin
- if IsSurrogate(UnicodeChar(AChar)) then
- raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
- Result := UnicodeChar(AChar);
- end
- else
- begin
- if AChar > MAX_LEGAL_UTF32 then
- raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
- SetLength(Result, 2);
- AChar := AChar - UCS4_HALF_BASE;
- Result[1] := UnicodeChar((AChar shr 10) + HIGH_SURROGATE_BEGIN);
- Result[2] := UnicodeChar((AChar and UCS4_HALF_MASK) + LOW_SURROGATE_BEGIN);
- end;
- end;
- class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- Result := Word(AString[AIndex]);
- if IsHighSurrogate(UnicodeChar(Result)) then
- begin
- if Length(AString) < Succ(AIndex) then
- raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
- Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
- end;
- end;
- class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- Result := Word(AString[AIndex]);
- if IsHighSurrogate(UnicodeChar(Result)) then
- begin
- if Length(AString) < Succ(AIndex) then
- raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
- Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
- ACharLength := 2;
- end
- else
- ACharLength := 1;
- end;
- class function TCharacter.ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
- begin
- if not IsHighSurrogate(AHighSurrogate) then
- raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]);
- if not IsLowSurrogate(ALowSurrogate) then
- raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(ALowSurrogate)]);
- Result := ToUCS4(AHighSurrogate, ALowSurrogate);
- end;
- class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double;
- begin
- Result := GetProps(Word(AChar))^.NumericValue;
- end;
- class function TCharacter.GetNumericValue(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Double;
- var
- pu : PUC_Prop;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- pu := GetProps(Word(AString[AIndex]));
- if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
- if not IsSurrogatePair(AString,AIndex) then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- pu := GetProps(AString[AIndex],AString[AIndex+1]);
- end;
- Result := pu^.NumericValue;
- end;
- class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
- begin
- Result := TUnicodeCategory(GetProps(Word(AChar))^.Category);
- end;
- class function TCharacter.GetUnicodeCategory(
- const AString : UnicodeString;
- AIndex : Integer
- ) : TUnicodeCategory;
- var
- pu : PUC_Prop;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- pu := GetProps(Word(AString[AIndex]));
- if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
- if not IsSurrogatePair(AString,AIndex) then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- pu := GetProps(AString[AIndex],AString[AIndex+1]);
- end;
- Result := TUnicodeCategory(pu^.Category);
- end;
- class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucControl);
- end;
- class function TCharacter.IsControl(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,TUnicodeCategory.ucControl);
- end;
- class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucDecimalNumber);
- end;
- class function TCharacter.IsDigit(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,TUnicodeCategory.ucDecimalNumber);
- end;
- class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate);
- end;
- class function TCharacter.IsSurrogate(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- Result := IsSurrogate(AString[AIndex]);
- end;
- class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
- (Word(AChar) >= HIGH_SURROGATE_BEGIN) and
- (Word(AChar) <= HIGH_SURROGATE_END);
- end;
- class function TCharacter.IsHighSurrogate(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- Result := IsHighSurrogate(AString[AIndex]);
- end;
- class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
- (Word(AChar) >= LOW_SURROGATE_BEGIN) and
- (Word(AChar) <= LOW_SURROGATE_END);
- end;
- class function TCharacter.IsLowSurrogate(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- Result := IsLowSurrogate(AString[AIndex]);
- end;
- class function TCharacter.IsSurrogatePair(
- const AHighSurrogate,
- ALowSurrogate : UnicodeChar
- ) : Boolean;
- begin
- Result := UnicodeIsSurrogatePair(AHighSurrogate,ALowSurrogate);
- end;
- class function TCharacter.IsSurrogatePair(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- if not IsHighSurrogate(AString[AIndex]) then begin
- Result := False;
- exit;
- end;
- if ((AIndex+1) > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex+1, Length(AString)]);
- Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
- end;
- class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_CATEGORIES);
- end;
- class function TCharacter.IsLetter(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,LETTER_CATEGORIES);
- end;
- class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_OR_DIGIT_CATEGORIES);
- end;
- class function TCharacter.IsLetterOrDigit(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,LETTER_OR_DIGIT_CATEGORIES);
- end;
- class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucLowercaseLetter);
- end;
- class function TCharacter.IsLower(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,TUnicodeCategory.ucLowercaseLetter);
- end;
- class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in NUMBER_CATEGORIES);
- end;
- class function TCharacter.IsNumber(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,NUMBER_CATEGORIES);
- end;
- class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in PUNCTUATION_CATEGORIES);
- end;
- class function TCharacter.IsPunctuation(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,PUNCTUATION_CATEGORIES);
- end;
- class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SEPARATOR_CATEGORIES);
- end;
- class function TCharacter.IsSeparator(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,SEPARATOR_CATEGORIES);
- end;
- class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SYMBOL_CATEGORIES);
- end;
- class function TCharacter.IsSymbol(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,SYMBOL_CATEGORIES);
- end;
- class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
- end;
- class function TCharacter.IsUpper(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,TUnicodeCategory.ucUppercaseLetter);
- end;
- class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;
- begin
- Result := GetProps(Word(AChar))^.WhiteSpace;
- end;
- class function TCharacter.IsWhiteSpace(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- var
- pu : PUC_Prop;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- pu := GetProps(Word(AString[AIndex]));
- if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
- if not IsSurrogatePair(AString,AIndex) then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- pu := GetProps(AString[AIndex],AString[AIndex+1]);
- end;
- Result := pu^.WhiteSpace;
- end;
- class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
- begin
- Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleLowerCase));
- if (Result = UnicodeChar(0)) then
- Result := AChar;
- end;
- class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;
- begin
- Result := ToLower(AString,[]);
- end;
- class function TCharacter.ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
- begin
- if (UnicodeToLower(
- AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
- ) <> 0
- )
- then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- end;
- class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
- begin
- Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleUpperCase));
- if (Result = UnicodeChar(0)) then
- Result := AChar;
- end;
- class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;
- begin
- Result := ToUpper(AString,[]);
- end;
- class function TCharacter.ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
- begin
- if (UnicodeToUpper(
- AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
- ) <> 0
- )
- then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- end;
- {$endif VER2_4}
- end.
|