123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677 |
- { Unicode "Character" properties handler.
- Copyright (c) 2012 by Inoussa OUEDRAOGO
- The source code is distributed under the Library GNU
- General Public License with the following modification:
- - object files and libraries linked into an application may be
- distributed without source code.
- If you didn't receive a copy of the file COPYING, contact:
- Free Software Foundation
- 675 Mass Ave
- Cambridge, MA 02139
- USA
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
- {$IFNDEF FPC_DOTTEDUNITS}
- unit Character;
- {$ENDIF FPC_DOTTEDUNITS}
- interface
- {$mode objfpc}
- {$H+}
- {$PACKENUM 1}
- {$SCOPEDENUMS ON}
- {$modeswitch typehelpers}
- {$IFDEF FPC_DOTTEDUNITS}
- uses
- System.CodePages.unicodedata;
- {$ELSE FPC_DOTTEDUNITS}
- uses
- unicodedata;
- {$ENDIF FPC_DOTTEDUNITS}
- type
- // Unicode General Category
- TUnicodeCategory = (
- ucUppercaseLetter, // Lu = Letter, uppercase
- ucLowercaseLetter, // Ll = Letter, lowercase
- ucTitlecaseLetter, // Lt = Letter, titlecase
- ucModifierLetter, // Lm = Letter, modifier
- ucOtherLetter, // Lo = Letter, other
-
- ucNonSpacingMark, // Mn = Mark, nonspacing
- ucCombiningMark, // Mc = Mark, spacing combining
- ucEnclosingMark, // Me = Mark, enclosing
-
- ucDecimalNumber, // Nd = Number, decimal digit
- ucLetterNumber, // Nl = Number, letter
- ucOtherNumber, // No = Number, other
-
- ucConnectPunctuation, // Pc = Punctuation, connector
- ucDashPunctuation, // Pd = Punctuation, dash
- ucOpenPunctuation, // Ps = Punctuation, open
- ucClosePunctuation, // Pe = Punctuation, close
- ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
- ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
- ucOtherPunctuation, // Po = Punctuation, other
-
- ucMathSymbol, // Sm = Symbol, math
- ucCurrencySymbol, // Sc = Symbol, currency
- ucModifierSymbol, // Sk = Symbol, modifier
- ucOtherSymbol, // So = Symbol, other
-
- ucSpaceSeparator, // Zs = Separator, space
- ucLineSeparator, // Zl = Separator, line
- ucParagraphSeparator, // Zp = Separator, paragraph
-
- ucControl, // Cc = Other, control
- ucFormat, // Cf = Other, format
- ucSurrogate, // Cs = Other, surrogate
- ucPrivateUse, // Co = Other, private use
- ucUnassigned // Cn = Other, not assigned (including noncharacters)
- );
- TUnicodeCategorySet = set of TUnicodeCategory;
- TCharacterOption = (coIgnoreInvalidSequence);
- TCharacterOptions = set of TCharacterOption;
- { TCharacter }
- TCharacter = class sealed
- private
- class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategory) : Boolean; overload; static;
- class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategorySet) : Boolean; overload; static;
- public
- const
- MaxHighSurrogate = UnicodeChar(HIGH_SURROGATE_END);
- MaxLowSurrogate = UnicodeChar(LOW_SURROGATE_END);
- MaxSurrogate = UnicodeChar(LOW_SURROGATE_END);
- MinHighSurrogate = UnicodeChar(HIGH_SURROGATE_BEGIN);
- MinLowSurrogate = UnicodeChar(LOW_SURROGATE_BEGIN);
- MinSurrogate = UnicodeChar(HIGH_SURROGATE_BEGIN);
- public
- constructor Create;
- class function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static;
- class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static;
- class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static;
- class function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; static;
-
- class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload;
- class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static;
- class function GetNumericValue(aChar: UCS4Char): Double; overload; inline; static;
- class function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload; static; inline;
- class function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload; static;
- class function GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory; overload; inline; static;
- class function IsControl(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsControl(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsDigit(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function IsSurrogate(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function IsHighSurrogate(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function IsLowSurrogate(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload; static; inline;
- class function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function IsLetter(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsLetter(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsLetterOrDigit(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
-
- class function IsLower(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsLower(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsNumber(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function IsNumber(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsPunctuation(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsPunctuation(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsSeparator(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsSeparator(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsSymbol(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsSymbol(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsUpper(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
- class function IsUpper(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload; static; inline;
- class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
- class function IsWhiteSpace(aChar: UCS4Char): Boolean; overload; inline; static;
- class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static;
- class function ToLower(const AString : UnicodeString) : UnicodeString; inline;overload; static;
- class function ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
- class function ToLower(aChar: UCS4Char): UCS4Char; overload; inline; static;
- class function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload; static;
- class function ToUpper(const AString : UnicodeString) : UnicodeString; inline; overload; static;
- class function ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
- class function ToUpper(aChar: UCS4Char): UCS4Char; overload; inline; static;
- class function MaxCodePoint: Integer; static;
- class function UnicodeDataVersion: UnicodeString; static;
- class function IsDefined(aChar: UnicodeChar): Boolean; overload; inline; static;
- class function IsDefined(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsDefined(const S: UnicodeString; Index: Integer): Boolean; overload; inline; static;
- end;
- { TCharHelper }
- TCharHelper = type helper for UnicodeChar
- public
- const
- MaxHighSurrogate = UnicodeChar(HIGH_SURROGATE_END);
- MaxLowSurrogate = UnicodeChar(LOW_SURROGATE_END);
- MaxSurrogate = UnicodeChar(LOW_SURROGATE_END);
- MinHighSurrogate = UnicodeChar(HIGH_SURROGATE_BEGIN);
- MinLowSurrogate = UnicodeChar(LOW_SURROGATE_BEGIN);
- MinSurrogate = UnicodeChar(HIGH_SURROGATE_BEGIN);
- class function MaxCodePoint: Integer; static;
- class function UnicodeDataVersion: UnicodeString; static;
- class function ConvertFromUtf32(aChar: UCS4Char): UnicodeString; static;
- class function ConvertToUtf32(const S: UnicodeString; Index: Integer): UCS4Char; overload; inline; static;
- class function ConvertToUtf32(const S: UnicodeString; Index: Integer; out CharLength: Integer): UCS4Char; overload; static;
- class function ConvertToUtf32(const HighSurrogate, LowSurrogate: UnicodeChar): UCS4Char; overload; static;
- function GetNumericValue: Double; overload;
- class function GetNumericValue(const S: UnicodeString; Index: Integer): Double; overload; static;
- function GetUnicodeCategory: TUnicodeCategory; overload;
- class function GetUnicodeCategory(const S: UnicodeString; Index: Integer): TUnicodeCategory; overload; static;
- function IsControl: Boolean; overload;
- function IsDefined: Boolean; overload;
- function IsDigit: Boolean; overload;
- function IsHighSurrogate: Boolean; overload; inline;
- function IsInArray(const SomeChars: array of UnicodeChar): Boolean; overload;
- function IsLetter: Boolean; overload;
- function IsLetterOrDigit: Boolean; overload;
- function IsLower: Boolean; overload;
- function IsLowSurrogate: Boolean; overload; inline;
- function IsNumber: Boolean; overload;
- function IsPunctuation: Boolean; overload;
- function IsSeparator: Boolean; overload;
- function IsSurrogate: Boolean; overload; inline;
- function IsSymbol: Boolean; overload;
- function IsUpper: Boolean; overload;
- function IsWhiteSpace: Boolean; overload;
- function ToLower: UnicodeChar; overload;
- function ToUpper: UnicodeChar; overload;
- function ToUCS4Char: UCS4Char; inline;
- class function IsControl(const S: UnicodeString; Index: Integer): Boolean; overload; static; inline;
- class function IsDefined(const S: UnicodeString; Index: Integer): Boolean; overload; static; inline;
- class function IsDigit(const S: UnicodeString; Index: Integer): Boolean; overload; static; inline;
- class function IsHighSurrogate(const S: UnicodeString; Index: Integer): Boolean; overload; inline; static;
- class function IsInArray(const S: UnicodeString; Index: Integer; const SomeChars: array of UnicodeChar): Boolean; overload; static; inline;
- class function IsLetter(const S: UnicodeString; Index: Integer): Boolean; overload; static;inline;
- class function IsLetterOrDigit(const S: UnicodeString; Index: Integer): Boolean; overload; static;
- class function IsLower(const S: UnicodeString; Index: Integer): Boolean; overload; static;
- class function IsLowSurrogate(const S: UnicodeString; Index: Integer): Boolean; overload; inline; static;
- class function IsNumber(const S: UnicodeString; Index: Integer): Boolean; overload; static;
- class function IsPunctuation(const S: UnicodeString; Index: Integer): Boolean; overload; static;
- class function IsSeparator(const S: UnicodeString; Index: Integer): Boolean; overload; static;
- class function IsSurrogate(const S: UnicodeString; Index: Integer): Boolean; overload; static;
- class function IsSurrogatePair(const HighSurrogate, LowSurrogate: UnicodeChar): Boolean; overload; inline; static;
- class function IsSurrogatePair(const S: UnicodeString; Index: Integer): Boolean; overload; static;
- class function IsSymbol(const S: UnicodeString; Index: Integer): Boolean; overload; static;
- class function IsUpper(const S: UnicodeString; Index: Integer): Boolean; overload; static;
- class function IsWhiteSpace(const S: UnicodeString; Index: Integer): Boolean; overload; static;
- class function ToLower(const S: UnicodeString): UnicodeString; overload; static;
- class function ToUpper(const S: UnicodeString): UnicodeString; overload; static;
- class function GetNumericValue(aChar: UCS4Char): Double; overload; static;
- class function GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory; overload; static;
- class function IsControl(aChar: UCS4Char): Boolean; overload; static;
- class function IsDefined(aChar: UCS4Char): Boolean; overload; static;
- class function IsDigit(aChar: UCS4Char): Boolean; overload; static;
- class function IsHighSurrogate(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsLetter(aChar: UCS4Char): Boolean; overload; static;
- class function IsLetterOrDigit(aChar: UCS4Char): Boolean; overload; static;
- class function IsLower(aChar: UCS4Char): Boolean; overload; static;
- class function IsLowSurrogate(aChar: UCS4Char): Boolean; overload; inline; static;
- class function IsNumber(aChar: UCS4Char): Boolean; overload; static;
- class function IsPunctuation(aChar: UCS4Char): Boolean; overload; static;
- class function IsSeparator(aChar: UCS4Char): Boolean; overload; static;
- class function IsSurrogate(Surrogate: UCS4Char): Boolean; overload; inline; static;
- class function IsSymbol(aChar: UCS4Char): Boolean; overload; static;
- class function IsUpper(aChar: UCS4Char): Boolean; overload; static;
- class function IsWhiteSpace(aChar: UCS4Char): Boolean; overload; static;
- class function ToLower(aChar: UCS4Char): UCS4Char; overload; static;
- class function ToUpper(aChar: UCS4Char): UCS4Char; overload; static;
- end;
- // flat functions
- function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
- function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
- function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
- function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
- function GetNumericValue(AChar : UnicodeChar) : Double; overload;
- function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload;
- function GetNumericValue(aChar: UCS4Char): Double; overload; inline;
- function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload;
- function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload;
- function GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory; overload; inline;
- function IsControl(AChar : UnicodeChar) : Boolean; overload;
- function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsControl(aChar: UCS4Char): Boolean; overload; inline;
- function IsDigit(AChar : UnicodeChar) : Boolean; overload;
- function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsDigit(aChar: UCS4Char): Boolean; overload; inline;
- function IsSurrogate(AChar : UnicodeChar) : Boolean; overload;
- function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsSurrogate(Surrogate: UCS4Char): Boolean; overload; inline;
- function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload;
- function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsHighSurrogate(aChar: UCS4Char): Boolean; overload; inline;
- function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload;
- function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsLowSurrogate(aChar: UCS4Char): Boolean; overload; inline;
- function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload;
- function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsLetter(AChar : UnicodeChar) : Boolean; overload;
- function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsLetter(aChar: UCS4Char): Boolean; overload; inline;
- function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload;
- function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsLetterOrDigit(aChar: UCS4Char): Boolean; overload; inline;
- function IsLower(AChar : UnicodeChar) : Boolean; overload;
- function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsLower(aChar: UCS4Char): Boolean; overload; inline;
- function IsNumber(AChar : UnicodeChar) : Boolean; overload;
- function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsNumber(aChar: UCS4Char): Boolean; overload; inline;
- function IsPunctuation(AChar : UnicodeChar) : Boolean; overload;
- function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsPunctuation(aChar: UCS4Char): Boolean; overload; inline;
- function IsSeparator(AChar : UnicodeChar) : Boolean; overload;
- function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsSeparator(aChar: UCS4Char): Boolean; overload; inline;
- function IsSymbol(AChar : UnicodeChar) : Boolean; overload;
- function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsSymbol(aChar: UCS4Char): Boolean; overload; inline;
- function IsUpper(AChar : UnicodeChar) : Boolean; overload;
- function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsUpper(aChar: UCS4Char): Boolean; overload; inline;
- function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload;
- function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
- function IsWhiteSpace(aChar: UCS4Char): Boolean; overload; inline;
- function ToLower(AChar : UnicodeChar) : UnicodeChar; overload;
- function ToLower(const AString : UnicodeString) : UnicodeString; overload;
- function ToLower(aChar: UCS4Char): UCS4Char; overload; inline;
- function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload;
- function ToUpper(const AString : UnicodeString) : UnicodeString; overload;
- function ToUpper(aChar: UCS4Char): UCS4Char; overload; inline;
- function IsDefined(aChar: UnicodeChar): Boolean; overload; inline;
- function IsDefined(aChar: UCS4Char): Boolean; overload; inline;
- function IsDefined(const S: Unicodestring; Index: Integer): Boolean; overload; inline;
- implementation
- {$IFDEF FPC_DOTTEDUNITS}
- uses
- System.SysUtils,
- System.RtlConsts;
- {$ELSE FPC_DOTTEDUNITS}
- uses
- SysUtils,
- RtlConsts;
- {$ENDIF FPC_DOTTEDUNITS}
- const
- LETTER_CATEGORIES = [
- TUnicodeCategory.ucUppercaseLetter, TUnicodeCategory.ucLowercaseLetter,
- TUnicodeCategory.ucTitlecaseLetter, TUnicodeCategory.ucModifierLetter,
- TUnicodeCategory.ucOtherLetter
- ];
- LETTER_OR_DIGIT_CATEGORIES =
- LETTER_CATEGORIES +
- [TUnicodeCategory.ucDecimalNumber,TUnicodeCategory.ucLetterNumber];
- NUMBER_CATEGORIES =
- [ TUnicodeCategory.ucDecimalNumber, TUnicodeCategory.ucLetterNumber,
- TUnicodeCategory.ucOtherNumber
- ];
- PUNCTUATION_CATEGORIES = [
- TUnicodeCategory.ucConnectPunctuation, TUnicodeCategory.ucDashPunctuation,
- TUnicodeCategory.ucOpenPunctuation, TUnicodeCategory.ucClosePunctuation,
- TUnicodeCategory.ucInitialPunctuation, TUnicodeCategory.ucFinalPunctuation,
- TUnicodeCategory.ucOtherPunctuation
- ];
- SEPARATOR_CATEGORIES =
- [ TUnicodeCategory.ucSpaceSeparator, TUnicodeCategory.ucLineSeparator,
- TUnicodeCategory.ucParagraphSeparator
- ];
- SYMBOL_CATEGORIES =
- [ TUnicodeCategory.ucMathSymbol, TUnicodeCategory.ucCurrencySymbol,
- TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
- ];
- Function GetProps(aChar: UCS4Char) : PUC_Prop; inline; overload;
- var
- aHigh,aLow : UnicodeChar;
- begin
- FromUCS4(aChar,aHigh,aLow);
- Result:=GetProps(aHigh,aLow);
- end;
- function IsDefined(aChar: UnicodeChar): Boolean; overload; inline;
- begin
- Result:=TCharacter.IsDefined(aChar);
- end;
- function IsDefined(aChar: UCS4Char): Boolean; overload; inline;
- begin
- Result:=TCharacter.IsDefined(aChar);
- end;
- function IsDefined(const S: UnicodeString; Index: Integer): Boolean; overload; inline;
- begin
- Result :=TCharacter.IsDefined(S,Index);
- end;
- function ConvertFromUtf32(AChar: UCS4Char): UnicodeString;
- begin
- Result := TCharacter.ConvertFromUtf32(AChar);
- end;
- function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer): UCS4Char;
- begin
- Result := TCharacter.ConvertToUtf32(AString, AIndex);
- end;
- function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer; out ACharLength: Integer): UCS4Char;
- begin
- Result := TCharacter.ConvertToUtf32(AString, AIndex, ACharLength);
- end;
- function ConvertToUtf32(const AHighSurrogate, ALowSurrogate: UnicodeChar): UCS4Char;
- begin
- Result := TCharacter.ConvertToUtf32(AHighSurrogate, ALowSurrogate);
- end;
- function GetNumericValue(AChar: UnicodeChar): Double;
- begin
- Result := TCharacter.GetNumericValue(AChar);
- end;
- function GetNumericValue(const AString: UnicodeString; AIndex: Integer): Double;
- begin
- Result := TCharacter.GetNumericValue(AString, AIndex);
- end;
- function GetNumericValue(aChar: UCS4Char): Double;
- begin
- Result:=TCharacter.GetNumericValue(aChar);
- end;
- function GetUnicodeCategory(AChar: UnicodeChar): TUnicodeCategory;
- begin
- Result := TCharacter.GetUnicodeCategory(AChar);
- end;
- function GetUnicodeCategory(const AString: UnicodeString; AIndex: Integer): TUnicodeCategory;
- begin
- Result := TCharacter.GetUnicodeCategory(AString, AIndex);
- end;
- function GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory; overload; inline;
- begin
- Result:=TCharacter.GetUnicodeCategory(aChar);
- end;
- function IsControl(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsControl(AChar);
- end;
- function IsControl(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsControl(AString, AIndex);
- end;
- function IsControl(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsControl(aChar);
- end;
- function IsDigit(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsDigit(AChar);
- end;
- function IsDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsDigit(AString, AIndex);
- end;
- function IsDigit(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsDigit(aChar);
- end;
- function IsSurrogate(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsSurrogate(AChar);
- end;
- function IsSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsSurrogate(AString, AIndex);
- end;
- function IsSurrogate(Surrogate: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsSurrogate(Surrogate);
- end;
- function IsHighSurrogate(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsHighSurrogate(AChar);
- end;
- function IsHighSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsHighSurrogate(AString, AIndex);
- end;
- function IsHighSurrogate(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsHighSurrogate(aChar);
- end;
- function IsLowSurrogate(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsLowSurrogate(AChar);
- end;
- function IsLowSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsLowSurrogate(AString, AIndex);
- end;
- function IsLowSurrogate(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsLowSurrogate(aChar);
- end;
- function IsSurrogatePair(const AHighSurrogate, ALowSurrogate: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsSurrogatePair(AHighSurrogate, ALowSurrogate);
- end;
- function IsSurrogatePair(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsSurrogatePair(AString, AIndex);
- end;
- function IsLetter(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsLetter(AChar);
- end;
- function IsLetter(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsLetter(AString, AIndex);
- end;
- function IsLetter(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsLetter(aChar);
- end;
- function IsLetterOrDigit(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsLetterOrDigit(AChar);
- end;
- function IsLetterOrDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsLetterOrDigit(AString, AIndex);
- end;
- function IsLetterOrDigit(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsLetterOrDigit(aChar);
- end;
- function IsLower(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsLower(AChar);
- end;
- function IsLower(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsLower(AString, AIndex);
- end;
- function IsLower(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsLower(aChar);
- end;
- function IsNumber(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsNumber(AChar);
- end;
- function IsNumber(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsNumber(AString, AIndex);
- end;
- function IsNumber(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsNumber(aChar);
- end;
- function IsPunctuation(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsPunctuation(AChar);
- end;
- function IsPunctuation(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsPunctuation(AString, AIndex);
- end;
- function IsPunctuation(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsPunctuation(aChar);
- end;
- function IsSeparator(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsSeparator(AChar);
- end;
- function IsSeparator(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsSeparator(AString, AIndex);
- end;
- function IsSeparator(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsSeparator(aChar);
- end;
- function IsSymbol(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsSymbol(AChar);
- end;
- function IsSymbol(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsSymbol(AString, AIndex);
- end;
- function IsSymbol(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsSymbol(aChar);
- end;
- function IsUpper(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsUpper(aChar);
- end;
- function IsUpper(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsUpper(AChar);
- end;
- function IsUpper(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsUpper(AString, AIndex);
- end;
- function IsWhiteSpace(AChar: UnicodeChar): Boolean;
- begin
- Result := TCharacter.IsWhiteSpace(AChar);
- end;
- function IsWhiteSpace(const AString: UnicodeString; AIndex: Integer): Boolean;
- begin
- Result := TCharacter.IsWhiteSpace(AString, AIndex);
- end;
- function IsWhiteSpace(aChar: UCS4Char): Boolean;
- begin
- Result := TCharacter.IsWhiteSpace(aChar);
- end;
- function ToLower(AChar: UnicodeChar): UnicodeChar;
- begin
- Result := TCharacter.ToLower(AChar);
- end;
- function ToLower(const AString: UnicodeString): UnicodeString;
- begin
- Result := TCharacter.ToLower(AString);
- end;
- function ToLower(aChar: UCS4Char): UCS4Char;
- begin
- Result := TCharacter.ToLower(aChar);
- end;
- function ToUpper(AChar: UnicodeChar): UnicodeChar;
- begin
- Result := TCharacter.ToUpper(AChar);
- end;
- function ToUpper(const AString: UnicodeString): UnicodeString;
- begin
- Result := TCharacter.ToUpper(AString);
- end;
- function ToUpper(aChar: UCS4Char): UCS4Char;
- begin
- Result := TCharacter.ToUpper(aChar);
- end;
- { TCharacter }
- class function TCharacter.TestCategory(
- const AString : UnicodeString;
- AIndex : Integer;
- ACategory : TUnicodeCategory
- ) : Boolean;
- var
- pu : PUC_Prop;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- pu := GetProps(Word(AString[AIndex]));
- if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
- if not IsSurrogatePair(AString,AIndex) then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- pu := GetProps(AString[AIndex],AString[AIndex+1]);
- end;
- Result := (TUnicodeCategory(pu^.Category) = ACategory);
- end;
- class function TCharacter.TestCategory(
- const AString : UnicodeString;
- AIndex : Integer;
- ACategory : TUnicodeCategorySet
- ) : Boolean;
- var
- pu : PUC_Prop;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- pu := GetProps(Word(AString[AIndex]));
- if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
- if not IsSurrogatePair(AString,AIndex) then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- pu := GetProps(AString[AIndex],AString[AIndex+1]);
- end;
- Result := (TUnicodeCategory(pu^.Category) in ACategory);
- end;
- constructor TCharacter.Create;
- begin
- raise ENoConstructException.CreateFmt(SClassCantBeConstructed, [ClassName]);
- end;
- class function TCharacter.ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
- begin
- if AChar < UCS4_HALF_BASE then
- begin
- if IsSurrogate(UnicodeChar(AChar)) then
- raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
- Result := UnicodeChar(AChar);
- end
- else
- begin
- if AChar > MAX_LEGAL_UTF32 then
- raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
- SetLength(Result, 2);
- AChar := AChar - UCS4_HALF_BASE;
- Result[1] := UnicodeChar((AChar shr 10) + HIGH_SURROGATE_BEGIN);
- Result[2] := UnicodeChar((AChar and UCS4_HALF_MASK) + LOW_SURROGATE_BEGIN);
- end;
- end;
- class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- Result := Word(AString[AIndex]);
- if IsHighSurrogate(UnicodeChar(Result)) then
- begin
- if Length(AString) < Succ(AIndex) then
- raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
- Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
- end;
- end;
- class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- Result := Word(AString[AIndex]);
- if IsHighSurrogate(UnicodeChar(Result)) then
- begin
- if Length(AString) < Succ(AIndex) then
- raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
- Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
- ACharLength := 2;
- end
- else
- ACharLength := 1;
- end;
- class function TCharacter.ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
- begin
- if not IsHighSurrogate(AHighSurrogate) then
- raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]);
- if not IsLowSurrogate(ALowSurrogate) then
- raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(ALowSurrogate)]);
- Result := ToUCS4(AHighSurrogate, ALowSurrogate);
- end;
- class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double;
- begin
- Result := GetProps(Word(AChar))^.NumericValue;
- end;
- class function TCharacter.GetNumericValue(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Double;
- var
- pu : PUC_Prop;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- pu := GetProps(Word(AString[AIndex]));
- if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
- if not IsSurrogatePair(AString,AIndex) then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- pu := GetProps(AString[AIndex],AString[AIndex+1]);
- end;
- Result := pu^.NumericValue;
- end;
- class function TCharacter.GetNumericValue(aChar: UCS4Char): Double;
- begin
- Result := GetProps(AChar)^.NumericValue;
- end;
- class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
- begin
- Result := TUnicodeCategory(GetProps(Word(AChar))^.Category);
- end;
- class function TCharacter.GetUnicodeCategory(
- const AString : UnicodeString;
- AIndex : Integer
- ) : TUnicodeCategory;
- var
- pu : PUC_Prop;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- pu := GetProps(Word(AString[AIndex]));
- if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
- if not IsSurrogatePair(AString,AIndex) then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- pu := GetProps(AString[AIndex],AString[AIndex+1]);
- end;
- Result := TUnicodeCategory(pu^.Category);
- end;
- class function TCharacter.GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory;
- var
- pr: PUC_Prop;
- begin
- Result:=TUnicodeCategory.ucUnassigned;
- if Cardinal(Ord(aChar))>MAX_LEGAL_UTF32 then
- Exit;
- Pr:=GetProps(aChar);
- if assigned(pr) then
- Result:=TUnicodeCategory(Pr^.Category);
- end;
- class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucControl);
- end;
- class function TCharacter.IsControl(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,TUnicodeCategory.ucControl);
- end;
- class function TCharacter.IsControl(aChar: UCS4Char): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(aChar)^.Category) = TUnicodeCategory.ucControl);
- end;
- class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucDecimalNumber);
- end;
- class function TCharacter.IsDigit(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,TUnicodeCategory.ucDecimalNumber);
- end;
- class function TCharacter.IsDigit(aChar: UCS4Char): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(aChar)^.Category) = TUnicodeCategory.ucDecimalNumber);
- end;
- class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate);
- end;
- class function TCharacter.IsSurrogate(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- Result := IsSurrogate(AString[AIndex]);
- end;
- class function TCharacter.IsSurrogate(aChar: UCS4Char): Boolean;
- begin
- Result:=(aChar>=UCS4Char(MinSurrogate)) and (aChar <= UCS4Char(MaxSurrogate));
- end;
- class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
- (Word(AChar) >= HIGH_SURROGATE_BEGIN) and
- (Word(AChar) <= HIGH_SURROGATE_END);
- end;
- class function TCharacter.IsHighSurrogate(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- Result := IsHighSurrogate(AString[AIndex]);
- end;
- class function TCharacter.IsHighSurrogate(aChar: UCS4Char): Boolean;
- begin
- Result:=(aChar >= UCS4Char(MinHighSurrogate)) and (aChar <= UCS4Char(MaxHighSurrogate));
- end;
- class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
- (Word(AChar) >= LOW_SURROGATE_BEGIN) and
- (Word(AChar) <= LOW_SURROGATE_END);
- end;
- class function TCharacter.IsLowSurrogate(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- Result := IsLowSurrogate(AString[AIndex]);
- end;
- class function TCharacter.IsLowSurrogate(aChar: UCS4Char): Boolean;
- begin
- Result := (aChar >= UCS4Char(MinLowSurrogate)) and (aChar <= UCS4Char(MaxLowSurrogate));
- end;
- class function TCharacter.IsSurrogatePair(
- const AHighSurrogate,
- ALowSurrogate : UnicodeChar
- ) : Boolean;
- begin
- Result := UnicodeIsSurrogatePair(AHighSurrogate,ALowSurrogate);
- end;
- class function TCharacter.IsSurrogatePair(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- if not IsHighSurrogate(AString[AIndex]) then begin
- Result := False;
- exit;
- end;
- if ((AIndex+1) > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex+1, Length(AString)]);
- Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
- end;
- class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_CATEGORIES);
- end;
- class function TCharacter.IsLetter(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,LETTER_CATEGORIES);
- end;
- class function TCharacter.IsLetter(aChar: UCS4Char): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(aChar)^.Category) in LETTER_CATEGORIES);
- end;
- class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_OR_DIGIT_CATEGORIES);
- end;
- class function TCharacter.IsLetterOrDigit(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,LETTER_OR_DIGIT_CATEGORIES);
- end;
- class function TCharacter.IsLetterOrDigit(aChar: UCS4Char): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(aChar)^.Category) in LETTER_OR_DIGIT_CATEGORIES);
- end;
- class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucLowercaseLetter);
- end;
- class function TCharacter.IsLower(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,TUnicodeCategory.ucLowercaseLetter);
- end;
- class function TCharacter.IsLower(aChar: UCS4Char): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(aChar)^.Category) = TUnicodeCategory.ucLowercaseLetter);
- end;
- class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in NUMBER_CATEGORIES);
- end;
- class function TCharacter.IsNumber(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,NUMBER_CATEGORIES);
- end;
- class function TCharacter.IsNumber(aChar: UCS4Char): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(aChar)^.Category) in NUMBER_CATEGORIES);
- end;
- class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in PUNCTUATION_CATEGORIES);
- end;
- class function TCharacter.IsPunctuation(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,PUNCTUATION_CATEGORIES);
- end;
- class function TCharacter.IsPunctuation(aChar: UCS4Char): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(aChar)^.Category) in PUNCTUATION_CATEGORIES);
- end;
- class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SEPARATOR_CATEGORIES);
- end;
- class function TCharacter.IsSeparator(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,SEPARATOR_CATEGORIES);
- end;
- class function TCharacter.IsSeparator(aChar: UCS4Char): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(aChar))^.Category) in SEPARATOR_CATEGORIES);
- end;
- class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SYMBOL_CATEGORIES);
- end;
- class function TCharacter.IsSymbol(aChar: UCS4Char): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(aChar)^.Category) in SYMBOL_CATEGORIES);
- end;
- class function TCharacter.IsSymbol(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,SYMBOL_CATEGORIES);
- end;
- class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
- end;
- class function TCharacter.IsUpper(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- begin
- Result := TestCategory(AString,AIndex,TUnicodeCategory.ucUppercaseLetter);
- end;
- class function TCharacter.IsUpper(aChar: UCS4Char): Boolean;
- begin
- Result := (TUnicodeCategory(GetProps(Word(aChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
- end;
- class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;
- begin
- Result := GetProps(Word(AChar))^.WhiteSpace;
- end;
- class function TCharacter.IsWhiteSpace(
- const AString : UnicodeString;
- AIndex : Integer
- ) : Boolean;
- var
- pu : PUC_Prop;
- begin
- if (AIndex < 1) or (AIndex > Length(AString)) then
- raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
- pu := GetProps(Word(AString[AIndex]));
- if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
- if not IsSurrogatePair(AString,AIndex) then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- pu := GetProps(AString[AIndex],AString[AIndex+1]);
- end;
- Result := pu^.WhiteSpace;
- end;
- class function TCharacter.IsWhiteSpace(aChar: UCS4Char): Boolean;
- begin
- Result := GetProps(Word(aChar))^.WhiteSpace;
- end;
- class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
- begin
- Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleLowerCase));
- if (Result = UnicodeChar(0)) then
- Result := AChar;
- end;
- class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;
- begin
- Result := ToLower(AString,[]);
- end;
- class function TCharacter.ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
- begin
- if (UnicodeToLower(
- AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
- ) <> 0
- )
- then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- end;
- class function TCharacter.ToLower(aChar: UCS4Char): UCS4Char;
- begin
- Result := UCS4Char(Cardinal(GetProps(aChar)^.SimpleLowerCase));
- if (Result = UCS4Char(0)) then
- Result := aChar;
- end;
- class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
- begin
- Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleUpperCase));
- if (Result = UnicodeChar(0)) then
- Result := AChar;
- end;
- class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;
- begin
- Result := ToUpper(AString,[]);
- end;
- class function TCharacter.ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
- begin
- if (UnicodeToUpper(
- AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
- ) <> 0
- )
- then
- raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
- end;
- class function TCharacter.ToUpper(aChar: UCS4Char): UCS4Char;
- begin
- Result := UCS4Char(Cardinal(GetProps(aChar)^.SimpleUpperCase));
- if (Result = UCS4Char(0)) then
- Result := aChar;
- end;
- class function TCharacter.MaxCodePoint: Integer;
- begin
- Result := MAX_LEGAL_UTF32;
- end;
- class function TCharacter.UnicodeDataVersion: UnicodeString;
- begin
- Result := '';
- end;
- class function TCharacter.IsDefined(aChar: UnicodeChar): Boolean;
- begin
- Result:=GetProps(Word(aChar))<>Nil;
- end;
- class function TCharacter.IsDefined(aChar: UCS4Char): Boolean;
- begin
- Result:=GetProps(aChar)<>nil;
- end;
- class function TCharacter.IsDefined(const S: unicodestring; Index: Integer): Boolean;
- begin
- Result:=IsDefined(S[Index]);
- end;
- { TCharHelper }
- class function TCharHelper.MaxCodePoint: Integer;
- begin
- Result := MAX_LEGAL_UTF32;
- end;
- class function TCharHelper.UnicodeDataVersion: UnicodeString;
- begin
- Result := '';
- end;
- function TCharHelper.IsHighSurrogate: Boolean;
- begin
- Result:=TCharacter.IsHighSurrogate(Self);
- end;
- function TCharHelper.IsLowSurrogate: Boolean;
- begin
- Result:=TCharacter.IsLowSurrogate(Self);
- end;
- function TCharHelper.IsSurrogate: Boolean;
- begin
- Result:=TCharacter.IsSurrogate(Self);
- end;
- class function TCharHelper.IsSurrogatePair(const HighSurrogate, LowSurrogate: UnicodeChar): Boolean;
- begin
- Result:=TCharacter.IsSurrogatePair(HighSurrogate, LowSurrogate);
- end;
- function TCharHelper.ToUCS4Char: UCS4Char;
- begin
- Result:=UCS4Char(Self);
- end;
- class function TCharHelper.ConvertToUtf32(const S: UnicodeString; Index: Integer; out CharLength: Integer): UCS4Char;
- begin
- Result:=TCharacter.ConvertToUtf32(S,Index,CharLength);
- end;
- class function TCharHelper.ConvertToUtf32(const S: UnicodeString; Index: Integer): UCS4Char;
- begin
- Result:=TCharacter.ConvertToUtf32(S,Index);
- end;
- class function TCharHelper.ConvertFromUtf32(aChar: UCS4Char): UnicodeString;
- begin
- Result:=TCharacter.ConvertFromUtf32(aChar);
- end;
- class function TCharHelper.ConvertToUtf32(const HighSurrogate, LowSurrogate: UnicodeChar): UCS4Char;
- begin
- Result:=TCharacter.ConvertToUtf32(HighSurrogate,LowSurrogate);
- end;
- function TCharHelper.IsLetter: Boolean;
- begin
- Result:=TCharacter.IsLetter(Self);
- end;
- function TCharHelper.IsLetterOrDigit: Boolean;
- begin
- Result:=TCharacter.IsLetterOrDigit(Self);
- end;
- class function TCharHelper.IsLetter(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsLetter(S,Index);
- end;
- class function TCharHelper.IsLetterOrDigit(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsLetterOrDigit(S,Index);
- end;
- class function TCharHelper.IsControl(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsControl(S,Index);
- end;
- class function TCharHelper.IsDefined(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsDefined(S,Index);
- end;
- function TCharHelper.IsControl: Boolean;
- begin
- Result:=TCharacter.IsControl(Self);
- end;
- function TCharHelper.IsDefined: Boolean;
- begin
- Result:=TCharacter.IsDefined(Self);
- end;
- function TCharHelper.IsDigit: Boolean;
- begin
- Result:=TCharacter.IsDigit(Self);
- end;
- class function TCharHelper.IsDigit(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsDigit(S,Index);
- end;
- function TCharHelper.IsInArray(const SomeChars: array of UnicodeChar): Boolean;
- var
- AChar: UnicodeChar;
- begin
- for AChar in SomeChars do
- if AChar=Self then
- Exit(True);
- Result := False;
- end;
- function TCharHelper.IsLower: Boolean;
- begin
- Result:=TCharacter.IsLower(Self);
- end;
- function TCharHelper.GetUnicodeCategory: TUnicodeCategory;
- begin
- Result:=TCharacter.GetUnicodeCategory(Self);
- end;
- function TCharHelper.GetNumericValue: Double;
- begin
- Result:=TCharacter.GetNumericValue(Self);
- end;
- class function TCharHelper.GetNumericValue(const S: UnicodeString; Index: Integer): Double;
- begin
- Result:=TCharacter.GetNumericValue(S,Index);
- end;
- class function TCharHelper.GetUnicodeCategory(const S: UnicodeString; Index: Integer): TUnicodeCategory;
- begin
- Result:=TCharacter.GetUnicodeCategory(S,Index);
- end;
- class function TCharHelper.IsHighSurrogate(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsHighSurrogate(S,Index);
- end;
- class function TCharHelper.IsInArray(const S: UnicodeString; Index: Integer; const SomeChars: array of UnicodeChar): Boolean;
- begin
- Result:=S[Index].IsInArray(SomeChars);
- end;
- class function TCharHelper.IsLower(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsLower(S,Index);
- end;
- class function TCharHelper.IsLowSurrogate(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=S[Index].IsLowSurrogate;
- end;
- function TCharHelper.IsNumber: Boolean;
- begin
- Result:=TCharacter.IsNumber(Self);
- end;
- function TCharHelper.IsPunctuation: Boolean;
- begin
- Result:=TCharacter.IsPunctuation(Self);
- end;
- class function TCharHelper.IsNumber(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsNumber(S,Index);
- end;
- class function TCharHelper.IsPunctuation(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsPunctuation(S,Index);
- end;
- function TCharHelper.IsSeparator: Boolean;
- begin
- Result:=TCharacter.IsSeparator(Self);
- end;
- class function TCharHelper.IsSeparator(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsSeparator(S,Index);
- end;
- class function TCharHelper.IsSurrogate(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsSurrogate(S,Index);
- end;
- class function TCharHelper.IsSurrogatePair(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsSurrogatePair(S,Index);
- end;
- function TCharHelper.IsSymbol: Boolean;
- begin
- Result:=TCharacter.IsSymbol(Self);
- end;
- class function TCharHelper.IsSymbol(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsSymbol(S,Index);
- end;
- class function TCharHelper.IsUpper(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsUpper(S,Index);
- end;
- function TCharHelper.IsUpper: Boolean;
- begin
- Result:=TCharacter.IsUpper(Self);
- end;
- class function TCharHelper.IsWhiteSpace(const S: UnicodeString; Index: Integer): Boolean;
- begin
- Result:=TCharacter.IsWhiteSpace(S,Index);
- end;
- function TCharHelper.ToLower: UnicodeChar;
- begin
- Result:=TCharacter.ToLower(Self);
- end;
- class function TCharHelper.ToLower(const S: UnicodeString): UnicodeString;
- begin
- Result:=TCharacter.ToLower(S);
- end;
- function TCharHelper.ToUpper: UnicodeChar;
- begin
- Result:=TCharacter.ToUpper(Self);
- end;
- class function TCharHelper.ToUpper(const S: UnicodeString): UnicodeString;
- begin
- Result:=TCharacter.ToUpper(S);
- end;
- function TCharHelper.IsWhiteSpace: Boolean;
- begin
- Result:=TCharacter.IsWhiteSpace(Self);
- end;
- class function TCharHelper.IsLetterOrDigit(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsLetterOrDigit(aChar);
- end;
- class function TCharHelper.IsControl(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsControl(aChar);
- end;
- class function TCharHelper.IsDefined(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsDefined(aChar);
- end;
- class function TCharHelper.IsDigit(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsDigit(aChar);
- end;
- class function TCharHelper.IsHighSurrogate(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsHighSurrogate(aChar);
- end;
- class function TCharHelper.IsLetter(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsLetter(aChar);
- end;
- class function TCharHelper.IsLowSurrogate(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsLowSurrogate(aChar);
- end;
- class function TCharHelper.IsSurrogate(Surrogate: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsSurrogate(Surrogate);
- end;
- class function TCharHelper.GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory;
- begin
- Result:=TCharacter.GetUnicodeCategory(aChar);
- end;
- class function TCharHelper.GetNumericValue(aChar: UCS4Char): Double;
- begin
- Result:=TCharacter.GetNumericValue(aChar);
- end;
- class function TCharHelper.IsLower(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsLower(aChar);
- end;
- class function TCharHelper.IsNumber(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsNumber(aChar);
- end;
- class function TCharHelper.IsPunctuation(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsPunctuation(aChar);
- end;
- class function TCharHelper.IsSeparator(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsSeparator(aChar);
- end;
- class function TCharHelper.IsSymbol(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsSymbol(aChar);
- end;
- class function TCharHelper.IsUpper(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsUpper(aChar);
- end;
- class function TCharHelper.ToLower(aChar: UCS4Char): UCS4Char;
- begin
- Result:=TCharacter.ToLower(aChar);
- end;
- class function TCharHelper.ToUpper(aChar: UCS4Char): UCS4Char;
- begin
- Result:=TCharacter.ToUpper(aChar);
- end;
- class function TCharHelper.IsWhiteSpace(aChar: UCS4Char): Boolean;
- begin
- Result:=TCharacter.IsWhiteSpace(aChar);
- end;
- end.
|