character.pas 55 KB


  1. { Unicode "Character" properties handler.
  2. Copyright (c) 2012 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
  15. {$IFNDEF FPC_DOTTEDUNITS}
  16. unit Character;
  17. {$ENDIF FPC_DOTTEDUNITS}
  18. interface
  19. {$mode objfpc}
  20. {$H+}
  21. {$PACKENUM 1}
  22. {$SCOPEDENUMS ON}
  23. {$modeswitch typehelpers}
  24. {$IFDEF FPC_DOTTEDUNITS}
  25. uses
  26. System.CodePages.unicodedata;
  27. {$ELSE FPC_DOTTEDUNITS}
  28. uses
  29. unicodedata;
  30. {$ENDIF FPC_DOTTEDUNITS}
  31. type
  32. // Unicode General Category
  33. TUnicodeCategory = (
  34. ucUppercaseLetter, // Lu = Letter, uppercase
  35. ucLowercaseLetter, // Ll = Letter, lowercase
  36. ucTitlecaseLetter, // Lt = Letter, titlecase
  37. ucModifierLetter, // Lm = Letter, modifier
  38. ucOtherLetter, // Lo = Letter, other
  39. ucNonSpacingMark, // Mn = Mark, nonspacing
  40. ucCombiningMark, // Mc = Mark, spacing combining
  41. ucEnclosingMark, // Me = Mark, enclosing
  42. ucDecimalNumber, // Nd = Number, decimal digit
  43. ucLetterNumber, // Nl = Number, letter
  44. ucOtherNumber, // No = Number, other
  45. ucConnectPunctuation, // Pc = Punctuation, connector
  46. ucDashPunctuation, // Pd = Punctuation, dash
  47. ucOpenPunctuation, // Ps = Punctuation, open
  48. ucClosePunctuation, // Pe = Punctuation, close
  49. ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
  50. ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
  51. ucOtherPunctuation, // Po = Punctuation, other
  52. ucMathSymbol, // Sm = Symbol, math
  53. ucCurrencySymbol, // Sc = Symbol, currency
  54. ucModifierSymbol, // Sk = Symbol, modifier
  55. ucOtherSymbol, // So = Symbol, other
  56. ucSpaceSeparator, // Zs = Separator, space
  57. ucLineSeparator, // Zl = Separator, line
  58. ucParagraphSeparator, // Zp = Separator, paragraph
  59. ucControl, // Cc = Other, control
  60. ucFormat, // Cf = Other, format
  61. ucSurrogate, // Cs = Other, surrogate
  62. ucPrivateUse, // Co = Other, private use
  63. ucUnassigned // Cn = Other, not assigned (including noncharacters)
  64. );
  65. TUnicodeCategorySet = set of TUnicodeCategory;
  66. TCharacterOption = (coIgnoreInvalidSequence);
  67. TCharacterOptions = set of TCharacterOption;
  68. { TCharacter }
  69. TCharacter = class sealed
  70. private
  71. class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategory) : Boolean; overload; static;
  72. class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategorySet) : Boolean; overload; static;
  73. public
  74. const
  75. MaxHighSurrogate = UnicodeChar(HIGH_SURROGATE_END);
  76. MaxLowSurrogate = UnicodeChar(LOW_SURROGATE_END);
  77. MaxSurrogate = UnicodeChar(LOW_SURROGATE_END);
  78. MinHighSurrogate = UnicodeChar(HIGH_SURROGATE_BEGIN);
  79. MinLowSurrogate = UnicodeChar(LOW_SURROGATE_BEGIN);
  80. MinSurrogate = UnicodeChar(HIGH_SURROGATE_BEGIN);
  81. public
  82. constructor Create;
  83. class function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static;
  84. class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static;
  85. class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static;
  86. class function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; static;
  87. class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload;
  88. class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static;
  89. class function GetNumericValue(aChar: UCS4Char): Double; overload; inline; static;
  90. class function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload; static; inline;
  91. class function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload; static;
  92. class function GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory; overload; inline; static;
  93. class function IsControl(AChar : UnicodeChar) : Boolean; overload; static; inline;
  94. class function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  95. class function IsControl(aChar: UCS4Char): Boolean; overload; inline; static;
  96. class function IsDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
  97. class function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  98. class function IsDigit(aChar: UCS4Char): Boolean; overload; inline; static;
  99. class function IsSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
  100. class function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  101. class function IsSurrogate(aChar: UCS4Char): Boolean; overload; inline; static;
  102. class function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
  103. class function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  104. class function IsHighSurrogate(aChar: UCS4Char): Boolean; overload; inline; static;
  105. class function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
  106. class function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  107. class function IsLowSurrogate(aChar: UCS4Char): Boolean; overload; inline; static;
  108. class function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload; static; inline;
  109. class function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  110. class function IsLetter(aChar: UCS4Char): Boolean; overload; inline; static;
  111. class function IsLetter(AChar : UnicodeChar) : Boolean; overload; static; inline;
  112. class function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  113. class function IsLetterOrDigit(aChar: UCS4Char): Boolean; overload; inline; static;
  114. class function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
  115. class function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  116. class function IsLower(AChar : UnicodeChar) : Boolean; overload; static; inline;
  117. class function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  118. class function IsLower(aChar: UCS4Char): Boolean; overload; inline; static;
  119. class function IsNumber(AChar : UnicodeChar) : Boolean; overload; static; inline;
  120. class function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  121. class function IsNumber(aChar: UCS4Char): Boolean; overload; inline; static;
  122. class function IsPunctuation(AChar : UnicodeChar) : Boolean; overload; static; inline;
  123. class function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  124. class function IsPunctuation(aChar: UCS4Char): Boolean; overload; inline; static;
  125. class function IsSeparator(AChar : UnicodeChar) : Boolean; overload; static; inline;
  126. class function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  127. class function IsSeparator(aChar: UCS4Char): Boolean; overload; inline; static;
  128. class function IsSymbol(AChar : UnicodeChar) : Boolean; overload; static; inline;
  129. class function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  130. class function IsSymbol(aChar: UCS4Char): Boolean; overload; inline; static;
  131. class function IsUpper(AChar : UnicodeChar) : Boolean; overload; static; inline;
  132. class function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  133. class function IsUpper(aChar: UCS4Char): Boolean; overload; inline; static;
  134. class function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload; static; inline;
  135. class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  136. class function IsWhiteSpace(aChar: UCS4Char): Boolean; overload; inline; static;
  137. class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static;
  138. class function ToLower(const AString : UnicodeString) : UnicodeString; inline;overload; static;
  139. class function ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
  140. class function ToLower(aChar: UCS4Char): UCS4Char; overload; inline; static;
  141. class function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload; static;
  142. class function ToUpper(const AString : UnicodeString) : UnicodeString; inline; overload; static;
  143. class function ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
  144. class function ToUpper(aChar: UCS4Char): UCS4Char; overload; inline; static;
  145. class function MaxCodePoint: Integer; static;
  146. class function UnicodeDataVersion: UnicodeString; static;
  147. class function IsDefined(aChar: UnicodeChar): Boolean; overload; inline; static;
  148. class function IsDefined(aChar: UCS4Char): Boolean; overload; inline; static;
  149. class function IsDefined(const S: UnicodeString; Index: Integer): Boolean; overload; inline; static;
  150. end;
  151. { TCharHelper }
  152. TCharHelper = type helper for UnicodeChar
  153. public
  154. const
  155. MaxHighSurrogate = UnicodeChar(HIGH_SURROGATE_END);
  156. MaxLowSurrogate = UnicodeChar(LOW_SURROGATE_END);
  157. MaxSurrogate = UnicodeChar(LOW_SURROGATE_END);
  158. MinHighSurrogate = UnicodeChar(HIGH_SURROGATE_BEGIN);
  159. MinLowSurrogate = UnicodeChar(LOW_SURROGATE_BEGIN);
  160. MinSurrogate = UnicodeChar(HIGH_SURROGATE_BEGIN);
  161. class function MaxCodePoint: Integer; static;
  162. class function UnicodeDataVersion: UnicodeString; static;
  163. class function ConvertFromUtf32(aChar: UCS4Char): UnicodeString; static;
  164. class function ConvertToUtf32(const S: UnicodeString; Index: Integer): UCS4Char; overload; inline; static;
  165. class function ConvertToUtf32(const S: UnicodeString; Index: Integer; out CharLength: Integer): UCS4Char; overload; static;
  166. class function ConvertToUtf32(const HighSurrogate, LowSurrogate: UnicodeChar): UCS4Char; overload; static;
  167. function GetNumericValue: Double; overload;
  168. class function GetNumericValue(const S: UnicodeString; Index: Integer): Double; overload; static;
  169. function GetUnicodeCategory: TUnicodeCategory; overload;
  170. class function GetUnicodeCategory(const S: UnicodeString; Index: Integer): TUnicodeCategory; overload; static;
  171. function IsControl: Boolean; overload;
  172. function IsDefined: Boolean; overload;
  173. function IsDigit: Boolean; overload;
  174. function IsHighSurrogate: Boolean; overload; inline;
  175. function IsInArray(const SomeChars: array of UnicodeChar): Boolean; overload;
  176. function IsLetter: Boolean; overload;
  177. function IsLetterOrDigit: Boolean; overload;
  178. function IsLower: Boolean; overload;
  179. function IsLowSurrogate: Boolean; overload; inline;
  180. function IsNumber: Boolean; overload;
  181. function IsPunctuation: Boolean; overload;
  182. function IsSeparator: Boolean; overload;
  183. function IsSurrogate: Boolean; overload; inline;
  184. function IsSymbol: Boolean; overload;
  185. function IsUpper: Boolean; overload;
  186. function IsWhiteSpace: Boolean; overload;
  187. function ToLower: UnicodeChar; overload;
  188. function ToUpper: UnicodeChar; overload;
  189. function ToUCS4Char: UCS4Char; inline;
  190. class function IsControl(const S: UnicodeString; Index: Integer): Boolean; overload; static; inline;
  191. class function IsDefined(const S: UnicodeString; Index: Integer): Boolean; overload; static; inline;
  192. class function IsDigit(const S: UnicodeString; Index: Integer): Boolean; overload; static; inline;
  193. class function IsHighSurrogate(const S: UnicodeString; Index: Integer): Boolean; overload; inline; static;
  194. class function IsInArray(const S: UnicodeString; Index: Integer; const SomeChars: array of UnicodeChar): Boolean; overload; static; inline;
  195. class function IsLetter(const S: UnicodeString; Index: Integer): Boolean; overload; static;inline;
  196. class function IsLetterOrDigit(const S: UnicodeString; Index: Integer): Boolean; overload; static;
  197. class function IsLower(const S: UnicodeString; Index: Integer): Boolean; overload; static;
  198. class function IsLowSurrogate(const S: UnicodeString; Index: Integer): Boolean; overload; inline; static;
  199. class function IsNumber(const S: UnicodeString; Index: Integer): Boolean; overload; static;
  200. class function IsPunctuation(const S: UnicodeString; Index: Integer): Boolean; overload; static;
  201. class function IsSeparator(const S: UnicodeString; Index: Integer): Boolean; overload; static;
  202. class function IsSurrogate(const S: UnicodeString; Index: Integer): Boolean; overload; static;
  203. class function IsSurrogatePair(const HighSurrogate, LowSurrogate: UnicodeChar): Boolean; overload; inline; static;
  204. class function IsSurrogatePair(const S: UnicodeString; Index: Integer): Boolean; overload; static;
  205. class function IsSymbol(const S: UnicodeString; Index: Integer): Boolean; overload; static;
  206. class function IsUpper(const S: UnicodeString; Index: Integer): Boolean; overload; static;
  207. class function IsWhiteSpace(const S: UnicodeString; Index: Integer): Boolean; overload; static;
  208. class function ToLower(const S: UnicodeString): UnicodeString; overload; static;
  209. class function ToUpper(const S: UnicodeString): UnicodeString; overload; static;
  210. class function GetNumericValue(aChar: UCS4Char): Double; overload; static;
  211. class function GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory; overload; static;
  212. class function IsControl(aChar: UCS4Char): Boolean; overload; static;
  213. class function IsDefined(aChar: UCS4Char): Boolean; overload; static;
  214. class function IsDigit(aChar: UCS4Char): Boolean; overload; static;
  215. class function IsHighSurrogate(aChar: UCS4Char): Boolean; overload; inline; static;
  216. class function IsLetter(aChar: UCS4Char): Boolean; overload; static;
  217. class function IsLetterOrDigit(aChar: UCS4Char): Boolean; overload; static;
  218. class function IsLower(aChar: UCS4Char): Boolean; overload; static;
  219. class function IsLowSurrogate(aChar: UCS4Char): Boolean; overload; inline; static;
  220. class function IsNumber(aChar: UCS4Char): Boolean; overload; static;
  221. class function IsPunctuation(aChar: UCS4Char): Boolean; overload; static;
  222. class function IsSeparator(aChar: UCS4Char): Boolean; overload; static;
  223. class function IsSurrogate(Surrogate: UCS4Char): Boolean; overload; inline; static;
  224. class function IsSymbol(aChar: UCS4Char): Boolean; overload; static;
  225. class function IsUpper(aChar: UCS4Char): Boolean; overload; static;
  226. class function IsWhiteSpace(aChar: UCS4Char): Boolean; overload; static;
  227. class function ToLower(aChar: UCS4Char): UCS4Char; overload; static;
  228. class function ToUpper(aChar: UCS4Char): UCS4Char; overload; static;
  229. end;
  230. // flat functions
  231. function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
  232. function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
  233. function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
  234. function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
  235. function GetNumericValue(AChar : UnicodeChar) : Double; overload;
  236. function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload;
  237. function GetNumericValue(aChar: UCS4Char): Double; overload; inline;
  238. function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload;
  239. function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload;
  240. function GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory; overload; inline;
  241. function IsControl(AChar : UnicodeChar) : Boolean; overload;
  242. function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  243. function IsControl(aChar: UCS4Char): Boolean; overload; inline;
  244. function IsDigit(AChar : UnicodeChar) : Boolean; overload;
  245. function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  246. function IsDigit(aChar: UCS4Char): Boolean; overload; inline;
  247. function IsSurrogate(AChar : UnicodeChar) : Boolean; overload;
  248. function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  249. function IsSurrogate(Surrogate: UCS4Char): Boolean; overload; inline;
  250. function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload;
  251. function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  252. function IsHighSurrogate(aChar: UCS4Char): Boolean; overload; inline;
  253. function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload;
  254. function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  255. function IsLowSurrogate(aChar: UCS4Char): Boolean; overload; inline;
  256. function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload;
  257. function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  258. function IsLetter(AChar : UnicodeChar) : Boolean; overload;
  259. function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  260. function IsLetter(aChar: UCS4Char): Boolean; overload; inline;
  261. function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload;
  262. function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  263. function IsLetterOrDigit(aChar: UCS4Char): Boolean; overload; inline;
  264. function IsLower(AChar : UnicodeChar) : Boolean; overload;
  265. function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  266. function IsLower(aChar: UCS4Char): Boolean; overload; inline;
  267. function IsNumber(AChar : UnicodeChar) : Boolean; overload;
  268. function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  269. function IsNumber(aChar: UCS4Char): Boolean; overload; inline;
  270. function IsPunctuation(AChar : UnicodeChar) : Boolean; overload;
  271. function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  272. function IsPunctuation(aChar: UCS4Char): Boolean; overload; inline;
  273. function IsSeparator(AChar : UnicodeChar) : Boolean; overload;
  274. function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  275. function IsSeparator(aChar: UCS4Char): Boolean; overload; inline;
  276. function IsSymbol(AChar : UnicodeChar) : Boolean; overload;
  277. function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  278. function IsSymbol(aChar: UCS4Char): Boolean; overload; inline;
  279. function IsUpper(AChar : UnicodeChar) : Boolean; overload;
  280. function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  281. function IsUpper(aChar: UCS4Char): Boolean; overload; inline;
  282. function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload;
  283. function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  284. function IsWhiteSpace(aChar: UCS4Char): Boolean; overload; inline;
  285. function ToLower(AChar : UnicodeChar) : UnicodeChar; overload;
  286. function ToLower(const AString : UnicodeString) : UnicodeString; overload;
  287. function ToLower(aChar: UCS4Char): UCS4Char; overload; inline;
  288. function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload;
  289. function ToUpper(const AString : UnicodeString) : UnicodeString; overload;
  290. function ToUpper(aChar: UCS4Char): UCS4Char; overload; inline;
  291. function IsDefined(aChar: UnicodeChar): Boolean; overload; inline;
  292. function IsDefined(aChar: UCS4Char): Boolean; overload; inline;
  293. function IsDefined(const S: Unicodestring; Index: Integer): Boolean; overload; inline;
  294. implementation
  295. {$IFDEF FPC_DOTTEDUNITS}
  296. uses
  297. System.SysUtils,
  298. System.RtlConsts;
  299. {$ELSE FPC_DOTTEDUNITS}
  300. uses
  301. SysUtils,
  302. RtlConsts;
  303. {$ENDIF FPC_DOTTEDUNITS}
  304. const
  305. LETTER_CATEGORIES = [
  306. TUnicodeCategory.ucUppercaseLetter, TUnicodeCategory.ucLowercaseLetter,
  307. TUnicodeCategory.ucTitlecaseLetter, TUnicodeCategory.ucModifierLetter,
  308. TUnicodeCategory.ucOtherLetter
  309. ];
  310. LETTER_OR_DIGIT_CATEGORIES =
  311. LETTER_CATEGORIES +
  312. [TUnicodeCategory.ucDecimalNumber,TUnicodeCategory.ucLetterNumber];
  313. NUMBER_CATEGORIES =
  314. [ TUnicodeCategory.ucDecimalNumber, TUnicodeCategory.ucLetterNumber,
  315. TUnicodeCategory.ucOtherNumber
  316. ];
  317. PUNCTUATION_CATEGORIES = [
  318. TUnicodeCategory.ucConnectPunctuation, TUnicodeCategory.ucDashPunctuation,
  319. TUnicodeCategory.ucOpenPunctuation, TUnicodeCategory.ucClosePunctuation,
  320. TUnicodeCategory.ucInitialPunctuation, TUnicodeCategory.ucFinalPunctuation,
  321. TUnicodeCategory.ucOtherPunctuation
  322. ];
  323. SEPARATOR_CATEGORIES =
  324. [ TUnicodeCategory.ucSpaceSeparator, TUnicodeCategory.ucLineSeparator,
  325. TUnicodeCategory.ucParagraphSeparator
  326. ];
  327. SYMBOL_CATEGORIES =
  328. [ TUnicodeCategory.ucMathSymbol, TUnicodeCategory.ucCurrencySymbol,
  329. TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
  330. ];
  331. Function GetProps(aChar: UCS4Char) : PUC_Prop; inline; overload;
  332. var
  333. aHigh,aLow : UnicodeChar;
  334. begin
  335. FromUCS4(aChar,aHigh,aLow);
  336. Result:=GetProps(aHigh,aLow);
  337. end;
  338. function IsDefined(aChar: UnicodeChar): Boolean; overload; inline;
  339. begin
  340. Result:=TCharacter.IsDefined(aChar);
  341. end;
  342. function IsDefined(aChar: UCS4Char): Boolean; overload; inline;
  343. begin
  344. Result:=TCharacter.IsDefined(aChar);
  345. end;
  346. function IsDefined(const S: UnicodeString; Index: Integer): Boolean; overload; inline;
  347. begin
  348. Result :=TCharacter.IsDefined(S,Index);
  349. end;
  350. function ConvertFromUtf32(AChar: UCS4Char): UnicodeString;
  351. begin
  352. Result := TCharacter.ConvertFromUtf32(AChar);
  353. end;
  354. function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer): UCS4Char;
  355. begin
  356. Result := TCharacter.ConvertToUtf32(AString, AIndex);
  357. end;
  358. function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer; out ACharLength: Integer): UCS4Char;
  359. begin
  360. Result := TCharacter.ConvertToUtf32(AString, AIndex, ACharLength);
  361. end;
  362. function ConvertToUtf32(const AHighSurrogate, ALowSurrogate: UnicodeChar): UCS4Char;
  363. begin
  364. Result := TCharacter.ConvertToUtf32(AHighSurrogate, ALowSurrogate);
  365. end;
  366. function GetNumericValue(AChar: UnicodeChar): Double;
  367. begin
  368. Result := TCharacter.GetNumericValue(AChar);
  369. end;
  370. function GetNumericValue(const AString: UnicodeString; AIndex: Integer): Double;
  371. begin
  372. Result := TCharacter.GetNumericValue(AString, AIndex);
  373. end;
  374. function GetNumericValue(aChar: UCS4Char): Double;
  375. begin
  376. Result:=TCharacter.GetNumericValue(aChar);
  377. end;
  378. function GetUnicodeCategory(AChar: UnicodeChar): TUnicodeCategory;
  379. begin
  380. Result := TCharacter.GetUnicodeCategory(AChar);
  381. end;
  382. function GetUnicodeCategory(const AString: UnicodeString; AIndex: Integer): TUnicodeCategory;
  383. begin
  384. Result := TCharacter.GetUnicodeCategory(AString, AIndex);
  385. end;
  386. function GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory; overload; inline;
  387. begin
  388. Result:=TCharacter.GetUnicodeCategory(aChar);
  389. end;
  390. function IsControl(AChar: UnicodeChar): Boolean;
  391. begin
  392. Result := TCharacter.IsControl(AChar);
  393. end;
  394. function IsControl(const AString: UnicodeString; AIndex: Integer): Boolean;
  395. begin
  396. Result := TCharacter.IsControl(AString, AIndex);
  397. end;
  398. function IsControl(aChar: UCS4Char): Boolean;
  399. begin
  400. Result := TCharacter.IsControl(aChar);
  401. end;
  402. function IsDigit(AChar: UnicodeChar): Boolean;
  403. begin
  404. Result := TCharacter.IsDigit(AChar);
  405. end;
  406. function IsDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
  407. begin
  408. Result := TCharacter.IsDigit(AString, AIndex);
  409. end;
  410. function IsDigit(aChar: UCS4Char): Boolean;
  411. begin
  412. Result := TCharacter.IsDigit(aChar);
  413. end;
  414. function IsSurrogate(AChar: UnicodeChar): Boolean;
  415. begin
  416. Result := TCharacter.IsSurrogate(AChar);
  417. end;
  418. function IsSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
  419. begin
  420. Result := TCharacter.IsSurrogate(AString, AIndex);
  421. end;
  422. function IsSurrogate(Surrogate: UCS4Char): Boolean;
  423. begin
  424. Result := TCharacter.IsSurrogate(Surrogate);
  425. end;
  426. function IsHighSurrogate(AChar: UnicodeChar): Boolean;
  427. begin
  428. Result := TCharacter.IsHighSurrogate(AChar);
  429. end;
  430. function IsHighSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
  431. begin
  432. Result := TCharacter.IsHighSurrogate(AString, AIndex);
  433. end;
  434. function IsHighSurrogate(aChar: UCS4Char): Boolean;
  435. begin
  436. Result := TCharacter.IsHighSurrogate(aChar);
  437. end;
  438. function IsLowSurrogate(AChar: UnicodeChar): Boolean;
  439. begin
  440. Result := TCharacter.IsLowSurrogate(AChar);
  441. end;
  442. function IsLowSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
  443. begin
  444. Result := TCharacter.IsLowSurrogate(AString, AIndex);
  445. end;
  446. function IsLowSurrogate(aChar: UCS4Char): Boolean;
  447. begin
  448. Result := TCharacter.IsLowSurrogate(aChar);
  449. end;
  450. function IsSurrogatePair(const AHighSurrogate, ALowSurrogate: UnicodeChar): Boolean;
  451. begin
  452. Result := TCharacter.IsSurrogatePair(AHighSurrogate, ALowSurrogate);
  453. end;
  454. function IsSurrogatePair(const AString: UnicodeString; AIndex: Integer): Boolean;
  455. begin
  456. Result := TCharacter.IsSurrogatePair(AString, AIndex);
  457. end;
  458. function IsLetter(AChar: UnicodeChar): Boolean;
  459. begin
  460. Result := TCharacter.IsLetter(AChar);
  461. end;
  462. function IsLetter(const AString: UnicodeString; AIndex: Integer): Boolean;
  463. begin
  464. Result := TCharacter.IsLetter(AString, AIndex);
  465. end;
  466. function IsLetter(aChar: UCS4Char): Boolean;
  467. begin
  468. Result := TCharacter.IsLetter(aChar);
  469. end;
  470. function IsLetterOrDigit(AChar: UnicodeChar): Boolean;
  471. begin
  472. Result := TCharacter.IsLetterOrDigit(AChar);
  473. end;
  474. function IsLetterOrDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
  475. begin
  476. Result := TCharacter.IsLetterOrDigit(AString, AIndex);
  477. end;
  478. function IsLetterOrDigit(aChar: UCS4Char): Boolean;
  479. begin
  480. Result := TCharacter.IsLetterOrDigit(aChar);
  481. end;
  482. function IsLower(AChar: UnicodeChar): Boolean;
  483. begin
  484. Result := TCharacter.IsLower(AChar);
  485. end;
  486. function IsLower(const AString: UnicodeString; AIndex: Integer): Boolean;
  487. begin
  488. Result := TCharacter.IsLower(AString, AIndex);
  489. end;
  490. function IsLower(aChar: UCS4Char): Boolean;
  491. begin
  492. Result := TCharacter.IsLower(aChar);
  493. end;
  494. function IsNumber(AChar: UnicodeChar): Boolean;
  495. begin
  496. Result := TCharacter.IsNumber(AChar);
  497. end;
  498. function IsNumber(const AString: UnicodeString; AIndex: Integer): Boolean;
  499. begin
  500. Result := TCharacter.IsNumber(AString, AIndex);
  501. end;
  502. function IsNumber(aChar: UCS4Char): Boolean;
  503. begin
  504. Result := TCharacter.IsNumber(aChar);
  505. end;
  506. function IsPunctuation(AChar: UnicodeChar): Boolean;
  507. begin
  508. Result := TCharacter.IsPunctuation(AChar);
  509. end;
  510. function IsPunctuation(const AString: UnicodeString; AIndex: Integer): Boolean;
  511. begin
  512. Result := TCharacter.IsPunctuation(AString, AIndex);
  513. end;
  514. function IsPunctuation(aChar: UCS4Char): Boolean;
  515. begin
  516. Result := TCharacter.IsPunctuation(aChar);
  517. end;
  518. function IsSeparator(AChar: UnicodeChar): Boolean;
  519. begin
  520. Result := TCharacter.IsSeparator(AChar);
  521. end;
  522. function IsSeparator(const AString: UnicodeString; AIndex: Integer): Boolean;
  523. begin
  524. Result := TCharacter.IsSeparator(AString, AIndex);
  525. end;
  526. function IsSeparator(aChar: UCS4Char): Boolean;
  527. begin
  528. Result := TCharacter.IsSeparator(aChar);
  529. end;
  530. function IsSymbol(AChar: UnicodeChar): Boolean;
  531. begin
  532. Result := TCharacter.IsSymbol(AChar);
  533. end;
  534. function IsSymbol(const AString: UnicodeString; AIndex: Integer): Boolean;
  535. begin
  536. Result := TCharacter.IsSymbol(AString, AIndex);
  537. end;
  538. function IsSymbol(aChar: UCS4Char): Boolean;
  539. begin
  540. Result := TCharacter.IsSymbol(aChar);
  541. end;
  542. function IsUpper(aChar: UCS4Char): Boolean;
  543. begin
  544. Result := TCharacter.IsUpper(aChar);
  545. end;
  546. function IsUpper(AChar: UnicodeChar): Boolean;
  547. begin
  548. Result := TCharacter.IsUpper(AChar);
  549. end;
  550. function IsUpper(const AString: UnicodeString; AIndex: Integer): Boolean;
  551. begin
  552. Result := TCharacter.IsUpper(AString, AIndex);
  553. end;
  554. function IsWhiteSpace(AChar: UnicodeChar): Boolean;
  555. begin
  556. Result := TCharacter.IsWhiteSpace(AChar);
  557. end;
  558. function IsWhiteSpace(const AString: UnicodeString; AIndex: Integer): Boolean;
  559. begin
  560. Result := TCharacter.IsWhiteSpace(AString, AIndex);
  561. end;
  562. function IsWhiteSpace(aChar: UCS4Char): Boolean;
  563. begin
  564. Result := TCharacter.IsWhiteSpace(aChar);
  565. end;
  566. function ToLower(AChar: UnicodeChar): UnicodeChar;
  567. begin
  568. Result := TCharacter.ToLower(AChar);
  569. end;
  570. function ToLower(const AString: UnicodeString): UnicodeString;
  571. begin
  572. Result := TCharacter.ToLower(AString);
  573. end;
  574. function ToLower(aChar: UCS4Char): UCS4Char;
  575. begin
  576. Result := TCharacter.ToLower(aChar);
  577. end;
  578. function ToUpper(AChar: UnicodeChar): UnicodeChar;
  579. begin
  580. Result := TCharacter.ToUpper(AChar);
  581. end;
  582. function ToUpper(const AString: UnicodeString): UnicodeString;
  583. begin
  584. Result := TCharacter.ToUpper(AString);
  585. end;
  586. function ToUpper(aChar: UCS4Char): UCS4Char;
  587. begin
  588. Result := TCharacter.ToUpper(aChar);
  589. end;
  590. { TCharacter }
  591. class function TCharacter.TestCategory(
  592. const AString : UnicodeString;
  593. AIndex : Integer;
  594. ACategory : TUnicodeCategory
  595. ) : Boolean;
  596. var
  597. pu : PUC_Prop;
  598. begin
  599. if (AIndex < 1) or (AIndex > Length(AString)) then
  600. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  601. pu := GetProps(Word(AString[AIndex]));
  602. if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
  603. if not IsSurrogatePair(AString,AIndex) then
  604. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  605. pu := GetProps(AString[AIndex],AString[AIndex+1]);
  606. end;
  607. Result := (TUnicodeCategory(pu^.Category) = ACategory);
  608. end;
  609. class function TCharacter.TestCategory(
  610. const AString : UnicodeString;
  611. AIndex : Integer;
  612. ACategory : TUnicodeCategorySet
  613. ) : Boolean;
  614. var
  615. pu : PUC_Prop;
  616. begin
  617. if (AIndex < 1) or (AIndex > Length(AString)) then
  618. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  619. pu := GetProps(Word(AString[AIndex]));
  620. if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
  621. if not IsSurrogatePair(AString,AIndex) then
  622. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  623. pu := GetProps(AString[AIndex],AString[AIndex+1]);
  624. end;
  625. Result := (TUnicodeCategory(pu^.Category) in ACategory);
  626. end;
  627. constructor TCharacter.Create;
  628. begin
  629. raise ENoConstructException.CreateFmt(SClassCantBeConstructed, [ClassName]);
  630. end;
  631. class function TCharacter.ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
  632. begin
  633. if AChar < UCS4_HALF_BASE then
  634. begin
  635. if IsSurrogate(UnicodeChar(AChar)) then
  636. raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
  637. Result := UnicodeChar(AChar);
  638. end
  639. else
  640. begin
  641. if AChar > MAX_LEGAL_UTF32 then
  642. raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
  643. SetLength(Result, 2);
  644. AChar := AChar - UCS4_HALF_BASE;
  645. Result[1] := UnicodeChar((AChar shr 10) + HIGH_SURROGATE_BEGIN);
  646. Result[2] := UnicodeChar((AChar and UCS4_HALF_MASK) + LOW_SURROGATE_BEGIN);
  647. end;
  648. end;
  649. class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
  650. begin
  651. if (AIndex < 1) or (AIndex > Length(AString)) then
  652. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  653. Result := Word(AString[AIndex]);
  654. if IsHighSurrogate(UnicodeChar(Result)) then
  655. begin
  656. if Length(AString) < Succ(AIndex) then
  657. raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
  658. Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
  659. end;
  660. end;
  661. class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
  662. begin
  663. if (AIndex < 1) or (AIndex > Length(AString)) then
  664. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  665. Result := Word(AString[AIndex]);
  666. if IsHighSurrogate(UnicodeChar(Result)) then
  667. begin
  668. if Length(AString) < Succ(AIndex) then
  669. raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
  670. Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
  671. ACharLength := 2;
  672. end
  673. else
  674. ACharLength := 1;
  675. end;
  676. class function TCharacter.ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
  677. begin
  678. if not IsHighSurrogate(AHighSurrogate) then
  679. raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]);
  680. if not IsLowSurrogate(ALowSurrogate) then
  681. raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(ALowSurrogate)]);
  682. Result := ToUCS4(AHighSurrogate, ALowSurrogate);
  683. end;
  684. class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double;
  685. begin
  686. Result := GetProps(Word(AChar))^.NumericValue;
  687. end;
  688. class function TCharacter.GetNumericValue(
  689. const AString : UnicodeString;
  690. AIndex : Integer
  691. ) : Double;
  692. var
  693. pu : PUC_Prop;
  694. begin
  695. if (AIndex < 1) or (AIndex > Length(AString)) then
  696. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  697. pu := GetProps(Word(AString[AIndex]));
  698. if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
  699. if not IsSurrogatePair(AString,AIndex) then
  700. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  701. pu := GetProps(AString[AIndex],AString[AIndex+1]);
  702. end;
  703. Result := pu^.NumericValue;
  704. end;
  705. class function TCharacter.GetNumericValue(aChar: UCS4Char): Double;
  706. begin
  707. Result := GetProps(AChar)^.NumericValue;
  708. end;
  709. class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
  710. begin
  711. Result := TUnicodeCategory(GetProps(Word(AChar))^.Category);
  712. end;
  713. class function TCharacter.GetUnicodeCategory(
  714. const AString : UnicodeString;
  715. AIndex : Integer
  716. ) : TUnicodeCategory;
  717. var
  718. pu : PUC_Prop;
  719. begin
  720. if (AIndex < 1) or (AIndex > Length(AString)) then
  721. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  722. pu := GetProps(Word(AString[AIndex]));
  723. if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
  724. if not IsSurrogatePair(AString,AIndex) then
  725. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  726. pu := GetProps(AString[AIndex],AString[AIndex+1]);
  727. end;
  728. Result := TUnicodeCategory(pu^.Category);
  729. end;
  730. class function TCharacter.GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory;
  731. var
  732. pr: PUC_Prop;
  733. begin
  734. Result:=TUnicodeCategory.ucUnassigned;
  735. if Cardinal(Ord(aChar))>MAX_LEGAL_UTF32 then
  736. Exit;
  737. Pr:=GetProps(aChar);
  738. if assigned(pr) then
  739. Result:=TUnicodeCategory(Pr^.Category);
  740. end;
  741. class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
  742. begin
  743. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucControl);
  744. end;
  745. class function TCharacter.IsControl(
  746. const AString : UnicodeString;
  747. AIndex : Integer
  748. ) : Boolean;
  749. begin
  750. Result := TestCategory(AString,AIndex,TUnicodeCategory.ucControl);
  751. end;
  752. class function TCharacter.IsControl(aChar: UCS4Char): Boolean;
  753. begin
  754. Result := (TUnicodeCategory(GetProps(aChar)^.Category) = TUnicodeCategory.ucControl);
  755. end;
  756. class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
  757. begin
  758. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucDecimalNumber);
  759. end;
  760. class function TCharacter.IsDigit(
  761. const AString : UnicodeString;
  762. AIndex : Integer
  763. ) : Boolean;
  764. begin
  765. Result := TestCategory(AString,AIndex,TUnicodeCategory.ucDecimalNumber);
  766. end;
  767. class function TCharacter.IsDigit(aChar: UCS4Char): Boolean;
  768. begin
  769. Result := (TUnicodeCategory(GetProps(aChar)^.Category) = TUnicodeCategory.ucDecimalNumber);
  770. end;
  771. class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
  772. begin
  773. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate);
  774. end;
  775. class function TCharacter.IsSurrogate(
  776. const AString : UnicodeString;
  777. AIndex : Integer
  778. ) : Boolean;
  779. begin
  780. if (AIndex < 1) or (AIndex > Length(AString)) then
  781. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  782. Result := IsSurrogate(AString[AIndex]);
  783. end;
  784. class function TCharacter.IsSurrogate(aChar: UCS4Char): Boolean;
  785. begin
  786. Result:=(aChar>=UCS4Char(MinSurrogate)) and (aChar <= UCS4Char(MaxSurrogate));
  787. end;
  788. class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean;
  789. begin
  790. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
  791. (Word(AChar) >= HIGH_SURROGATE_BEGIN) and
  792. (Word(AChar) <= HIGH_SURROGATE_END);
  793. end;
  794. class function TCharacter.IsHighSurrogate(
  795. const AString : UnicodeString;
  796. AIndex : Integer
  797. ) : Boolean;
  798. begin
  799. if (AIndex < 1) or (AIndex > Length(AString)) then
  800. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  801. Result := IsHighSurrogate(AString[AIndex]);
  802. end;
  803. class function TCharacter.IsHighSurrogate(aChar: UCS4Char): Boolean;
  804. begin
  805. Result:=(aChar >= UCS4Char(MinHighSurrogate)) and (aChar <= UCS4Char(MaxHighSurrogate));
  806. end;
  807. class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean;
  808. begin
  809. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
  810. (Word(AChar) >= LOW_SURROGATE_BEGIN) and
  811. (Word(AChar) <= LOW_SURROGATE_END);
  812. end;
  813. class function TCharacter.IsLowSurrogate(
  814. const AString : UnicodeString;
  815. AIndex : Integer
  816. ) : Boolean;
  817. begin
  818. if (AIndex < 1) or (AIndex > Length(AString)) then
  819. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  820. Result := IsLowSurrogate(AString[AIndex]);
  821. end;
  822. class function TCharacter.IsLowSurrogate(aChar: UCS4Char): Boolean;
  823. begin
  824. Result := (aChar >= UCS4Char(MinLowSurrogate)) and (aChar <= UCS4Char(MaxLowSurrogate));
  825. end;
  826. class function TCharacter.IsSurrogatePair(
  827. const AHighSurrogate,
  828. ALowSurrogate : UnicodeChar
  829. ) : Boolean;
  830. begin
  831. Result := UnicodeIsSurrogatePair(AHighSurrogate,ALowSurrogate);
  832. end;
  833. class function TCharacter.IsSurrogatePair(
  834. const AString : UnicodeString;
  835. AIndex : Integer
  836. ) : Boolean;
  837. begin
  838. if (AIndex < 1) or (AIndex > Length(AString)) then
  839. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  840. if not IsHighSurrogate(AString[AIndex]) then begin
  841. Result := False;
  842. exit;
  843. end;
  844. if ((AIndex+1) > Length(AString)) then
  845. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex+1, Length(AString)]);
  846. Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
  847. end;
  848. class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean;
  849. begin
  850. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_CATEGORIES);
  851. end;
  852. class function TCharacter.IsLetter(
  853. const AString : UnicodeString;
  854. AIndex : Integer
  855. ) : Boolean;
  856. begin
  857. Result := TestCategory(AString,AIndex,LETTER_CATEGORIES);
  858. end;
  859. class function TCharacter.IsLetter(aChar: UCS4Char): Boolean;
  860. begin
  861. Result := (TUnicodeCategory(GetProps(aChar)^.Category) in LETTER_CATEGORIES);
  862. end;
  863. class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
  864. begin
  865. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_OR_DIGIT_CATEGORIES);
  866. end;
  867. class function TCharacter.IsLetterOrDigit(
  868. const AString : UnicodeString;
  869. AIndex : Integer
  870. ) : Boolean;
  871. begin
  872. Result := TestCategory(AString,AIndex,LETTER_OR_DIGIT_CATEGORIES);
  873. end;
  874. class function TCharacter.IsLetterOrDigit(aChar: UCS4Char): Boolean;
  875. begin
  876. Result := (TUnicodeCategory(GetProps(aChar)^.Category) in LETTER_OR_DIGIT_CATEGORIES);
  877. end;
  878. class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
  879. begin
  880. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucLowercaseLetter);
  881. end;
  882. class function TCharacter.IsLower(
  883. const AString : UnicodeString;
  884. AIndex : Integer
  885. ) : Boolean;
  886. begin
  887. Result := TestCategory(AString,AIndex,TUnicodeCategory.ucLowercaseLetter);
  888. end;
  889. class function TCharacter.IsLower(aChar: UCS4Char): Boolean;
  890. begin
  891. Result := (TUnicodeCategory(GetProps(aChar)^.Category) = TUnicodeCategory.ucLowercaseLetter);
  892. end;
  893. class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
  894. begin
  895. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in NUMBER_CATEGORIES);
  896. end;
  897. class function TCharacter.IsNumber(
  898. const AString : UnicodeString;
  899. AIndex : Integer
  900. ) : Boolean;
  901. begin
  902. Result := TestCategory(AString,AIndex,NUMBER_CATEGORIES);
  903. end;
  904. class function TCharacter.IsNumber(aChar: UCS4Char): Boolean;
  905. begin
  906. Result := (TUnicodeCategory(GetProps(aChar)^.Category) in NUMBER_CATEGORIES);
  907. end;
  908. class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
  909. begin
  910. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in PUNCTUATION_CATEGORIES);
  911. end;
  912. class function TCharacter.IsPunctuation(
  913. const AString : UnicodeString;
  914. AIndex : Integer
  915. ) : Boolean;
  916. begin
  917. Result := TestCategory(AString,AIndex,PUNCTUATION_CATEGORIES);
  918. end;
  919. class function TCharacter.IsPunctuation(aChar: UCS4Char): Boolean;
  920. begin
  921. Result := (TUnicodeCategory(GetProps(aChar)^.Category) in PUNCTUATION_CATEGORIES);
  922. end;
  923. class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
  924. begin
  925. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SEPARATOR_CATEGORIES);
  926. end;
  927. class function TCharacter.IsSeparator(
  928. const AString : UnicodeString;
  929. AIndex : Integer
  930. ) : Boolean;
  931. begin
  932. Result := TestCategory(AString,AIndex,SEPARATOR_CATEGORIES);
  933. end;
  934. class function TCharacter.IsSeparator(aChar: UCS4Char): Boolean;
  935. begin
  936. Result := (TUnicodeCategory(GetProps(Word(aChar))^.Category) in SEPARATOR_CATEGORIES);
  937. end;
  938. class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
  939. begin
  940. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SYMBOL_CATEGORIES);
  941. end;
  942. class function TCharacter.IsSymbol(aChar: UCS4Char): Boolean;
  943. begin
  944. Result := (TUnicodeCategory(GetProps(aChar)^.Category) in SYMBOL_CATEGORIES);
  945. end;
  946. class function TCharacter.IsSymbol(
  947. const AString : UnicodeString;
  948. AIndex : Integer
  949. ) : Boolean;
  950. begin
  951. Result := TestCategory(AString,AIndex,SYMBOL_CATEGORIES);
  952. end;
  953. class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
  954. begin
  955. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
  956. end;
  957. class function TCharacter.IsUpper(
  958. const AString : UnicodeString;
  959. AIndex : Integer
  960. ) : Boolean;
  961. begin
  962. Result := TestCategory(AString,AIndex,TUnicodeCategory.ucUppercaseLetter);
  963. end;
  964. class function TCharacter.IsUpper(aChar: UCS4Char): Boolean;
  965. begin
  966. Result := (TUnicodeCategory(GetProps(Word(aChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
  967. end;
  968. class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;
  969. begin
  970. Result := GetProps(Word(AChar))^.WhiteSpace;
  971. end;
  972. class function TCharacter.IsWhiteSpace(
  973. const AString : UnicodeString;
  974. AIndex : Integer
  975. ) : Boolean;
  976. var
  977. pu : PUC_Prop;
  978. begin
  979. if (AIndex < 1) or (AIndex > Length(AString)) then
  980. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  981. pu := GetProps(Word(AString[AIndex]));
  982. if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
  983. if not IsSurrogatePair(AString,AIndex) then
  984. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  985. pu := GetProps(AString[AIndex],AString[AIndex+1]);
  986. end;
  987. Result := pu^.WhiteSpace;
  988. end;
  989. class function TCharacter.IsWhiteSpace(aChar: UCS4Char): Boolean;
  990. begin
  991. Result := GetProps(Word(aChar))^.WhiteSpace;
  992. end;
  993. class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
  994. begin
  995. Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleLowerCase));
  996. if (Result = UnicodeChar(0)) then
  997. Result := AChar;
  998. end;
  999. class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;
  1000. begin
  1001. Result := ToLower(AString,[]);
  1002. end;
  1003. class function TCharacter.ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
  1004. begin
  1005. if (UnicodeToLower(
  1006. AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
  1007. ) <> 0
  1008. )
  1009. then
  1010. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  1011. end;
  1012. class function TCharacter.ToLower(aChar: UCS4Char): UCS4Char;
  1013. begin
  1014. Result := UCS4Char(Cardinal(GetProps(aChar)^.SimpleLowerCase));
  1015. if (Result = UCS4Char(0)) then
  1016. Result := aChar;
  1017. end;
  1018. class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
  1019. begin
  1020. Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleUpperCase));
  1021. if (Result = UnicodeChar(0)) then
  1022. Result := AChar;
  1023. end;
  1024. class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;
  1025. begin
  1026. Result := ToUpper(AString,[]);
  1027. end;
  1028. class function TCharacter.ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
  1029. begin
  1030. if (UnicodeToUpper(
  1031. AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
  1032. ) <> 0
  1033. )
  1034. then
  1035. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  1036. end;
  1037. class function TCharacter.ToUpper(aChar: UCS4Char): UCS4Char;
  1038. begin
  1039. Result := UCS4Char(Cardinal(GetProps(aChar)^.SimpleUpperCase));
  1040. if (Result = UCS4Char(0)) then
  1041. Result := aChar;
  1042. end;
  1043. class function TCharacter.MaxCodePoint: Integer;
  1044. begin
  1045. Result := MAX_LEGAL_UTF32;
  1046. end;
  1047. class function TCharacter.UnicodeDataVersion: UnicodeString;
  1048. begin
  1049. Result := '';
  1050. end;
  1051. class function TCharacter.IsDefined(aChar: UnicodeChar): Boolean;
  1052. begin
  1053. Result:=GetProps(Word(aChar))<>Nil;
  1054. end;
  1055. class function TCharacter.IsDefined(aChar: UCS4Char): Boolean;
  1056. begin
  1057. Result:=GetProps(aChar)<>nil;
  1058. end;
  1059. class function TCharacter.IsDefined(const S: unicodestring; Index: Integer): Boolean;
  1060. begin
  1061. Result:=IsDefined(S[Index]);
  1062. end;
  1063. { TCharHelper }
  1064. class function TCharHelper.MaxCodePoint: Integer;
  1065. begin
  1066. Result := MAX_LEGAL_UTF32;
  1067. end;
  1068. class function TCharHelper.UnicodeDataVersion: UnicodeString;
  1069. begin
  1070. Result := '';
  1071. end;
  1072. function TCharHelper.IsHighSurrogate: Boolean;
  1073. begin
  1074. Result:=TCharacter.IsHighSurrogate(Self);
  1075. end;
  1076. function TCharHelper.IsLowSurrogate: Boolean;
  1077. begin
  1078. Result:=TCharacter.IsLowSurrogate(Self);
  1079. end;
  1080. function TCharHelper.IsSurrogate: Boolean;
  1081. begin
  1082. Result:=TCharacter.IsSurrogate(Self);
  1083. end;
  1084. class function TCharHelper.IsSurrogatePair(const HighSurrogate, LowSurrogate: UnicodeChar): Boolean;
  1085. begin
  1086. Result:=TCharacter.IsSurrogatePair(HighSurrogate, LowSurrogate);
  1087. end;
  1088. function TCharHelper.ToUCS4Char: UCS4Char;
  1089. begin
  1090. Result:=UCS4Char(Self);
  1091. end;
  1092. class function TCharHelper.ConvertToUtf32(const S: UnicodeString; Index: Integer; out CharLength: Integer): UCS4Char;
  1093. begin
  1094. Result:=TCharacter.ConvertToUtf32(S,Index,CharLength);
  1095. end;
  1096. class function TCharHelper.ConvertToUtf32(const S: UnicodeString; Index: Integer): UCS4Char;
  1097. begin
  1098. Result:=TCharacter.ConvertToUtf32(S,Index);
  1099. end;
  1100. class function TCharHelper.ConvertFromUtf32(aChar: UCS4Char): UnicodeString;
  1101. begin
  1102. Result:=TCharacter.ConvertFromUtf32(aChar);
  1103. end;
  1104. class function TCharHelper.ConvertToUtf32(const HighSurrogate, LowSurrogate: UnicodeChar): UCS4Char;
  1105. begin
  1106. Result:=TCharacter.ConvertToUtf32(HighSurrogate,LowSurrogate);
  1107. end;
  1108. function TCharHelper.IsLetter: Boolean;
  1109. begin
  1110. Result:=TCharacter.IsLetter(Self);
  1111. end;
  1112. function TCharHelper.IsLetterOrDigit: Boolean;
  1113. begin
  1114. Result:=TCharacter.IsLetterOrDigit(Self);
  1115. end;
  1116. class function TCharHelper.IsLetter(const S: UnicodeString; Index: Integer): Boolean;
  1117. begin
  1118. Result:=TCharacter.IsLetter(S,Index);
  1119. end;
  1120. class function TCharHelper.IsLetterOrDigit(const S: UnicodeString; Index: Integer): Boolean;
  1121. begin
  1122. Result:=TCharacter.IsLetterOrDigit(S,Index);
  1123. end;
  1124. class function TCharHelper.IsControl(const S: UnicodeString; Index: Integer): Boolean;
  1125. begin
  1126. Result:=TCharacter.IsControl(S,Index);
  1127. end;
  1128. class function TCharHelper.IsDefined(const S: UnicodeString; Index: Integer): Boolean;
  1129. begin
  1130. Result:=TCharacter.IsDefined(S,Index);
  1131. end;
  1132. function TCharHelper.IsControl: Boolean;
  1133. begin
  1134. Result:=TCharacter.IsControl(Self);
  1135. end;
  1136. function TCharHelper.IsDefined: Boolean;
  1137. begin
  1138. Result:=TCharacter.IsDefined(Self);
  1139. end;
  1140. function TCharHelper.IsDigit: Boolean;
  1141. begin
  1142. Result:=TCharacter.IsDigit(Self);
  1143. end;
  1144. class function TCharHelper.IsDigit(const S: UnicodeString; Index: Integer): Boolean;
  1145. begin
  1146. Result:=TCharacter.IsDigit(S,Index);
  1147. end;
  1148. function TCharHelper.IsInArray(const SomeChars: array of UnicodeChar): Boolean;
  1149. var
  1150. AChar: UnicodeChar;
  1151. begin
  1152. for AChar in SomeChars do
  1153. if AChar=Self then
  1154. Exit(True);
  1155. Result := False;
  1156. end;
  1157. function TCharHelper.IsLower: Boolean;
  1158. begin
  1159. Result:=TCharacter.IsLower(Self);
  1160. end;
  1161. function TCharHelper.GetUnicodeCategory: TUnicodeCategory;
  1162. begin
  1163. Result:=TCharacter.GetUnicodeCategory(Self);
  1164. end;
  1165. function TCharHelper.GetNumericValue: Double;
  1166. begin
  1167. Result:=TCharacter.GetNumericValue(Self);
  1168. end;
  1169. class function TCharHelper.GetNumericValue(const S: UnicodeString; Index: Integer): Double;
  1170. begin
  1171. Result:=TCharacter.GetNumericValue(S,Index);
  1172. end;
  1173. class function TCharHelper.GetUnicodeCategory(const S: UnicodeString; Index: Integer): TUnicodeCategory;
  1174. begin
  1175. Result:=TCharacter.GetUnicodeCategory(S,Index);
  1176. end;
  1177. class function TCharHelper.IsHighSurrogate(const S: UnicodeString; Index: Integer): Boolean;
  1178. begin
  1179. Result:=TCharacter.IsHighSurrogate(S,Index);
  1180. end;
  1181. class function TCharHelper.IsInArray(const S: UnicodeString; Index: Integer; const SomeChars: array of UnicodeChar): Boolean;
  1182. begin
  1183. Result:=S[Index].IsInArray(SomeChars);
  1184. end;
  1185. class function TCharHelper.IsLower(const S: UnicodeString; Index: Integer): Boolean;
  1186. begin
  1187. Result:=TCharacter.IsLower(S,Index);
  1188. end;
  1189. class function TCharHelper.IsLowSurrogate(const S: UnicodeString; Index: Integer): Boolean;
  1190. begin
  1191. Result:=S[Index].IsLowSurrogate;
  1192. end;
  1193. function TCharHelper.IsNumber: Boolean;
  1194. begin
  1195. Result:=TCharacter.IsNumber(Self);
  1196. end;
  1197. function TCharHelper.IsPunctuation: Boolean;
  1198. begin
  1199. Result:=TCharacter.IsPunctuation(Self);
  1200. end;
  1201. class function TCharHelper.IsNumber(const S: UnicodeString; Index: Integer): Boolean;
  1202. begin
  1203. Result:=TCharacter.IsNumber(S,Index);
  1204. end;
  1205. class function TCharHelper.IsPunctuation(const S: UnicodeString; Index: Integer): Boolean;
  1206. begin
  1207. Result:=TCharacter.IsPunctuation(S,Index);
  1208. end;
  1209. function TCharHelper.IsSeparator: Boolean;
  1210. begin
  1211. Result:=TCharacter.IsSeparator(Self);
  1212. end;
  1213. class function TCharHelper.IsSeparator(const S: UnicodeString; Index: Integer): Boolean;
  1214. begin
  1215. Result:=TCharacter.IsSeparator(S,Index);
  1216. end;
  1217. class function TCharHelper.IsSurrogate(const S: UnicodeString; Index: Integer): Boolean;
  1218. begin
  1219. Result:=TCharacter.IsSurrogate(S,Index);
  1220. end;
  1221. class function TCharHelper.IsSurrogatePair(const S: UnicodeString; Index: Integer): Boolean;
  1222. begin
  1223. Result:=TCharacter.IsSurrogatePair(S,Index);
  1224. end;
  1225. function TCharHelper.IsSymbol: Boolean;
  1226. begin
  1227. Result:=TCharacter.IsSymbol(Self);
  1228. end;
  1229. class function TCharHelper.IsSymbol(const S: UnicodeString; Index: Integer): Boolean;
  1230. begin
  1231. Result:=TCharacter.IsSymbol(S,Index);
  1232. end;
  1233. class function TCharHelper.IsUpper(const S: UnicodeString; Index: Integer): Boolean;
  1234. begin
  1235. Result:=TCharacter.IsUpper(S,Index);
  1236. end;
  1237. function TCharHelper.IsUpper: Boolean;
  1238. begin
  1239. Result:=TCharacter.IsUpper(Self);
  1240. end;
  1241. class function TCharHelper.IsWhiteSpace(const S: UnicodeString; Index: Integer): Boolean;
  1242. begin
  1243. Result:=TCharacter.IsWhiteSpace(S,Index);
  1244. end;
  1245. function TCharHelper.ToLower: UnicodeChar;
  1246. begin
  1247. Result:=TCharacter.ToLower(Self);
  1248. end;
  1249. class function TCharHelper.ToLower(const S: UnicodeString): UnicodeString;
  1250. begin
  1251. Result:=TCharacter.ToLower(S);
  1252. end;
  1253. function TCharHelper.ToUpper: UnicodeChar;
  1254. begin
  1255. Result:=TCharacter.ToUpper(Self);
  1256. end;
  1257. class function TCharHelper.ToUpper(const S: UnicodeString): UnicodeString;
  1258. begin
  1259. Result:=TCharacter.ToUpper(S);
  1260. end;
  1261. function TCharHelper.IsWhiteSpace: Boolean;
  1262. begin
  1263. Result:=TCharacter.IsWhiteSpace(Self);
  1264. end;
  1265. class function TCharHelper.IsLetterOrDigit(aChar: UCS4Char): Boolean;
  1266. begin
  1267. Result:=TCharacter.IsLetterOrDigit(aChar);
  1268. end;
  1269. class function TCharHelper.IsControl(aChar: UCS4Char): Boolean;
  1270. begin
  1271. Result:=TCharacter.IsControl(aChar);
  1272. end;
  1273. class function TCharHelper.IsDefined(aChar: UCS4Char): Boolean;
  1274. begin
  1275. Result:=TCharacter.IsDefined(aChar);
  1276. end;
  1277. class function TCharHelper.IsDigit(aChar: UCS4Char): Boolean;
  1278. begin
  1279. Result:=TCharacter.IsDigit(aChar);
  1280. end;
  1281. class function TCharHelper.IsHighSurrogate(aChar: UCS4Char): Boolean;
  1282. begin
  1283. Result:=TCharacter.IsHighSurrogate(aChar);
  1284. end;
  1285. class function TCharHelper.IsLetter(aChar: UCS4Char): Boolean;
  1286. begin
  1287. Result:=TCharacter.IsLetter(aChar);
  1288. end;
  1289. class function TCharHelper.IsLowSurrogate(aChar: UCS4Char): Boolean;
  1290. begin
  1291. Result:=TCharacter.IsLowSurrogate(aChar);
  1292. end;
  1293. class function TCharHelper.IsSurrogate(Surrogate: UCS4Char): Boolean;
  1294. begin
  1295. Result:=TCharacter.IsSurrogate(Surrogate);
  1296. end;
  1297. class function TCharHelper.GetUnicodeCategory(aChar: UCS4Char): TUnicodeCategory;
  1298. begin
  1299. Result:=TCharacter.GetUnicodeCategory(aChar);
  1300. end;
  1301. class function TCharHelper.GetNumericValue(aChar: UCS4Char): Double;
  1302. begin
  1303. Result:=TCharacter.GetNumericValue(aChar);
  1304. end;
  1305. class function TCharHelper.IsLower(aChar: UCS4Char): Boolean;
  1306. begin
  1307. Result:=TCharacter.IsLower(aChar);
  1308. end;
  1309. class function TCharHelper.IsNumber(aChar: UCS4Char): Boolean;
  1310. begin
  1311. Result:=TCharacter.IsNumber(aChar);
  1312. end;
  1313. class function TCharHelper.IsPunctuation(aChar: UCS4Char): Boolean;
  1314. begin
  1315. Result:=TCharacter.IsPunctuation(aChar);
  1316. end;
  1317. class function TCharHelper.IsSeparator(aChar: UCS4Char): Boolean;
  1318. begin
  1319. Result:=TCharacter.IsSeparator(aChar);
  1320. end;
  1321. class function TCharHelper.IsSymbol(aChar: UCS4Char): Boolean;
  1322. begin
  1323. Result:=TCharacter.IsSymbol(aChar);
  1324. end;
  1325. class function TCharHelper.IsUpper(aChar: UCS4Char): Boolean;
  1326. begin
  1327. Result:=TCharacter.IsUpper(aChar);
  1328. end;
  1329. class function TCharHelper.ToLower(aChar: UCS4Char): UCS4Char;
  1330. begin
  1331. Result:=TCharacter.ToLower(aChar);
  1332. end;
  1333. class function TCharHelper.ToUpper(aChar: UCS4Char): UCS4Char;
  1334. begin
  1335. Result:=TCharacter.ToUpper(aChar);
  1336. end;
  1337. class function TCharHelper.IsWhiteSpace(aChar: UCS4Char): Boolean;
  1338. begin
  1339. Result:=TCharacter.IsWhiteSpace(aChar);
  1340. end;
  1341. end.