character.pas 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. { Unicode "Character" properties handler.
  2. Copyright (c) 2012 by Inoussa OUEDRAOGO
  3. The source code is distributed under the Library GNU
  4. General Public License with the following modification:
  5. - object files and libraries linked into an application may be
  6. distributed without source code.
  7. If you didn't receive a copy of the file COPYING, contact:
  8. Free Software Foundation
  9. 675 Mass Ave
  10. Cambridge, MA 02139
  11. USA
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
  15. unit Character;
  16. interface
  17. {$ifndef VER2_4}
  18. {$mode objfpc}
  19. {$H+}
  20. {$PACKENUM 1}
  21. {$SCOPEDENUMS ON}
  22. uses
  23. unicodedata;
  24. type
  25. // Unicode General Category
  26. TUnicodeCategory = (
  27. ucUppercaseLetter, // Lu = Letter, uppercase
  28. ucLowercaseLetter, // Ll = Letter, lowercase
  29. ucTitlecaseLetter, // Lt = Letter, titlecase
  30. ucModifierLetter, // Lm = Letter, modifier
  31. ucOtherLetter, // Lo = Letter, other
  32. ucNonSpacingMark, // Mn = Mark, nonspacing
  33. ucCombiningMark, // Mc = Mark, spacing combining
  34. ucEnclosingMark, // Me = Mark, enclosing
  35. ucDecimalNumber, // Nd = Number, decimal digit
  36. ucLetterNumber, // Nl = Number, letter
  37. ucOtherNumber, // No = Number, other
  38. ucConnectPunctuation, // Pc = Punctuation, connector
  39. ucDashPunctuation, // Pd = Punctuation, dash
  40. ucOpenPunctuation, // Ps = Punctuation, open
  41. ucClosePunctuation, // Pe = Punctuation, close
  42. ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
  43. ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
  44. ucOtherPunctuation, // Po = Punctuation, other
  45. ucMathSymbol, // Sm = Symbol, math
  46. ucCurrencySymbol, // Sc = Symbol, currency
  47. ucModifierSymbol, // Sk = Symbol, modifier
  48. ucOtherSymbol, // So = Symbol, other
  49. ucSpaceSeparator, // Zs = Separator, space
  50. ucLineSeparator, // Zl = Separator, line
  51. ucParagraphSeparator, // Zp = Separator, paragraph
  52. ucControl, // Cc = Other, control
  53. ucFormat, // Cf = Other, format
  54. ucSurrogate, // Cs = Other, surrogate
  55. ucPrivateUse, // Co = Other, private use
  56. ucUnassigned // Cn = Other, not assigned (including noncharacters)
  57. );
  58. TUnicodeCategorySet = set of TUnicodeCategory;
  59. TCharacterOption = (coIgnoreInvalidSequence);
  60. TCharacterOptions = set of TCharacterOption;
  61. { TCharacter }
  62. TCharacter = class sealed
  63. private
  64. class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategory) : Boolean; overload; static;
  65. class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategorySet) : Boolean; overload; static;
  66. public
  67. constructor Create;
  68. class function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static;
  69. class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static;
  70. class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static;
  71. class function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; static;
  72. class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload;
  73. class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static;
  74. class function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload; static; inline;
  75. class function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload; static;
  76. class function IsControl(AChar : UnicodeChar) : Boolean; overload; static; inline;
  77. class function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  78. class function IsDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
  79. class function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  80. class function IsSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
  81. class function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  82. class function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
  83. class function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  84. class function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
  85. class function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  86. class function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload; static; inline;
  87. class function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  88. class function IsLetter(AChar : UnicodeChar) : Boolean; overload; static; inline;
  89. class function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  90. class function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
  91. class function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  92. class function IsLower(AChar : UnicodeChar) : Boolean; overload; static; inline;
  93. class function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  94. class function IsNumber(AChar : UnicodeChar) : Boolean; overload; static; inline;
  95. class function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  96. class function IsPunctuation(AChar : UnicodeChar) : Boolean; overload; static; inline;
  97. class function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  98. class function IsSeparator(AChar : UnicodeChar) : Boolean; overload; static; inline;
  99. class function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  100. class function IsSymbol(AChar : UnicodeChar) : Boolean; overload; static; inline;
  101. class function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  102. class function IsUpper(AChar : UnicodeChar) : Boolean; overload; static; inline;
  103. class function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
  104. class function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload; static; inline;
  105. class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
  106. class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static;
  107. class function ToLower(const AString : UnicodeString) : UnicodeString; inline;overload; static;
  108. class function ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
  109. class function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload; static;
  110. class function ToUpper(const AString : UnicodeString) : UnicodeString; inline; overload; static;
  111. class function ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
  112. end;
  113. // flat functions
  114. function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
  115. function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
  116. function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
  117. function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
  118. function GetNumericValue(AChar : UnicodeChar) : Double; overload;
  119. function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload;
  120. function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload;
  121. function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload;
  122. function IsControl(AChar : UnicodeChar) : Boolean; overload;
  123. function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  124. function IsDigit(AChar : UnicodeChar) : Boolean; overload;
  125. function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  126. function IsSurrogate(AChar : UnicodeChar) : Boolean; overload;
  127. function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  128. function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload;
  129. function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  130. function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload;
  131. function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  132. function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload;
  133. function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  134. function IsLetter(AChar : UnicodeChar) : Boolean; overload;
  135. function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  136. function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload;
  137. function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  138. function IsLower(AChar : UnicodeChar) : Boolean; overload;
  139. function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  140. function IsNumber(AChar : UnicodeChar) : Boolean; overload;
  141. function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  142. function IsPunctuation(AChar : UnicodeChar) : Boolean; overload;
  143. function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  144. function IsSeparator(AChar : UnicodeChar) : Boolean; overload;
  145. function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  146. function IsSymbol(AChar : UnicodeChar) : Boolean; overload;
  147. function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  148. function IsUpper(AChar : UnicodeChar) : Boolean; overload;
  149. function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  150. function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload;
  151. function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
  152. function ToLower(AChar : UnicodeChar) : UnicodeChar; overload;
  153. function ToLower(const AString : UnicodeString) : UnicodeString; overload;
  154. function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload;
  155. function ToUpper(const AString : UnicodeString) : UnicodeString; overload;
  156. {$endif VER2_4}
  157. implementation
  158. {$ifndef VER2_4}
  159. uses
  160. SysUtils,
  161. RtlConsts;
  162. const
  163. LETTER_CATEGORIES = [
  164. TUnicodeCategory.ucUppercaseLetter, TUnicodeCategory.ucLowercaseLetter,
  165. TUnicodeCategory.ucTitlecaseLetter, TUnicodeCategory.ucModifierLetter,
  166. TUnicodeCategory.ucOtherLetter
  167. ];
  168. LETTER_OR_DIGIT_CATEGORIES =
  169. LETTER_CATEGORIES +
  170. [TUnicodeCategory.ucDecimalNumber,TUnicodeCategory.ucLetterNumber];
  171. NUMBER_CATEGORIES =
  172. [ TUnicodeCategory.ucDecimalNumber, TUnicodeCategory.ucLetterNumber,
  173. TUnicodeCategory.ucOtherNumber
  174. ];
  175. PUNCTUATION_CATEGORIES = [
  176. TUnicodeCategory.ucConnectPunctuation, TUnicodeCategory.ucDashPunctuation,
  177. TUnicodeCategory.ucOpenPunctuation, TUnicodeCategory.ucClosePunctuation,
  178. TUnicodeCategory.ucInitialPunctuation, TUnicodeCategory.ucFinalPunctuation,
  179. TUnicodeCategory.ucOtherPunctuation
  180. ];
  181. SEPARATOR_CATEGORIES =
  182. [ TUnicodeCategory.ucSpaceSeparator, TUnicodeCategory.ucLineSeparator,
  183. TUnicodeCategory.ucParagraphSeparator
  184. ];
  185. SYMBOL_CATEGORIES =
  186. [ TUnicodeCategory.ucMathSymbol, TUnicodeCategory.ucCurrencySymbol,
  187. TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
  188. ];
  189. function ConvertFromUtf32(AChar: UCS4Char): UnicodeString;
  190. begin
  191. Result := TCharacter.ConvertFromUtf32(AChar);
  192. end;
  193. function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer): UCS4Char;
  194. begin
  195. Result := TCharacter.ConvertToUtf32(AString, AIndex);
  196. end;
  197. function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer; out ACharLength: Integer): UCS4Char;
  198. begin
  199. Result := TCharacter.ConvertToUtf32(AString, AIndex, ACharLength);
  200. end;
  201. function ConvertToUtf32(const AHighSurrogate, ALowSurrogate: UnicodeChar): UCS4Char;
  202. begin
  203. Result := TCharacter.ConvertToUtf32(AHighSurrogate, ALowSurrogate);
  204. end;
  205. function GetNumericValue(AChar: UnicodeChar): Double;
  206. begin
  207. Result := TCharacter.GetNumericValue(AChar);
  208. end;
  209. function GetNumericValue(const AString: UnicodeString; AIndex: Integer): Double;
  210. begin
  211. Result := TCharacter.GetNumericValue(AString, AIndex);
  212. end;
  213. function GetUnicodeCategory(AChar: UnicodeChar): TUnicodeCategory;
  214. begin
  215. Result := TCharacter.GetUnicodeCategory(AChar);
  216. end;
  217. function GetUnicodeCategory(const AString: UnicodeString; AIndex: Integer): TUnicodeCategory;
  218. begin
  219. Result := TCharacter.GetUnicodeCategory(AString, AIndex);
  220. end;
  221. function IsControl(AChar: UnicodeChar): Boolean;
  222. begin
  223. Result := TCharacter.IsControl(AChar);
  224. end;
  225. function IsControl(const AString: UnicodeString; AIndex: Integer): Boolean;
  226. begin
  227. Result := TCharacter.IsControl(AString, AIndex);
  228. end;
  229. function IsDigit(AChar: UnicodeChar): Boolean;
  230. begin
  231. Result := TCharacter.IsDigit(AChar);
  232. end;
  233. function IsDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
  234. begin
  235. Result := TCharacter.IsDigit(AString, AIndex);
  236. end;
  237. function IsSurrogate(AChar: UnicodeChar): Boolean;
  238. begin
  239. Result := TCharacter.IsSurrogate(AChar);
  240. end;
  241. function IsSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
  242. begin
  243. Result := TCharacter.IsSurrogate(AString, AIndex);
  244. end;
  245. function IsHighSurrogate(AChar: UnicodeChar): Boolean;
  246. begin
  247. Result := TCharacter.IsHighSurrogate(AChar);
  248. end;
  249. function IsHighSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
  250. begin
  251. Result := TCharacter.IsHighSurrogate(AString, AIndex);
  252. end;
  253. function IsLowSurrogate(AChar: UnicodeChar): Boolean;
  254. begin
  255. Result := TCharacter.IsLowSurrogate(AChar);
  256. end;
  257. function IsLowSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
  258. begin
  259. Result := TCharacter.IsLowSurrogate(AString, AIndex);
  260. end;
  261. function IsSurrogatePair(const AHighSurrogate, ALowSurrogate: UnicodeChar): Boolean;
  262. begin
  263. Result := TCharacter.IsSurrogatePair(AHighSurrogate, ALowSurrogate);
  264. end;
  265. function IsSurrogatePair(const AString: UnicodeString; AIndex: Integer): Boolean;
  266. begin
  267. Result := TCharacter.IsSurrogatePair(AString, AIndex);
  268. end;
  269. function IsLetter(AChar: UnicodeChar): Boolean;
  270. begin
  271. Result := TCharacter.IsLetter(AChar);
  272. end;
  273. function IsLetter(const AString: UnicodeString; AIndex: Integer): Boolean;
  274. begin
  275. Result := TCharacter.IsLetter(AString, AIndex);
  276. end;
  277. function IsLetterOrDigit(AChar: UnicodeChar): Boolean;
  278. begin
  279. Result := TCharacter.IsLetterOrDigit(AChar);
  280. end;
  281. function IsLetterOrDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
  282. begin
  283. Result := TCharacter.IsLetterOrDigit(AString, AIndex);
  284. end;
  285. function IsLower(AChar: UnicodeChar): Boolean;
  286. begin
  287. Result := TCharacter.IsLower(AChar);
  288. end;
  289. function IsLower(const AString: UnicodeString; AIndex: Integer): Boolean;
  290. begin
  291. Result := TCharacter.IsLower(AString, AIndex);
  292. end;
  293. function IsNumber(AChar: UnicodeChar): Boolean;
  294. begin
  295. Result := TCharacter.IsNumber(AChar);
  296. end;
  297. function IsNumber(const AString: UnicodeString; AIndex: Integer): Boolean;
  298. begin
  299. Result := TCharacter.IsNumber(AString, AIndex);
  300. end;
  301. function IsPunctuation(AChar: UnicodeChar): Boolean;
  302. begin
  303. Result := TCharacter.IsPunctuation(AChar);
  304. end;
  305. function IsPunctuation(const AString: UnicodeString; AIndex: Integer): Boolean;
  306. begin
  307. Result := TCharacter.IsPunctuation(AString, AIndex);
  308. end;
  309. function IsSeparator(AChar: UnicodeChar): Boolean;
  310. begin
  311. Result := TCharacter.IsSeparator(AChar);
  312. end;
  313. function IsSeparator(const AString: UnicodeString; AIndex: Integer): Boolean;
  314. begin
  315. Result := TCharacter.IsSeparator(AString, AIndex);
  316. end;
  317. function IsSymbol(AChar: UnicodeChar): Boolean;
  318. begin
  319. Result := TCharacter.IsSymbol(AChar);
  320. end;
  321. function IsSymbol(const AString: UnicodeString; AIndex: Integer): Boolean;
  322. begin
  323. Result := TCharacter.IsSymbol(AString, AIndex);
  324. end;
  325. function IsUpper(AChar: UnicodeChar): Boolean;
  326. begin
  327. Result := TCharacter.IsUpper(AChar);
  328. end;
  329. function IsUpper(const AString: UnicodeString; AIndex: Integer): Boolean;
  330. begin
  331. Result := TCharacter.IsUpper(AString, AIndex);
  332. end;
  333. function IsWhiteSpace(AChar: UnicodeChar): Boolean;
  334. begin
  335. Result := TCharacter.IsWhiteSpace(AChar);
  336. end;
  337. function IsWhiteSpace(const AString: UnicodeString; AIndex: Integer): Boolean;
  338. begin
  339. Result := TCharacter.IsWhiteSpace(AString, AIndex);
  340. end;
  341. function ToLower(AChar: UnicodeChar): UnicodeChar;
  342. begin
  343. Result := TCharacter.ToLower(AChar);
  344. end;
  345. function ToLower(const AString: UnicodeString): UnicodeString;
  346. begin
  347. Result := TCharacter.ToLower(AString);
  348. end;
  349. function ToUpper(AChar: UnicodeChar): UnicodeChar;
  350. begin
  351. Result := TCharacter.ToUpper(AChar);
  352. end;
  353. function ToUpper(const AString: UnicodeString): UnicodeString;
  354. begin
  355. Result := TCharacter.ToUpper(AString);
  356. end;
  357. { TCharacter }
  358. class function TCharacter.TestCategory(
  359. const AString : UnicodeString;
  360. AIndex : Integer;
  361. ACategory : TUnicodeCategory
  362. ) : Boolean;
  363. var
  364. pu : PUC_Prop;
  365. begin
  366. if (AIndex < 1) or (AIndex > Length(AString)) then
  367. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  368. pu := GetProps(Word(AString[AIndex]));
  369. if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
  370. if not IsSurrogatePair(AString,AIndex) then
  371. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  372. pu := GetProps(AString[AIndex],AString[AIndex+1]);
  373. end;
  374. Result := (TUnicodeCategory(pu^.Category) = ACategory);
  375. end;
  376. class function TCharacter.TestCategory(
  377. const AString : UnicodeString;
  378. AIndex : Integer;
  379. ACategory : TUnicodeCategorySet
  380. ) : Boolean;
  381. var
  382. pu : PUC_Prop;
  383. begin
  384. if (AIndex < 1) or (AIndex > Length(AString)) then
  385. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  386. pu := GetProps(Word(AString[AIndex]));
  387. if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
  388. if not IsSurrogatePair(AString,AIndex) then
  389. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  390. pu := GetProps(AString[AIndex],AString[AIndex+1]);
  391. end;
  392. Result := (TUnicodeCategory(pu^.Category) in ACategory);
  393. end;
  394. constructor TCharacter.Create;
  395. begin
  396. raise ENoConstructException.CreateFmt(SClassCantBeConstructed, [ClassName]);
  397. end;
  398. class function TCharacter.ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
  399. begin
  400. if AChar < UCS4_HALF_BASE then
  401. begin
  402. if IsSurrogate(UnicodeChar(AChar)) then
  403. raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
  404. Result := UnicodeChar(AChar);
  405. end
  406. else
  407. begin
  408. if AChar > MAX_LEGAL_UTF32 then
  409. raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
  410. SetLength(Result, 2);
  411. AChar := AChar - UCS4_HALF_BASE;
  412. Result[1] := UnicodeChar((AChar shr 10) + HIGH_SURROGATE_BEGIN);
  413. Result[2] := UnicodeChar((AChar and UCS4_HALF_MASK) + LOW_SURROGATE_BEGIN);
  414. end;
  415. end;
  416. class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
  417. begin
  418. if (AIndex < 1) or (AIndex > Length(AString)) then
  419. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  420. Result := Word(AString[AIndex]);
  421. if IsHighSurrogate(UnicodeChar(Result)) then
  422. begin
  423. if Length(AString) < Succ(AIndex) then
  424. raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
  425. Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
  426. end;
  427. end;
  428. class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
  429. begin
  430. if (AIndex < 1) or (AIndex > Length(AString)) then
  431. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  432. Result := Word(AString[AIndex]);
  433. if IsHighSurrogate(UnicodeChar(Result)) then
  434. begin
  435. if Length(AString) < Succ(AIndex) then
  436. raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
  437. Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
  438. ACharLength := 2;
  439. end
  440. else
  441. ACharLength := 1;
  442. end;
  443. class function TCharacter.ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
  444. begin
  445. if not IsHighSurrogate(AHighSurrogate) then
  446. raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]);
  447. if not IsLowSurrogate(ALowSurrogate) then
  448. raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(ALowSurrogate)]);
  449. Result := ToUCS4(AHighSurrogate, ALowSurrogate);
  450. end;
  451. class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double;
  452. begin
  453. Result := GetProps(Word(AChar))^.NumericValue;
  454. end;
  455. class function TCharacter.GetNumericValue(
  456. const AString : UnicodeString;
  457. AIndex : Integer
  458. ) : Double;
  459. var
  460. pu : PUC_Prop;
  461. begin
  462. if (AIndex < 1) or (AIndex > Length(AString)) then
  463. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  464. pu := GetProps(Word(AString[AIndex]));
  465. if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
  466. if not IsSurrogatePair(AString,AIndex) then
  467. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  468. pu := GetProps(AString[AIndex],AString[AIndex+1]);
  469. end;
  470. Result := pu^.NumericValue;
  471. end;
  472. class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
  473. begin
  474. Result := TUnicodeCategory(GetProps(Word(AChar))^.Category);
  475. end;
  476. class function TCharacter.GetUnicodeCategory(
  477. const AString : UnicodeString;
  478. AIndex : Integer
  479. ) : TUnicodeCategory;
  480. var
  481. pu : PUC_Prop;
  482. begin
  483. if (AIndex < 1) or (AIndex > Length(AString)) then
  484. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  485. pu := GetProps(Word(AString[AIndex]));
  486. if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
  487. if not IsSurrogatePair(AString,AIndex) then
  488. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  489. pu := GetProps(AString[AIndex],AString[AIndex+1]);
  490. end;
  491. Result := TUnicodeCategory(pu^.Category);
  492. end;
  493. class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
  494. begin
  495. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucControl);
  496. end;
  497. class function TCharacter.IsControl(
  498. const AString : UnicodeString;
  499. AIndex : Integer
  500. ) : Boolean;
  501. begin
  502. Result := TestCategory(AString,AIndex,TUnicodeCategory.ucControl);
  503. end;
  504. class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
  505. begin
  506. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucDecimalNumber);
  507. end;
  508. class function TCharacter.IsDigit(
  509. const AString : UnicodeString;
  510. AIndex : Integer
  511. ) : Boolean;
  512. begin
  513. Result := TestCategory(AString,AIndex,TUnicodeCategory.ucDecimalNumber);
  514. end;
  515. class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
  516. begin
  517. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate);
  518. end;
  519. class function TCharacter.IsSurrogate(
  520. const AString : UnicodeString;
  521. AIndex : Integer
  522. ) : Boolean;
  523. begin
  524. if (AIndex < 1) or (AIndex > Length(AString)) then
  525. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  526. Result := IsSurrogate(AString[AIndex]);
  527. end;
  528. class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean;
  529. begin
  530. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
  531. (Word(AChar) >= HIGH_SURROGATE_BEGIN) and
  532. (Word(AChar) <= HIGH_SURROGATE_END);
  533. end;
  534. class function TCharacter.IsHighSurrogate(
  535. const AString : UnicodeString;
  536. AIndex : Integer
  537. ) : Boolean;
  538. begin
  539. if (AIndex < 1) or (AIndex > Length(AString)) then
  540. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  541. Result := IsHighSurrogate(AString[AIndex]);
  542. end;
  543. class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean;
  544. begin
  545. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
  546. (Word(AChar) >= LOW_SURROGATE_BEGIN) and
  547. (Word(AChar) <= LOW_SURROGATE_END);
  548. end;
  549. class function TCharacter.IsLowSurrogate(
  550. const AString : UnicodeString;
  551. AIndex : Integer
  552. ) : Boolean;
  553. begin
  554. if (AIndex < 1) or (AIndex > Length(AString)) then
  555. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  556. Result := IsLowSurrogate(AString[AIndex]);
  557. end;
  558. class function TCharacter.IsSurrogatePair(
  559. const AHighSurrogate,
  560. ALowSurrogate : UnicodeChar
  561. ) : Boolean;
  562. begin
  563. Result := UnicodeIsSurrogatePair(AHighSurrogate,ALowSurrogate);
  564. end;
  565. class function TCharacter.IsSurrogatePair(
  566. const AString : UnicodeString;
  567. AIndex : Integer
  568. ) : Boolean;
  569. begin
  570. if (AIndex < 1) or (AIndex > Length(AString)) then
  571. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  572. if not IsHighSurrogate(AString[AIndex]) then begin
  573. Result := False;
  574. exit;
  575. end;
  576. if ((AIndex+1) > Length(AString)) then
  577. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex+1, Length(AString)]);
  578. Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
  579. end;
  580. class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean;
  581. begin
  582. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_CATEGORIES);
  583. end;
  584. class function TCharacter.IsLetter(
  585. const AString : UnicodeString;
  586. AIndex : Integer
  587. ) : Boolean;
  588. begin
  589. Result := TestCategory(AString,AIndex,LETTER_CATEGORIES);
  590. end;
  591. class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
  592. begin
  593. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_OR_DIGIT_CATEGORIES);
  594. end;
  595. class function TCharacter.IsLetterOrDigit(
  596. const AString : UnicodeString;
  597. AIndex : Integer
  598. ) : Boolean;
  599. begin
  600. Result := TestCategory(AString,AIndex,LETTER_OR_DIGIT_CATEGORIES);
  601. end;
  602. class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
  603. begin
  604. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucLowercaseLetter);
  605. end;
  606. class function TCharacter.IsLower(
  607. const AString : UnicodeString;
  608. AIndex : Integer
  609. ) : Boolean;
  610. begin
  611. Result := TestCategory(AString,AIndex,TUnicodeCategory.ucLowercaseLetter);
  612. end;
  613. class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
  614. begin
  615. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in NUMBER_CATEGORIES);
  616. end;
  617. class function TCharacter.IsNumber(
  618. const AString : UnicodeString;
  619. AIndex : Integer
  620. ) : Boolean;
  621. begin
  622. Result := TestCategory(AString,AIndex,NUMBER_CATEGORIES);
  623. end;
  624. class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
  625. begin
  626. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in PUNCTUATION_CATEGORIES);
  627. end;
  628. class function TCharacter.IsPunctuation(
  629. const AString : UnicodeString;
  630. AIndex : Integer
  631. ) : Boolean;
  632. begin
  633. Result := TestCategory(AString,AIndex,PUNCTUATION_CATEGORIES);
  634. end;
  635. class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
  636. begin
  637. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SEPARATOR_CATEGORIES);
  638. end;
  639. class function TCharacter.IsSeparator(
  640. const AString : UnicodeString;
  641. AIndex : Integer
  642. ) : Boolean;
  643. begin
  644. Result := TestCategory(AString,AIndex,SEPARATOR_CATEGORIES);
  645. end;
  646. class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
  647. begin
  648. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SYMBOL_CATEGORIES);
  649. end;
  650. class function TCharacter.IsSymbol(
  651. const AString : UnicodeString;
  652. AIndex : Integer
  653. ) : Boolean;
  654. begin
  655. Result := TestCategory(AString,AIndex,SYMBOL_CATEGORIES);
  656. end;
  657. class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
  658. begin
  659. Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
  660. end;
  661. class function TCharacter.IsUpper(
  662. const AString : UnicodeString;
  663. AIndex : Integer
  664. ) : Boolean;
  665. begin
  666. Result := TestCategory(AString,AIndex,TUnicodeCategory.ucUppercaseLetter);
  667. end;
  668. class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;
  669. begin
  670. Result := GetProps(Word(AChar))^.WhiteSpace;
  671. end;
  672. class function TCharacter.IsWhiteSpace(
  673. const AString : UnicodeString;
  674. AIndex : Integer
  675. ) : Boolean;
  676. var
  677. pu : PUC_Prop;
  678. begin
  679. if (AIndex < 1) or (AIndex > Length(AString)) then
  680. raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
  681. pu := GetProps(Word(AString[AIndex]));
  682. if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
  683. if not IsSurrogatePair(AString,AIndex) then
  684. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  685. pu := GetProps(AString[AIndex],AString[AIndex+1]);
  686. end;
  687. Result := pu^.WhiteSpace;
  688. end;
  689. class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
  690. begin
  691. Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleLowerCase));
  692. if (Result = UnicodeChar(0)) then
  693. Result := AChar;
  694. end;
  695. class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;
  696. begin
  697. Result := ToLower(AString,[]);
  698. end;
  699. class function TCharacter.ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
  700. begin
  701. if (UnicodeToLower(
  702. AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
  703. ) <> 0
  704. )
  705. then
  706. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  707. end;
  708. class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
  709. begin
  710. Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleUpperCase));
  711. if (Result = UnicodeChar(0)) then
  712. Result := AChar;
  713. end;
  714. class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;
  715. begin
  716. Result := ToUpper(AString,[]);
  717. end;
  718. class function TCharacter.ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
  719. begin
  720. if (UnicodeToUpper(
  721. AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
  722. ) <> 0
  723. )
  724. then
  725. raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
  726. end;
  727. {$endif VER2_4}
  728. end.