sysencoding.inc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. {$ifndef VER2_4}
  2. { TEncoding }
  3. class function TEncoding.GetANSI: TEncoding;
  4. begin
  5. if not Assigned(FStandardEncodings[seAnsi]) then
  6. FStandardEncodings[seAnsi] := TMBCSEncoding.Create(DefaultSystemCodePage);
  7. Result := FStandardEncodings[seAnsi];
  8. end;
  9. class function TEncoding.GetASCII: TEncoding;
  10. begin
  11. if not Assigned(FStandardEncodings[seAscii]) then
  12. FStandardEncodings[seAscii] := TMBCSEncoding.Create(CP_ASCII);
  13. Result := FStandardEncodings[seAscii];
  14. end;
  15. class function TEncoding.GetBigEndianUnicode: TEncoding;
  16. begin
  17. if not Assigned(FStandardEncodings[seBigEndianUnicode]) then
  18. FStandardEncodings[seBigEndianUnicode] := TBigEndianUnicodeEncoding.Create;
  19. Result := FStandardEncodings[seBigEndianUnicode];
  20. end;
  21. class function TEncoding.GetDefault: TEncoding;
  22. begin
  23. Result := GetANSI;
  24. end;
  25. class function TEncoding.GetUnicode: TEncoding;
  26. begin
  27. if not Assigned(FStandardEncodings[seUnicode]) then
  28. FStandardEncodings[seUnicode] := TUnicodeEncoding.Create;
  29. Result := FStandardEncodings[seUnicode];
  30. end;
  31. class function TEncoding.GetUTF7: TEncoding;
  32. begin
  33. if not Assigned(FStandardEncodings[seUTF7]) then
  34. FStandardEncodings[seUTF7] := TUTF7Encoding.Create;
  35. Result := FStandardEncodings[seUTF7];
  36. end;
  37. class function TEncoding.GetUTF8: TEncoding;
  38. begin
  39. if not Assigned(FStandardEncodings[seUTF8]) then
  40. FStandardEncodings[seUTF8] := TUTF8Encoding.Create;
  41. Result := FStandardEncodings[seUTF8];
  42. end;
  43. class procedure TEncoding.FreeEncodings;
  44. var
  45. E: TStandardEncoding;
  46. begin
  47. for E := Low(FStandardEncodings) to High(FStandardEncodings) do
  48. FStandardEncodings[E].Free;
  49. end;
  50. class constructor TEncoding.Create;
  51. var
  52. E: TStandardEncoding;
  53. begin
  54. for E := Low(FStandardEncodings) to High(FStandardEncodings) do
  55. FStandardEncodings[E] := nil;
  56. end;
  57. class destructor TEncoding.Destroy;
  58. begin
  59. FreeEncodings;
  60. end;
  61. function TEncoding.Clone: TEncoding;
  62. begin
  63. Result := nil;
  64. end;
  65. class function TEncoding.Convert(Source, Destination: TEncoding;
  66. const Bytes: TBytes): TBytes;
  67. begin
  68. Result := Destination.GetBytes(Source.GetChars(Bytes));
  69. end;
  70. class function TEncoding.Convert(Source, Destination: TEncoding;
  71. const Bytes: TBytes; StartIndex, Count: Integer): TBytes;
  72. begin
  73. Result := Destination.GetBytes(Source.GetChars(Bytes, StartIndex, Count));
  74. end;
  75. class function TEncoding.IsStandardEncoding(AEncoding: TEncoding): Boolean;
  76. var
  77. Encoding: TEncoding;
  78. begin
  79. if Assigned(AEncoding) then
  80. for Encoding in FStandardEncodings do
  81. if Encoding = AEncoding then
  82. Exit(True);
  83. Result := False;
  84. end;
  85. class function TEncoding.GetBufferEncoding(const Buffer: TBytes; var AEncoding: TEncoding): Integer;
  86. begin
  87. Result := GetBufferEncoding(Buffer, AEncoding, Default);
  88. end;
  89. class function TEncoding.GetBufferEncoding(const Buffer: TBytes;
  90. var AEncoding: TEncoding; ADefaultEncoding: TEncoding): Integer;
  91. function CheckEncoding(AEncoding: TEncoding; out ByteCount: Integer): Boolean;
  92. var
  93. Preamble: TBytes;
  94. begin
  95. Preamble := AEncoding.GetPreamble;
  96. ByteCount := Length(Preamble);
  97. Result := (Length(Buffer) >= ByteCount) and (ByteCount > 0);
  98. if Result then
  99. Result := CompareMem(@Preamble[0], @Buffer[0], ByteCount);
  100. end;
  101. begin
  102. if Assigned(AEncoding) then
  103. begin
  104. if not CheckEncoding(AEncoding, Result) then
  105. Result := 0;
  106. end
  107. else
  108. if CheckEncoding(Unicode, Result) then
  109. AEncoding := Unicode
  110. else
  111. if CheckEncoding(BigEndianUnicode, Result) then
  112. AEncoding := BigEndianUnicode
  113. else
  114. if CheckEncoding(UTF8, Result) then
  115. AEncoding := UTF8
  116. else
  117. begin
  118. AEncoding := ADefaultEncoding;
  119. Result := 0;
  120. end;
  121. end;
  122. function TEncoding.GetByteCount(const Chars: TUnicodeCharArray): Integer;
  123. begin
  124. Result := GetByteCount(Chars, 0, Length(Chars));
  125. end;
  126. function TEncoding.GetByteCount(const Chars: TUnicodeCharArray; CharIndex,
  127. CharCount: Integer): Integer;
  128. begin
  129. if (CharCount < 0) or (Length(Chars) <= CharCount + CharIndex) then
  130. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  131. if (CharIndex < 0) then
  132. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  133. Result := GetByteCount(@Chars[CharIndex], CharCount);
  134. end;
  135. function TEncoding.GetByteCount(const S: UnicodeString): Integer;
  136. begin
  137. Result := GetByteCount(PUnicodeChar(S), Length(S));
  138. end;
  139. function TEncoding.GetByteCount(const S: UnicodeString; CharIndex, CharCount: Integer): Integer;
  140. begin
  141. if (CharCount < 0) or (Length(S) < CharCount + CharIndex) then
  142. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  143. if (CharIndex < 1) then
  144. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  145. Result := GetByteCount(@S[CharIndex], CharCount);
  146. end;
  147. function TEncoding.GetBytes(const Chars: TUnicodeCharArray): TBytes;
  148. begin
  149. SetLength(Result, GetByteCount(Chars));
  150. GetBytes(@Chars[0], Length(Chars), @Result[0], Length(Result));
  151. end;
  152. function TEncoding.GetBytes(const Chars: TUnicodeCharArray; CharIndex,
  153. CharCount: Integer): TBytes;
  154. begin
  155. if (CharCount < 0) or (Length(Chars) <= CharCount + CharIndex) then
  156. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  157. if (CharIndex < 0) then
  158. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  159. SetLength(Result, GetByteCount(Chars, CharIndex, CharCount));
  160. GetBytes(@Chars[CharIndex], CharCount, @Result[0], Length(Result));
  161. end;
  162. function TEncoding.GetBytes(const Chars: TUnicodeCharArray; CharIndex,
  163. CharCount: Integer; const Bytes: TBytes; ByteIndex: Integer): Integer;
  164. var
  165. ByteLen: Integer;
  166. begin
  167. ByteLen := Length(Bytes);
  168. if (ByteLen = 0) and (CharCount > 0) then
  169. raise EEncodingError.Create(SInvalidDestinationArray);
  170. if (ByteIndex < 0) or (ByteLen < ByteIndex) then
  171. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  172. if (CharCount < 0) or (Length(Chars) <= CharCount + CharIndex) then
  173. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  174. if (CharIndex < 0) then
  175. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  176. Result := GetBytes(@Chars[CharIndex], CharCount, @Bytes[ByteIndex], ByteLen - ByteIndex);
  177. end;
  178. function TEncoding.GetBytes(const S: UnicodeString): TBytes;
  179. begin
  180. SetLength(Result, GetByteCount(S));
  181. GetBytes(@S[1], Length(S), @Result[0], Length(Result));
  182. end;
  183. function TEncoding.GetBytes(const S: UnicodeString; CharIndex, CharCount: Integer;
  184. const Bytes: TBytes; ByteIndex: Integer): Integer;
  185. var
  186. ByteLen: Integer;
  187. begin
  188. ByteLen := Length(Bytes);
  189. if (ByteLen = 0) and (CharCount > 0) then
  190. raise EEncodingError.Create(SInvalidDestinationArray);
  191. if (ByteIndex < 0) or (ByteLen < ByteIndex) then
  192. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  193. if (CharCount < 0) or (Length(S) < CharCount + CharIndex) then
  194. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  195. if (CharIndex < 1) then
  196. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  197. Result := GetBytes(@S[CharIndex], CharCount, @Bytes[ByteIndex], ByteLen - ByteIndex);
  198. end;
  199. function TEncoding.GetCharCount(const Bytes: TBytes): Integer;
  200. begin
  201. Result := GetCharCount(@Bytes[0], Length(Bytes));
  202. end;
  203. function TEncoding.GetCharCount(const Bytes: TBytes; ByteIndex,
  204. ByteCount: Integer): Integer;
  205. begin
  206. if (ByteIndex < 0) or (Length(Bytes) < ByteIndex) then
  207. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  208. Result := GetCharCount(@Bytes[ByteIndex], ByteCount);
  209. end;
  210. function TEncoding.GetChars(const Bytes: TBytes): TUnicodeCharArray;
  211. begin
  212. SetLength(Result, GetCharCount(Bytes));
  213. GetChars(@Bytes[0], Length(Bytes), @Result[0], Length(Result));
  214. end;
  215. function TEncoding.GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer): TUnicodeCharArray;
  216. begin
  217. if (ByteIndex < 0) or (Length(Bytes) < ByteIndex) then
  218. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  219. SetLength(Result, GetCharCount(Bytes, ByteIndex, ByteCount));
  220. GetChars(@Bytes[ByteIndex], ByteCount, @Result[0], Length(Result));
  221. end;
  222. function TEncoding.GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer;
  223. const Chars: TUnicodeCharArray; CharIndex: Integer): Integer;
  224. var
  225. CharLen: Integer;
  226. begin
  227. if (ByteIndex < 0) or (Length(Bytes) <= ByteIndex) then
  228. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  229. CharLen := Length(Chars);
  230. if (CharIndex < 0) or (CharLen <= CharIndex) then
  231. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  232. Result := GetChars(@Bytes[ByteIndex], ByteCount, @Chars[CharIndex], CharLen - CharIndex);
  233. end;
  234. class function TEncoding.GetEncoding(CodePage: Integer): TEncoding;
  235. begin
  236. case CodePage of
  237. CP_UTF16: Result := TUnicodeEncoding.Create;
  238. CP_UTF16BE: Result := TBigEndianUnicodeEncoding.Create;
  239. CP_UTF7: Result := TUTF7Encoding.Create;
  240. CP_UTF8: Result := TUTF8Encoding.Create;
  241. else
  242. Result := TMBCSEncoding.Create(CodePage);
  243. end;
  244. end;
  245. class function TEncoding.GetEncoding(const EncodingName: UnicodeString): TEncoding;
  246. var
  247. ACodePage: TSystemCodePage;
  248. begin
  249. ACodePage := CodePageNameToCodePage(EncodingName);
  250. if ACodePage = $FFFF then
  251. raise EEncodingError.CreateFmt(SNotValidCodePageName, [EncodingName]);
  252. Result := TMBCSEncoding.Create(ACodePage);
  253. end;
  254. function TEncoding.GetString(const Bytes: TBytes): UnicodeString;
  255. var
  256. Chars: TUnicodeCharArray;
  257. begin
  258. Chars := GetChars(Bytes);
  259. SetString(Result, PUnicodeChar(Chars), Length(Chars));
  260. end;
  261. function TEncoding.GetString(const Bytes: TBytes; ByteIndex, ByteCount: Integer): UnicodeString;
  262. var
  263. Chars: TUnicodeCharArray;
  264. begin
  265. Chars := GetChars(Bytes, ByteIndex, ByteCount);
  266. SetString(Result, PUnicodeChar(Chars), Length(Chars));
  267. end;
  268. { TMBCSEncoding }
  269. function TMBCSEncoding.GetByteCount(Chars: PUnicodeChar; CharCount: Integer): Integer;
  270. var
  271. S: RawByteString;
  272. begin
  273. widestringmanager.Unicode2AnsiMoveProc(Chars, S, CodePage, CharCount);
  274. Result := Length(S);
  275. end;
  276. function TMBCSEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer; Bytes: PByte;
  277. ByteCount: Integer): Integer;
  278. var
  279. S: RawByteString;
  280. begin
  281. widestringmanager.Unicode2AnsiMoveProc(Chars, S, CodePage, CharCount);
  282. Result := Length(S);
  283. if ByteCount < Result then
  284. Result := ByteCount;
  285. if Result > 0 then
  286. Move(S[1], Bytes[0], Result);
  287. end;
  288. function TMBCSEncoding.GetCharCount(Bytes: PByte; ByteCount: Integer): Integer;
  289. var
  290. U: UnicodeString;
  291. begin
  292. widestringmanager.Ansi2UnicodeMoveProc(PChar(Bytes), CodePage, U, ByteCount);
  293. Result := Length(U);
  294. end;
  295. function TMBCSEncoding.GetChars(Bytes: PByte; ByteCount: Integer; Chars: PUnicodeChar;
  296. CharCount: Integer): Integer;
  297. var
  298. U: UnicodeString;
  299. begin
  300. widestringmanager.Ansi2UnicodeMoveProc(PChar(Bytes), CodePage, U, ByteCount);
  301. Result := Length(U);
  302. if CharCount < Result then
  303. Result := CharCount;
  304. if Result > 0 then
  305. Move(U[1], Chars[0], Result * SizeOf(UnicodeChar));
  306. end;
  307. function TMBCSEncoding.GetCodePage: Cardinal;
  308. begin
  309. Result := FCodePage;
  310. end;
  311. function TMBCSEncoding.GetEncodingName: UnicodeString;
  312. begin
  313. Result := CodePageToCodePageName(CodePage);
  314. end;
  315. constructor TMBCSEncoding.Create;
  316. begin
  317. Create(DefaultSystemCodePage, 0, 0);
  318. end;
  319. constructor TMBCSEncoding.Create(ACodePage: Integer);
  320. begin
  321. Create(ACodePage, 0, 0);
  322. end;
  323. constructor TMBCSEncoding.Create(ACodePage, MBToWCharFlags,
  324. WCharToMBFlags: Integer);
  325. begin
  326. FCodePage := ACodePage;
  327. FMBToWCharFlags := MBToWCharFlags;
  328. FWCharToMBFlags := WCharToMBFlags;
  329. end;
  330. function TMBCSEncoding.Clone: TEncoding;
  331. begin
  332. Result := TMBCSEncoding.Create(FCodePage, FMBToWCharFlags, FWCharToMBFlags);
  333. end;
  334. function TMBCSEncoding.GetMaxByteCount(CharCount: Integer): Integer;
  335. begin
  336. Result := CharCount;
  337. end;
  338. function TMBCSEncoding.GetMaxCharCount(ByteCount: Integer): Integer;
  339. begin
  340. Result := ByteCount;
  341. end;
  342. function TMBCSEncoding.GetPreamble: TBytes;
  343. begin
  344. Result := nil;
  345. end;
  346. { TUTF7Encoding }
  347. constructor TUTF7Encoding.Create;
  348. begin
  349. inherited Create(CP_UTF7);
  350. FIsSingleByte := False;
  351. end;
  352. function TUTF7Encoding.Clone: TEncoding;
  353. begin
  354. Result := TUTF7Encoding.Create;
  355. end;
  356. function TUTF7Encoding.GetMaxByteCount(CharCount: Integer): Integer;
  357. begin
  358. Result := CharCount * 3 + 2;
  359. end;
  360. function TUTF7Encoding.GetMaxCharCount(ByteCount: Integer): Integer;
  361. begin
  362. Result := ByteCount;
  363. end;
  364. { TUTF8Encoding }
  365. constructor TUTF8Encoding.Create;
  366. begin
  367. inherited Create(CP_UTF8);
  368. FIsSingleByte := False;
  369. end;
  370. function TUTF8Encoding.Clone: TEncoding;
  371. begin
  372. Result := TUTF8Encoding.Create;
  373. end;
  374. function TUTF8Encoding.GetMaxByteCount(CharCount: Integer): Integer;
  375. begin
  376. Result := CharCount * 3;
  377. end;
  378. function TUTF8Encoding.GetMaxCharCount(ByteCount: Integer): Integer;
  379. begin
  380. Result := ByteCount;
  381. end;
  382. function TUTF8Encoding.GetPreamble: TBytes;
  383. begin
  384. SetLength(Result, 3);
  385. Result[0] := $EF;
  386. Result[1] := $BB;
  387. Result[2] := $BF;
  388. end;
  389. { TUnicodeEncoding }
  390. function TUnicodeEncoding.GetByteCount(Chars: PUnicodeChar; CharCount: Integer): Integer;
  391. begin
  392. Result := CharCount * SizeOf(UnicodeChar);
  393. end;
  394. function TUnicodeEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer;
  395. Bytes: PByte; ByteCount: Integer): Integer;
  396. begin
  397. Result := CharCount * SizeOf(UnicodeChar);
  398. if ByteCount < Result then
  399. Result := ByteCount;
  400. if Result > 0 then
  401. Move(Chars[0], Bytes[0], Result);
  402. end;
  403. function TUnicodeEncoding.GetCharCount(Bytes: PByte; ByteCount: Integer): Integer;
  404. begin
  405. Result := ByteCount div SizeOf(UnicodeChar);
  406. end;
  407. function TUnicodeEncoding.GetChars(Bytes: PByte; ByteCount: Integer;
  408. Chars: PUnicodeChar; CharCount: Integer): Integer;
  409. begin
  410. Result := ByteCount div 2;
  411. if CharCount < Result then
  412. Result := CharCount;
  413. Move(Bytes[0], Chars[0], Result * SizeOf(UnicodeChar));
  414. end;
  415. function TUnicodeEncoding.GetCodePage: Cardinal;
  416. begin
  417. Result := CP_UTF16;
  418. end;
  419. function TUnicodeEncoding.GetEncodingName: UnicodeString;
  420. begin
  421. Result := CodePageToCodePageName(CodePage);
  422. end;
  423. constructor TUnicodeEncoding.Create;
  424. begin
  425. inherited Create;
  426. FIsSingleByte := False;
  427. FMaxCharSize := SizeOf(UnicodeChar);
  428. end;
  429. function TUnicodeEncoding.Clone: TEncoding;
  430. begin
  431. Result := TUnicodeEncoding.Create;
  432. end;
  433. function TUnicodeEncoding.GetMaxByteCount(CharCount: Integer): Integer;
  434. begin
  435. Result := CharCount * SizeOf(UnicodeChar);
  436. end;
  437. function TUnicodeEncoding.GetMaxCharCount(ByteCount: Integer): Integer;
  438. begin
  439. Result := ByteCount div SizeOf(UnicodeChar);
  440. end;
  441. function TUnicodeEncoding.GetPreamble: TBytes;
  442. begin
  443. SetLength(Result, 2);
  444. Result[0] := $FF;
  445. Result[1] := $FE;
  446. end;
  447. { TBigEndianUnicodeEncoding }
  448. function TBigEndianUnicodeEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer;
  449. Bytes: PByte; ByteCount: Integer): Integer;
  450. var
  451. LastByte: PByte;
  452. begin
  453. Result := CharCount * SizeOf(UnicodeChar);
  454. if ByteCount < Result then
  455. Result := ByteCount;
  456. LastByte := @Bytes[Result];
  457. while Bytes < LastByte do
  458. begin
  459. Bytes^ := Hi(Word(Chars^));
  460. inc(Bytes);
  461. if Bytes < LastByte then
  462. Bytes^ := Lo(Word(Chars^));
  463. inc(Bytes);
  464. inc(Chars);
  465. end;
  466. end;
  467. function TBigEndianUnicodeEncoding.GetChars(Bytes: PByte; ByteCount: Integer;
  468. Chars: PUnicodeChar; CharCount: Integer): Integer;
  469. var
  470. LastChar: PUnicodeChar;
  471. begin
  472. Result := ByteCount div SizeOf(UnicodeChar);
  473. if CharCount < Result then
  474. Result := CharCount;
  475. LastChar := @Chars[Result];
  476. while Chars <= LastChar do
  477. begin
  478. Chars^ := UnicodeChar(Bytes[1] + Bytes[0] shl 8);
  479. inc(Bytes, SizeOf(UnicodeChar));
  480. inc(Chars);
  481. end;
  482. end;
  483. function TBigEndianUnicodeEncoding.GetCodePage: Cardinal;
  484. begin
  485. Result := CP_UTF16BE;
  486. end;
  487. function TBigEndianUnicodeEncoding.GetEncodingName: UnicodeString;
  488. begin
  489. Result := CodePageToCodePageName(CodePage);
  490. end;
  491. function TBigEndianUnicodeEncoding.Clone: TEncoding;
  492. begin
  493. Result := TBigEndianUnicodeEncoding.Create;
  494. end;
  495. function TBigEndianUnicodeEncoding.GetPreamble: TBytes;
  496. begin
  497. SetLength(Result, 2);
  498. Result[0] := $FE;
  499. Result[1] := $FF;
  500. end;
  501. {$endif VER2_4}