2
0

sysencoding.inc 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762
  1. {
  2. *********************************************************************
  3. Copyright (C) 2012 Paul Ishenin,
  4. member of the Free Pascal Development Team
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. *********************************************************************
  11. }
  12. {$ifndef VER2_4}
  13. { TEncoding }
  14. class function TEncoding.GetANSI: TEncoding;
  15. begin
  16. {$ifdef FPC_HAS_FEATURE_THREADING}
  17. EnterCriticalSection(FLock);
  18. try
  19. {$endif}
  20. if not Assigned(FStandardEncodings[seAnsi]) then
  21. FStandardEncodings[seAnsi] := TMBCSEncoding.Create(DefaultSystemCodePage);
  22. {$ifdef FPC_HAS_FEATURE_THREADING}
  23. finally
  24. LeaveCriticalSection(FLock);
  25. end;
  26. {$endif}
  27. Result := FStandardEncodings[seAnsi];
  28. end;
  29. function TEncoding.GetAnsiBytes(const S: string): TBytes;
  30. begin
  31. Result := GetAnsiBytes(S, 1, Length(S));
  32. end;
  33. function TEncoding.GetAnsiBytes(const S: string; CharIndex, CharCount: Integer
  34. ): TBytes;
  35. begin
  36. Result := GetAnsiBytes(Pointer(@S[CharIndex]), CharCount);
  37. end;
  38. function TEncoding.GetAnsiString(const Bytes: TBytes): string;
  39. begin
  40. Result := GetAnsiString(Bytes, 0, Length(Bytes));
  41. end;
  42. function TEncoding.GetAnsiString(const Bytes: TBytes; ByteIndex,
  43. ByteCount: Integer): string;
  44. begin
  45. Result := GetAnsiString(Pointer(@Bytes[ByteIndex]), ByteCount);
  46. SetCodePage(RawByteString(Result), DefaultSystemCodePage, False);
  47. end;
  48. class function TEncoding.GetASCII: TEncoding;
  49. begin
  50. {$ifdef FPC_HAS_FEATURE_THREADING}
  51. EnterCriticalSection(FLock);
  52. try
  53. {$endif}
  54. if not Assigned(FStandardEncodings[seAscii]) then
  55. FStandardEncodings[seAscii] := TMBCSEncoding.Create(CP_ASCII);
  56. {$ifdef FPC_HAS_FEATURE_THREADING}
  57. finally
  58. LeaveCriticalSection(FLock);
  59. end;
  60. {$endif}
  61. Result := FStandardEncodings[seAscii];
  62. end;
  63. class function TEncoding.GetBigEndianUnicode: TEncoding;
  64. begin
  65. {$ifdef FPC_HAS_FEATURE_THREADING}
  66. EnterCriticalSection(FLock);
  67. try
  68. {$endif}
  69. if not Assigned(FStandardEncodings[seBigEndianUnicode]) then
  70. FStandardEncodings[seBigEndianUnicode] := TBigEndianUnicodeEncoding.Create;
  71. {$ifdef FPC_HAS_FEATURE_THREADING}
  72. finally
  73. LeaveCriticalSection(FLock);
  74. end;
  75. {$endif}
  76. Result := FStandardEncodings[seBigEndianUnicode];
  77. end;
  78. class function TEncoding.GetDefault: TEncoding;
  79. begin
  80. Result := GetANSI;
  81. end;
  82. class function TEncoding.GetUnicode: TEncoding;
  83. begin
  84. {$ifdef FPC_HAS_FEATURE_THREADING}
  85. EnterCriticalSection(FLock);
  86. try
  87. {$endif}
  88. if not Assigned(FStandardEncodings[seUnicode]) then
  89. FStandardEncodings[seUnicode] := TUnicodeEncoding.Create;
  90. {$ifdef FPC_HAS_FEATURE_THREADING}
  91. finally
  92. LeaveCriticalSection(FLock);
  93. end;
  94. {$endif}
  95. Result := FStandardEncodings[seUnicode];
  96. end;
  97. class function TEncoding.GetUTF7: TEncoding;
  98. begin
  99. {$ifdef FPC_HAS_FEATURE_THREADING}
  100. EnterCriticalSection(FLock);
  101. try
  102. {$endif}
  103. if not Assigned(FStandardEncodings[seUTF7]) then
  104. FStandardEncodings[seUTF7] := TUTF7Encoding.Create;
  105. {$ifdef FPC_HAS_FEATURE_THREADING}
  106. finally
  107. LeaveCriticalSection(FLock);
  108. end;
  109. {$endif}
  110. Result := FStandardEncodings[seUTF7];
  111. end;
  112. class function TEncoding.GetUTF8: TEncoding;
  113. begin
  114. {$ifdef FPC_HAS_FEATURE_THREADING}
  115. EnterCriticalSection(FLock);
  116. try
  117. {$endif}
  118. if not Assigned(FStandardEncodings[seUTF8]) then
  119. FStandardEncodings[seUTF8] := TUTF8Encoding.Create;
  120. {$ifdef FPC_HAS_FEATURE_THREADING}
  121. finally
  122. LeaveCriticalSection(FLock);
  123. end;
  124. {$endif}
  125. Result := FStandardEncodings[seUTF8];
  126. end;
  127. class procedure TEncoding.FreeEncodings;
  128. var
  129. E: TStandardEncoding;
  130. begin
  131. {$ifdef FPC_HAS_FEATURE_THREADING}
  132. EnterCriticalSection(FLock);
  133. try
  134. {$endif}
  135. for E := Low(FStandardEncodings) to High(FStandardEncodings) do
  136. FreeAndNil(FStandardEncodings[E]);
  137. {$ifdef FPC_HAS_FEATURE_THREADING}
  138. finally
  139. LeaveCriticalSection(FLock);
  140. end;
  141. {$endif}
  142. end;
  143. class constructor TEncoding.Create;
  144. var
  145. E: TStandardEncoding;
  146. begin
  147. for E := Low(FStandardEncodings) to High(FStandardEncodings) do
  148. FStandardEncodings[E] := nil;
  149. {$ifdef FPC_HAS_FEATURE_THREADING}
  150. InitCriticalSection(FLock);
  151. {$endif}
  152. end;
  153. class destructor TEncoding.Destroy;
  154. begin
  155. FreeEncodings;
  156. {$ifdef FPC_HAS_FEATURE_THREADING}
  157. DoneCriticalSection(FLock);
  158. {$endif}
  159. end;
  160. function TEncoding.Clone: TEncoding;
  161. begin
  162. Result := nil;
  163. end;
  164. class function TEncoding.Convert(Source, Destination: TEncoding;
  165. const Bytes: TBytes): TBytes;
  166. begin
  167. Result := Destination.GetBytes(Source.GetChars(Bytes));
  168. end;
  169. class function TEncoding.Convert(Source, Destination: TEncoding;
  170. const Bytes: TBytes; StartIndex, Count: Integer): TBytes;
  171. begin
  172. Result := Destination.GetBytes(Source.GetChars(Bytes, StartIndex, Count));
  173. end;
  174. class function TEncoding.IsStandardEncoding(AEncoding: TEncoding): Boolean;
  175. var
  176. Encoding: TEncoding;
  177. begin
  178. if Assigned(AEncoding) then
  179. for Encoding in FStandardEncodings do
  180. if Encoding = AEncoding then
  181. Exit(True);
  182. Result := False;
  183. end;
  184. class function TEncoding.GetBufferEncoding(const Buffer: TBytes; var AEncoding: TEncoding): Integer;
  185. begin
  186. Result := GetBufferEncoding(Buffer, AEncoding, Default);
  187. end;
  188. class function TEncoding.GetBufferEncoding(const Buffer: TBytes;
  189. var AEncoding: TEncoding; ADefaultEncoding: TEncoding): Integer;
  190. function CheckEncoding(AEncoding: TEncoding; out ByteCount: Integer): Boolean;
  191. var
  192. Preamble: TBytes;
  193. begin
  194. Preamble := AEncoding.GetPreamble;
  195. ByteCount := Length(Preamble);
  196. Result := (Length(Buffer) >= ByteCount) and (ByteCount > 0);
  197. if Result then
  198. Result := CompareMem(@Preamble[0], @Buffer[0], ByteCount);
  199. end;
  200. begin
  201. if Assigned(AEncoding) then
  202. begin
  203. if not CheckEncoding(AEncoding, Result) then
  204. Result := 0;
  205. end
  206. else
  207. if CheckEncoding(Unicode, Result) then
  208. AEncoding := Unicode
  209. else
  210. if CheckEncoding(BigEndianUnicode, Result) then
  211. AEncoding := BigEndianUnicode
  212. else
  213. if CheckEncoding(UTF8, Result) then
  214. AEncoding := UTF8
  215. else
  216. begin
  217. AEncoding := ADefaultEncoding;
  218. Result := 0;
  219. end;
  220. end;
  221. function TEncoding.GetByteCount(const Chars: TUnicodeCharArray): Integer;
  222. begin
  223. Result := GetByteCount(Chars, 0, Length(Chars));
  224. end;
  225. function TEncoding.GetByteCount(const Chars: TUnicodeCharArray; CharIndex,
  226. CharCount: Integer): Integer;
  227. begin
  228. if (CharCount < 0) or (Length(Chars) < CharCount + CharIndex) then
  229. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  230. if (CharIndex < 0) then
  231. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  232. Result := GetByteCount(@Chars[CharIndex], CharCount);
  233. end;
  234. function TEncoding.GetByteCount(const S: UnicodeString): Integer;
  235. begin
  236. Result := GetByteCount(PUnicodeChar(S), Length(S));
  237. end;
  238. function TEncoding.GetByteCount(const S: UnicodeString; CharIndex, CharCount: Integer): Integer;
  239. begin
  240. if (CharCount < 0) or (Length(S) < CharCount + CharIndex) then
  241. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  242. if (CharIndex < 1) then
  243. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  244. Result := GetByteCount(@S[CharIndex], CharCount);
  245. end;
  246. function TEncoding.GetBytes(const Chars: TUnicodeCharArray): TBytes;
  247. begin
  248. SetLength(Result, GetByteCount(Chars));
  249. GetBytes(@Chars[0], Length(Chars), @Result[0], Length(Result));
  250. end;
  251. function TEncoding.GetBytes(const Chars: TUnicodeCharArray; CharIndex,
  252. CharCount: Integer): TBytes;
  253. begin
  254. if (CharCount < 0) or (Length(Chars) < CharCount + CharIndex) then
  255. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  256. if (CharIndex < 0) then
  257. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  258. SetLength(Result, GetByteCount(Chars, CharIndex, CharCount));
  259. GetBytes(@Chars[CharIndex], CharCount, @Result[0], Length(Result));
  260. end;
  261. function TEncoding.GetBytes(const Chars: TUnicodeCharArray; CharIndex,
  262. CharCount: Integer; const Bytes: TBytes; ByteIndex: Integer): Integer;
  263. var
  264. ByteLen: Integer;
  265. begin
  266. ByteLen := Length(Bytes);
  267. if (ByteLen = 0) and (CharCount > 0) then
  268. raise EEncodingError.Create(SInvalidDestinationArray);
  269. if (ByteIndex < 0) or (ByteLen < ByteIndex) then
  270. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  271. if (CharCount < 0) or (Length(Chars) < CharCount + CharIndex) then
  272. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  273. if (CharIndex < 0) then
  274. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  275. Result := GetBytes(@Chars[CharIndex], CharCount, @Bytes[ByteIndex], ByteLen - ByteIndex);
  276. end;
  277. function TEncoding.GetBytes(const S: UnicodeString): TBytes;
  278. begin
  279. SetLength(Result, GetByteCount(S));
  280. GetBytes(@S[1], Length(S), @Result[0], Length(Result));
  281. end;
  282. function TEncoding.GetBytes(const S: UnicodeString; CharIndex, CharCount: Integer;
  283. const Bytes: TBytes; ByteIndex: Integer): Integer;
  284. var
  285. ByteLen: Integer;
  286. begin
  287. ByteLen := Length(Bytes);
  288. if (ByteLen = 0) and (CharCount > 0) then
  289. raise EEncodingError.Create(SInvalidDestinationArray);
  290. if (ByteIndex < 0) or (ByteLen < ByteIndex) then
  291. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  292. if (CharCount < 0) or (Length(S) < CharCount + CharIndex) then
  293. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  294. if (CharIndex < 1) then
  295. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  296. Result := GetBytes(@S[CharIndex], CharCount, @Bytes[ByteIndex], ByteLen - ByteIndex);
  297. end;
  298. function TEncoding.GetCharCount(const Bytes: TBytes): Integer;
  299. begin
  300. Result := GetCharCount(@Bytes[0], Length(Bytes));
  301. end;
  302. function TEncoding.GetCharCount(const Bytes: TBytes; ByteIndex,
  303. ByteCount: Integer): Integer;
  304. begin
  305. if (ByteIndex < 0) or (Length(Bytes) < ByteIndex) then
  306. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  307. Result := GetCharCount(@Bytes[ByteIndex], ByteCount);
  308. end;
  309. function TEncoding.GetChars(const Bytes: TBytes): TUnicodeCharArray;
  310. begin
  311. SetLength(Result, GetCharCount(Bytes));
  312. GetChars(@Bytes[0], Length(Bytes), @Result[0], Length(Result));
  313. end;
  314. function TEncoding.GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer): TUnicodeCharArray;
  315. begin
  316. if (ByteIndex < 0) or (Length(Bytes) < ByteIndex) then
  317. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  318. SetLength(Result, GetCharCount(Bytes, ByteIndex, ByteCount));
  319. GetChars(@Bytes[ByteIndex], ByteCount, @Result[0], Length(Result));
  320. end;
  321. function TEncoding.GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer;
  322. const Chars: TUnicodeCharArray; CharIndex: Integer): Integer;
  323. var
  324. CharLen: Integer;
  325. begin
  326. if (ByteIndex < 0) or (Length(Bytes) <= ByteIndex) then
  327. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  328. CharLen := Length(Chars);
  329. if (CharIndex < 0) or (CharLen <= CharIndex) then
  330. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  331. Result := GetChars(@Bytes[ByteIndex], ByteCount, @Chars[CharIndex], CharLen - CharIndex);
  332. end;
  333. class function TEncoding.GetEncoding(CodePage: Integer): TEncoding;
  334. begin
  335. case CodePage of
  336. CP_UTF16: Result := TUnicodeEncoding.Create;
  337. CP_UTF16BE: Result := TBigEndianUnicodeEncoding.Create;
  338. CP_UTF7: Result := TUTF7Encoding.Create;
  339. CP_UTF8: Result := TUTF8Encoding.Create;
  340. else
  341. Result := TMBCSEncoding.Create(CodePage);
  342. end;
  343. end;
  344. class function TEncoding.GetEncoding(const EncodingName: UnicodeString): TEncoding;
  345. var
  346. ACodePage: TSystemCodePage;
  347. begin
  348. ACodePage := CodePageNameToCodePage(AnsiString(EncodingName));
  349. if ACodePage = $FFFF then
  350. raise EEncodingError.CreateFmt(SNotValidCodePageName, [EncodingName]);
  351. Result := TMBCSEncoding.Create(ACodePage);
  352. end;
  353. function TEncoding.GetString(const Bytes: TBytes): UnicodeString;
  354. var
  355. Chars: TUnicodeCharArray;
  356. begin
  357. Chars := GetChars(Bytes);
  358. SetString(Result, PUnicodeChar(Chars), Length(Chars));
  359. end;
  360. function TEncoding.GetString(const Bytes: TBytes; ByteIndex, ByteCount: Integer): UnicodeString;
  361. var
  362. Chars: TUnicodeCharArray;
  363. begin
  364. Chars := GetChars(Bytes, ByteIndex, ByteCount);
  365. SetString(Result, PUnicodeChar(Chars), Length(Chars));
  366. end;
  367. { TMBCSEncoding }
  368. function TMBCSEncoding.GetByteCount(Chars: PUnicodeChar; CharCount: Integer): Integer;
  369. var
  370. S: RawByteString;
  371. begin
  372. widestringmanager.Unicode2AnsiMoveProc(Chars, S, CodePage, CharCount);
  373. Result := Length(S);
  374. end;
  375. function TMBCSEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer; Bytes: PByte;
  376. ByteCount: Integer): Integer;
  377. var
  378. S: RawByteString;
  379. begin
  380. widestringmanager.Unicode2AnsiMoveProc(Chars, S, CodePage, CharCount);
  381. Result := Length(S);
  382. if ByteCount < Result then
  383. Result := ByteCount;
  384. if Result > 0 then
  385. Move(S[1], Bytes[0], Result);
  386. end;
  387. function TMBCSEncoding.GetCharCount(Bytes: PByte; ByteCount: Integer): Integer;
  388. var
  389. U: UnicodeString;
  390. begin
  391. widestringmanager.Ansi2UnicodeMoveProc(PChar(Bytes), CodePage, U, ByteCount);
  392. Result := Length(U);
  393. end;
  394. function TMBCSEncoding.GetChars(Bytes: PByte; ByteCount: Integer; Chars: PUnicodeChar;
  395. CharCount: Integer): Integer;
  396. var
  397. U: UnicodeString;
  398. begin
  399. widestringmanager.Ansi2UnicodeMoveProc(PChar(Bytes), CodePage, U, ByteCount);
  400. Result := Length(U);
  401. if CharCount < Result then
  402. Result := CharCount;
  403. if Result > 0 then
  404. Move(U[1], Chars[0], Result * SizeOf(UnicodeChar));
  405. end;
  406. function TMBCSEncoding.GetCodePage: Cardinal;
  407. begin
  408. Result := FCodePage;
  409. end;
  410. function TMBCSEncoding.GetEncodingName: UnicodeString;
  411. begin
  412. Result := UnicodeString(CodePageToCodePageName(CodePage));
  413. end;
  414. constructor TMBCSEncoding.Create;
  415. begin
  416. Create(DefaultSystemCodePage, 0, 0);
  417. end;
  418. constructor TMBCSEncoding.Create(ACodePage: Integer);
  419. begin
  420. Create(ACodePage, 0, 0);
  421. end;
  422. constructor TMBCSEncoding.Create(ACodePage, MBToWCharFlags,
  423. WCharToMBFlags: Integer);
  424. begin
  425. FCodePage := ACodePage;
  426. FMBToWCharFlags := MBToWCharFlags;
  427. FWCharToMBFlags := WCharToMBFlags;
  428. end;
  429. function TMBCSEncoding.Clone: TEncoding;
  430. begin
  431. Result := TMBCSEncoding.Create(FCodePage, FMBToWCharFlags, FWCharToMBFlags);
  432. end;
  433. function TMBCSEncoding.GetAnsiBytes(Chars: PChar; CharCount: Integer): TBytes;
  434. var
  435. S: RawByteString;
  436. begin
  437. SetString(S, Chars, CharCount);
  438. SetCodePage(S, DefaultSystemCodePage, False);
  439. SetCodePage(S, GetCodePage, True);
  440. SetLength(Result, Length(S));
  441. if Length(S)>0 then
  442. Move(S[1], Result[0], Length(S));
  443. end;
  444. function TMBCSEncoding.GetAnsiString(Bytes: PByte; ByteCount: Integer): string;
  445. begin
  446. SetString(Result, Pointer(Bytes), ByteCount);
  447. SetCodePage(RawByteString(Result), GetCodePage, False);
  448. SetCodePage(RawByteString(Result), DefaultSystemCodePage, True);
  449. end;
  450. function TMBCSEncoding.GetMaxByteCount(CharCount: Integer): Integer;
  451. begin
  452. Result := CharCount;
  453. end;
  454. function TMBCSEncoding.GetMaxCharCount(ByteCount: Integer): Integer;
  455. begin
  456. Result := ByteCount;
  457. end;
  458. function TMBCSEncoding.GetPreamble: TBytes;
  459. begin
  460. Result := nil;
  461. end;
  462. { TUTF7Encoding }
  463. constructor TUTF7Encoding.Create;
  464. begin
  465. inherited Create(CP_UTF7);
  466. FIsSingleByte := False;
  467. end;
  468. function TUTF7Encoding.Clone: TEncoding;
  469. begin
  470. Result := TUTF7Encoding.Create;
  471. end;
  472. function TUTF7Encoding.GetMaxByteCount(CharCount: Integer): Integer;
  473. begin
  474. Result := CharCount * 3 + 2;
  475. end;
  476. function TUTF7Encoding.GetMaxCharCount(ByteCount: Integer): Integer;
  477. begin
  478. Result := ByteCount;
  479. end;
  480. { TUTF8Encoding }
  481. constructor TUTF8Encoding.Create;
  482. begin
  483. inherited Create(CP_UTF8);
  484. FIsSingleByte := False;
  485. end;
  486. function TUTF8Encoding.Clone: TEncoding;
  487. begin
  488. Result := TUTF8Encoding.Create;
  489. end;
  490. function TUTF8Encoding.GetMaxByteCount(CharCount: Integer): Integer;
  491. begin
  492. Result := CharCount * 3;
  493. end;
  494. function TUTF8Encoding.GetMaxCharCount(ByteCount: Integer): Integer;
  495. begin
  496. Result := ByteCount;
  497. end;
  498. function TUTF8Encoding.GetPreamble: TBytes;
  499. begin
  500. SetLength(Result, 3);
  501. Result[0] := $EF;
  502. Result[1] := $BB;
  503. Result[2] := $BF;
  504. end;
  505. { TUnicodeEncoding }
  506. function TUnicodeEncoding.GetByteCount(Chars: PUnicodeChar; CharCount: Integer): Integer;
  507. begin
  508. Result := CharCount * SizeOf(UnicodeChar);
  509. end;
  510. function TUnicodeEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer;
  511. Bytes: PByte; ByteCount: Integer): Integer;
  512. begin
  513. Result := CharCount * SizeOf(UnicodeChar);
  514. if ByteCount < Result then
  515. Result := ByteCount;
  516. if Result > 0 then
  517. Move(Chars[0], Bytes[0], Result);
  518. end;
  519. function TUnicodeEncoding.GetCharCount(Bytes: PByte; ByteCount: Integer): Integer;
  520. begin
  521. Result := ByteCount div SizeOf(UnicodeChar);
  522. end;
  523. function TUnicodeEncoding.GetChars(Bytes: PByte; ByteCount: Integer;
  524. Chars: PUnicodeChar; CharCount: Integer): Integer;
  525. begin
  526. Result := ByteCount div 2;
  527. if CharCount < Result then
  528. Result := CharCount;
  529. Move(Bytes[0], Chars[0], Result * SizeOf(UnicodeChar));
  530. end;
  531. function TUnicodeEncoding.GetCodePage: Cardinal;
  532. begin
  533. Result := CP_UTF16;
  534. end;
  535. function TUnicodeEncoding.GetEncodingName: UnicodeString;
  536. begin
  537. Result := UnicodeString(CodePageToCodePageName(CodePage));
  538. end;
  539. constructor TUnicodeEncoding.Create;
  540. begin
  541. inherited Create;
  542. FIsSingleByte := False;
  543. FMaxCharSize := SizeOf(UnicodeChar);
  544. end;
  545. function TUnicodeEncoding.Clone: TEncoding;
  546. begin
  547. Result := TUnicodeEncoding.Create;
  548. end;
  549. function TUnicodeEncoding.GetAnsiBytes(Chars: PChar; CharCount: Integer
  550. ): TBytes;
  551. var
  552. U: UnicodeString;
  553. begin
  554. widestringmanager.Ansi2UnicodeMoveProc(Chars, DefaultSystemCodePage, U, CharCount);
  555. SetLength(Result, Length(U)*SizeOf(UnicodeChar));
  556. if Length(Result)>0 then
  557. Move(U[1], Result[0], Length(Result));
  558. end;
  559. function TUnicodeEncoding.GetAnsiString(Bytes: PByte; ByteCount: Integer
  560. ): string;
  561. begin
  562. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Bytes), RawByteString(Result), DefaultSystemCodePage, ByteCount div SizeOf(UnicodeChar));
  563. end;
  564. function TUnicodeEncoding.GetMaxByteCount(CharCount: Integer): Integer;
  565. begin
  566. Result := CharCount * SizeOf(UnicodeChar);
  567. end;
  568. function TUnicodeEncoding.GetMaxCharCount(ByteCount: Integer): Integer;
  569. begin
  570. Result := ByteCount div SizeOf(UnicodeChar);
  571. end;
  572. function TUnicodeEncoding.GetPreamble: TBytes;
  573. begin
  574. SetLength(Result, 2);
  575. Result[0] := $FF;
  576. Result[1] := $FE;
  577. end;
  578. { TBigEndianUnicodeEncoding }
  579. function TBigEndianUnicodeEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer;
  580. Bytes: PByte; ByteCount: Integer): Integer;
  581. var
  582. LastByte: PByte;
  583. begin
  584. Result := CharCount * SizeOf(UnicodeChar);
  585. if ByteCount < Result then
  586. Result := ByteCount;
  587. LastByte := @Bytes[Result];
  588. while Bytes < LastByte do
  589. begin
  590. Bytes^ := Hi(Word(Chars^));
  591. inc(Bytes);
  592. if Bytes < LastByte then
  593. Bytes^ := Lo(Word(Chars^));
  594. inc(Bytes);
  595. inc(Chars);
  596. end;
  597. end;
  598. function TBigEndianUnicodeEncoding.GetChars(Bytes: PByte; ByteCount: Integer;
  599. Chars: PUnicodeChar; CharCount: Integer): Integer;
  600. var
  601. LastChar: PUnicodeChar;
  602. begin
  603. Result := ByteCount div SizeOf(UnicodeChar);
  604. if CharCount < Result then
  605. Result := CharCount;
  606. LastChar := @Chars[Result];
  607. while Chars < LastChar do
  608. begin
  609. Chars^ := UnicodeChar(Bytes[1] + Bytes[0] shl 8);
  610. inc(Bytes, SizeOf(UnicodeChar));
  611. inc(Chars);
  612. end;
  613. end;
  614. function TBigEndianUnicodeEncoding.GetCodePage: Cardinal;
  615. begin
  616. Result := CP_UTF16BE;
  617. end;
  618. function TBigEndianUnicodeEncoding.GetEncodingName: UnicodeString;
  619. begin
  620. Result := UnicodeString(CodePageToCodePageName(CodePage));
  621. end;
  622. function TBigEndianUnicodeEncoding.Clone: TEncoding;
  623. begin
  624. Result := TBigEndianUnicodeEncoding.Create;
  625. end;
  626. function TBigEndianUnicodeEncoding.GetAnsiBytes(Chars: PChar; CharCount: Integer
  627. ): TBytes;
  628. begin
  629. Result := TEncoding.Unicode.GetAnsiBytes(Chars, CharCount);
  630. Swap(Result);
  631. end;
  632. function TBigEndianUnicodeEncoding.GetAnsiString(Bytes: PByte;
  633. ByteCount: Integer): string;
  634. var
  635. B: TBytes;
  636. begin
  637. if ByteCount=0 then
  638. Exit('');
  639. SetLength(B, ByteCount);
  640. Move(Bytes^, B[0], ByteCount);
  641. Swap(B);
  642. Result := TEncoding.Unicode.GetAnsiString(PByte(@B[0]), ByteCount);
  643. end;
  644. function TBigEndianUnicodeEncoding.GetPreamble: TBytes;
  645. begin
  646. SetLength(Result, 2);
  647. Result[0] := $FE;
  648. Result[1] := $FF;
  649. end;
  650. procedure TBigEndianUnicodeEncoding.Swap(var B: TBytes);
  651. var
  652. LastB, I: Integer;
  653. C: Byte;
  654. begin
  655. LastB := Length(B)-1;
  656. I := 0;
  657. while I < LastB do
  658. begin
  659. C := B[I];
  660. B[I] := B[I+1];
  661. B[I+1] := C;
  662. Inc(I, 2);
  663. end;
  664. end;
  665. {$endif VER2_4}