2
0

sysencoding.inc 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826
  1. {%MainUnit sysutils.pp}
  2. {
  3. *********************************************************************
  4. Copyright (C) 2012 Paul Ishenin,
  5. member of the Free Pascal Development Team
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. *********************************************************************
  12. }
  13. { TEncoding }
  14. class function TEncoding.GetStandard(Se: TStandardEncoding; Ctr: TCreateEncodingProc): TEncoding;
  15. begin
  16. Result := FStandardEncodings[Se];
  17. if Assigned(Result) then
  18. begin
  19. {$ifdef FPC_HAS_FEATURE_THREADING}
  20. ReadDependencyBarrier; // Read Result contents (by caller) after Result pointer.
  21. {$endif}
  22. Exit;
  23. end;
  24. Result := Ctr();
  25. {$ifdef FPC_HAS_FEATURE_THREADING}
  26. WriteBarrier; // Write FStandardEncodings[Se] after Result contents.
  27. if InterlockedCompareExchange(Pointer(FStandardEncodings[Se]), Pointer(Result), nil) <> nil then
  28. begin
  29. Result.Free;
  30. Result := FStandardEncodings[Se];
  31. end;
  32. {$else}
  33. FStandardEncodings[Se] := Result;
  34. {$endif}
  35. end;
  36. class function TEncoding.CreateANSI: TEncoding;
  37. var
  38. Cp: TSystemCodePage;
  39. begin
  40. Cp := DefaultSystemCodePage;
  41. if Assigned(widestringmanager.GetStandardCodePageProc) then
  42. Cp := widestringmanager.GetStandardCodePageProc(scpAnsi);
  43. Result := TMBCSEncoding.Create(Cp);
  44. end;
  45. class function TEncoding.GetANSI: TEncoding;
  46. begin
  47. Result := GetStandard(seAnsi, @CreateANSI);
  48. end;
  49. function TEncoding.GetAnsiBytes(const S: ansistring): TBytes;
  50. begin
  51. if S='' then
  52. Result := nil
  53. else
  54. Result := GetAnsiBytes(S, 1, Length(S));
  55. end;
  56. function TEncoding.GetAnsiBytes(const S: ansistring; CharIndex, CharCount: Integer
  57. ): TBytes;
  58. begin
  59. Result := GetAnsiBytes(Pointer(@S[CharIndex]), CharCount);
  60. end;
  61. function TEncoding.GetAnsiString(const Bytes: TBytes): ansistring;
  62. begin
  63. if Length(Bytes)=0 then
  64. Result := ''
  65. else
  66. Result := GetAnsiString(Bytes, 0, Length(Bytes));
  67. end;
  68. function TEncoding.GetAnsiString(const Bytes: TBytes; ByteIndex,
  69. ByteCount: Integer): ansistring;
  70. begin
  71. Result := GetAnsiString(Pointer(@Bytes[ByteIndex]), ByteCount);
  72. SetCodePage(RawByteString(Result), DefaultSystemCodePage, False);
  73. end;
  74. class function TEncoding.CreateASCII: TEncoding;
  75. begin
  76. Result := TMBCSEncoding.Create(CP_ASCII);
  77. end;
  78. class function TEncoding.GetASCII: TEncoding;
  79. begin
  80. Result := GetStandard(seAscii, @CreateASCII);
  81. end;
  82. class function TEncoding.CreateBigEndianUnicode: TEncoding;
  83. begin
  84. Result := TBigEndianUnicodeEncoding.Create;
  85. end;
  86. class function TEncoding.GetBigEndianUnicode: TEncoding;
  87. begin
  88. Result := GetStandard(seBigEndianUnicode, @CreateBigEndianUnicode);
  89. end;
  90. class function TEncoding.GetDefault: TEncoding;
  91. begin
  92. Result := GetSystemEncoding;
  93. end;
  94. class function TEncoding.GetSystemEncoding: TEncoding;
  95. var
  96. Cp: TSystemCodePage;
  97. Head: TEncoding;
  98. begin
  99. repeat
  100. Cp := DefaultSystemCodePage;
  101. Head := FSystemEncodingsList; // Must not be re-read until InterlockedCompareExchange to guarantee that search was performed against this head.
  102. {$ifdef FPC_HAS_FEATURE_THREADING}
  103. ReadDependencyBarrier; // Read Head contents after Head pointer.
  104. {$endif}
  105. Result := Head;
  106. while Assigned(Result) do
  107. if Result.CodePage = Cp then
  108. Exit
  109. else
  110. Result := Result.FNext;
  111. // not found - create new encoding at first position
  112. Result := TMBCSEncoding.Create(Cp);
  113. Result.FNext := Head;
  114. {$ifdef FPC_HAS_FEATURE_THREADING}
  115. WriteBarrier; // Write FSystemEncodingsList after Result contents.
  116. if InterlockedCompareExchange(Pointer(FSystemEncodingsList), Pointer(Result), Pointer(Head)) = Pointer(Head) then
  117. break
  118. else
  119. Result.Free;
  120. {$else}
  121. FSystemEncodingsList := Result;
  122. break;
  123. {$endif}
  124. until false;
  125. end;
  126. class function TEncoding.CreateUnicode: TEncoding;
  127. begin
  128. Result := TUnicodeEncoding.Create;
  129. end;
  130. class function TEncoding.GetUnicode: TEncoding;
  131. begin
  132. Result := GetStandard(seUnicode, @CreateUnicode);
  133. end;
  134. class function TEncoding.CreateUTF7: TEncoding;
  135. begin
  136. Result := TUTF7Encoding.Create;
  137. end;
  138. class function TEncoding.GetUTF7: TEncoding;
  139. begin
  140. Result := GetStandard(seUTF7, @CreateUTF7);
  141. end;
  142. class function TEncoding.CreateUTF8: TEncoding;
  143. begin
  144. Result := TUTF8Encoding.Create;
  145. end;
  146. class function TEncoding.GetUTF8: TEncoding;
  147. begin
  148. Result := GetStandard(seUTF8, @CreateUTF8);
  149. end;
  150. class destructor TEncoding.Destroy;
  151. var
  152. E: TStandardEncoding;
  153. Se: TEncoding;
  154. begin
  155. // Synchronization shouldn't be required for class destructors.
  156. for E := Low(FStandardEncodings) to High(FStandardEncodings) do
  157. FreeAndNil(FStandardEncodings[E]);
  158. repeat
  159. Se := FSystemEncodingsList;
  160. if not Assigned(Se) then
  161. break;
  162. FSystemEncodingsList := Se.FNext;
  163. Se.Free;
  164. until false;
  165. end;
  166. function TEncoding.Clone: TEncoding;
  167. begin
  168. Result := nil;
  169. end;
  170. class function TEncoding.Convert(Source, Destination: TEncoding;
  171. const Bytes: TBytes): TBytes;
  172. begin
  173. Result := Destination.GetBytes(Source.GetChars(Bytes));
  174. end;
  175. class function TEncoding.Convert(Source, Destination: TEncoding;
  176. const Bytes: TBytes; StartIndex, Count: Integer): TBytes;
  177. begin
  178. Result := Destination.GetBytes(Source.GetChars(Bytes, StartIndex, Count));
  179. end;
  180. class function TEncoding.IsStandardEncoding(AEncoding: TEncoding): Boolean;
  181. var
  182. Encoding: TEncoding;
  183. begin
  184. if Assigned(AEncoding) then
  185. begin
  186. for Encoding in FStandardEncodings do
  187. if Encoding = AEncoding then
  188. Exit(True);
  189. Encoding := FSystemEncodingsList;
  190. while Assigned(Encoding) do
  191. if Encoding = AEncoding then
  192. Exit(True)
  193. else
  194. Encoding := Encoding.FNext;
  195. end;
  196. Result := False;
  197. end;
  198. class function TEncoding.GetBufferEncoding(const Buffer: TBytes; var AEncoding: TEncoding): Integer;
  199. begin
  200. Result := GetBufferEncoding(Buffer, AEncoding, Default);
  201. end;
  202. class function TEncoding.GetBufferEncoding(const Buffer: TBytes;
  203. var AEncoding: TEncoding; ADefaultEncoding: TEncoding): Integer;
  204. function CheckEncoding(AEncoding: TEncoding; out ByteCount: Integer): Boolean;
  205. var
  206. Preamble: TBytes;
  207. begin
  208. Preamble := AEncoding.GetPreamble;
  209. ByteCount := Length(Preamble);
  210. Result := (Length(Buffer) >= ByteCount) and (ByteCount > 0);
  211. if Result then
  212. Result := CompareMem(@Preamble[0], @Buffer[0], ByteCount);
  213. end;
  214. begin
  215. if Assigned(AEncoding) then
  216. begin
  217. if not CheckEncoding(AEncoding, Result) then
  218. Result := 0;
  219. end
  220. else
  221. if CheckEncoding(Unicode, Result) then
  222. AEncoding := Unicode
  223. else
  224. if CheckEncoding(BigEndianUnicode, Result) then
  225. AEncoding := BigEndianUnicode
  226. else
  227. if CheckEncoding(UTF8, Result) then
  228. AEncoding := UTF8
  229. else
  230. begin
  231. AEncoding := ADefaultEncoding;
  232. Result := 0;
  233. end;
  234. end;
  235. function TEncoding.GetByteCount(const Chars: TUnicodeCharArray): Integer;
  236. begin
  237. if Length(Chars)=0 then
  238. Result := 0
  239. else
  240. Result := GetByteCount(Chars, 0, Length(Chars));
  241. end;
  242. function TEncoding.GetByteCount(const Chars: TUnicodeCharArray; CharIndex,
  243. CharCount: Integer): Integer;
  244. begin
  245. if (CharCount < 0) or (Length(Chars) < CharCount + CharIndex) then
  246. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  247. if (CharIndex < 0) then
  248. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  249. Result := GetByteCount(@Chars[CharIndex], CharCount);
  250. end;
  251. function TEncoding.GetByteCount(const S: UnicodeString): Integer;
  252. begin
  253. if S='' then
  254. Result := 0
  255. else
  256. Result := GetByteCount(PUnicodeChar(S), Length(S));
  257. end;
  258. function TEncoding.GetByteCount(const S: UnicodeString; CharIndex, CharCount: Integer): Integer;
  259. begin
  260. if (CharIndex < 1) then
  261. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  262. if (CharCount < 0) or (Length(S) < CharCount + CharIndex - 1) then
  263. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  264. Result := GetByteCount(@S[CharIndex], CharCount);
  265. end;
  266. function TEncoding.GetBytes(const Chars: TUnicodeCharArray): TBytes;
  267. begin
  268. SetLength(Result, GetByteCount(Chars));
  269. if Length(Result)>0 then
  270. GetBytes(@Chars[0], Length(Chars), @Result[0], Length(Result));
  271. end;
  272. function TEncoding.GetBytes(const Chars: TUnicodeCharArray; CharIndex,
  273. CharCount: Integer): TBytes;
  274. begin
  275. if (CharCount < 0) or (Length(Chars) < CharCount + CharIndex) then
  276. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  277. if (CharIndex < 0) then
  278. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  279. SetLength(Result, GetByteCount(Chars, CharIndex, CharCount));
  280. GetBytes(@Chars[CharIndex], CharCount, @Result[0], Length(Result));
  281. end;
  282. function TEncoding.GetBytes(const Chars: TUnicodeCharArray; CharIndex,
  283. CharCount: Integer; const Bytes: TBytes; ByteIndex: Integer): Integer;
  284. var
  285. ByteLen: Integer;
  286. begin
  287. ByteLen := Length(Bytes);
  288. if (ByteLen = 0) and (CharCount > 0) then
  289. raise EEncodingError.Create(SInvalidDestinationArray);
  290. if (ByteIndex < 0) or (ByteLen < ByteIndex) then
  291. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  292. if (CharCount < 0) or (Length(Chars) < CharCount + CharIndex) then
  293. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  294. if (CharIndex < 0) then
  295. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  296. Result := GetBytes(@Chars[CharIndex], CharCount, @Bytes[ByteIndex], ByteLen - ByteIndex);
  297. end;
  298. function TEncoding.GetBytes(const S: UnicodeString): TBytes;
  299. begin
  300. SetLength(Result, GetByteCount(S));
  301. if Length(Result)>0 then
  302. GetBytes(@S[1], Length(S), @Result[0], Length(Result));
  303. end;
  304. function TEncoding.GetBytes(const S: UnicodeString; CharIndex, CharCount: Integer;
  305. const Bytes: TBytes; ByteIndex: Integer): Integer;
  306. var
  307. ByteLen: Integer;
  308. begin
  309. ByteLen := Length(Bytes);
  310. if (ByteLen = 0) and (CharCount > 0) then
  311. raise EEncodingError.Create(SInvalidDestinationArray);
  312. if (ByteIndex < 0) or (ByteLen < ByteIndex) then
  313. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  314. if (CharIndex < 1) then
  315. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  316. if (CharCount < 0) or (Length(S) < CharCount + CharIndex - 1) then
  317. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  318. Result := GetBytes(@S[CharIndex], CharCount, @Bytes[ByteIndex], ByteLen - ByteIndex);
  319. end;
  320. function TEncoding.GetCharCount(const Bytes: TBytes): Integer;
  321. begin
  322. if Length(Bytes)=0 then
  323. Result := 0
  324. else
  325. Result := GetCharCount(@Bytes[0], Length(Bytes));
  326. end;
  327. function TEncoding.GetCharCount(const Bytes: TBytes; ByteIndex,
  328. ByteCount: Integer): Integer;
  329. begin
  330. if (ByteIndex < 0) or (Length(Bytes) < ByteIndex) then
  331. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  332. Result := GetCharCount(@Bytes[ByteIndex], ByteCount);
  333. end;
  334. function TEncoding.GetChars(const Bytes: TBytes): TUnicodeCharArray;
  335. begin
  336. SetLength(Result, GetCharCount(Bytes));
  337. if Length(Result)>0 then
  338. GetChars(@Bytes[0], Length(Bytes), @Result[0], Length(Result));
  339. end;
  340. function TEncoding.GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer): TUnicodeCharArray;
  341. begin
  342. if (ByteIndex < 0) or (Length(Bytes) < ByteIndex) then
  343. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  344. SetLength(Result, GetCharCount(Bytes, ByteIndex, ByteCount));
  345. GetChars(@Bytes[ByteIndex], ByteCount, @Result[0], Length(Result));
  346. end;
  347. function TEncoding.GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer;
  348. const Chars: TUnicodeCharArray; CharIndex: Integer): Integer;
  349. var
  350. CharLen: Integer;
  351. begin
  352. if (ByteIndex < 0) or (Length(Bytes) <= ByteIndex) then
  353. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  354. CharLen := Length(Chars);
  355. if (CharIndex < 0) or (CharLen <= CharIndex) then
  356. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  357. Result := GetChars(@Bytes[ByteIndex], ByteCount, @Chars[CharIndex], CharLen - CharIndex);
  358. end;
  359. class function TEncoding.GetEncoding(CodePage: Integer): TEncoding;
  360. begin
  361. case CodePage of
  362. CP_UTF16: Result := TUnicodeEncoding.Create;
  363. CP_UTF16BE: Result := TBigEndianUnicodeEncoding.Create;
  364. CP_UTF7: Result := TUTF7Encoding.Create;
  365. CP_UTF8: Result := TUTF8Encoding.Create;
  366. else
  367. Result := TMBCSEncoding.Create(CodePage);
  368. end;
  369. end;
  370. class function TEncoding.GetEncoding(const EncodingName: UnicodeString): TEncoding;
  371. var
  372. ACodePage: TSystemCodePage;
  373. begin
  374. ACodePage := CodePageNameToCodePage(AnsiString(EncodingName));
  375. if ACodePage = $FFFF then
  376. raise EEncodingError.CreateFmt(SNotValidCodePageName, [EncodingName]);
  377. Result := GetEncoding(ACodePage);
  378. end;
  379. function TEncoding.GetString(const Bytes: TBytes): UnicodeString;
  380. var
  381. Chars: TUnicodeCharArray;
  382. begin
  383. if Length(Bytes)=0 then
  384. Result := ''
  385. else
  386. begin
  387. Chars := GetChars(Bytes);
  388. SetString(Result, PUnicodeChar(Chars), Length(Chars));
  389. end;
  390. end;
  391. function TEncoding.GetString(const Bytes: TBytes; ByteIndex, ByteCount: Integer): UnicodeString;
  392. var
  393. Chars: TUnicodeCharArray;
  394. begin
  395. Chars := GetChars(Bytes, ByteIndex, ByteCount);
  396. SetString(Result, PUnicodeChar(Chars), Length(Chars));
  397. end;
  398. { TMBCSEncoding }
  399. function TMBCSEncoding.GetByteCount(Chars: PUnicodeChar; CharCount: Integer): Integer;
  400. var
  401. S: RawByteString;
  402. begin
  403. widestringmanager.Unicode2AnsiMoveProc(Chars, S, CodePage, CharCount);
  404. Result := Length(S);
  405. end;
  406. function TMBCSEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer; Bytes: PByte;
  407. ByteCount: Integer): Integer;
  408. var
  409. S: RawByteString;
  410. begin
  411. widestringmanager.Unicode2AnsiMoveProc(Chars, S, CodePage, CharCount);
  412. Result := Length(S);
  413. if ByteCount < Result then
  414. Result := ByteCount;
  415. if Result > 0 then
  416. Move(S[1], Bytes[0], Result);
  417. end;
  418. function TMBCSEncoding.GetCharCount(Bytes: PByte; ByteCount: Integer): Integer;
  419. var
  420. U: UnicodeString;
  421. begin
  422. widestringmanager.Ansi2UnicodeMoveProc(PAnsiChar(Bytes), CodePage, U, ByteCount);
  423. Result := Length(U);
  424. end;
  425. function TMBCSEncoding.GetChars(Bytes: PByte; ByteCount: Integer; Chars: PUnicodeChar;
  426. CharCount: Integer): Integer;
  427. var
  428. U: UnicodeString;
  429. begin
  430. widestringmanager.Ansi2UnicodeMoveProc(PAnsiChar(Bytes), CodePage, U, ByteCount);
  431. Result := Length(U);
  432. if CharCount < Result then
  433. Result := CharCount;
  434. if Result > 0 then
  435. Move(U[1], Chars[0], Result * SizeOf(UnicodeChar));
  436. end;
  437. function TMBCSEncoding.GetCodePage: Cardinal;
  438. begin
  439. Result := FCodePage;
  440. end;
  441. function TMBCSEncoding.GetEncodingName: UnicodeString;
  442. begin
  443. Result := UnicodeString(CodePageToCodePageName(CodePage));
  444. end;
  445. constructor TMBCSEncoding.Create;
  446. begin
  447. Create(DefaultSystemCodePage, 0, 0);
  448. end;
  449. constructor TMBCSEncoding.Create(ACodePage: Integer);
  450. begin
  451. Create(ACodePage, 0, 0);
  452. end;
  453. constructor TMBCSEncoding.Create(ACodePage, MBToWCharFlags,
  454. WCharToMBFlags: Integer);
  455. begin
  456. FCodePage := ACodePage;
  457. FMBToWCharFlags := MBToWCharFlags;
  458. FWCharToMBFlags := WCharToMBFlags;
  459. case ACodePage of
  460. CP_UTF7, CP_UTF8, CP_UTF16, CP_UTF16BE: FIsSingleByte := False;
  461. else
  462. FIsSingleByte := True;
  463. end;
  464. end;
  465. function TMBCSEncoding.Clone: TEncoding;
  466. begin
  467. Result := TMBCSEncoding.Create(FCodePage, FMBToWCharFlags, FWCharToMBFlags);
  468. end;
  469. function TMBCSEncoding.GetAnsiBytes(Chars: PAnsiChar; CharCount: Integer): TBytes;
  470. var
  471. S: RawByteString;
  472. begin
  473. SetString(S, Chars, CharCount);
  474. SetCodePage(S, DefaultSystemCodePage, False);
  475. SetCodePage(S, GetCodePage, True);
  476. SetLength(Result, Length(S));
  477. if Length(S)>0 then
  478. Move(S[1], Result[0], Length(S));
  479. end;
  480. function TMBCSEncoding.GetAnsiString(Bytes: PByte; ByteCount: Integer): ansistring;
  481. begin
  482. SetString(Result, Pointer(Bytes), ByteCount);
  483. SetCodePage(RawByteString(Result), GetCodePage, False);
  484. SetCodePage(RawByteString(Result), DefaultSystemCodePage, True);
  485. end;
  486. function TMBCSEncoding.GetMaxByteCount(CharCount: Integer): Integer;
  487. begin
  488. Result := CharCount;
  489. end;
  490. function TMBCSEncoding.GetMaxCharCount(ByteCount: Integer): Integer;
  491. begin
  492. Result := ByteCount;
  493. end;
  494. function TMBCSEncoding.GetPreamble: TBytes;
  495. begin
  496. case CodePage of
  497. CP_UTF8:
  498. begin
  499. SetLength(Result, 3);
  500. Result[0] := $EF;
  501. Result[1] := $BB;
  502. Result[2] := $BF;
  503. end;
  504. CP_UTF16:
  505. begin
  506. SetLength(Result, 2);
  507. Result[0] := $FF;
  508. Result[1] := $FE;
  509. end;
  510. CP_UTF16BE:
  511. begin
  512. SetLength(Result, 2);
  513. Result[0] := $FE;
  514. Result[1] := $FF;
  515. end;
  516. else
  517. Result := nil;
  518. end;
  519. end;
  520. { TUTF7Encoding }
  521. constructor TUTF7Encoding.Create;
  522. begin
  523. inherited Create(CP_UTF7);
  524. FIsSingleByte := False;
  525. end;
  526. function TUTF7Encoding.Clone: TEncoding;
  527. begin
  528. Result := TUTF7Encoding.Create;
  529. end;
  530. function TUTF7Encoding.GetMaxByteCount(CharCount: Integer): Integer;
  531. begin
  532. Result := CharCount * 3 + 2;
  533. end;
  534. function TUTF7Encoding.GetMaxCharCount(ByteCount: Integer): Integer;
  535. begin
  536. Result := ByteCount;
  537. end;
  538. { TUTF8Encoding }
  539. constructor TUTF8Encoding.Create;
  540. begin
  541. inherited Create(CP_UTF8);
  542. FIsSingleByte := False;
  543. end;
  544. function TUTF8Encoding.Clone: TEncoding;
  545. begin
  546. Result := TUTF8Encoding.Create;
  547. end;
  548. function TUTF8Encoding.GetMaxByteCount(CharCount: Integer): Integer;
  549. begin
  550. Result := CharCount * 3;
  551. end;
  552. function TUTF8Encoding.GetMaxCharCount(ByteCount: Integer): Integer;
  553. begin
  554. Result := ByteCount;
  555. end;
  556. function TUTF8Encoding.GetPreamble: TBytes;
  557. begin
  558. SetLength(Result, 3);
  559. Result[0] := $EF;
  560. Result[1] := $BB;
  561. Result[2] := $BF;
  562. end;
  563. { TUnicodeEncoding }
  564. function TUnicodeEncoding.GetByteCount(Chars: PUnicodeChar; CharCount: Integer): Integer;
  565. begin
  566. Result := CharCount * SizeOf(UnicodeChar);
  567. end;
  568. function TUnicodeEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer;
  569. Bytes: PByte; ByteCount: Integer): Integer;
  570. begin
  571. Result := CharCount * SizeOf(UnicodeChar);
  572. if ByteCount < Result then
  573. Result := ByteCount;
  574. if Result > 0 then
  575. Move(Chars[0], Bytes[0], Result);
  576. end;
  577. function TUnicodeEncoding.GetCharCount(Bytes: PByte; ByteCount: Integer): Integer;
  578. begin
  579. Result := ByteCount div SizeOf(UnicodeChar);
  580. end;
  581. function TUnicodeEncoding.GetChars(Bytes: PByte; ByteCount: Integer;
  582. Chars: PUnicodeChar; CharCount: Integer): Integer;
  583. begin
  584. Result := ByteCount div 2;
  585. if CharCount < Result then
  586. Result := CharCount;
  587. Move(Bytes[0], Chars[0], Result * SizeOf(UnicodeChar));
  588. end;
  589. function TUnicodeEncoding.GetCodePage: Cardinal;
  590. begin
  591. Result := CP_UTF16;
  592. end;
  593. function TUnicodeEncoding.GetEncodingName: UnicodeString;
  594. begin
  595. Result := UnicodeString(CodePageToCodePageName(CodePage));
  596. end;
  597. constructor TUnicodeEncoding.Create;
  598. begin
  599. inherited Create;
  600. FIsSingleByte := False;
  601. FMaxCharSize := SizeOf(UnicodeChar);
  602. end;
  603. function TUnicodeEncoding.Clone: TEncoding;
  604. begin
  605. Result := TUnicodeEncoding.Create;
  606. end;
  607. function TUnicodeEncoding.GetAnsiBytes(Chars: PAnsiChar; CharCount: Integer
  608. ): TBytes;
  609. var
  610. U: UnicodeString;
  611. begin
  612. widestringmanager.Ansi2UnicodeMoveProc(Chars, DefaultSystemCodePage, U, CharCount);
  613. SetLength(Result, Length(U)*SizeOf(UnicodeChar));
  614. if Length(Result)>0 then
  615. Move(U[1], Result[0], Length(Result));
  616. end;
  617. function TUnicodeEncoding.GetAnsiString(Bytes: PByte; ByteCount: Integer
  618. ): ansistring;
  619. begin
  620. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Bytes), RawByteString(Result), DefaultSystemCodePage, ByteCount div SizeOf(UnicodeChar));
  621. end;
  622. function TUnicodeEncoding.GetMaxByteCount(CharCount: Integer): Integer;
  623. begin
  624. Result := CharCount * SizeOf(UnicodeChar);
  625. end;
  626. function TUnicodeEncoding.GetMaxCharCount(ByteCount: Integer): Integer;
  627. begin
  628. Result := ByteCount div SizeOf(UnicodeChar);
  629. end;
  630. function TUnicodeEncoding.GetPreamble: TBytes;
  631. begin
  632. SetLength(Result, 2);
  633. Result[0] := $FF;
  634. Result[1] := $FE;
  635. end;
  636. { TBigEndianUnicodeEncoding }
  637. function TBigEndianUnicodeEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer;
  638. Bytes: PByte; ByteCount: Integer): Integer;
  639. var
  640. LastByte: PByte;
  641. begin
  642. Result := CharCount * SizeOf(UnicodeChar);
  643. if ByteCount < Result then
  644. Result := ByteCount;
  645. LastByte := @Bytes[Result];
  646. while Bytes < LastByte do
  647. begin
  648. Bytes^ := Hi(Word(Chars^));
  649. inc(Bytes);
  650. if Bytes < LastByte then
  651. Bytes^ := Lo(Word(Chars^));
  652. inc(Bytes);
  653. inc(Chars);
  654. end;
  655. end;
  656. function TBigEndianUnicodeEncoding.GetChars(Bytes: PByte; ByteCount: Integer;
  657. Chars: PUnicodeChar; CharCount: Integer): Integer;
  658. var
  659. LastChar: PUnicodeChar;
  660. begin
  661. Result := ByteCount div SizeOf(UnicodeChar);
  662. if CharCount < Result then
  663. Result := CharCount;
  664. LastChar := @Chars[Result];
  665. while Chars < LastChar do
  666. begin
  667. Chars^ := UnicodeChar(Bytes[1] + Bytes[0] shl 8);
  668. inc(Bytes, SizeOf(UnicodeChar));
  669. inc(Chars);
  670. end;
  671. end;
  672. function TBigEndianUnicodeEncoding.GetCodePage: Cardinal;
  673. begin
  674. Result := CP_UTF16BE;
  675. end;
  676. function TBigEndianUnicodeEncoding.GetEncodingName: UnicodeString;
  677. begin
  678. Result := UnicodeString(CodePageToCodePageName(CodePage));
  679. end;
  680. function TBigEndianUnicodeEncoding.Clone: TEncoding;
  681. begin
  682. Result := TBigEndianUnicodeEncoding.Create;
  683. end;
  684. function TBigEndianUnicodeEncoding.GetAnsiBytes(Chars: PAnsiChar; CharCount: Integer
  685. ): TBytes;
  686. begin
  687. Result := TEncoding.Unicode.GetAnsiBytes(Chars, CharCount);
  688. Swap(Result);
  689. end;
  690. function TBigEndianUnicodeEncoding.GetAnsiString(Bytes: PByte;
  691. ByteCount: Integer): ansistring;
  692. var
  693. B: TBytes;
  694. begin
  695. if ByteCount=0 then
  696. Exit('');
  697. SetLength(B, ByteCount);
  698. Move(Bytes^, B[0], ByteCount);
  699. Swap(B);
  700. Result := TEncoding.Unicode.GetAnsiString(PByte(@B[0]), ByteCount);
  701. end;
  702. function TBigEndianUnicodeEncoding.GetPreamble: TBytes;
  703. begin
  704. SetLength(Result, 2);
  705. Result[0] := $FE;
  706. Result[1] := $FF;
  707. end;
  708. procedure TBigEndianUnicodeEncoding.Swap(var B: TBytes);
  709. var
  710. LastB, I: Integer;
  711. C: Byte;
  712. begin
  713. LastB := Length(B)-1;
  714. I := 0;
  715. while I < LastB do
  716. begin
  717. C := B[I];
  718. B[I] := B[I+1];
  719. B[I+1] := C;
  720. Inc(I, 2);
  721. end;
  722. end;