sysencoding.inc 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857
  1. {
  2. *********************************************************************
  3. Copyright (C) 2012 Paul Ishenin,
  4. member of the Free Pascal Development Team
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. *********************************************************************
  11. }
  12. { TEncoding }
  13. class function TEncoding.GetANSI: TEncoding;
  14. begin
  15. {$ifdef FPC_HAS_FEATURE_THREADING}
  16. EnterCriticalSection(FLock);
  17. try
  18. {$endif}
  19. if not Assigned(FStandardEncodings[seAnsi]) then
  20. begin
  21. if Assigned(widestringmanager.GetStandardCodePageProc) then
  22. FStandardEncodings[seAnsi] := TMBCSEncoding.Create(widestringmanager.GetStandardCodePageProc(scpAnsi))
  23. else
  24. FStandardEncodings[seAnsi] := TMBCSEncoding.Create(DefaultSystemCodePage);
  25. end;
  26. {$ifdef FPC_HAS_FEATURE_THREADING}
  27. finally
  28. LeaveCriticalSection(FLock);
  29. end;
  30. {$endif}
  31. Result := FStandardEncodings[seAnsi];
  32. end;
  33. function TEncoding.GetAnsiBytes(const S: string): TBytes;
  34. begin
  35. if S='' then
  36. Result := nil
  37. else
  38. Result := GetAnsiBytes(S, 1, Length(S));
  39. end;
  40. function TEncoding.GetAnsiBytes(const S: string; CharIndex, CharCount: Integer
  41. ): TBytes;
  42. begin
  43. Result := GetAnsiBytes(Pointer(@S[CharIndex]), CharCount);
  44. end;
  45. function TEncoding.GetAnsiString(const Bytes: TBytes): string;
  46. begin
  47. if Length(Bytes)=0 then
  48. Result := ''
  49. else
  50. Result := GetAnsiString(Bytes, 0, Length(Bytes));
  51. end;
  52. function TEncoding.GetAnsiString(const Bytes: TBytes; ByteIndex,
  53. ByteCount: Integer): string;
  54. begin
  55. Result := GetAnsiString(Pointer(@Bytes[ByteIndex]), ByteCount);
  56. SetCodePage(RawByteString(Result), DefaultSystemCodePage, False);
  57. end;
  58. class function TEncoding.GetASCII: TEncoding;
  59. begin
  60. {$ifdef FPC_HAS_FEATURE_THREADING}
  61. EnterCriticalSection(FLock);
  62. try
  63. {$endif}
  64. if not Assigned(FStandardEncodings[seAscii]) then
  65. FStandardEncodings[seAscii] := TMBCSEncoding.Create(CP_ASCII);
  66. {$ifdef FPC_HAS_FEATURE_THREADING}
  67. finally
  68. LeaveCriticalSection(FLock);
  69. end;
  70. {$endif}
  71. Result := FStandardEncodings[seAscii];
  72. end;
  73. class function TEncoding.GetBigEndianUnicode: TEncoding;
  74. begin
  75. {$ifdef FPC_HAS_FEATURE_THREADING}
  76. EnterCriticalSection(FLock);
  77. try
  78. {$endif}
  79. if not Assigned(FStandardEncodings[seBigEndianUnicode]) then
  80. FStandardEncodings[seBigEndianUnicode] := TBigEndianUnicodeEncoding.Create;
  81. {$ifdef FPC_HAS_FEATURE_THREADING}
  82. finally
  83. LeaveCriticalSection(FLock);
  84. end;
  85. {$endif}
  86. Result := FStandardEncodings[seBigEndianUnicode];
  87. end;
  88. class function TEncoding.GetDefault: TEncoding;
  89. begin
  90. Result := GetSystemEncoding;
  91. end;
  92. class function TEncoding.GetSystemEncoding: TEncoding;
  93. var
  94. I: Integer;
  95. begin
  96. {$ifdef FPC_HAS_FEATURE_THREADING}
  97. EnterCriticalSection(FLock);
  98. try
  99. {$endif}
  100. for I := Low(FSystemEncodings) to High(FSystemEncodings) do
  101. begin
  102. if FSystemEncodings[I].CodePage=DefaultSystemCodePage then
  103. begin
  104. Result := FSystemEncodings[I];
  105. if I<>Low(FSystemEncodings) then // exchange with first position to find it faster the next time
  106. begin
  107. FSystemEncodings[I] := FSystemEncodings[Low(FSystemEncodings)];
  108. FSystemEncodings[Low(FSystemEncodings)] := Result;
  109. end;
  110. Exit;
  111. end;
  112. end;
  113. // not found - create new encoding at first position
  114. Result := TMBCSEncoding.Create(DefaultSystemCodePage);
  115. SetLength(FSystemEncodings, Length(FSystemEncodings)+1);
  116. if High(FSystemEncodings)<>Low(FSystemEncodings) then
  117. FSystemEncodings[High(FSystemEncodings)] := FSystemEncodings[Low(FSystemEncodings)];
  118. FSystemEncodings[Low(FSystemEncodings)] := Result;
  119. {$ifdef FPC_HAS_FEATURE_THREADING}
  120. finally
  121. LeaveCriticalSection(FLock);
  122. end;
  123. {$endif}
  124. end;
  125. class function TEncoding.GetUnicode: TEncoding;
  126. begin
  127. {$ifdef FPC_HAS_FEATURE_THREADING}
  128. EnterCriticalSection(FLock);
  129. try
  130. {$endif}
  131. if not Assigned(FStandardEncodings[seUnicode]) then
  132. FStandardEncodings[seUnicode] := TUnicodeEncoding.Create;
  133. {$ifdef FPC_HAS_FEATURE_THREADING}
  134. finally
  135. LeaveCriticalSection(FLock);
  136. end;
  137. {$endif}
  138. Result := FStandardEncodings[seUnicode];
  139. end;
  140. class function TEncoding.GetUTF7: TEncoding;
  141. begin
  142. {$ifdef FPC_HAS_FEATURE_THREADING}
  143. EnterCriticalSection(FLock);
  144. try
  145. {$endif}
  146. if not Assigned(FStandardEncodings[seUTF7]) then
  147. FStandardEncodings[seUTF7] := TUTF7Encoding.Create;
  148. {$ifdef FPC_HAS_FEATURE_THREADING}
  149. finally
  150. LeaveCriticalSection(FLock);
  151. end;
  152. {$endif}
  153. Result := FStandardEncodings[seUTF7];
  154. end;
  155. class function TEncoding.GetUTF8: TEncoding;
  156. begin
  157. {$ifdef FPC_HAS_FEATURE_THREADING}
  158. EnterCriticalSection(FLock);
  159. try
  160. {$endif}
  161. if not Assigned(FStandardEncodings[seUTF8]) then
  162. FStandardEncodings[seUTF8] := TUTF8Encoding.Create;
  163. {$ifdef FPC_HAS_FEATURE_THREADING}
  164. finally
  165. LeaveCriticalSection(FLock);
  166. end;
  167. {$endif}
  168. Result := FStandardEncodings[seUTF8];
  169. end;
  170. class procedure TEncoding.FreeEncodings;
  171. var
  172. E: TStandardEncoding;
  173. I: Integer;
  174. begin
  175. {$ifdef FPC_HAS_FEATURE_THREADING}
  176. EnterCriticalSection(FLock);
  177. try
  178. {$endif}
  179. for E := Low(FStandardEncodings) to High(FStandardEncodings) do
  180. FreeAndNil(FStandardEncodings[E]);
  181. for I := Low(FSystemEncodings) to High(FSystemEncodings) do
  182. FSystemEncodings[I].Free;
  183. SetLength(FSystemEncodings, 0);
  184. {$ifdef FPC_HAS_FEATURE_THREADING}
  185. finally
  186. LeaveCriticalSection(FLock);
  187. end;
  188. {$endif}
  189. end;
  190. class constructor TEncoding.Create;
  191. var
  192. E: TStandardEncoding;
  193. begin
  194. for E := Low(FStandardEncodings) to High(FStandardEncodings) do
  195. FStandardEncodings[E] := nil;
  196. {$ifdef FPC_HAS_FEATURE_THREADING}
  197. InitCriticalSection(FLock);
  198. {$endif}
  199. end;
  200. class destructor TEncoding.Destroy;
  201. begin
  202. FreeEncodings;
  203. {$ifdef FPC_HAS_FEATURE_THREADING}
  204. DoneCriticalSection(FLock);
  205. {$endif}
  206. end;
  207. function TEncoding.Clone: TEncoding;
  208. begin
  209. Result := nil;
  210. end;
  211. class function TEncoding.Convert(Source, Destination: TEncoding;
  212. const Bytes: TBytes): TBytes;
  213. begin
  214. Result := Destination.GetBytes(Source.GetChars(Bytes));
  215. end;
  216. class function TEncoding.Convert(Source, Destination: TEncoding;
  217. const Bytes: TBytes; StartIndex, Count: Integer): TBytes;
  218. begin
  219. Result := Destination.GetBytes(Source.GetChars(Bytes, StartIndex, Count));
  220. end;
  221. class function TEncoding.IsStandardEncoding(AEncoding: TEncoding): Boolean;
  222. var
  223. Encoding: TEncoding;
  224. begin
  225. if Assigned(AEncoding) then
  226. begin
  227. for Encoding in FStandardEncodings do
  228. if Encoding = AEncoding then
  229. Exit(True);
  230. for Encoding in FSystemEncodings do
  231. if Encoding = AEncoding then
  232. Exit(True);
  233. end;
  234. Result := False;
  235. end;
  236. class function TEncoding.GetBufferEncoding(const Buffer: TBytes; var AEncoding: TEncoding): Integer;
  237. begin
  238. Result := GetBufferEncoding(Buffer, AEncoding, Default);
  239. end;
  240. class function TEncoding.GetBufferEncoding(const Buffer: TBytes;
  241. var AEncoding: TEncoding; ADefaultEncoding: TEncoding): Integer;
  242. function CheckEncoding(AEncoding: TEncoding; out ByteCount: Integer): Boolean;
  243. var
  244. Preamble: TBytes;
  245. begin
  246. Preamble := AEncoding.GetPreamble;
  247. ByteCount := Length(Preamble);
  248. Result := (Length(Buffer) >= ByteCount) and (ByteCount > 0);
  249. if Result then
  250. Result := CompareMem(@Preamble[0], @Buffer[0], ByteCount);
  251. end;
  252. begin
  253. if Assigned(AEncoding) then
  254. begin
  255. if not CheckEncoding(AEncoding, Result) then
  256. Result := 0;
  257. end
  258. else
  259. if CheckEncoding(Unicode, Result) then
  260. AEncoding := Unicode
  261. else
  262. if CheckEncoding(BigEndianUnicode, Result) then
  263. AEncoding := BigEndianUnicode
  264. else
  265. if CheckEncoding(UTF8, Result) then
  266. AEncoding := UTF8
  267. else
  268. begin
  269. AEncoding := ADefaultEncoding;
  270. Result := 0;
  271. end;
  272. end;
  273. function TEncoding.GetByteCount(const Chars: TUnicodeCharArray): Integer;
  274. begin
  275. if Length(Chars)=0 then
  276. Result := 0
  277. else
  278. Result := GetByteCount(Chars, 0, Length(Chars));
  279. end;
  280. function TEncoding.GetByteCount(const Chars: TUnicodeCharArray; CharIndex,
  281. CharCount: Integer): Integer;
  282. begin
  283. if (CharCount < 0) or (Length(Chars) < CharCount + CharIndex) then
  284. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  285. if (CharIndex < 0) then
  286. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  287. Result := GetByteCount(@Chars[CharIndex], CharCount);
  288. end;
  289. function TEncoding.GetByteCount(const S: UnicodeString): Integer;
  290. begin
  291. if S='' then
  292. Result := 0
  293. else
  294. Result := GetByteCount(PUnicodeChar(S), Length(S));
  295. end;
  296. function TEncoding.GetByteCount(const S: UnicodeString; CharIndex, CharCount: Integer): Integer;
  297. begin
  298. if (CharCount < 0) or (Length(S) < CharCount + CharIndex) then
  299. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  300. if (CharIndex < 1) then
  301. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  302. Result := GetByteCount(@S[CharIndex], CharCount);
  303. end;
  304. function TEncoding.GetBytes(const Chars: TUnicodeCharArray): TBytes;
  305. begin
  306. SetLength(Result, GetByteCount(Chars));
  307. if Length(Result)>0 then
  308. GetBytes(@Chars[0], Length(Chars), @Result[0], Length(Result));
  309. end;
  310. function TEncoding.GetBytes(const Chars: TUnicodeCharArray; CharIndex,
  311. CharCount: Integer): TBytes;
  312. begin
  313. if (CharCount < 0) or (Length(Chars) < CharCount + CharIndex) then
  314. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  315. if (CharIndex < 0) then
  316. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  317. SetLength(Result, GetByteCount(Chars, CharIndex, CharCount));
  318. GetBytes(@Chars[CharIndex], CharCount, @Result[0], Length(Result));
  319. end;
  320. function TEncoding.GetBytes(const Chars: TUnicodeCharArray; CharIndex,
  321. CharCount: Integer; const Bytes: TBytes; ByteIndex: Integer): Integer;
  322. var
  323. ByteLen: Integer;
  324. begin
  325. ByteLen := Length(Bytes);
  326. if (ByteLen = 0) and (CharCount > 0) then
  327. raise EEncodingError.Create(SInvalidDestinationArray);
  328. if (ByteIndex < 0) or (ByteLen < ByteIndex) then
  329. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  330. if (CharCount < 0) or (Length(Chars) < CharCount + CharIndex) then
  331. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  332. if (CharIndex < 0) then
  333. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  334. Result := GetBytes(@Chars[CharIndex], CharCount, @Bytes[ByteIndex], ByteLen - ByteIndex);
  335. end;
  336. function TEncoding.GetBytes(const S: UnicodeString): TBytes;
  337. begin
  338. SetLength(Result, GetByteCount(S));
  339. if Length(Result)>0 then
  340. GetBytes(@S[1], Length(S), @Result[0], Length(Result));
  341. end;
  342. function TEncoding.GetBytes(const S: UnicodeString; CharIndex, CharCount: Integer;
  343. const Bytes: TBytes; ByteIndex: Integer): Integer;
  344. var
  345. ByteLen: Integer;
  346. begin
  347. ByteLen := Length(Bytes);
  348. if (ByteLen = 0) and (CharCount > 0) then
  349. raise EEncodingError.Create(SInvalidDestinationArray);
  350. if (ByteIndex < 0) or (ByteLen < ByteIndex) then
  351. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  352. if (CharCount < 0) or (Length(S) < CharCount + CharIndex) then
  353. raise EEncodingError.CreateFmt(SInvalidCount, [CharCount]);
  354. if (CharIndex < 1) then
  355. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  356. Result := GetBytes(@S[CharIndex], CharCount, @Bytes[ByteIndex], ByteLen - ByteIndex);
  357. end;
  358. function TEncoding.GetCharCount(const Bytes: TBytes): Integer;
  359. begin
  360. if Length(Bytes)=0 then
  361. Result := 0
  362. else
  363. Result := GetCharCount(@Bytes[0], Length(Bytes));
  364. end;
  365. function TEncoding.GetCharCount(const Bytes: TBytes; ByteIndex,
  366. ByteCount: Integer): Integer;
  367. begin
  368. if (ByteIndex < 0) or (Length(Bytes) < ByteIndex) then
  369. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  370. Result := GetCharCount(@Bytes[ByteIndex], ByteCount);
  371. end;
  372. function TEncoding.GetChars(const Bytes: TBytes): TUnicodeCharArray;
  373. begin
  374. SetLength(Result, GetCharCount(Bytes));
  375. if Length(Result)>0 then
  376. GetChars(@Bytes[0], Length(Bytes), @Result[0], Length(Result));
  377. end;
  378. function TEncoding.GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer): TUnicodeCharArray;
  379. begin
  380. if (ByteIndex < 0) or (Length(Bytes) < ByteIndex) then
  381. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  382. SetLength(Result, GetCharCount(Bytes, ByteIndex, ByteCount));
  383. GetChars(@Bytes[ByteIndex], ByteCount, @Result[0], Length(Result));
  384. end;
  385. function TEncoding.GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer;
  386. const Chars: TUnicodeCharArray; CharIndex: Integer): Integer;
  387. var
  388. CharLen: Integer;
  389. begin
  390. if (ByteIndex < 0) or (Length(Bytes) <= ByteIndex) then
  391. raise EEncodingError.CreateFmt(SInvalidDestinationIndex, [ByteIndex]);
  392. CharLen := Length(Chars);
  393. if (CharIndex < 0) or (CharLen <= CharIndex) then
  394. raise EEncodingError.CreateFmt(SCharacterIndexOutOfBounds, [CharIndex]);
  395. Result := GetChars(@Bytes[ByteIndex], ByteCount, @Chars[CharIndex], CharLen - CharIndex);
  396. end;
  397. class function TEncoding.GetEncoding(CodePage: Integer): TEncoding;
  398. begin
  399. case CodePage of
  400. CP_UTF16: Result := TUnicodeEncoding.Create;
  401. CP_UTF16BE: Result := TBigEndianUnicodeEncoding.Create;
  402. CP_UTF7: Result := TUTF7Encoding.Create;
  403. CP_UTF8: Result := TUTF8Encoding.Create;
  404. else
  405. Result := TMBCSEncoding.Create(CodePage);
  406. end;
  407. end;
  408. class function TEncoding.GetEncoding(const EncodingName: UnicodeString): TEncoding;
  409. var
  410. ACodePage: TSystemCodePage;
  411. begin
  412. ACodePage := CodePageNameToCodePage(AnsiString(EncodingName));
  413. if ACodePage = $FFFF then
  414. raise EEncodingError.CreateFmt(SNotValidCodePageName, [EncodingName]);
  415. Result := GetEncoding(ACodePage);
  416. end;
  417. function TEncoding.GetString(const Bytes: TBytes): UnicodeString;
  418. var
  419. Chars: TUnicodeCharArray;
  420. begin
  421. if Length(Bytes)=0 then
  422. Result := ''
  423. else
  424. begin
  425. Chars := GetChars(Bytes);
  426. SetString(Result, PUnicodeChar(Chars), Length(Chars));
  427. end;
  428. end;
  429. function TEncoding.GetString(const Bytes: TBytes; ByteIndex, ByteCount: Integer): UnicodeString;
  430. var
  431. Chars: TUnicodeCharArray;
  432. begin
  433. Chars := GetChars(Bytes, ByteIndex, ByteCount);
  434. SetString(Result, PUnicodeChar(Chars), Length(Chars));
  435. end;
  436. { TMBCSEncoding }
  437. function TMBCSEncoding.GetByteCount(Chars: PUnicodeChar; CharCount: Integer): Integer;
  438. var
  439. S: RawByteString;
  440. begin
  441. widestringmanager.Unicode2AnsiMoveProc(Chars, S, CodePage, CharCount);
  442. Result := Length(S);
  443. end;
  444. function TMBCSEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer; Bytes: PByte;
  445. ByteCount: Integer): Integer;
  446. var
  447. S: RawByteString;
  448. begin
  449. widestringmanager.Unicode2AnsiMoveProc(Chars, S, CodePage, CharCount);
  450. Result := Length(S);
  451. if ByteCount < Result then
  452. Result := ByteCount;
  453. if Result > 0 then
  454. Move(S[1], Bytes[0], Result);
  455. end;
  456. function TMBCSEncoding.GetCharCount(Bytes: PByte; ByteCount: Integer): Integer;
  457. var
  458. U: UnicodeString;
  459. begin
  460. widestringmanager.Ansi2UnicodeMoveProc(PChar(Bytes), CodePage, U, ByteCount);
  461. Result := Length(U);
  462. end;
  463. function TMBCSEncoding.GetChars(Bytes: PByte; ByteCount: Integer; Chars: PUnicodeChar;
  464. CharCount: Integer): Integer;
  465. var
  466. U: UnicodeString;
  467. begin
  468. widestringmanager.Ansi2UnicodeMoveProc(PChar(Bytes), CodePage, U, ByteCount);
  469. Result := Length(U);
  470. if CharCount < Result then
  471. Result := CharCount;
  472. if Result > 0 then
  473. Move(U[1], Chars[0], Result * SizeOf(UnicodeChar));
  474. end;
  475. function TMBCSEncoding.GetCodePage: Cardinal;
  476. begin
  477. Result := FCodePage;
  478. end;
  479. function TMBCSEncoding.GetEncodingName: UnicodeString;
  480. begin
  481. Result := UnicodeString(CodePageToCodePageName(CodePage));
  482. end;
  483. constructor TMBCSEncoding.Create;
  484. begin
  485. Create(DefaultSystemCodePage, 0, 0);
  486. end;
  487. constructor TMBCSEncoding.Create(ACodePage: Integer);
  488. begin
  489. Create(ACodePage, 0, 0);
  490. end;
  491. constructor TMBCSEncoding.Create(ACodePage, MBToWCharFlags,
  492. WCharToMBFlags: Integer);
  493. begin
  494. FCodePage := ACodePage;
  495. FMBToWCharFlags := MBToWCharFlags;
  496. FWCharToMBFlags := WCharToMBFlags;
  497. case ACodePage of
  498. CP_UTF7, CP_UTF8, CP_UTF16, CP_UTF16BE: FIsSingleByte := False;
  499. else
  500. FIsSingleByte := True;
  501. end;
  502. end;
  503. function TMBCSEncoding.Clone: TEncoding;
  504. begin
  505. Result := TMBCSEncoding.Create(FCodePage, FMBToWCharFlags, FWCharToMBFlags);
  506. end;
  507. function TMBCSEncoding.GetAnsiBytes(Chars: PChar; CharCount: Integer): TBytes;
  508. var
  509. S: RawByteString;
  510. begin
  511. SetString(S, Chars, CharCount);
  512. SetCodePage(S, DefaultSystemCodePage, False);
  513. SetCodePage(S, GetCodePage, True);
  514. SetLength(Result, Length(S));
  515. if Length(S)>0 then
  516. Move(S[1], Result[0], Length(S));
  517. end;
  518. function TMBCSEncoding.GetAnsiString(Bytes: PByte; ByteCount: Integer): string;
  519. begin
  520. SetString(Result, Pointer(Bytes), ByteCount);
  521. SetCodePage(RawByteString(Result), GetCodePage, False);
  522. SetCodePage(RawByteString(Result), DefaultSystemCodePage, True);
  523. end;
  524. function TMBCSEncoding.GetMaxByteCount(CharCount: Integer): Integer;
  525. begin
  526. Result := CharCount;
  527. end;
  528. function TMBCSEncoding.GetMaxCharCount(ByteCount: Integer): Integer;
  529. begin
  530. Result := ByteCount;
  531. end;
  532. function TMBCSEncoding.GetPreamble: TBytes;
  533. begin
  534. case CodePage of
  535. CP_UTF8:
  536. begin
  537. SetLength(Result, 3);
  538. Result[0] := $EF;
  539. Result[1] := $BB;
  540. Result[2] := $BF;
  541. end;
  542. CP_UTF16:
  543. begin
  544. SetLength(Result, 2);
  545. Result[0] := $FF;
  546. Result[1] := $FE;
  547. end;
  548. CP_UTF16BE:
  549. begin
  550. SetLength(Result, 2);
  551. Result[0] := $FE;
  552. Result[1] := $FF;
  553. end;
  554. else
  555. Result := nil;
  556. end;
  557. end;
  558. { TUTF7Encoding }
  559. constructor TUTF7Encoding.Create;
  560. begin
  561. inherited Create(CP_UTF7);
  562. FIsSingleByte := False;
  563. end;
  564. function TUTF7Encoding.Clone: TEncoding;
  565. begin
  566. Result := TUTF7Encoding.Create;
  567. end;
  568. function TUTF7Encoding.GetMaxByteCount(CharCount: Integer): Integer;
  569. begin
  570. Result := CharCount * 3 + 2;
  571. end;
  572. function TUTF7Encoding.GetMaxCharCount(ByteCount: Integer): Integer;
  573. begin
  574. Result := ByteCount;
  575. end;
  576. { TUTF8Encoding }
  577. constructor TUTF8Encoding.Create;
  578. begin
  579. inherited Create(CP_UTF8);
  580. FIsSingleByte := False;
  581. end;
  582. function TUTF8Encoding.Clone: TEncoding;
  583. begin
  584. Result := TUTF8Encoding.Create;
  585. end;
  586. function TUTF8Encoding.GetMaxByteCount(CharCount: Integer): Integer;
  587. begin
  588. Result := CharCount * 3;
  589. end;
  590. function TUTF8Encoding.GetMaxCharCount(ByteCount: Integer): Integer;
  591. begin
  592. Result := ByteCount;
  593. end;
  594. function TUTF8Encoding.GetPreamble: TBytes;
  595. begin
  596. SetLength(Result, 3);
  597. Result[0] := $EF;
  598. Result[1] := $BB;
  599. Result[2] := $BF;
  600. end;
  601. { TUnicodeEncoding }
  602. function TUnicodeEncoding.GetByteCount(Chars: PUnicodeChar; CharCount: Integer): Integer;
  603. begin
  604. Result := CharCount * SizeOf(UnicodeChar);
  605. end;
  606. function TUnicodeEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer;
  607. Bytes: PByte; ByteCount: Integer): Integer;
  608. begin
  609. Result := CharCount * SizeOf(UnicodeChar);
  610. if ByteCount < Result then
  611. Result := ByteCount;
  612. if Result > 0 then
  613. Move(Chars[0], Bytes[0], Result);
  614. end;
  615. function TUnicodeEncoding.GetCharCount(Bytes: PByte; ByteCount: Integer): Integer;
  616. begin
  617. Result := ByteCount div SizeOf(UnicodeChar);
  618. end;
  619. function TUnicodeEncoding.GetChars(Bytes: PByte; ByteCount: Integer;
  620. Chars: PUnicodeChar; CharCount: Integer): Integer;
  621. begin
  622. Result := ByteCount div 2;
  623. if CharCount < Result then
  624. Result := CharCount;
  625. Move(Bytes[0], Chars[0], Result * SizeOf(UnicodeChar));
  626. end;
  627. function TUnicodeEncoding.GetCodePage: Cardinal;
  628. begin
  629. Result := CP_UTF16;
  630. end;
  631. function TUnicodeEncoding.GetEncodingName: UnicodeString;
  632. begin
  633. Result := UnicodeString(CodePageToCodePageName(CodePage));
  634. end;
  635. constructor TUnicodeEncoding.Create;
  636. begin
  637. inherited Create;
  638. FIsSingleByte := False;
  639. FMaxCharSize := SizeOf(UnicodeChar);
  640. end;
  641. function TUnicodeEncoding.Clone: TEncoding;
  642. begin
  643. Result := TUnicodeEncoding.Create;
  644. end;
  645. function TUnicodeEncoding.GetAnsiBytes(Chars: PChar; CharCount: Integer
  646. ): TBytes;
  647. var
  648. U: UnicodeString;
  649. begin
  650. widestringmanager.Ansi2UnicodeMoveProc(Chars, DefaultSystemCodePage, U, CharCount);
  651. SetLength(Result, Length(U)*SizeOf(UnicodeChar));
  652. if Length(Result)>0 then
  653. Move(U[1], Result[0], Length(Result));
  654. end;
  655. function TUnicodeEncoding.GetAnsiString(Bytes: PByte; ByteCount: Integer
  656. ): string;
  657. begin
  658. widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Bytes), RawByteString(Result), DefaultSystemCodePage, ByteCount div SizeOf(UnicodeChar));
  659. end;
  660. function TUnicodeEncoding.GetMaxByteCount(CharCount: Integer): Integer;
  661. begin
  662. Result := CharCount * SizeOf(UnicodeChar);
  663. end;
  664. function TUnicodeEncoding.GetMaxCharCount(ByteCount: Integer): Integer;
  665. begin
  666. Result := ByteCount div SizeOf(UnicodeChar);
  667. end;
  668. function TUnicodeEncoding.GetPreamble: TBytes;
  669. begin
  670. SetLength(Result, 2);
  671. Result[0] := $FF;
  672. Result[1] := $FE;
  673. end;
  674. { TBigEndianUnicodeEncoding }
  675. function TBigEndianUnicodeEncoding.GetBytes(Chars: PUnicodeChar; CharCount: Integer;
  676. Bytes: PByte; ByteCount: Integer): Integer;
  677. var
  678. LastByte: PByte;
  679. begin
  680. Result := CharCount * SizeOf(UnicodeChar);
  681. if ByteCount < Result then
  682. Result := ByteCount;
  683. LastByte := @Bytes[Result];
  684. while Bytes < LastByte do
  685. begin
  686. Bytes^ := Hi(Word(Chars^));
  687. inc(Bytes);
  688. if Bytes < LastByte then
  689. Bytes^ := Lo(Word(Chars^));
  690. inc(Bytes);
  691. inc(Chars);
  692. end;
  693. end;
  694. function TBigEndianUnicodeEncoding.GetChars(Bytes: PByte; ByteCount: Integer;
  695. Chars: PUnicodeChar; CharCount: Integer): Integer;
  696. var
  697. LastChar: PUnicodeChar;
  698. begin
  699. Result := ByteCount div SizeOf(UnicodeChar);
  700. if CharCount < Result then
  701. Result := CharCount;
  702. LastChar := @Chars[Result];
  703. while Chars < LastChar do
  704. begin
  705. Chars^ := UnicodeChar(Bytes[1] + Bytes[0] shl 8);
  706. inc(Bytes, SizeOf(UnicodeChar));
  707. inc(Chars);
  708. end;
  709. end;
  710. function TBigEndianUnicodeEncoding.GetCodePage: Cardinal;
  711. begin
  712. Result := CP_UTF16BE;
  713. end;
  714. function TBigEndianUnicodeEncoding.GetEncodingName: UnicodeString;
  715. begin
  716. Result := UnicodeString(CodePageToCodePageName(CodePage));
  717. end;
  718. function TBigEndianUnicodeEncoding.Clone: TEncoding;
  719. begin
  720. Result := TBigEndianUnicodeEncoding.Create;
  721. end;
  722. function TBigEndianUnicodeEncoding.GetAnsiBytes(Chars: PChar; CharCount: Integer
  723. ): TBytes;
  724. begin
  725. Result := TEncoding.Unicode.GetAnsiBytes(Chars, CharCount);
  726. Swap(Result);
  727. end;
  728. function TBigEndianUnicodeEncoding.GetAnsiString(Bytes: PByte;
  729. ByteCount: Integer): string;
  730. var
  731. B: TBytes;
  732. begin
  733. if ByteCount=0 then
  734. Exit('');
  735. SetLength(B, ByteCount);
  736. Move(Bytes^, B[0], ByteCount);
  737. Swap(B);
  738. Result := TEncoding.Unicode.GetAnsiString(PByte(@B[0]), ByteCount);
  739. end;
  740. function TBigEndianUnicodeEncoding.GetPreamble: TBytes;
  741. begin
  742. SetLength(Result, 2);
  743. Result[0] := $FE;
  744. Result[1] := $FF;
  745. end;
  746. procedure TBigEndianUnicodeEncoding.Swap(var B: TBytes);
  747. var
  748. LastB, I: Integer;
  749. C: Byte;
  750. begin
  751. LastB := Length(B)-1;
  752. I := 0;
  753. while I < LastB do
  754. begin
  755. C := B[I];
  756. B[I] := B[I+1];
  757. B[I+1] := C;
  758. Inc(I, 2);
  759. end;
  760. end;