system.netencoding.pp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2019 by Michael Van Canneyt, member of the
  4. Free Pascal development team
  5. VCL compatible TNetEncoding unit
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$H+}
  14. unit System.NetEncoding;
  15. interface
  16. {$IFDEF FPC_DOTTEDUNITS}
  17. uses System.SysUtils, System.Classes, System.Types;
  18. {$ELSE FPC_DOTTEDUNITS}
  19. uses Sysutils, Classes, Types;
  20. {$ENDIF FPC_DOTTEDUNITS}
  21. type
  22. // Not used here
  23. EHTTPException = class(Exception);
  24. UnsafeChar = Byte;
  25. TUnsafeChars = set of UnsafeChar;
  26. TURLEncoding = Class;
  27. { TNetEncoding }
  28. TNetEncoding = class
  29. private
  30. type
  31. TStandardEncoding = (
  32. seBase64,
  33. seBase64String,
  34. seHTML,
  35. seURL);
  36. Class var
  37. FStdEncodings : Array[TStandardEncoding] of TNetEncoding;
  38. Class Function GetStdEncoding(aIndex : TStandardEncoding) : TNetEncoding; Static;
  39. Class Destructor Destroy;
  40. class function GetURLEncoding: TURLEncoding; static;
  41. protected
  42. // These must be implemented by descendents
  43. Function DoDecode(const aInput: RawByteString): RawByteString; overload; virtual; abstract;
  44. Function DoEncode(const aInput: RawByteString): RawByteString; overload; virtual; abstract;
  45. // These can be overridden by descendents for effiency
  46. Function DoDecode(const aInput: UnicodeString): UnicodeString; overload; virtual;
  47. Function DoEncode(const aInput: UnicodeString): UnicodeString; overload; virtual;
  48. Function DoDecode(const aInput, aOutput: TStream): Integer; overload; virtual;
  49. Function DoEncode(const aInput, aOutput: TStream): Integer; overload; virtual;
  50. Function DoDecode(const aInput: array of Byte): TBytes; overload; virtual;
  51. Function DoEncode(const aInput: array of Byte): TBytes; overload; virtual;
  52. Function DoDecodeStringToBytes(const aInput: RawByteString): TBytes; virtual; overload;
  53. Function DoDecodeStringToBytes(const aInput: UnicodeString): TBytes; virtual; overload;
  54. Function DoEncodeBytesToString(const aInput: array of Byte): UnicodeString; overload; virtual;
  55. Function DoEncodeBytesToString(const aInput: Pointer; Size: Integer): UnicodeString; overload; virtual;
  56. public
  57. Class Procedure FreeStdEncodings;
  58. // Public stubs, they call the Do* versions
  59. // Stream
  60. Function Decode(const aInput, aOutput: TStream): Integer; overload;
  61. Function Encode(const aInput, aOutput: TStream): Integer; overload;
  62. // TBytes
  63. Function Decode(const aInput: array of Byte): TBytes; overload;
  64. Function Encode(const aInput: array of Byte): TBytes; overload;
  65. // Strings
  66. Function Decode(const aInput: UnicodeString): UnicodeString; overload;
  67. Function Encode(const aInput: UnicodeString): UnicodeString; overload;
  68. Function Decode(const aInput: RawByteString): RawByteString; overload;
  69. Function Encode(const aInput: RawByteString): RawByteString; overload;
  70. // UnicodeString to Bytes
  71. Function DecodeStringToBytes(const aInput: UnicodeString): TBytes;
  72. Function DecodeStringToBytes(const aInput: RawByteString): TBytes;
  73. Function EncodeBytesToString(const aInput: array of Byte): UnicodeString; overload;
  74. Function EncodeBytesToString(const aInput: Pointer; Size: Integer): UnicodeString; overload;
  75. // Default instances
  76. class property Base64: TNetEncoding Index seBase64 read GetStdEncoding;
  77. class property Base64String: TNetEncoding Index seBase64String read GetStdEncoding;
  78. class property HTML: TNetEncoding Index seHTML read GetStdEncoding;
  79. class property URL: TURLEncoding read GetURLEncoding;
  80. end;
  81. { TCustomBase64Encoding }
  82. TCustomBase64Encoding = class(TNetEncoding)
  83. protected const
  84. kCharsPerLine = 76;
  85. kLineSeparator = #13#10;
  86. protected
  87. FCharsPerline: Integer;
  88. FLineSeparator: string;
  89. FPadEnd: Boolean;
  90. protected
  91. Function DoDecode(const aInput, aOutput: TStream): Integer; overload; override;
  92. Function DoEncode(const aInput, aOutput: TStream): Integer; overload; override;
  93. Function DoDecode(const aInput: RawByteString): RawByteString; overload; override;
  94. Function DoEncode(const aInput: RawByteString): RawByteString; overload; override;
  95. Function DoDecode(const aInput: array of Byte): TBytes; overload; override;
  96. Function DoEncode(const aInput: array of Byte): TBytes; overload; override;
  97. end;
  98. { TBase64Encoding }
  99. TBase64Encoding = class(TCustomBase64Encoding)
  100. public
  101. constructor Create; overload; virtual;
  102. constructor Create(CharsPerLine: Integer); overload; virtual;
  103. constructor Create(CharsPerLine: Integer; LineSeparator: string); overload; virtual;
  104. end;
  105. { TBase64StringEncoding }
  106. TBase64StringEncoding = class(TCustomBase64Encoding)
  107. public
  108. constructor Create; overload; virtual;
  109. end;
  110. { TURLEncoding }
  111. TURLEncoding = class(TNetEncoding)
  112. protected
  113. Function DoEncode(const aInput: RawBytestring): RawBytestring; overload; override;
  114. Function DoDecode(const aInput: RawBytestring): RawBytestring; overload; override;
  115. Public
  116. Type
  117. UnsafeChar = Byte;
  118. TUnsafeChars = set of UnsafeChar;
  119. TEncodeOption = (SpacesAsPlus, EncodePercent);
  120. TEncodeOptions = set of TEncodeOption;
  121. TDecodeOption = (PlusAsSpaces);
  122. TDecodeOptions = set of TDecodeOption;
  123. Public
  124. function Encode(const aInput: string; const aSet: TUnsafeChars; const aOptions: TEncodeOptions; aEncoding: TEncoding = nil): string; overload;
  125. function EncodeQuery(const aInput: string; const aExtraUnsafeChars: TUnsafeChars): string;
  126. function EncodePath(const aPath: string; const aExtraUnsafeChars: TUnsafeChars): string;
  127. class function URIDecode(const aValue: string; aPlusAsSpaces: Boolean): string;
  128. end;
  129. THTMLEncoding = class(TNetEncoding)
  130. protected
  131. Function DoDecode(const aInput: UnicodeString): UnicodeString; override;
  132. Function DoDecode(const aInput: RawBytestring): RawBytestring; overload; override;
  133. Function DoEncode(const aInput: UnicodeString): UnicodeString; override;
  134. Function DoEncode(const aInput: RawBytestring): RawBytestring; overload; override;
  135. end;
  136. implementation
  137. {$IFDEF FPC_DOTTEDUNITS}
  138. uses System.Hash.Base64, FpWeb.Http.Protocol, Html.Defs, Xml.Read;
  139. {$ELSE FPC_DOTTEDUNITS}
  140. uses base64, httpprotocol, HTMLDefs, xmlread;
  141. {$ENDIF FPC_DOTTEDUNITS}
  142. Resourcestring
  143. sInvalidHTMLEntity = 'Invalid HTML encoded character: %s';
  144. { TCustomBase64Encoding }
  145. function TCustomBase64Encoding.DoDecode(const aInput, aOutput: TStream): Integer;
  146. Var
  147. S : TBase64DecodingStream;
  148. begin
  149. S:=TBase64DecodingStream.Create(aInput,bdmMIME);
  150. try
  151. Result:=S.Size;
  152. aOutput.CopyFrom(S,Result);
  153. finally
  154. S.Free;
  155. end;
  156. end;
  157. function TCustomBase64Encoding.DoDecode(const aInput: array of Byte): TBytes;
  158. var
  159. Instream : TBytesStream;
  160. Outstream : TBytesStream;
  161. Decoder : TBase64DecodingStream;
  162. const
  163. cPad: AnsiChar = '=';
  164. begin
  165. if Length(aInput)=0 then
  166. Exit(nil);
  167. Instream:=TBytesStream.Create;
  168. try
  169. Instream.WriteBuffer(aInput[0], Length(aInput));
  170. while Instream.Size mod 4 > 0 do
  171. Instream.WriteBuffer(cPad, 1);
  172. Instream.Position:=0;
  173. Outstream:=TBytesStream.Create;
  174. try
  175. Decoder:=TBase64DecodingStream.Create(Instream,bdmMIME);
  176. try
  177. Outstream.CopyFrom(Decoder,Decoder.Size);
  178. Result:=Outstream.Bytes;
  179. SetLength(Result,Outstream.Size);
  180. finally
  181. Decoder.Free;
  182. end;
  183. finally
  184. Outstream.Free;
  185. end;
  186. finally
  187. Instream.Free;
  188. end;
  189. end;
  190. function TCustomBase64Encoding.DoEncode(const aInput, aOutput: TStream): Integer;
  191. Var
  192. S : TBase64EncodingStream;
  193. begin
  194. S:=TBase64EncodingStream.Create(aOutput,FCharsPerline,FLineSeparator,FPadEnd);
  195. try
  196. Result:=S.CopyFrom(aInput,0);
  197. finally
  198. S.Free;
  199. end;
  200. end;
  201. function TCustomBase64Encoding.DoEncode(const aInput: array of Byte): TBytes;
  202. var
  203. Outstream : TBytesStream;
  204. Encoder : TBase64EncodingStream;
  205. begin
  206. if Length(aInput)=0 then
  207. Exit(nil);
  208. Outstream:=TBytesStream.Create;
  209. try
  210. Encoder:=TBase64EncodingStream.create(outstream,FCharsPerline,FLineSeparator,FPadEnd);
  211. try
  212. Encoder.Write(aInput[0],Length(aInput));
  213. finally
  214. Encoder.Free;
  215. end;
  216. Result:=Outstream.Bytes;
  217. SetLength(Result,Outstream.Size);
  218. finally
  219. Outstream.free;
  220. end;
  221. end;
  222. function TCustomBase64Encoding.DoDecode(const aInput: RawByteString): RawByteString;
  223. begin
  224. Result:=DecodeStringBase64(aInput,False);
  225. end;
  226. function TCustomBase64Encoding.DoEncode(const aInput: RawByteString): RawByteString;
  227. var
  228. Outstream : TStringStream;
  229. Encoder : TBase64EncodingStream;
  230. begin
  231. if Length(aInput)=0 then
  232. Exit('');
  233. Outstream:=TStringStream.Create('');
  234. try
  235. Encoder:=TBase64EncodingStream.create(outstream,FCharsPerline,FLineSeparator,FPadEnd);
  236. try
  237. Encoder.Write(aInput[1],Length(aInput));
  238. finally
  239. Encoder.Free;
  240. end;
  241. Result:=Outstream.DataString;
  242. finally
  243. Outstream.free;
  244. end;
  245. end;
  246. { TBase64Encoding }
  247. constructor TBase64Encoding.Create(CharsPerLine: Integer);
  248. begin
  249. Create(CharsPerLine, kLineSeparator);
  250. end;
  251. constructor TBase64Encoding.Create(CharsPerLine: Integer; LineSeparator: string);
  252. begin
  253. inherited Create;
  254. FCharsPerline:=CharsPerLine;
  255. FLineSeparator:=LineSeparator;
  256. FPadEnd:=True;
  257. end;
  258. constructor TBase64Encoding.Create;
  259. begin
  260. Create(kCharsPerLine, kLineSeparator);
  261. end;
  262. { TBase64StringEncoding }
  263. constructor TBase64StringEncoding.Create;
  264. begin
  265. inherited Create;
  266. FCharsPerline:=0;
  267. FLineSeparator:='';
  268. FPadEnd:=True;
  269. end;
  270. { ---------------------------------------------------------------------
  271. TNetEncoding
  272. ---------------------------------------------------------------------}
  273. class procedure TNetEncoding.FreeStdEncodings;
  274. Var
  275. I : TStandardEncoding;
  276. begin
  277. For I in TStandardEncoding do
  278. FreeAndNil(FStdEncodings[i]);
  279. end;
  280. class destructor TNetEncoding.Destroy;
  281. begin
  282. FreeStdEncodings;
  283. end;
  284. class function TNetEncoding.GetURLEncoding: TURLEncoding;
  285. begin
  286. Result:=TURLEncoding(GetStdEncoding(seURL));
  287. end;
  288. class function TNetEncoding.GetStdEncoding(aIndex: TStandardEncoding): TNetEncoding;
  289. begin
  290. Result:=FStdEncodings[aIndex];
  291. if Assigned(Result) then
  292. begin
  293. {$ifdef FPC_HAS_FEATURE_THREADING}
  294. ReadDependencyBarrier; // Read Result contents (by caller) after Result pointer.
  295. {$endif}
  296. Exit;
  297. end;
  298. case aIndex of
  299. seBase64: Result:=TBase64Encoding.Create;
  300. seBase64String: Result:=TBase64StringEncoding.Create;
  301. seHTML: Result:=THTMLEncoding.Create;
  302. seURL: Result:=TURLEncoding.Create;
  303. end;
  304. {$ifdef FPC_HAS_FEATURE_THREADING}
  305. WriteBarrier; // Write FStdEncodings[aIndex] after Result contents.
  306. if InterlockedCompareExchange(Pointer(FStdEncodings[aIndex]), Pointer(Result), nil) <> nil then
  307. begin
  308. Result.Free;
  309. Result := FStdEncodings[aIndex];
  310. end;
  311. {$else}
  312. FStdEncodings[aIndex] := Result;
  313. {$endif}
  314. end;
  315. // Public API
  316. function TNetEncoding.Encode(const aInput: array of Byte): TBytes;
  317. begin
  318. Result:=DoEncode(aInput);
  319. end;
  320. function TNetEncoding.Encode(const aInput, aOutput: TStream): Integer;
  321. begin
  322. Result:=DoEncode(aInput, aOutput);
  323. end;
  324. function TNetEncoding.Decode(const aInput: RawByteString): RawByteString;
  325. begin
  326. Result:=DoDecode(aInput);
  327. end;
  328. function TNetEncoding.Encode(const aInput: RawByteString): RawByteString;
  329. begin
  330. Result:=DoEncode(aInput);
  331. end;
  332. function TNetEncoding.Encode(const aInput: UnicodeString): UnicodeString;
  333. begin
  334. Result:=DoEncode(aInput);
  335. end;
  336. function TNetEncoding.EncodeBytesToString(const aInput: array of Byte): UnicodeString;
  337. begin
  338. Result:=DoEncodeBytesToString(aInput);
  339. end;
  340. function TNetEncoding.EncodeBytesToString(const aInput: Pointer; Size: Integer): UnicodeString;
  341. begin
  342. Result:=DoEncodeBytesToString(aInput, Size);
  343. end;
  344. function TNetEncoding.Decode(const aInput, aOutput: TStream): Integer;
  345. begin
  346. Result:=DoDecode(aInput,aOutput);
  347. end;
  348. function TNetEncoding.Decode(const aInput: UnicodeString): UnicodeString;
  349. begin
  350. Result:=DoDecode(aInput);
  351. end;
  352. function TNetEncoding.DecodeStringToBytes(const aInput: UnicodeString): TBytes;
  353. begin
  354. Result:=DoDecodeStringToBytes(aInput);
  355. end;
  356. function TNetEncoding.DecodeStringToBytes(const aInput: RawByteString): TBytes;
  357. begin
  358. Result:=DoDecodeStringToBytes(aInput);
  359. end;
  360. function TNetEncoding.Decode(const aInput: array of Byte): TBytes;
  361. begin
  362. Result:=DoDecode(aInput);
  363. end;
  364. // Protected
  365. function TNetEncoding.DoDecode(const aInput: UnicodeString): UnicodeString;
  366. Var
  367. U : UTF8String;
  368. begin
  369. U:=UTF8Encode(aInput);
  370. Result:=UTF8Decode(DoDecode(U));
  371. end;
  372. function TNetEncoding.DoEncode(const aInput: UnicodeString): UnicodeString;
  373. Var
  374. U : UTF8String;
  375. begin
  376. U:=UTF8Encode(aInput);
  377. Result:=UTF8Decode(DoEncode(U));
  378. end;
  379. function TNetEncoding.DoDecode(const aInput: array of Byte): TBytes;
  380. begin
  381. if Length(aInput)=0 then
  382. Result:=Default(TBytes)
  383. else
  384. Result:=TEncoding.UTF8.GetBytes(DoDecode(UTF8ToString(aInput)));
  385. end;
  386. function TNetEncoding.DoDecode(const aInput, aOutput: TStream): Integer;
  387. var
  388. Src,Dest: TBytes;
  389. Len : Integer;
  390. begin
  391. Result:=0;
  392. Len:=aInput.Size;
  393. if Len<>0 then
  394. begin
  395. Src:=Default(TBytes);
  396. SetLength(Src,Len);
  397. aInput.ReadBuffer(Src,Len);
  398. Dest:=DoDecode(Src);
  399. Result:=Length(Dest);
  400. aOutput.WriteBuffer(Dest,Result);
  401. end
  402. end;
  403. function TNetEncoding.DoDecodeStringToBytes(const aInput: UnicodeString): TBytes;
  404. begin
  405. Result:=TEncoding.UTF8.GetBytes(DoDecode(aInput));
  406. end;
  407. function TNetEncoding.DoEncode(const aInput: array of Byte): TBytes;
  408. begin
  409. if Length(aInput)=0 then
  410. Result:=Default(TBytes)
  411. else
  412. Result:=TEncoding.UTF8.GetBytes(DoEncode(UTF8ToString(aInput)))
  413. end;
  414. function TNetEncoding.DoDecodeStringToBytes(const aInput: RawByteString): TBytes;
  415. Var
  416. U : RawByteString;
  417. begin
  418. U:=AInput;
  419. UniqueString(U);
  420. SetCodePage(U,CP_UTF8,True);
  421. Result:=DoDecodeStringToBytes(UTF8Decode(U));
  422. end;
  423. function TNetEncoding.DoEncodeBytesToString(const aInput: array of Byte): UnicodeString;
  424. begin
  425. Result:=TEncoding.UTF8.GetString(DoEncode(aInput));
  426. end;
  427. function TNetEncoding.DoEncodeBytesToString(const aInput: Pointer; Size: Integer): UnicodeString;
  428. Var
  429. Src : TBytes;
  430. begin
  431. Src:=Default(TBytes);
  432. SetLength(Src,Size);
  433. Move(aInput^,Src[0],Size);
  434. Result:=DoEncodeBytesToString(Src);
  435. end;
  436. function TNetEncoding.DoEncode(const aInput, aOutput: TStream): Integer;
  437. var
  438. InBuf: array of Byte;
  439. OutBuf: TBytes;
  440. begin
  441. if aInput.Size > 0 then
  442. begin
  443. SetLength(InBuf, aInput.Size);
  444. aInput.Read(InBuf[0], aInput.Size);
  445. OutBuf:=DoEncode(InBuf);
  446. Result:=Length(OutBuf);
  447. aOutput.Write(OutBuf, Result);
  448. SetLength(InBuf, 0);
  449. end
  450. else
  451. Result:=0;
  452. end;
  453. { TBase64Encoding }
  454. { TURLEncoding }
  455. function TURLEncoding.DoDecode(const aInput: RawBytestring): RawBytestring;
  456. begin
  457. Result:=HTTPDecode(aInput);
  458. end;
  459. function TURLEncoding.Encode(const aInput: string; const aSet: TUnsafeChars; const aOptions: TEncodeOptions; aEncoding: TEncoding): string;
  460. var
  461. S : TUnsafeChars;
  462. begin
  463. S:=aSet;
  464. if (TEncodeOption.EncodePercent in aOptions) then
  465. S:=aSet+[Ord('%')];
  466. Result:=HttpEncode(aInput,S,TEncodeOption.SpacesAsPlus in aOptions);
  467. end;
  468. function TURLEncoding.DoEncode(const aInput: RawBytestring): RawBytestring;
  469. begin
  470. Result:=HTTPEncode(aInput)
  471. end;
  472. function TURLEncoding.EncodeQuery(const aInput: string; const aExtraUnsafeChars: TUnsafeChars): string;
  473. const
  474. QueryUnsafeChars: TUnsafeChars = [Ord('''')+Ord('%')];
  475. var
  476. Unsafe: TUnsafeChars;
  477. begin
  478. Unsafe:=QueryUnsafeChars+aExtraUnsafeChars;
  479. Result:=HTTPEncode(aInput,Unsafe,True);
  480. end;
  481. function TURLEncoding.EncodePath(const aPath: string; const aExtraUnsafeChars: TUnsafeChars): string;
  482. var
  483. lPaths: TStringDynArray;
  484. I,Last: Integer;
  485. LUnsafeChars: TUnsafeChars;
  486. begin
  487. if APath = '' then
  488. Exit('/');
  489. Result:='';
  490. lPaths:=APath.Split(['/'], TStringSplitOptions.ExcludeEmpty);
  491. Last:=Length(lPaths)-1;
  492. for I:=0 to Last do
  493. Result:=Result+'/'+HTTPEncode(LPaths[I],aExtraUnsafeChars,True);
  494. end;
  495. class function TURLEncoding.URIDecode(const aValue: string; aPlusAsSpaces: Boolean): string;
  496. begin
  497. Result:=HTTPDecode(aValue,aPlusAsSpaces);
  498. end;
  499. { THTMLEncoding }
  500. Function THTMLEncoding.DoEncode(const aInput: UnicodeString): UnicodeString;
  501. Var
  502. S : UTF8String;
  503. begin
  504. S:=UTF8Encode(aInput);
  505. Result:=UTF8Decode(DoEncode(S));
  506. end;
  507. Function THTMLEncoding.DoEncode(const aInput: RawByteString): RawByteString;
  508. var
  509. Src, Curr, OrigDest,Dest : PAnsiChar;
  510. Procedure CopyData(S : String);
  511. Var
  512. len : integer;
  513. begin
  514. Len:=(Curr-Src);
  515. if Len>0 then
  516. Move(Src^,Dest^,Len);
  517. Src:=Curr;
  518. Inc(Src);
  519. inc(Dest,Len);
  520. Len:=Length(S);
  521. if Len>0 then
  522. Move(S[1],Dest^,Len);
  523. inc(Dest,Len);
  524. end;
  525. begin
  526. SetLength(Result,Length(aInput)*6);
  527. if Length(aInput)=0 then exit;
  528. Src:=PAnsiChar(aInput);
  529. Curr:=Src;
  530. OrigDest:=PAnsiChar(Result);
  531. Dest:=OrigDest;
  532. // Convert: &, <, >, "
  533. while Curr^<>#0 do
  534. begin
  535. case Curr^ of
  536. '&': CopyData('&amp;');
  537. '<': CopyData('&lt;');
  538. '>': CopyData('&gt;');
  539. '"': CopyData('&quot;');
  540. end;
  541. Inc(Curr);
  542. end;
  543. CopyData('');
  544. SetLength(Result,Dest-OrigDest);
  545. end;
  546. Function THTMLEncoding.DoDecode(const aInput: RawByteString): RawByteString;
  547. Var
  548. S : RawByteString;
  549. begin
  550. S:=aInput;
  551. UniqueString(S);
  552. SetCodePage(S,CP_UTF8,true);
  553. Result:=UTF8Encode(DoDecode(UTF8Decode(S)));
  554. end;
  555. Function THTMLEncoding.DoDecode(const aInput: UnicodeString): UnicodeString;
  556. var
  557. Src, Curr, Dest : PWideChar;
  558. Procedure CopyData(S : UnicodeString);
  559. Var
  560. len : integer;
  561. begin
  562. Len:=(Curr-Src);
  563. if Len>0 then
  564. begin
  565. Move(Src^,Dest^,Len*Sizeof(UnicodeChar));
  566. inc(Dest,Len);
  567. end;
  568. Len:=Length(S);
  569. if Len>0 then
  570. begin
  571. Move(S[1],Dest^,Len*Sizeof(UnicodeChar));
  572. inc(Dest,Len);
  573. end;
  574. end;
  575. Var
  576. Len : Integer;
  577. U : UnicodeChar;
  578. US : Unicodestring;
  579. Ent,OrigDest : PWideChar;
  580. begin
  581. SetLength(Result, Length(aInput));
  582. if Length(Result)=0 then exit;
  583. Src:=PWideChar(aInput);
  584. OrigDest:=PWideChar(Result);
  585. Dest:=OrigDest;
  586. Curr:=Src;
  587. while Curr^ <> #0 do
  588. begin
  589. If Curr^='&' then
  590. begin
  591. CopyData('');
  592. Src:=Curr;
  593. Ent:=Curr;
  594. While Not (Ent^ in [#0,';']) do
  595. Inc(Ent);
  596. Len:=Ent-Curr-1;
  597. SetLength(US,Len);
  598. if Len>0 then
  599. Move(Curr[1],US[1],Len*SizeOf(UnicodeChar));
  600. if not ResolveHTMLEntityReference(US,U) then
  601. raise EConvertError.CreateFmt(sInvalidHTMLEntity,[US]);
  602. CopyData(U);
  603. Curr:=Ent;
  604. Src:=Curr;
  605. Inc(Src);
  606. end;
  607. Inc(Curr);
  608. end;
  609. CopyData('');
  610. SetLength(Result,Dest-OrigDest);
  611. end;
  612. end.