system.netencoding.pp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2019 by Michael Van Canneyt, member of the
  4. Free Pascal development team
  5. VCL compatible TNetEncoding unit
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$mode objfpc}
  13. {$H+}
  14. unit System.NetEncoding;
  15. interface
  16. uses Sysutils, Classes;
  17. type
  18. // Not used here
  19. EHTTPException = class(Exception);
  20. { TNetEncoding }
  21. TNetEncoding = class
  22. private
  23. Const
  24. StdCount = 3;
  25. Class var
  26. FStdEncodings : Array[1..StdCount] of TNetEncoding;
  27. Class Function GetStdEncoding(aIndex : Integer) : TNetEncoding; Static;
  28. Class Destructor Destroy;
  29. protected
  30. // These must be implemented by descendents
  31. Function DoDecode(const aInput: RawByteString): RawByteString; overload; virtual; abstract;
  32. Function DoEncode(const aInput: RawByteString): RawByteString; overload; virtual; abstract;
  33. // These can be overridden by descendents for effiency
  34. Function DoDecode(const aInput: UnicodeString): UnicodeString; overload; virtual;
  35. Function DoEncode(const aInput: UnicodeString): UnicodeString; overload; virtual;
  36. Function DoDecode(const aInput, aOutput: TStream): Integer; overload; virtual;
  37. Function DoEncode(const aInput, aOutput: TStream): Integer; overload; virtual;
  38. Function DoDecode(const aInput: array of Byte): TBytes; overload; virtual;
  39. Function DoEncode(const aInput: array of Byte): TBytes; overload; virtual;
  40. Function DoDecodeStringToBytes(const aInput: RawByteString): TBytes; virtual; overload;
  41. Function DoDecodeStringToBytes(const aInput: UnicodeString): TBytes; virtual; overload;
  42. Function DoEncodeBytesToString(const aInput: array of Byte): UnicodeString; overload; virtual;
  43. Function DoEncodeBytesToString(const aInput: Pointer; Size: Integer): UnicodeString; overload; virtual;
  44. public
  45. Class Procedure FreeStdEncodings;
  46. // Public stubs, they call the Do* versions
  47. // Stream
  48. Function Decode(const aInput, aOutput: TStream): Integer; overload;
  49. Function Encode(const aInput, aOutput: TStream): Integer; overload;
  50. // TBytes
  51. Function Decode(const aInput: array of Byte): TBytes; overload;
  52. Function Encode(const aInput: array of Byte): TBytes; overload;
  53. // Strings
  54. Function Decode(const aInput: UnicodeString): UnicodeString; overload;
  55. Function Encode(const aInput: UnicodeString): UnicodeString; overload;
  56. Function Decode(const aInput: RawByteString): RawByteString; overload;
  57. Function Encode(const aInput: RawByteString): RawByteString; overload;
  58. // UnicodeString to Bytes
  59. Function DecodeStringToBytes(const aInput: UnicodeString): TBytes;
  60. Function DecodeStringToBytes(const aInput: RawByteString): TBytes;
  61. Function EncodeBytesToString(const aInput: array of Byte): UnicodeString; overload;
  62. Function EncodeBytesToString(const aInput: Pointer; Size: Integer): UnicodeString; overload;
  63. // Default instances
  64. class property Base64: TNetEncoding Index 1 read GetStdEncoding;
  65. class property HTML: TNetEncoding Index 2 read GetStdEncoding;
  66. class property URL: TNetEncoding Index 3 read GetStdEncoding;
  67. end;
  68. { TBase64Encoding }
  69. TBase64Encoding = class(TNetEncoding)
  70. protected
  71. Function DoDecode(const aInput, aOutput: TStream): Integer; overload; override;
  72. Function DoEncode(const aInput, aOutput: TStream): Integer; overload; override;
  73. Function DoDecode(const aInput: RawByteString): RawByteString; overload; override;
  74. Function DoEncode(const aInput: RawByteString): RawByteString; overload; override;
  75. end;
  76. TURLEncoding = class(TNetEncoding)
  77. protected
  78. Function DoEncode(const aInput: RawBytestring): RawBytestring; overload; override;
  79. Function DoDecode(const aInput: RawBytestring): RawBytestring; overload; override;
  80. end;
  81. THTMLEncoding = class(TNetEncoding)
  82. protected
  83. Function DoDecode(const aInput: UnicodeString): UnicodeString; override;
  84. Function DoDecode(const aInput: RawBytestring): RawBytestring; overload; override;
  85. Function DoEncode(const aInput: UnicodeString): UnicodeString; override;
  86. Function DoEncode(const aInput: RawBytestring): RawBytestring; overload; override;
  87. end;
  88. implementation
  89. uses base64, httpprotocol, HTMLDefs, xmlread;
  90. Resourcestring
  91. sInvalidHTMLEntity = 'Invalid HTML encoded character: %s';
  92. { TBase64Encoding }
  93. function TBase64Encoding.DoDecode(const aInput, aOutput: TStream): Integer;
  94. Var
  95. S : TBase64DecodingStream;
  96. begin
  97. S:=TBase64DecodingStream.Create(aInput,bdmMIME);
  98. try
  99. Result:=S.Size;
  100. aOutput.CopyFrom(S,Result);
  101. finally
  102. S.Free;
  103. end;
  104. end;
  105. function TBase64Encoding.DoEncode(const aInput, aOutput: TStream): Integer;
  106. Var
  107. S : TBase64DecodingStream;
  108. begin
  109. S:=TBase64DecodingStream.Create(aInput);
  110. try
  111. Result:=S.Size;
  112. aOutput.CopyFrom(S,Result);
  113. finally
  114. S.Free;
  115. end;
  116. end;
  117. function TBase64Encoding.DoDecode(const aInput: RawByteString): RawByteString;
  118. begin
  119. Result:=DecodeStringBase64(aInput,False);
  120. end;
  121. function TBase64Encoding.DoEncode(const aInput: RawByteString): RawByteString;
  122. begin
  123. Result:=EncodeStringBase64(aInput);
  124. end;
  125. { ---------------------------------------------------------------------
  126. TNetEncoding
  127. ---------------------------------------------------------------------}
  128. class procedure TNetEncoding.FreeStdEncodings;
  129. Var
  130. I : Integer;
  131. begin
  132. For I:=1 to StdCount do
  133. FreeAndNil(FStdEncodings[i]);
  134. end;
  135. class destructor TNetEncoding.Destroy;
  136. begin
  137. FreeStdEncodings;
  138. end;
  139. class Function TNetEncoding.GetStdEncoding(aIndex: Integer): TNetEncoding;
  140. begin
  141. if FStdEncodings[aIndex]=Nil then
  142. case aIndex of
  143. 1 : FStdEncodings[1]:=TBase64Encoding.Create;
  144. 2 : FStdEncodings[2]:=THTMLEncoding.Create;
  145. 3 : FStdEncodings[3]:=TURLEncoding.Create;
  146. end;
  147. Result:=FStdEncodings[aIndex];
  148. end;
  149. // Public API
  150. Function TNetEncoding.Encode(const aInput: array of Byte): TBytes;
  151. begin
  152. Result:=DoEncode(aInput);
  153. end;
  154. Function TNetEncoding.Encode(const aInput, aOutput: TStream): Integer;
  155. begin
  156. Result:=DoEncode(aInput, aOutput);
  157. end;
  158. Function TNetEncoding.Decode(const aInput: RawByteString): RawByteString; overload;
  159. begin
  160. Result:=DoDecode(aInput);
  161. end;
  162. Function TNetEncoding.Encode(const aInput: RawByteString): RawByteString; overload;
  163. begin
  164. Result:=DoEncode(aInput);
  165. end;
  166. Function TNetEncoding.Encode(const aInput: UnicodeString): UnicodeString;
  167. begin
  168. Result:=DoEncode(aInput);
  169. end;
  170. Function TNetEncoding.EncodeBytesToString(const aInput: array of Byte): UnicodeString;
  171. begin
  172. Result:=DoEncodeBytesToString(aInput);
  173. end;
  174. Function TNetEncoding.EncodeBytesToString(const aInput: Pointer; Size: Integer): UnicodeString;
  175. begin
  176. Result:=DoEncodeBytesToString(aInput, Size);
  177. end;
  178. Function TNetEncoding.Decode(const aInput, aOutput: TStream): Integer;
  179. begin
  180. Result:=DoDecode(aInput,aOutput);
  181. end;
  182. Function TNetEncoding.Decode(const aInput: UnicodeString): UnicodeString;
  183. begin
  184. Result:=DoDecode(aInput);
  185. end;
  186. Function TNetEncoding.DecodeStringToBytes(const aInput: UnicodeString): TBytes;
  187. begin
  188. Result:=DoDecodeStringToBytes(aInput);
  189. end;
  190. function TNetEncoding.DecodeStringToBytes(const aInput: RawByteString): TBytes;
  191. begin
  192. Result:=DoDecodeStringToBytes(aInput);
  193. end;
  194. Function TNetEncoding.Decode(const aInput: array of Byte): TBytes;
  195. begin
  196. Result:=DoDecode(aInput);
  197. end;
  198. // Protected
  199. Function TNetEncoding.DoDecode(const aInput: UnicodeString): UnicodeString;
  200. Var
  201. U : UTF8String;
  202. begin
  203. U:=UTF8Encode(aInput);
  204. Result:=UTF8Decode(DoDecode(U));
  205. end;
  206. Function TNetEncoding.DoEncode(const aInput: UnicodeString): UnicodeString;
  207. Var
  208. U : UTF8String;
  209. begin
  210. U:=UTF8Encode(aInput);
  211. Result:=UTF8Decode(DoEncode(U));
  212. end;
  213. Function TNetEncoding.DoDecode(const aInput: array of Byte): TBytes;
  214. begin
  215. if Length(aInput)=0 then
  216. Result:=Default(TBytes)
  217. else
  218. Result:=TEncoding.UTF8.GetBytes(DoDecode(UTF8ToString(aInput)));
  219. end;
  220. Function TNetEncoding.DoDecode(const aInput, aOutput: TStream): Integer;
  221. var
  222. Src,Dest: TBytes;
  223. Len : Integer;
  224. begin
  225. Result:=0;
  226. Len:=aInput.Size;
  227. if Len<>0 then
  228. begin
  229. Src:=Default(TBytes);
  230. SetLength(Src,Len);
  231. aInput.ReadBuffer(Src,Len);
  232. Dest:=DoDecode(Src);
  233. Result:=Length(Dest);
  234. aOutput.WriteBuffer(Dest,Result);
  235. end
  236. end;
  237. Function TNetEncoding.DoDecodeStringToBytes(const aInput: UnicodeString): TBytes;
  238. begin
  239. Result:=TEncoding.UTF8.GetBytes(DoDecode(aInput));
  240. end;
  241. Function TNetEncoding.DoEncode(const aInput: array of Byte): TBytes;
  242. begin
  243. if Length(aInput)=0 then
  244. Result:=Default(TBytes)
  245. else
  246. Result:=TEncoding.UTF8.GetBytes(DoEncode(UTF8ToString(aInput)))
  247. end;
  248. function TNetEncoding.DoDecodeStringToBytes(const aInput: RawByteString): TBytes;
  249. Var
  250. U : RawByteString;
  251. begin
  252. U:=AInput;
  253. UniqueString(U);
  254. SetCodePage(U,CP_UTF8,True);
  255. Result:=DoDecodeStringToBytes(UTF8Decode(U));
  256. end;
  257. Function TNetEncoding.DoEncodeBytesToString(const aInput: array of Byte): UnicodeString;
  258. begin
  259. Result:=TEncoding.UTF8.GetString(DoEncode(aInput));
  260. end;
  261. Function TNetEncoding.DoEncodeBytesToString(const aInput: Pointer; Size: Integer): UnicodeString;
  262. Var
  263. Src : TBytes;
  264. begin
  265. Src:=Default(TBytes);
  266. SetLength(Src,Size);
  267. Move(aInput^,Src[0],Size);
  268. Result:=DoEncodeBytesToString(Src);
  269. end;
  270. Function TNetEncoding.DoEncode(const aInput, aOutput: TStream): Integer;
  271. var
  272. InBuf: array of Byte;
  273. OutBuf: TBytes;
  274. begin
  275. if aInput.Size > 0 then
  276. begin
  277. SetLength(InBuf, aInput.Size);
  278. aInput.Read(InBuf[0], aInput.Size);
  279. OutBuf:=DoEncode(InBuf);
  280. Result:=Length(OutBuf);
  281. aOutput.Write(OutBuf, Result);
  282. SetLength(InBuf, 0);
  283. end
  284. else
  285. Result:=0;
  286. end;
  287. { TBase64Encoding }
  288. { TURLEncoding }
  289. Function TURLEncoding.DoDecode(const aInput: RawByteString): RawByteString;
  290. begin
  291. Result:=HTTPDecode(aInput);
  292. end;
  293. Function TURLEncoding.DoEncode(const aInput: RawByteString): RawByteString;
  294. begin
  295. Result:=HTTPEncode(aInput)
  296. end;
  297. { THTMLEncoding }
  298. Function THTMLEncoding.DoEncode(const aInput: UnicodeString): UnicodeString;
  299. Var
  300. S : UTF8String;
  301. begin
  302. S:=UTF8Encode(aInput);
  303. Result:=UTF8Decode(DoEncode(S));
  304. end;
  305. Function THTMLEncoding.DoEncode(const aInput: RawByteString): RawByteString;
  306. var
  307. Src, Curr, OrigDest,Dest : PAnsiChar;
  308. Procedure CopyData(S : String);
  309. Var
  310. len : integer;
  311. begin
  312. Len:=(Curr-Src);
  313. if Len>0 then
  314. Move(Src^,Dest^,Len);
  315. Src:=Curr;
  316. Inc(Src);
  317. inc(Dest,Len);
  318. Len:=Length(S);
  319. if Len>0 then
  320. Move(S[1],Dest^,Len);
  321. inc(Dest,Len);
  322. end;
  323. begin
  324. SetLength(Result,Length(aInput)*6);
  325. if Length(aInput)=0 then exit;
  326. Src:=PAnsiChar(aInput);
  327. Curr:=Src;
  328. OrigDest:=PAnsiChar(Result);
  329. Dest:=OrigDest;
  330. // Convert: &, <, >, "
  331. while Curr^<>#0 do
  332. begin
  333. case Curr^ of
  334. '&': CopyData('&amp;');
  335. '<': CopyData('&lt;');
  336. '>': CopyData('&gt;');
  337. '"': CopyData('&quot;');
  338. end;
  339. Inc(Curr);
  340. end;
  341. CopyData('');
  342. SetLength(Result,Dest-OrigDest);
  343. end;
  344. Function THTMLEncoding.DoDecode(const aInput: RawByteString): RawByteString;
  345. Var
  346. S : RawByteString;
  347. begin
  348. S:=aInput;
  349. UniqueString(S);
  350. SetCodePage(S,CP_UTF8,true);
  351. Result:=UTF8Encode(DoDecode(UTF8Decode(S)));
  352. end;
  353. Function THTMLEncoding.DoDecode(const aInput: UnicodeString): UnicodeString;
  354. var
  355. Src, Curr, Dest : PWideChar;
  356. Procedure CopyData(S : UnicodeString);
  357. Var
  358. len : integer;
  359. begin
  360. Len:=(Curr-Src);
  361. if Len>0 then
  362. begin
  363. Move(Src^,Dest^,Len*Sizeof(UnicodeChar));
  364. inc(Dest,Len);
  365. end;
  366. Len:=Length(S);
  367. if Len>0 then
  368. begin
  369. Move(S[1],Dest^,Len*Sizeof(UnicodeChar));
  370. inc(Dest,Len);
  371. end;
  372. end;
  373. Var
  374. Len : Integer;
  375. U : UnicodeChar;
  376. US : Unicodestring;
  377. Ent,OrigDest : PWideChar;
  378. begin
  379. SetLength(Result, Length(aInput));
  380. if Length(Result)=0 then exit;
  381. Src:=PWideChar(aInput);
  382. OrigDest:=PWideChar(Result);
  383. Dest:=OrigDest;
  384. Curr:=Src;
  385. while Curr^ <> #0 do
  386. begin
  387. If Curr^='&' then
  388. begin
  389. CopyData('');
  390. Src:=Curr;
  391. Ent:=Curr;
  392. While Not (Ent^ in [#0,';']) do
  393. Inc(Ent);
  394. Len:=Ent-Curr-1;
  395. SetLength(US,Len);
  396. if Len>0 then
  397. Move(Curr[1],US[1],Len*SizeOf(UnicodeChar));
  398. if not ResolveHTMLEntityReference(US,U) then
  399. raise EConvertError.CreateFmt(sInvalidHTMLEntity,[US]);
  400. CopyData(U);
  401. Curr:=Ent;
  402. Src:=Curr;
  403. Inc(Src);
  404. end;
  405. Inc(Curr);
  406. end;
  407. CopyData('');
  408. SetLength(Result,Dest-OrigDest);
  409. end;
  410. end.