htmwrite.pp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. {
  2. This file is part of the Free Component Library
  3. HTML writing routines
  4. Copyright (c) 2000-2002 by
  5. Areca Systems GmbH / Sebastian Guenther, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$IFNDEF FPC_DOTTEDUNITS}
  13. unit HTMWrite;
  14. {$ENDIF FPC_DOTTEDUNITS}
  15. {$MODE objfpc}
  16. {$H+}
  17. interface
  18. {$IFDEF FPC_DOTTEDUNITS}
  19. uses System.Classes, Xml.Dom;
  20. {$ELSE FPC_DOTTEDUNITS}
  21. uses Classes, DOM;
  22. {$ENDIF FPC_DOTTEDUNITS}
  23. procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String);
  24. procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
  25. procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
  26. procedure WriteHTML(Element: TDOMNode; const AFileName: String);
  27. procedure WriteHTML(Element: TDOMNode; var AFile: Text);
  28. procedure WriteHTML(Element: TDOMNode; AStream: TStream);
  29. // ===================================================================
  30. implementation
  31. {$IFDEF FPC_DOTTEDUNITS}
  32. uses System.SysUtils, Html.Defs, Xml.Utils;
  33. {$ELSE FPC_DOTTEDUNITS}
  34. uses SysUtils, HTMLDefs, xmlutils;
  35. {$ENDIF FPC_DOTTEDUNITS}
  36. type
  37. TSpecialCharCallback = procedure(c: WideChar) of object;
  38. THTMLWriter = class(TObject)
  39. private
  40. FStream: TStream;
  41. FInsideTextNode: Boolean;
  42. FInsideScript: Boolean;
  43. FBuffer: PAnsiChar;
  44. FBufPos: PAnsiChar;
  45. FCapacity: Integer;
  46. FLineBreak: string;
  47. procedure wrtChars(Src: PWideChar; Length: Integer);
  48. procedure wrtStr(const ws: XMLString); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  49. procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  50. procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
  51. procedure wrtQuotedLiteral(const ws: XMLString);
  52. procedure ConvWrite(const s: XMLString; const SpecialChars: TSetOfChar;
  53. const SpecialCharCallback: TSpecialCharCallback);
  54. procedure AttrSpecialCharCallback(c: WideChar);
  55. procedure TextNodeSpecialCharCallback(c: WideChar);
  56. protected
  57. procedure WriteNode(Node: TDOMNode);
  58. procedure VisitDocument(Node: TDOMNode);
  59. procedure VisitElement(Node: TDOMNode);
  60. procedure VisitText(Node: TDOMNode);
  61. procedure VisitCDATA(Node: TDOMNode);
  62. procedure VisitComment(Node: TDOMNode);
  63. procedure VisitFragment(Node: TDOMNode);
  64. procedure VisitAttribute(Node: TDOMNode);
  65. procedure VisitEntityRef(Node: TDOMNode);
  66. procedure VisitDocumentType(Node: TDOMNode);
  67. procedure VisitPI(Node: TDOMNode);
  68. public
  69. constructor Create(AStream: TStream; ACapacity : Cardinal = 4096);
  70. destructor Destroy; override;
  71. end;
  72. TTextStream = class(TStream)
  73. Private
  74. F : ^Text;
  75. Public
  76. constructor Create(var AFile: Text);
  77. function Write(Const Buffer; Count: Longint): Longint; override;
  78. end;
  79. { ---------------------------------------------------------------------
  80. TTextStream
  81. ---------------------------------------------------------------------}
  82. constructor TTextStream.Create(var AFile: Text);
  83. begin
  84. inherited Create;
  85. f := @AFile;
  86. end;
  87. function TTextStream.Write(const Buffer; Count: Longint): Longint;
  88. var
  89. s: string;
  90. begin
  91. if Count>0 then
  92. begin
  93. SetString(s, PAnsiChar(@Buffer), Count);
  94. system.Write(f^, s);
  95. end;
  96. Result := Count;
  97. end;
  98. { ---------------------------------------------------------------------
  99. THTMLWriter
  100. ---------------------------------------------------------------------}
  101. constructor THTMLWriter.Create(AStream: TStream; ACapacity : Cardinal = 4096);
  102. begin
  103. inherited Create;
  104. FStream := AStream;
  105. // some overhead - always be able to write at least one extra UCS4
  106. FCapacity := ACapacity;
  107. FBuffer := AllocMem(FCapacity+32);
  108. FBufPos := FBuffer;
  109. // Later on, this may be put under user control
  110. // for now, take OS setting
  111. FLineBreak := sLineBreak;
  112. end;
  113. destructor THTMLWriter.Destroy;
  114. begin
  115. if FBufPos > FBuffer then
  116. FStream.write(FBuffer^, FBufPos-FBuffer);
  117. FreeMem(FBuffer);
  118. inherited Destroy;
  119. end;
  120. procedure THTMLWriter.wrtChars(Src: PWideChar; Length: Integer);
  121. var
  122. pb: PAnsiChar;
  123. wc: Cardinal;
  124. SrcEnd: PWideChar;
  125. begin
  126. pb := FBufPos;
  127. SrcEnd := Src + Length;
  128. while Src < SrcEnd do
  129. begin
  130. if pb >= @FBuffer[FCapacity] then
  131. begin
  132. FStream.write(FBuffer^, FCapacity);
  133. Dec(pb, FCapacity);
  134. if pb > FBuffer then
  135. Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
  136. end;
  137. wc := Cardinal(Src^); Inc(Src);
  138. case wc of
  139. $0A: pb := StrECopy(pb, PAnsiChar(FLineBreak));
  140. $0D: begin
  141. pb := StrECopy(pb, PAnsiChar(FLineBreak));
  142. if (Src < SrcEnd) and (Src^ = #$0A) then
  143. Inc(Src);
  144. end;
  145. 0..$09, $0B, $0C, $0E..$7F: begin
  146. pb^ := AnsiChar(wc); Inc(pb);
  147. end;
  148. $80..$7FF: begin
  149. pb^ := AnsiChar($C0 or (wc shr 6));
  150. pb[1] := AnsiChar($80 or (wc and $3F));
  151. Inc(pb,2);
  152. end;
  153. $D800..$DBFF: begin
  154. if (Src < SrcEnd) and (Src^ >= #$DC00) and (Src^ <= #$DFFF) then
  155. begin
  156. wc := ((LongInt(wc) - $D7C0) shl 10) + LongInt(word(Src^) xor $DC00);
  157. Inc(Src);
  158. pb^ := AnsiChar($F0 or (wc shr 18));
  159. pb[1] := AnsiChar($80 or ((wc shr 12) and $3F));
  160. pb[2] := AnsiChar($80 or ((wc shr 6) and $3F));
  161. pb[3] := AnsiChar($80 or (wc and $3F));
  162. Inc(pb,4);
  163. end
  164. else
  165. raise EConvertError.Create('High surrogate without low one');
  166. end;
  167. $DC00..$DFFF:
  168. raise EConvertError.Create('Low surrogate without high one');
  169. else // $800 >= wc > $FFFF, excluding surrogates
  170. begin
  171. pb^ := AnsiChar($E0 or (wc shr 12));
  172. pb[1] := AnsiChar($80 or ((wc shr 6) and $3F));
  173. pb[2] := AnsiChar($80 or (wc and $3F));
  174. Inc(pb,3);
  175. end;
  176. end;
  177. end;
  178. FBufPos := pb;
  179. end;
  180. procedure THTMLWriter.wrtStr(const ws: XMLString); { inline }
  181. begin
  182. wrtChars(PWideChar(ws), Length(ws));
  183. end;
  184. { No checks here - buffer always has 32 extra bytes }
  185. procedure THTMLWriter.wrtChr(c: WideChar); { inline }
  186. begin
  187. FBufPos^ := AnsiChar(ord(c));
  188. Inc(FBufPos);
  189. end;
  190. procedure THTMLWriter.wrtIndent; { inline }
  191. begin
  192. wrtChars(#10, 1);
  193. end;
  194. procedure THTMLWriter.wrtQuotedLiteral(const ws: XMLString);
  195. var
  196. Quote: WideChar;
  197. begin
  198. // TODO: need to check if the string also contains single quote
  199. // both quotes present is a error
  200. if Pos('"', ws) > 0 then
  201. Quote := ''''
  202. else
  203. Quote := '"';
  204. wrtChr(Quote);
  205. wrtStr(ws);
  206. wrtChr(Quote);
  207. end;
  208. const
  209. AttrSpecialChars = ['<', '"', '&'];
  210. TextSpecialChars = ['<', '>', '&'];
  211. procedure THTMLWriter.ConvWrite(const s: XMLString; const SpecialChars: TSetOfChar;
  212. const SpecialCharCallback: TSpecialCharCallback);
  213. var
  214. StartPos, EndPos: Integer;
  215. begin
  216. StartPos := 1;
  217. EndPos := 1;
  218. while EndPos <= Length(s) do
  219. begin
  220. if (s[EndPos] < #255) and (AnsiChar(ord(s[EndPos])) in SpecialChars) then
  221. begin
  222. wrtChars(@s[StartPos], EndPos - StartPos);
  223. SpecialCharCallback(s[EndPos]);
  224. StartPos := EndPos + 1;
  225. end;
  226. Inc(EndPos);
  227. end;
  228. if StartPos <= length(s) then
  229. wrtChars(@s[StartPos], EndPos - StartPos);
  230. end;
  231. const
  232. QuotStr = '&quot;';
  233. AmpStr = '&amp;';
  234. ltStr = '&lt;';
  235. gtStr = '&gt;';
  236. procedure THTMLWriter.AttrSpecialCharCallback(c: WideChar);
  237. begin
  238. case c of
  239. '"': wrtStr(QuotStr);
  240. '&': wrtStr(AmpStr);
  241. '<': wrtStr(ltStr);
  242. else
  243. wrtChr(c);
  244. end;
  245. end;
  246. procedure THTMLWriter.TextnodeSpecialCharCallback(c: WideChar);
  247. begin
  248. case c of
  249. '<': wrtStr(ltStr);
  250. '>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
  251. '&': wrtStr(AmpStr);
  252. else
  253. wrtChr(c);
  254. end;
  255. end;
  256. procedure THTMLWriter.WriteNode(node: TDOMNode);
  257. begin
  258. case node.NodeType of
  259. ELEMENT_NODE: VisitElement(node);
  260. ATTRIBUTE_NODE: VisitAttribute(node);
  261. TEXT_NODE: VisitText(node);
  262. CDATA_SECTION_NODE: VisitCDATA(node);
  263. ENTITY_REFERENCE_NODE: VisitEntityRef(node);
  264. PROCESSING_INSTRUCTION_NODE: VisitPI(node);
  265. COMMENT_NODE: VisitComment(node);
  266. DOCUMENT_NODE: VisitDocument(node);
  267. DOCUMENT_TYPE_NODE: VisitDocumentType(node);
  268. ENTITY_NODE,
  269. DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
  270. end;
  271. end;
  272. procedure THTMLWriter.VisitElement(node: TDOMNode);
  273. var
  274. i: Integer;
  275. child: TDOMNode;
  276. SavedInsideTextNode: Boolean;
  277. s: string;
  278. ElFlags: THTMLElementFlags;
  279. j: THTMLElementTag;
  280. meta: Boolean;
  281. begin
  282. if not FInsideTextNode then
  283. wrtIndent;
  284. meta := False;
  285. s := LowerCase(node.NodeName);
  286. ElFlags := [efSubelementContent, efPCDATAContent]; // default flags
  287. for j := Low(THTMLElementTag) to High(THTMLElementTag) do
  288. if HTMLElementProps[J].Name = s then
  289. begin
  290. ElFlags := HTMLElementProps[j].Flags;
  291. if j = etMeta then
  292. meta := True;
  293. FInsideScript := (j=etScript) or (j=etStyle);
  294. break;
  295. end;
  296. wrtChr('<');
  297. wrtStr(TDOMElement(node).TagName);
  298. { Force charset label to utf-8, because it is the encoding we actually write }
  299. if meta then
  300. begin
  301. s := TDOMElement(node).GetAttribute('http-equiv');
  302. if SameText(s, 'content-type') then
  303. begin
  304. wrtStr(' content="text/html; charset=utf-8" http-equiv="Content-Type">');
  305. Exit;
  306. end;
  307. end;
  308. if node.HasAttributes then
  309. for i := 0 to node.Attributes.Length - 1 do
  310. begin
  311. child := node.Attributes.Item[i];
  312. VisitAttribute(child);
  313. end;
  314. wrtChr('>');
  315. Child := node.FirstChild;
  316. if Child <> nil then
  317. begin
  318. SavedInsideTextNode := FInsideTextNode;
  319. FInsideTextNode := efPCDATAContent in ElFlags;
  320. repeat
  321. WriteNode(Child);
  322. Child := Child.NextSibling;
  323. until Child = nil;
  324. FInsideTextNode := SavedInsideTextNode;
  325. end;
  326. if (not FInsideTextNode) and not (efPCDATAContent in ElFlags) then
  327. wrtIndent;
  328. if ElFlags * [efSubelementContent, efPCDATAContent] <> [] then
  329. begin
  330. wrtChars('</', 2);
  331. wrtStr(TDOMElement(Node).TagName);
  332. wrtChr('>');
  333. end;
  334. end;
  335. procedure THTMLWriter.VisitText(node: TDOMNode);
  336. begin
  337. if FInsideScript then
  338. WrtStr(TDOMCharacterData(node).Data)
  339. else
  340. ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, {$IFDEF FPC}@{$ENDIF}TextnodeSpecialCharCallback);
  341. end;
  342. procedure THTMLWriter.VisitCDATA(node: TDOMNode);
  343. begin
  344. if not FInsideTextNode then
  345. wrtIndent;
  346. wrtChars('<![CDATA[', 9);
  347. wrtStr(TDOMCharacterData(node).Data);
  348. wrtChars(']]>', 3);
  349. end;
  350. procedure THTMLWriter.VisitEntityRef(node: TDOMNode);
  351. begin
  352. wrtChr('&');
  353. wrtStr(node.NodeName);
  354. wrtChr(';');
  355. end;
  356. procedure THTMLWriter.VisitPI(node: TDOMNode);
  357. begin
  358. if not FInsideTextNode then wrtIndent;
  359. wrtStr('<?');
  360. wrtStr(TDOMProcessingInstruction(node).Target);
  361. wrtChr(' ');
  362. wrtStr(TDOMProcessingInstruction(node).Data);
  363. wrtStr('?>');
  364. end;
  365. procedure THTMLWriter.VisitComment(node: TDOMNode);
  366. begin
  367. if not FInsideTextNode then wrtIndent;
  368. wrtChars('<!--', 4);
  369. wrtStr(TDOMCharacterData(node).Data);
  370. wrtChars('-->', 3);
  371. end;
  372. procedure THTMLWriter.VisitDocument(node: TDOMNode);
  373. var
  374. child: TDOMNode;
  375. begin
  376. child := node.FirstChild;
  377. while Assigned(Child) do
  378. begin
  379. WriteNode(Child);
  380. Child := Child.NextSibling;
  381. end;
  382. wrtChars(#10, 1);
  383. end;
  384. procedure THTMLWriter.VisitAttribute(Node: TDOMNode);
  385. var
  386. Child: TDOMNode;
  387. begin
  388. wrtChr(' ');
  389. wrtStr(TDOMAttr(Node).Name);
  390. wrtChars('="', 2);
  391. Child := Node.FirstChild;
  392. while Assigned(Child) do
  393. begin
  394. case Child.NodeType of
  395. ENTITY_REFERENCE_NODE:
  396. VisitEntityRef(Child);
  397. TEXT_NODE:
  398. ConvWrite(TDOMCharacterData(Child).Data, AttrSpecialChars, {$IFDEF FPC}@{$ENDIF}AttrSpecialCharCallback);
  399. end;
  400. Child := Child.NextSibling;
  401. end;
  402. wrtChr('"');
  403. end;
  404. procedure THTMLWriter.VisitDocumentType(Node: TDOMNode);
  405. begin
  406. wrtStr('<!DOCTYPE ');
  407. wrtStr(Node.NodeName);
  408. wrtChr(' ');
  409. with TDOMDocumentType(Node) do
  410. begin
  411. if PublicID <> '' then
  412. begin
  413. wrtStr('PUBLIC ');
  414. wrtQuotedLiteral(PublicID);
  415. if SystemID <> '' then
  416. begin
  417. wrtChr(' ');
  418. wrtQuotedLiteral(SystemID);
  419. end;
  420. end
  421. else if SystemID <> '' then
  422. begin
  423. wrtStr('SYSTEM ');
  424. wrtQuotedLiteral(SystemID);
  425. end;
  426. end;
  427. wrtChr('>');
  428. end;
  429. procedure THTMLWriter.VisitFragment(Node: TDOMNode);
  430. var
  431. Child: TDOMNode;
  432. begin
  433. // Fragment itself should not be written, only its children should...
  434. Child := Node.FirstChild;
  435. while Assigned(Child) do
  436. begin
  437. WriteNode(Child);
  438. Child := Child.NextSibling;
  439. end;
  440. end;
  441. // -------------------------------------------------------------------
  442. // Interface implementation
  443. // -------------------------------------------------------------------
  444. procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String);
  445. var
  446. fs: TFileStream;
  447. begin
  448. fs := TFileStream.Create(AFileName, fmCreate);
  449. try
  450. WriteHTMLFile(doc, fs);
  451. finally
  452. fs.Free;
  453. end;
  454. end;
  455. procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
  456. var
  457. s: TStream;
  458. begin
  459. s := TTextStream.Create(AFile);
  460. try
  461. with THTMLWriter.Create(s) do
  462. try
  463. WriteNode(doc);
  464. finally
  465. Free;
  466. end;
  467. finally
  468. s.Free;
  469. end;
  470. end;
  471. procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
  472. begin
  473. with THTMLWriter.Create(AStream) do
  474. try
  475. WriteNode(doc);
  476. finally
  477. Free;
  478. end;
  479. end;
  480. procedure WriteHTML(Element: TDOMNode; const AFileName: String);
  481. begin
  482. WriteHTMLFile(TXMLDocument(Element), AFileName);
  483. end;
  484. procedure WriteHTML(Element: TDOMNode; var AFile: Text);
  485. begin
  486. WriteHTMLFile(TXMLDocument(Element), AFile);
  487. end;
  488. procedure WriteHTML(Element: TDOMNode; AStream: TStream);
  489. begin
  490. WriteHTMLFile(TXMLDocument(Element), AStream);
  491. end;
  492. end.