htmwrite.pp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. {
  2. This file is part of the Free Component Library
  3. HTML writing routines
  4. Copyright (c) 2000-2002 by
  5. Areca Systems GmbH / Sebastian Guenther, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit HTMWrite;
  13. {$MODE objfpc}
  14. {$H+}
  15. interface
  16. uses Classes, DOM;
  17. procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String);
  18. procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
  19. procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
  20. procedure WriteHTML(Element: TDOMNode; const AFileName: String);
  21. procedure WriteHTML(Element: TDOMNode; var AFile: Text);
  22. procedure WriteHTML(Element: TDOMNode; AStream: TStream);
  23. // ===================================================================
  24. implementation
  25. uses SysUtils, HTMLDefs;
  26. type
  27. TSpecialCharCallback = procedure(c: WideChar) of object;
  28. THTMLWriter = class(TObject)
  29. private
  30. FStream: TStream;
  31. FInsideTextNode: Boolean;
  32. FBuffer: PChar;
  33. FBufPos: PChar;
  34. FCapacity: Integer;
  35. FLineBreak: string;
  36. procedure wrtChars(Src: PWideChar; Length: Integer);
  37. procedure wrtStr(const ws: WideString); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  38. procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  39. procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
  40. procedure wrtQuotedLiteral(const ws: WideString);
  41. procedure ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
  42. const SpecialCharCallback: TSpecialCharCallback);
  43. procedure AttrSpecialCharCallback(c: WideChar);
  44. procedure TextNodeSpecialCharCallback(c: WideChar);
  45. protected
  46. procedure WriteNode(Node: TDOMNode);
  47. procedure VisitDocument(Node: TDOMNode);
  48. procedure VisitElement(Node: TDOMNode);
  49. procedure VisitText(Node: TDOMNode);
  50. procedure VisitCDATA(Node: TDOMNode);
  51. procedure VisitComment(Node: TDOMNode);
  52. procedure VisitFragment(Node: TDOMNode);
  53. procedure VisitAttribute(Node: TDOMNode);
  54. procedure VisitEntityRef(Node: TDOMNode);
  55. procedure VisitDocumentType(Node: TDOMNode);
  56. procedure VisitPI(Node: TDOMNode);
  57. public
  58. constructor Create(AStream: TStream);
  59. destructor Destroy; override;
  60. end;
  61. TTextStream = class(TStream)
  62. Private
  63. F : ^Text;
  64. Public
  65. constructor Create(var AFile: Text);
  66. function Write(Const Buffer; Count: Longint): Longint; override;
  67. end;
  68. { ---------------------------------------------------------------------
  69. TTextStream
  70. ---------------------------------------------------------------------}
  71. constructor TTextStream.Create(var AFile: Text);
  72. begin
  73. inherited Create;
  74. f := @AFile;
  75. end;
  76. function TTextStream.Write(const Buffer; Count: Longint): Longint;
  77. var
  78. s: string;
  79. begin
  80. if Count>0 then
  81. begin
  82. SetString(s, PChar(@Buffer), Count);
  83. system.Write(f^, s);
  84. end;
  85. Result := Count;
  86. end;
  87. { ---------------------------------------------------------------------
  88. THTMLWriter
  89. ---------------------------------------------------------------------}
  90. constructor THTMLWriter.Create(AStream: TStream);
  91. begin
  92. inherited Create;
  93. FStream := AStream;
  94. // some overhead - always be able to write at least one extra UCS4
  95. FBuffer := AllocMem(512+32);
  96. FBufPos := FBuffer;
  97. FCapacity := 512;
  98. // Later on, this may be put under user control
  99. // for now, take OS setting
  100. FLineBreak := sLineBreak;
  101. end;
  102. destructor THTMLWriter.Destroy;
  103. begin
  104. if FBufPos > FBuffer then
  105. FStream.write(FBuffer^, FBufPos-FBuffer);
  106. FreeMem(FBuffer);
  107. inherited Destroy;
  108. end;
  109. procedure THTMLWriter.wrtChars(Src: PWideChar; Length: Integer);
  110. var
  111. pb: PChar;
  112. wc: Cardinal;
  113. SrcEnd: PWideChar;
  114. begin
  115. pb := FBufPos;
  116. SrcEnd := Src + Length;
  117. while Src < SrcEnd do
  118. begin
  119. if pb >= @FBuffer[FCapacity] then
  120. begin
  121. FStream.write(FBuffer^, FCapacity);
  122. Dec(pb, FCapacity);
  123. if pb > FBuffer then
  124. Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
  125. end;
  126. wc := Cardinal(Src^); Inc(Src);
  127. case wc of
  128. $0A: pb := StrECopy(pb, PChar(FLineBreak));
  129. $0D: begin
  130. pb := StrECopy(pb, PChar(FLineBreak));
  131. if (Src < SrcEnd) and (Src^ = #$0A) then
  132. Inc(Src);
  133. end;
  134. 0..$09, $0B, $0C, $0E..$7F: begin
  135. pb^ := char(wc); Inc(pb);
  136. end;
  137. $80..$7FF: begin
  138. pb^ := Char($C0 or (wc shr 6));
  139. pb[1] := Char($80 or (wc and $3F));
  140. Inc(pb,2);
  141. end;
  142. $D800..$DBFF: begin
  143. if (Src < SrcEnd) and (Src^ >= #$DC00) and (Src^ <= #$DFFF) then
  144. begin
  145. wc := ((LongInt(wc) - $D7C0) shl 10) + LongInt(word(Src^) xor $DC00);
  146. Inc(Src);
  147. pb^ := Char($F0 or (wc shr 18));
  148. pb[1] := Char($80 or ((wc shr 12) and $3F));
  149. pb[2] := Char($80 or ((wc shr 6) and $3F));
  150. pb[3] := Char($80 or (wc and $3F));
  151. Inc(pb,4);
  152. end
  153. else
  154. raise EConvertError.Create('High surrogate without low one');
  155. end;
  156. $DC00..$DFFF:
  157. raise EConvertError.Create('Low surrogate without high one');
  158. else // $800 >= wc > $FFFF, excluding surrogates
  159. begin
  160. pb^ := Char($E0 or (wc shr 12));
  161. pb[1] := Char($80 or ((wc shr 6) and $3F));
  162. pb[2] := Char($80 or (wc and $3F));
  163. Inc(pb,3);
  164. end;
  165. end;
  166. end;
  167. FBufPos := pb;
  168. end;
  169. procedure THTMLWriter.wrtStr(const ws: WideString); { inline }
  170. begin
  171. wrtChars(PWideChar(ws), Length(ws));
  172. end;
  173. { No checks here - buffer always has 32 extra bytes }
  174. procedure THTMLWriter.wrtChr(c: WideChar); { inline }
  175. begin
  176. FBufPos^ := char(ord(c));
  177. Inc(FBufPos);
  178. end;
  179. procedure THTMLWriter.wrtIndent; { inline }
  180. begin
  181. wrtChars(#10, 1);
  182. end;
  183. procedure THTMLWriter.wrtQuotedLiteral(const ws: WideString);
  184. var
  185. Quote: WideChar;
  186. begin
  187. // TODO: need to check if the string also contains single quote
  188. // both quotes present is a error
  189. if Pos('"', ws) > 0 then
  190. Quote := ''''
  191. else
  192. Quote := '"';
  193. wrtChr(Quote);
  194. wrtStr(ws);
  195. wrtChr(Quote);
  196. end;
  197. const
  198. AttrSpecialChars = ['<', '"', '&'];
  199. TextSpecialChars = ['<', '>', '&'];
  200. procedure THTMLWriter.ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
  201. const SpecialCharCallback: TSpecialCharCallback);
  202. var
  203. StartPos, EndPos: Integer;
  204. begin
  205. StartPos := 1;
  206. EndPos := 1;
  207. while EndPos <= Length(s) do
  208. begin
  209. if (s[EndPos] < #255) and (Char(ord(s[EndPos])) in SpecialChars) then
  210. begin
  211. wrtChars(@s[StartPos], EndPos - StartPos);
  212. SpecialCharCallback(s[EndPos]);
  213. StartPos := EndPos + 1;
  214. end;
  215. Inc(EndPos);
  216. end;
  217. if StartPos <= length(s) then
  218. wrtChars(@s[StartPos], EndPos - StartPos);
  219. end;
  220. const
  221. QuotStr = '&quot;';
  222. AmpStr = '&amp;';
  223. ltStr = '&lt;';
  224. gtStr = '&gt;';
  225. procedure THTMLWriter.AttrSpecialCharCallback(c: WideChar);
  226. begin
  227. case c of
  228. '"': wrtStr(QuotStr);
  229. '&': wrtStr(AmpStr);
  230. '<': wrtStr(ltStr);
  231. else
  232. wrtChr(c);
  233. end;
  234. end;
  235. procedure THTMLWriter.TextnodeSpecialCharCallback(c: WideChar);
  236. begin
  237. case c of
  238. '<': wrtStr(ltStr);
  239. '>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
  240. '&': wrtStr(AmpStr);
  241. else
  242. wrtChr(c);
  243. end;
  244. end;
  245. procedure THTMLWriter.WriteNode(node: TDOMNode);
  246. begin
  247. case node.NodeType of
  248. ELEMENT_NODE: VisitElement(node);
  249. ATTRIBUTE_NODE: VisitAttribute(node);
  250. TEXT_NODE: VisitText(node);
  251. CDATA_SECTION_NODE: VisitCDATA(node);
  252. ENTITY_REFERENCE_NODE: VisitEntityRef(node);
  253. PROCESSING_INSTRUCTION_NODE: VisitPI(node);
  254. COMMENT_NODE: VisitComment(node);
  255. DOCUMENT_NODE: VisitDocument(node);
  256. DOCUMENT_TYPE_NODE: VisitDocumentType(node);
  257. ENTITY_NODE,
  258. DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
  259. end;
  260. end;
  261. procedure THTMLWriter.VisitElement(node: TDOMNode);
  262. var
  263. i: Integer;
  264. child: TDOMNode;
  265. SavedInsideTextNode: Boolean;
  266. s: string;
  267. ElFlags: THTMLElementFlags;
  268. j: THTMLElementTag;
  269. meta: Boolean;
  270. begin
  271. if not FInsideTextNode then
  272. wrtIndent;
  273. meta := False;
  274. s := LowerCase(node.NodeName);
  275. ElFlags := [efSubelementContent, efPCDATAContent]; // default flags
  276. for j := Low(THTMLElementTag) to High(THTMLElementTag) do
  277. if HTMLElementProps[J].Name = s then
  278. begin
  279. ElFlags := HTMLElementProps[j].Flags;
  280. if j = etMeta then
  281. meta := True;
  282. break;
  283. end;
  284. wrtChr('<');
  285. wrtStr(TDOMElement(node).TagName);
  286. { Force charset label to utf-8, because it is the encoding we actually write }
  287. if meta then
  288. begin
  289. s := TDOMElement(node).GetAttribute('http-equiv');
  290. if SameText(s, 'content-type') then
  291. begin
  292. wrtStr(' content="text/html; charset=utf-8" http-equiv="Content-Type">');
  293. Exit;
  294. end;
  295. end;
  296. if node.HasAttributes then
  297. for i := 0 to node.Attributes.Length - 1 do
  298. begin
  299. child := node.Attributes.Item[i];
  300. VisitAttribute(child);
  301. end;
  302. wrtChr('>');
  303. Child := node.FirstChild;
  304. if Child <> nil then
  305. begin
  306. SavedInsideTextNode := FInsideTextNode;
  307. FInsideTextNode := efPCDATAContent in ElFlags;
  308. repeat
  309. WriteNode(Child);
  310. Child := Child.NextSibling;
  311. until Child = nil;
  312. FInsideTextNode := SavedInsideTextNode;
  313. end;
  314. if (not FInsideTextNode) and not (efPCDATAContent in ElFlags) then
  315. wrtIndent;
  316. if ElFlags * [efSubelementContent, efPCDATAContent] <> [] then
  317. begin
  318. wrtChars('</', 2);
  319. wrtStr(TDOMElement(Node).TagName);
  320. wrtChr('>');
  321. end;
  322. end;
  323. procedure THTMLWriter.VisitText(node: TDOMNode);
  324. begin
  325. ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, {$IFDEF FPC}@{$ENDIF}TextnodeSpecialCharCallback);
  326. end;
  327. procedure THTMLWriter.VisitCDATA(node: TDOMNode);
  328. begin
  329. if not FInsideTextNode then
  330. wrtIndent;
  331. wrtChars('<![CDATA[', 9);
  332. wrtStr(TDOMCharacterData(node).Data);
  333. wrtChars(']]>', 3);
  334. end;
  335. procedure THTMLWriter.VisitEntityRef(node: TDOMNode);
  336. begin
  337. wrtChr('&');
  338. wrtStr(node.NodeName);
  339. wrtChr(';');
  340. end;
  341. procedure THTMLWriter.VisitPI(node: TDOMNode);
  342. begin
  343. if not FInsideTextNode then wrtIndent;
  344. wrtStr('<?');
  345. wrtStr(TDOMProcessingInstruction(node).Target);
  346. wrtChr(' ');
  347. wrtStr(TDOMProcessingInstruction(node).Data);
  348. wrtStr('?>');
  349. end;
  350. procedure THTMLWriter.VisitComment(node: TDOMNode);
  351. begin
  352. if not FInsideTextNode then wrtIndent;
  353. wrtChars('<!--', 4);
  354. wrtStr(TDOMCharacterData(node).Data);
  355. wrtChars('-->', 3);
  356. end;
  357. procedure THTMLWriter.VisitDocument(node: TDOMNode);
  358. var
  359. child: TDOMNode;
  360. begin
  361. child := node.FirstChild;
  362. while Assigned(Child) do
  363. begin
  364. WriteNode(Child);
  365. Child := Child.NextSibling;
  366. end;
  367. wrtChars(#10, 1);
  368. end;
  369. procedure THTMLWriter.VisitAttribute(Node: TDOMNode);
  370. var
  371. Child: TDOMNode;
  372. begin
  373. wrtChr(' ');
  374. wrtStr(TDOMAttr(Node).Name);
  375. wrtChars('="', 2);
  376. Child := Node.FirstChild;
  377. while Assigned(Child) do
  378. begin
  379. case Child.NodeType of
  380. ENTITY_REFERENCE_NODE:
  381. VisitEntityRef(Child);
  382. TEXT_NODE:
  383. ConvWrite(TDOMCharacterData(Child).Data, AttrSpecialChars, {$IFDEF FPC}@{$ENDIF}AttrSpecialCharCallback);
  384. end;
  385. Child := Child.NextSibling;
  386. end;
  387. wrtChr('"');
  388. end;
  389. procedure THTMLWriter.VisitDocumentType(Node: TDOMNode);
  390. begin
  391. wrtStr('<!DOCTYPE ');
  392. wrtStr(Node.NodeName);
  393. wrtChr(' ');
  394. with TDOMDocumentType(Node) do
  395. begin
  396. if PublicID <> '' then
  397. begin
  398. wrtStr('PUBLIC ');
  399. wrtQuotedLiteral(PublicID);
  400. if SystemID <> '' then
  401. begin
  402. wrtChr(' ');
  403. wrtQuotedLiteral(SystemID);
  404. end;
  405. end
  406. else if SystemID <> '' then
  407. begin
  408. wrtStr('SYSTEM ');
  409. wrtQuotedLiteral(SystemID);
  410. end;
  411. end;
  412. wrtChr('>');
  413. end;
  414. procedure THTMLWriter.VisitFragment(Node: TDOMNode);
  415. var
  416. Child: TDOMNode;
  417. begin
  418. // Fragment itself should not be written, only its children should...
  419. Child := Node.FirstChild;
  420. while Assigned(Child) do
  421. begin
  422. WriteNode(Child);
  423. Child := Child.NextSibling;
  424. end;
  425. end;
  426. // -------------------------------------------------------------------
  427. // Interface implementation
  428. // -------------------------------------------------------------------
  429. procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String);
  430. var
  431. fs: TFileStream;
  432. begin
  433. fs := TFileStream.Create(AFileName, fmCreate);
  434. try
  435. WriteHTMLFile(doc, fs);
  436. finally
  437. fs.Free;
  438. end;
  439. end;
  440. procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
  441. var
  442. s: TStream;
  443. begin
  444. s := TTextStream.Create(AFile);
  445. try
  446. with THTMLWriter.Create(s) do
  447. try
  448. WriteNode(doc);
  449. finally
  450. Free;
  451. end;
  452. finally
  453. s.Free;
  454. end;
  455. end;
  456. procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
  457. begin
  458. with THTMLWriter.Create(AStream) do
  459. try
  460. WriteNode(doc);
  461. finally
  462. Free;
  463. end;
  464. end;
  465. procedure WriteHTML(Element: TDOMNode; const AFileName: String);
  466. begin
  467. WriteHTMLFile(TXMLDocument(Element), AFileName);
  468. end;
  469. procedure WriteHTML(Element: TDOMNode; var AFile: Text);
  470. begin
  471. WriteHTMLFile(TXMLDocument(Element), AFile);
  472. end;
  473. procedure WriteHTML(Element: TDOMNode; AStream: TStream);
  474. begin
  475. WriteHTMLFile(TXMLDocument(Element), AStream);
  476. end;
  477. end.