htmwrite.pp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. {
  2. This file is part of the Free Component Library
  3. HTML writing routines
  4. Copyright (c) 2000-2002 by
  5. Areca Systems GmbH / Sebastian Guenther, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit HTMWrite;
  13. {$MODE objfpc}
  14. {$H+}
  15. interface
  16. uses Classes, DOM;
  17. procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String);
  18. procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
  19. procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
  20. procedure WriteHTML(Element: TDOMNode; const AFileName: String);
  21. procedure WriteHTML(Element: TDOMNode; var AFile: Text);
  22. procedure WriteHTML(Element: TDOMNode; AStream: TStream);
  23. // ===================================================================
  24. implementation
  25. uses SysUtils, HTMLDefs;
  26. type
  27. TSpecialCharCallback = procedure(c: WideChar) of object;
  28. THTMLWriter = class(TObject)
  29. private
  30. FInsideTextNode: Boolean;
  31. FBuffer: PChar;
  32. FBufPos: PChar;
  33. FCapacity: Integer;
  34. FLineBreak: string;
  35. procedure wrtChars(Src: PWideChar; Length: Integer);
  36. procedure wrtStr(const ws: WideString); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  37. procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  38. procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
  39. procedure wrtQuotedLiteral(const ws: WideString);
  40. procedure ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
  41. const SpecialCharCallback: TSpecialCharCallback);
  42. procedure AttrSpecialCharCallback(c: WideChar);
  43. procedure TextNodeSpecialCharCallback(c: WideChar);
  44. protected
  45. procedure Write(const Buffer; Count: Longint); virtual; abstract;
  46. procedure WriteNode(Node: TDOMNode);
  47. procedure VisitDocument(Node: TDOMNode);
  48. procedure VisitElement(Node: TDOMNode);
  49. procedure VisitText(Node: TDOMNode);
  50. procedure VisitCDATA(Node: TDOMNode);
  51. procedure VisitComment(Node: TDOMNode);
  52. procedure VisitFragment(Node: TDOMNode);
  53. procedure VisitAttribute(Node: TDOMNode);
  54. procedure VisitEntityRef(Node: TDOMNode);
  55. procedure VisitDocumentType(Node: TDOMNode);
  56. procedure VisitPI(Node: TDOMNode);
  57. public
  58. constructor Create;
  59. destructor Destroy; override;
  60. end;
  61. TTextHTMLWriter = Class(THTMLWriter)
  62. Private
  63. F : ^Text;
  64. Protected
  65. Procedure Write(Const Buffer; Count : Longint);override;
  66. Public
  67. constructor Create(var AFile: Text);
  68. end;
  69. TStreamHTMLWriter = Class(THTMLWriter)
  70. Private
  71. F : TStream;
  72. Protected
  73. Procedure Write(Const Buffer; Count : Longint);override;
  74. Public
  75. constructor Create(AStream: TStream);
  76. end;
  77. { ---------------------------------------------------------------------
  78. TTextHTMLWriter
  79. ---------------------------------------------------------------------}
  80. constructor TTextHTMLWriter.Create(var AFile: Text);
  81. begin
  82. inherited Create;
  83. f := @AFile;
  84. end;
  85. procedure TTextHTMLWriter.Write(const Buffer; Count: Longint);
  86. var
  87. s: string;
  88. begin
  89. if Count>0 then
  90. begin
  91. SetString(s, PChar(@Buffer), Count);
  92. system.Write(f^, s);
  93. end;
  94. end;
  95. { ---------------------------------------------------------------------
  96. TStreamHTMLWriter
  97. ---------------------------------------------------------------------}
  98. constructor TStreamHTMLWriter.Create(AStream: TStream);
  99. begin
  100. inherited Create;
  101. F := AStream;
  102. end;
  103. procedure TStreamHTMLWriter.Write(const Buffer; Count: Longint);
  104. begin
  105. if Count > 0 then
  106. F.Write(Buffer, Count);
  107. end;
  108. { ---------------------------------------------------------------------
  109. THTMLWriter
  110. ---------------------------------------------------------------------}
  111. constructor THTMLWriter.Create;
  112. begin
  113. inherited Create;
  114. // some overhead - always be able to write at least one extra UCS4
  115. FBuffer := AllocMem(512+32);
  116. FBufPos := FBuffer;
  117. FCapacity := 512;
  118. // Later on, this may be put under user control
  119. // for now, take OS setting
  120. FLineBreak := sLineBreak;
  121. end;
  122. destructor THTMLWriter.Destroy;
  123. begin
  124. if FBufPos > FBuffer then
  125. write(FBuffer^, FBufPos-FBuffer);
  126. FreeMem(FBuffer);
  127. inherited Destroy;
  128. end;
  129. procedure THTMLWriter.wrtChars(Src: PWideChar; Length: Integer);
  130. var
  131. pb: PChar;
  132. wc: Cardinal;
  133. SrcEnd: PWideChar;
  134. begin
  135. pb := FBufPos;
  136. SrcEnd := Src + Length;
  137. while Src < SrcEnd do
  138. begin
  139. if pb >= @FBuffer[FCapacity] then
  140. begin
  141. write(FBuffer^, FCapacity);
  142. Dec(pb, FCapacity);
  143. if pb > FBuffer then
  144. Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
  145. end;
  146. wc := Cardinal(Src^); Inc(Src);
  147. case wc of
  148. $0A: pb := StrECopy(pb, PChar(FLineBreak));
  149. $0D: begin
  150. pb := StrECopy(pb, PChar(FLineBreak));
  151. if (Src < SrcEnd) and (Src^ = #$0A) then
  152. Inc(Src);
  153. end;
  154. 0..$09, $0B, $0C, $0E..$7F: begin
  155. pb^ := char(wc); Inc(pb);
  156. end;
  157. $80..$7FF: begin
  158. pb^ := Char($C0 or (wc shr 6));
  159. pb[1] := Char($80 or (wc and $3F));
  160. Inc(pb,2);
  161. end;
  162. $D800..$DBFF: begin
  163. if (Src < SrcEnd) and (Src^ >= #$DC00) and (Src^ <= #$DFFF) then
  164. begin
  165. wc := ((LongInt(wc) - $D7C0) shl 10) + LongInt(word(Src^) xor $DC00);
  166. Inc(Src);
  167. pb^ := Char($F0 or (wc shr 18));
  168. pb[1] := Char($80 or ((wc shr 12) and $3F));
  169. pb[2] := Char($80 or ((wc shr 6) and $3F));
  170. pb[3] := Char($80 or (wc and $3F));
  171. Inc(pb,4);
  172. end
  173. else
  174. raise EConvertError.Create('High surrogate without low one');
  175. end;
  176. $DC00..$DFFF:
  177. raise EConvertError.Create('Low surrogate without high one');
  178. else // $800 >= wc > $FFFF, excluding surrogates
  179. begin
  180. pb^ := Char($E0 or (wc shr 12));
  181. pb[1] := Char($80 or ((wc shr 6) and $3F));
  182. pb[2] := Char($80 or (wc and $3F));
  183. Inc(pb,3);
  184. end;
  185. end;
  186. end;
  187. FBufPos := pb;
  188. end;
  189. procedure THTMLWriter.wrtStr(const ws: WideString); { inline }
  190. begin
  191. wrtChars(PWideChar(ws), Length(ws));
  192. end;
  193. { No checks here - buffer always has 32 extra bytes }
  194. procedure THTMLWriter.wrtChr(c: WideChar); { inline }
  195. begin
  196. FBufPos^ := char(ord(c));
  197. Inc(FBufPos);
  198. end;
  199. procedure THTMLWriter.wrtIndent; { inline }
  200. begin
  201. wrtChars(#10, 1);
  202. end;
  203. procedure THTMLWriter.wrtQuotedLiteral(const ws: WideString);
  204. var
  205. Quote: WideChar;
  206. begin
  207. // TODO: need to check if the string also contains single quote
  208. // both quotes present is a error
  209. if Pos('"', ws) > 0 then
  210. Quote := ''''
  211. else
  212. Quote := '"';
  213. wrtChr(Quote);
  214. wrtStr(ws);
  215. wrtChr(Quote);
  216. end;
  217. const
  218. AttrSpecialChars = ['<', '"', '&'];
  219. TextSpecialChars = ['<', '>', '&'];
  220. procedure THTMLWriter.ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
  221. const SpecialCharCallback: TSpecialCharCallback);
  222. var
  223. StartPos, EndPos: Integer;
  224. begin
  225. StartPos := 1;
  226. EndPos := 1;
  227. while EndPos <= Length(s) do
  228. begin
  229. if (s[EndPos] < #255) and (Char(ord(s[EndPos])) in SpecialChars) then
  230. begin
  231. wrtChars(@s[StartPos], EndPos - StartPos);
  232. SpecialCharCallback(s[EndPos]);
  233. StartPos := EndPos + 1;
  234. end;
  235. Inc(EndPos);
  236. end;
  237. if StartPos <= length(s) then
  238. wrtChars(@s[StartPos], EndPos - StartPos);
  239. end;
  240. const
  241. QuotStr = '&quot;';
  242. AmpStr = '&amp;';
  243. ltStr = '&lt;';
  244. gtStr = '&gt;';
  245. procedure THTMLWriter.AttrSpecialCharCallback(c: WideChar);
  246. begin
  247. case c of
  248. '"': wrtStr(QuotStr);
  249. '&': wrtStr(AmpStr);
  250. '<': wrtStr(ltStr);
  251. else
  252. wrtChr(c);
  253. end;
  254. end;
  255. procedure THTMLWriter.TextnodeSpecialCharCallback(c: WideChar);
  256. begin
  257. case c of
  258. '<': wrtStr(ltStr);
  259. '>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
  260. '&': wrtStr(AmpStr);
  261. else
  262. wrtChr(c);
  263. end;
  264. end;
  265. procedure THTMLWriter.WriteNode(node: TDOMNode);
  266. begin
  267. case node.NodeType of
  268. ELEMENT_NODE: VisitElement(node);
  269. ATTRIBUTE_NODE: VisitAttribute(node);
  270. TEXT_NODE: VisitText(node);
  271. CDATA_SECTION_NODE: VisitCDATA(node);
  272. ENTITY_REFERENCE_NODE: VisitEntityRef(node);
  273. PROCESSING_INSTRUCTION_NODE: VisitPI(node);
  274. COMMENT_NODE: VisitComment(node);
  275. DOCUMENT_NODE: VisitDocument(node);
  276. DOCUMENT_TYPE_NODE: VisitDocumentType(node);
  277. ENTITY_NODE,
  278. DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
  279. end;
  280. end;
  281. procedure THTMLWriter.VisitElement(node: TDOMNode);
  282. var
  283. i: Integer;
  284. child: TDOMNode;
  285. SavedInsideTextNode: Boolean;
  286. s: string;
  287. ElFlags: THTMLElementFlags;
  288. j: THTMLElementTag;
  289. meta: Boolean;
  290. begin
  291. if not FInsideTextNode then
  292. wrtIndent;
  293. meta := False;
  294. s := LowerCase(node.NodeName);
  295. ElFlags := [efSubelementContent, efPCDATAContent]; // default flags
  296. for j := Low(THTMLElementTag) to High(THTMLElementTag) do
  297. if HTMLElementProps[J].Name = s then
  298. begin
  299. ElFlags := HTMLElementProps[j].Flags;
  300. if j = etMeta then
  301. meta := True;
  302. break;
  303. end;
  304. wrtChr('<');
  305. wrtStr(TDOMElement(node).TagName);
  306. { Force charset label to utf-8, because it is the encoding we actually write }
  307. if meta then
  308. begin
  309. s := TDOMElement(node).GetAttribute('http-equiv');
  310. if SameText(s, 'content-type') then
  311. begin
  312. wrtStr(' content="text/html; charset=utf-8" http-equiv="Content-Type">');
  313. Exit;
  314. end;
  315. end;
  316. if node.HasAttributes then
  317. for i := 0 to node.Attributes.Length - 1 do
  318. begin
  319. child := node.Attributes.Item[i];
  320. VisitAttribute(child);
  321. end;
  322. wrtChr('>');
  323. Child := node.FirstChild;
  324. if Child <> nil then
  325. begin
  326. SavedInsideTextNode := FInsideTextNode;
  327. FInsideTextNode := efPCDATAContent in ElFlags;
  328. repeat
  329. WriteNode(Child);
  330. Child := Child.NextSibling;
  331. until Child = nil;
  332. FInsideTextNode := SavedInsideTextNode;
  333. end;
  334. if (not FInsideTextNode) and not (efPCDATAContent in ElFlags) then
  335. wrtIndent;
  336. if ElFlags * [efSubelementContent, efPCDATAContent] <> [] then
  337. begin
  338. wrtChars('</', 2);
  339. wrtStr(TDOMElement(Node).TagName);
  340. wrtChr('>');
  341. end;
  342. end;
  343. procedure THTMLWriter.VisitText(node: TDOMNode);
  344. begin
  345. ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, {$IFDEF FPC}@{$ENDIF}TextnodeSpecialCharCallback);
  346. end;
  347. procedure THTMLWriter.VisitCDATA(node: TDOMNode);
  348. begin
  349. if not FInsideTextNode then
  350. wrtIndent;
  351. wrtChars('<![CDATA[', 9);
  352. wrtStr(TDOMCharacterData(node).Data);
  353. wrtChars(']]>', 3);
  354. end;
  355. procedure THTMLWriter.VisitEntityRef(node: TDOMNode);
  356. begin
  357. wrtChr('&');
  358. wrtStr(node.NodeName);
  359. wrtChr(';');
  360. end;
  361. procedure THTMLWriter.VisitPI(node: TDOMNode);
  362. begin
  363. if not FInsideTextNode then wrtIndent;
  364. wrtStr('<?');
  365. wrtStr(TDOMProcessingInstruction(node).Target);
  366. wrtChr(' ');
  367. wrtStr(TDOMProcessingInstruction(node).Data);
  368. wrtStr('?>');
  369. end;
  370. procedure THTMLWriter.VisitComment(node: TDOMNode);
  371. begin
  372. if not FInsideTextNode then wrtIndent;
  373. wrtChars('<!--', 4);
  374. wrtStr(TDOMCharacterData(node).Data);
  375. wrtChars('-->', 3);
  376. end;
  377. procedure THTMLWriter.VisitDocument(node: TDOMNode);
  378. var
  379. child: TDOMNode;
  380. begin
  381. child := node.FirstChild;
  382. while Assigned(Child) do
  383. begin
  384. WriteNode(Child);
  385. Child := Child.NextSibling;
  386. end;
  387. wrtChars(#10, 1);
  388. end;
  389. procedure THTMLWriter.VisitAttribute(Node: TDOMNode);
  390. var
  391. Child: TDOMNode;
  392. begin
  393. wrtChr(' ');
  394. wrtStr(TDOMAttr(Node).Name);
  395. wrtChars('="', 2);
  396. Child := Node.FirstChild;
  397. while Assigned(Child) do
  398. begin
  399. case Child.NodeType of
  400. ENTITY_REFERENCE_NODE:
  401. VisitEntityRef(Child);
  402. TEXT_NODE:
  403. ConvWrite(TDOMCharacterData(Child).Data, AttrSpecialChars, {$IFDEF FPC}@{$ENDIF}AttrSpecialCharCallback);
  404. end;
  405. Child := Child.NextSibling;
  406. end;
  407. wrtChr('"');
  408. end;
  409. procedure THTMLWriter.VisitDocumentType(Node: TDOMNode);
  410. begin
  411. wrtStr('<!DOCTYPE ');
  412. wrtStr(Node.NodeName);
  413. wrtChr(' ');
  414. with TDOMDocumentType(Node) do
  415. begin
  416. if PublicID <> '' then
  417. begin
  418. wrtStr('PUBLIC ');
  419. wrtQuotedLiteral(PublicID);
  420. if SystemID <> '' then
  421. begin
  422. wrtChr(' ');
  423. wrtQuotedLiteral(SystemID);
  424. end;
  425. end
  426. else if SystemID <> '' then
  427. begin
  428. wrtStr('SYSTEM ');
  429. wrtQuotedLiteral(SystemID);
  430. end;
  431. end;
  432. wrtChr('>');
  433. end;
  434. procedure THTMLWriter.VisitFragment(Node: TDOMNode);
  435. var
  436. Child: TDOMNode;
  437. begin
  438. // Fragment itself should not be written, only its children should...
  439. Child := Node.FirstChild;
  440. while Assigned(Child) do
  441. begin
  442. WriteNode(Child);
  443. Child := Child.NextSibling;
  444. end;
  445. end;
  446. // -------------------------------------------------------------------
  447. // Interface implementation
  448. // -------------------------------------------------------------------
  449. procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String);
  450. var
  451. fs: TFileStream;
  452. begin
  453. fs := TFileStream.Create(AFileName, fmCreate);
  454. try
  455. WriteHTMLFile(doc, fs);
  456. finally
  457. fs.Free;
  458. end;
  459. end;
  460. procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
  461. begin
  462. with TTextHTMLWriter.Create(AFile) do
  463. try
  464. WriteNode(doc);
  465. finally
  466. Free;
  467. end;
  468. end;
  469. procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
  470. begin
  471. with TStreamHTMLWriter.Create(AStream) do
  472. try
  473. WriteNode(doc);
  474. finally
  475. Free;
  476. end;
  477. end;
  478. procedure WriteHTML(Element: TDOMNode; const AFileName: String);
  479. begin
  480. WriteHTMLFile(TXMLDocument(Element), AFileName);
  481. end;
  482. procedure WriteHTML(Element: TDOMNode; var AFile: Text);
  483. begin
  484. WriteHTMLFile(TXMLDocument(Element), AFile);
  485. end;
  486. procedure WriteHTML(Element: TDOMNode; AStream: TStream);
  487. begin
  488. WriteHTMLFile(TXMLDocument(Element), AStream);
  489. end;
  490. end.