htmwrite.pp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. {
  2. This file is part of the Free Component Library
  3. HTML writing routines
  4. Copyright (c) 2000-2002 by
  5. Areca Systems GmbH / Sebastian Guenther, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit HTMWrite;
  13. {$MODE objfpc}
  14. {$H+}
  15. interface
  16. uses Classes, DOM;
  17. procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String);
  18. procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
  19. procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
  20. procedure WriteHTML(Element: TDOMNode; const AFileName: String);
  21. procedure WriteHTML(Element: TDOMNode; var AFile: Text);
  22. procedure WriteHTML(Element: TDOMNode; AStream: TStream);
  23. // ===================================================================
  24. implementation
  25. uses SysUtils, HTMLDefs, xmlutils;
  26. type
  27. TSpecialCharCallback = procedure(c: WideChar) of object;
  28. THTMLWriter = class(TObject)
  29. private
  30. FStream: TStream;
  31. FInsideTextNode: Boolean;
  32. FInsideScript: Boolean;
  33. FBuffer: PChar;
  34. FBufPos: PChar;
  35. FCapacity: Integer;
  36. FLineBreak: string;
  37. procedure wrtChars(Src: PWideChar; Length: Integer);
  38. procedure wrtStr(const ws: XMLString); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  39. procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  40. procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
  41. procedure wrtQuotedLiteral(const ws: XMLString);
  42. procedure ConvWrite(const s: XMLString; const SpecialChars: TSetOfChar;
  43. const SpecialCharCallback: TSpecialCharCallback);
  44. procedure AttrSpecialCharCallback(c: WideChar);
  45. procedure TextNodeSpecialCharCallback(c: WideChar);
  46. protected
  47. procedure WriteNode(Node: TDOMNode);
  48. procedure VisitDocument(Node: TDOMNode);
  49. procedure VisitElement(Node: TDOMNode);
  50. procedure VisitText(Node: TDOMNode);
  51. procedure VisitCDATA(Node: TDOMNode);
  52. procedure VisitComment(Node: TDOMNode);
  53. procedure VisitFragment(Node: TDOMNode);
  54. procedure VisitAttribute(Node: TDOMNode);
  55. procedure VisitEntityRef(Node: TDOMNode);
  56. procedure VisitDocumentType(Node: TDOMNode);
  57. procedure VisitPI(Node: TDOMNode);
  58. public
  59. constructor Create(AStream: TStream; ACapacity : Cardinal = 4096);
  60. destructor Destroy; override;
  61. end;
  62. TTextStream = class(TStream)
  63. Private
  64. F : ^Text;
  65. Public
  66. constructor Create(var AFile: Text);
  67. function Write(Const Buffer; Count: Longint): Longint; override;
  68. end;
  69. { ---------------------------------------------------------------------
  70. TTextStream
  71. ---------------------------------------------------------------------}
  72. constructor TTextStream.Create(var AFile: Text);
  73. begin
  74. inherited Create;
  75. f := @AFile;
  76. end;
  77. function TTextStream.Write(const Buffer; Count: Longint): Longint;
  78. var
  79. s: string;
  80. begin
  81. if Count>0 then
  82. begin
  83. SetString(s, PChar(@Buffer), Count);
  84. system.Write(f^, s);
  85. end;
  86. Result := Count;
  87. end;
  88. { ---------------------------------------------------------------------
  89. THTMLWriter
  90. ---------------------------------------------------------------------}
  91. constructor THTMLWriter.Create(AStream: TStream; ACapacity : Cardinal = 4096);
  92. begin
  93. inherited Create;
  94. FStream := AStream;
  95. // some overhead - always be able to write at least one extra UCS4
  96. FCapacity := ACapacity;
  97. FBuffer := AllocMem(FCapacity+32);
  98. FBufPos := FBuffer;
  99. // Later on, this may be put under user control
  100. // for now, take OS setting
  101. FLineBreak := sLineBreak;
  102. end;
  103. destructor THTMLWriter.Destroy;
  104. begin
  105. if FBufPos > FBuffer then
  106. FStream.write(FBuffer^, FBufPos-FBuffer);
  107. FreeMem(FBuffer);
  108. inherited Destroy;
  109. end;
  110. procedure THTMLWriter.wrtChars(Src: PWideChar; Length: Integer);
  111. var
  112. pb: PChar;
  113. wc: Cardinal;
  114. SrcEnd: PWideChar;
  115. begin
  116. pb := FBufPos;
  117. SrcEnd := Src + Length;
  118. while Src < SrcEnd do
  119. begin
  120. if pb >= @FBuffer[FCapacity] then
  121. begin
  122. FStream.write(FBuffer^, FCapacity);
  123. Dec(pb, FCapacity);
  124. if pb > FBuffer then
  125. Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
  126. end;
  127. wc := Cardinal(Src^); Inc(Src);
  128. case wc of
  129. $0A: pb := StrECopy(pb, PChar(FLineBreak));
  130. $0D: begin
  131. pb := StrECopy(pb, PChar(FLineBreak));
  132. if (Src < SrcEnd) and (Src^ = #$0A) then
  133. Inc(Src);
  134. end;
  135. 0..$09, $0B, $0C, $0E..$7F: begin
  136. pb^ := char(wc); Inc(pb);
  137. end;
  138. $80..$7FF: begin
  139. pb^ := Char($C0 or (wc shr 6));
  140. pb[1] := Char($80 or (wc and $3F));
  141. Inc(pb,2);
  142. end;
  143. $D800..$DBFF: begin
  144. if (Src < SrcEnd) and (Src^ >= #$DC00) and (Src^ <= #$DFFF) then
  145. begin
  146. wc := ((LongInt(wc) - $D7C0) shl 10) + LongInt(word(Src^) xor $DC00);
  147. Inc(Src);
  148. pb^ := Char($F0 or (wc shr 18));
  149. pb[1] := Char($80 or ((wc shr 12) and $3F));
  150. pb[2] := Char($80 or ((wc shr 6) and $3F));
  151. pb[3] := Char($80 or (wc and $3F));
  152. Inc(pb,4);
  153. end
  154. else
  155. raise EConvertError.Create('High surrogate without low one');
  156. end;
  157. $DC00..$DFFF:
  158. raise EConvertError.Create('Low surrogate without high one');
  159. else // $800 >= wc > $FFFF, excluding surrogates
  160. begin
  161. pb^ := Char($E0 or (wc shr 12));
  162. pb[1] := Char($80 or ((wc shr 6) and $3F));
  163. pb[2] := Char($80 or (wc and $3F));
  164. Inc(pb,3);
  165. end;
  166. end;
  167. end;
  168. FBufPos := pb;
  169. end;
  170. procedure THTMLWriter.wrtStr(const ws: XMLString); { inline }
  171. begin
  172. wrtChars(PWideChar(ws), Length(ws));
  173. end;
  174. { No checks here - buffer always has 32 extra bytes }
  175. procedure THTMLWriter.wrtChr(c: WideChar); { inline }
  176. begin
  177. FBufPos^ := char(ord(c));
  178. Inc(FBufPos);
  179. end;
  180. procedure THTMLWriter.wrtIndent; { inline }
  181. begin
  182. wrtChars(#10, 1);
  183. end;
  184. procedure THTMLWriter.wrtQuotedLiteral(const ws: XMLString);
  185. var
  186. Quote: WideChar;
  187. begin
  188. // TODO: need to check if the string also contains single quote
  189. // both quotes present is a error
  190. if Pos('"', ws) > 0 then
  191. Quote := ''''
  192. else
  193. Quote := '"';
  194. wrtChr(Quote);
  195. wrtStr(ws);
  196. wrtChr(Quote);
  197. end;
  198. const
  199. AttrSpecialChars = ['<', '"', '&'];
  200. TextSpecialChars = ['<', '>', '&'];
  201. procedure THTMLWriter.ConvWrite(const s: XMLString; const SpecialChars: TSetOfChar;
  202. const SpecialCharCallback: TSpecialCharCallback);
  203. var
  204. StartPos, EndPos: Integer;
  205. begin
  206. StartPos := 1;
  207. EndPos := 1;
  208. while EndPos <= Length(s) do
  209. begin
  210. if (s[EndPos] < #255) and (Char(ord(s[EndPos])) in SpecialChars) then
  211. begin
  212. wrtChars(@s[StartPos], EndPos - StartPos);
  213. SpecialCharCallback(s[EndPos]);
  214. StartPos := EndPos + 1;
  215. end;
  216. Inc(EndPos);
  217. end;
  218. if StartPos <= length(s) then
  219. wrtChars(@s[StartPos], EndPos - StartPos);
  220. end;
  221. const
  222. QuotStr = '&quot;';
  223. AmpStr = '&amp;';
  224. ltStr = '&lt;';
  225. gtStr = '&gt;';
  226. procedure THTMLWriter.AttrSpecialCharCallback(c: WideChar);
  227. begin
  228. case c of
  229. '"': wrtStr(QuotStr);
  230. '&': wrtStr(AmpStr);
  231. '<': wrtStr(ltStr);
  232. else
  233. wrtChr(c);
  234. end;
  235. end;
  236. procedure THTMLWriter.TextnodeSpecialCharCallback(c: WideChar);
  237. begin
  238. case c of
  239. '<': wrtStr(ltStr);
  240. '>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
  241. '&': wrtStr(AmpStr);
  242. else
  243. wrtChr(c);
  244. end;
  245. end;
  246. procedure THTMLWriter.WriteNode(node: TDOMNode);
  247. begin
  248. case node.NodeType of
  249. ELEMENT_NODE: VisitElement(node);
  250. ATTRIBUTE_NODE: VisitAttribute(node);
  251. TEXT_NODE: VisitText(node);
  252. CDATA_SECTION_NODE: VisitCDATA(node);
  253. ENTITY_REFERENCE_NODE: VisitEntityRef(node);
  254. PROCESSING_INSTRUCTION_NODE: VisitPI(node);
  255. COMMENT_NODE: VisitComment(node);
  256. DOCUMENT_NODE: VisitDocument(node);
  257. DOCUMENT_TYPE_NODE: VisitDocumentType(node);
  258. ENTITY_NODE,
  259. DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
  260. end;
  261. end;
  262. procedure THTMLWriter.VisitElement(node: TDOMNode);
  263. var
  264. i: Integer;
  265. child: TDOMNode;
  266. SavedInsideTextNode: Boolean;
  267. s: string;
  268. ElFlags: THTMLElementFlags;
  269. j: THTMLElementTag;
  270. meta: Boolean;
  271. begin
  272. if not FInsideTextNode then
  273. wrtIndent;
  274. meta := False;
  275. s := LowerCase(node.NodeName);
  276. ElFlags := [efSubelementContent, efPCDATAContent]; // default flags
  277. for j := Low(THTMLElementTag) to High(THTMLElementTag) do
  278. if HTMLElementProps[J].Name = s then
  279. begin
  280. ElFlags := HTMLElementProps[j].Flags;
  281. if j = etMeta then
  282. meta := True;
  283. FInsideScript := (j=etScript) or (j=etStyle);
  284. break;
  285. end;
  286. wrtChr('<');
  287. wrtStr(TDOMElement(node).TagName);
  288. { Force charset label to utf-8, because it is the encoding we actually write }
  289. if meta then
  290. begin
  291. s := TDOMElement(node).GetAttribute('http-equiv');
  292. if SameText(s, 'content-type') then
  293. begin
  294. wrtStr(' content="text/html; charset=utf-8" http-equiv="Content-Type">');
  295. Exit;
  296. end;
  297. end;
  298. if node.HasAttributes then
  299. for i := 0 to node.Attributes.Length - 1 do
  300. begin
  301. child := node.Attributes.Item[i];
  302. VisitAttribute(child);
  303. end;
  304. wrtChr('>');
  305. Child := node.FirstChild;
  306. if Child <> nil then
  307. begin
  308. SavedInsideTextNode := FInsideTextNode;
  309. FInsideTextNode := efPCDATAContent in ElFlags;
  310. repeat
  311. WriteNode(Child);
  312. Child := Child.NextSibling;
  313. until Child = nil;
  314. FInsideTextNode := SavedInsideTextNode;
  315. end;
  316. if (not FInsideTextNode) and not (efPCDATAContent in ElFlags) then
  317. wrtIndent;
  318. if ElFlags * [efSubelementContent, efPCDATAContent] <> [] then
  319. begin
  320. wrtChars('</', 2);
  321. wrtStr(TDOMElement(Node).TagName);
  322. wrtChr('>');
  323. end;
  324. end;
  325. procedure THTMLWriter.VisitText(node: TDOMNode);
  326. begin
  327. if FInsideScript then
  328. WrtStr(TDOMCharacterData(node).Data)
  329. else
  330. ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, {$IFDEF FPC}@{$ENDIF}TextnodeSpecialCharCallback);
  331. end;
  332. procedure THTMLWriter.VisitCDATA(node: TDOMNode);
  333. begin
  334. if not FInsideTextNode then
  335. wrtIndent;
  336. wrtChars('<![CDATA[', 9);
  337. wrtStr(TDOMCharacterData(node).Data);
  338. wrtChars(']]>', 3);
  339. end;
  340. procedure THTMLWriter.VisitEntityRef(node: TDOMNode);
  341. begin
  342. wrtChr('&');
  343. wrtStr(node.NodeName);
  344. wrtChr(';');
  345. end;
  346. procedure THTMLWriter.VisitPI(node: TDOMNode);
  347. begin
  348. if not FInsideTextNode then wrtIndent;
  349. wrtStr('<?');
  350. wrtStr(TDOMProcessingInstruction(node).Target);
  351. wrtChr(' ');
  352. wrtStr(TDOMProcessingInstruction(node).Data);
  353. wrtStr('?>');
  354. end;
  355. procedure THTMLWriter.VisitComment(node: TDOMNode);
  356. begin
  357. if not FInsideTextNode then wrtIndent;
  358. wrtChars('<!--', 4);
  359. wrtStr(TDOMCharacterData(node).Data);
  360. wrtChars('-->', 3);
  361. end;
  362. procedure THTMLWriter.VisitDocument(node: TDOMNode);
  363. var
  364. child: TDOMNode;
  365. begin
  366. child := node.FirstChild;
  367. while Assigned(Child) do
  368. begin
  369. WriteNode(Child);
  370. Child := Child.NextSibling;
  371. end;
  372. wrtChars(#10, 1);
  373. end;
  374. procedure THTMLWriter.VisitAttribute(Node: TDOMNode);
  375. var
  376. Child: TDOMNode;
  377. begin
  378. wrtChr(' ');
  379. wrtStr(TDOMAttr(Node).Name);
  380. wrtChars('="', 2);
  381. Child := Node.FirstChild;
  382. while Assigned(Child) do
  383. begin
  384. case Child.NodeType of
  385. ENTITY_REFERENCE_NODE:
  386. VisitEntityRef(Child);
  387. TEXT_NODE:
  388. ConvWrite(TDOMCharacterData(Child).Data, AttrSpecialChars, {$IFDEF FPC}@{$ENDIF}AttrSpecialCharCallback);
  389. end;
  390. Child := Child.NextSibling;
  391. end;
  392. wrtChr('"');
  393. end;
  394. procedure THTMLWriter.VisitDocumentType(Node: TDOMNode);
  395. begin
  396. wrtStr('<!DOCTYPE ');
  397. wrtStr(Node.NodeName);
  398. wrtChr(' ');
  399. with TDOMDocumentType(Node) do
  400. begin
  401. if PublicID <> '' then
  402. begin
  403. wrtStr('PUBLIC ');
  404. wrtQuotedLiteral(PublicID);
  405. if SystemID <> '' then
  406. begin
  407. wrtChr(' ');
  408. wrtQuotedLiteral(SystemID);
  409. end;
  410. end
  411. else if SystemID <> '' then
  412. begin
  413. wrtStr('SYSTEM ');
  414. wrtQuotedLiteral(SystemID);
  415. end;
  416. end;
  417. wrtChr('>');
  418. end;
  419. procedure THTMLWriter.VisitFragment(Node: TDOMNode);
  420. var
  421. Child: TDOMNode;
  422. begin
  423. // Fragment itself should not be written, only its children should...
  424. Child := Node.FirstChild;
  425. while Assigned(Child) do
  426. begin
  427. WriteNode(Child);
  428. Child := Child.NextSibling;
  429. end;
  430. end;
  431. // -------------------------------------------------------------------
  432. // Interface implementation
  433. // -------------------------------------------------------------------
  434. procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String);
  435. var
  436. fs: TFileStream;
  437. begin
  438. fs := TFileStream.Create(AFileName, fmCreate);
  439. try
  440. WriteHTMLFile(doc, fs);
  441. finally
  442. fs.Free;
  443. end;
  444. end;
  445. procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
  446. var
  447. s: TStream;
  448. begin
  449. s := TTextStream.Create(AFile);
  450. try
  451. with THTMLWriter.Create(s) do
  452. try
  453. WriteNode(doc);
  454. finally
  455. Free;
  456. end;
  457. finally
  458. s.Free;
  459. end;
  460. end;
  461. procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
  462. begin
  463. with THTMLWriter.Create(AStream) do
  464. try
  465. WriteNode(doc);
  466. finally
  467. Free;
  468. end;
  469. end;
  470. procedure WriteHTML(Element: TDOMNode; const AFileName: String);
  471. begin
  472. WriteHTMLFile(TXMLDocument(Element), AFileName);
  473. end;
  474. procedure WriteHTML(Element: TDOMNode; var AFile: Text);
  475. begin
  476. WriteHTMLFile(TXMLDocument(Element), AFile);
  477. end;
  478. procedure WriteHTML(Element: TDOMNode; AStream: TStream);
  479. begin
  480. WriteHTMLFile(TXMLDocument(Element), AStream);
  481. end;
  482. end.