xmlwrite.pp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. {
  2. This file is part of the Free Component Library
  3. XML writing routines
  4. Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
  5. Modified in 2006 by Sergei Gorelkin, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit XMLWrite;
  13. {$ifdef fpc}
  14. {$MODE objfpc}
  15. {$H+}
  16. {/$DEFINE HAS_INLINE}
  17. {$INLINE OFF}
  18. {$endif}
  19. interface
  20. uses Classes, DOM;
  21. procedure WriteXMLFile(doc: TXMLDocument; const AFileName: String); overload;
  22. procedure WriteXMLFile(doc: TXMLDocument; var AFile: Text); overload;
  23. procedure WriteXMLFile(doc: TXMLDocument; AStream: TStream); overload;
  24. procedure WriteXML(Element: TDOMNode; const AFileName: String); overload;
  25. procedure WriteXML(Element: TDOMNode; var AFile: Text); overload;
  26. procedure WriteXML(Element: TDOMNode; AStream: TStream); overload;
  27. // ===================================================================
  28. implementation
  29. uses SysUtils;
  30. type
  31. TCharacters = set of Char;
  32. TSpecialCharCallback = procedure(c: WideChar) of object;
  33. TXMLWriter = class(TObject)
  34. private
  35. FInsideTextNode: Boolean;
  36. FIndent: WideString;
  37. FIndentCount: Integer;
  38. FBuffer: PChar;
  39. FBufPos: PChar;
  40. FCapacity: Integer;
  41. FLineBreak: string;
  42. procedure wrtChars(Src: PWideChar; Length: Integer);
  43. procedure IncIndent;
  44. procedure DecIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
  45. procedure wrtStr(const ws: WideString); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  46. procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  47. procedure wrtLineEnd; {$IFDEF HAS_INLINE} inline; {$ENDIF}
  48. procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
  49. procedure wrtQuotedLiteral(const ws: WideString);
  50. procedure ConvWrite(const s: WideString; const SpecialChars: TCharacters;
  51. const SpecialCharCallback: TSpecialCharCallback);
  52. procedure AttrSpecialCharCallback(c: WideChar);
  53. procedure TextNodeSpecialCharCallback(c: WideChar);
  54. protected
  55. procedure Write(const Buffer; Count: Longint); virtual; abstract;
  56. procedure WriteNode(Node: TDOMNode);
  57. procedure VisitDocument(Node: TDOMNode);
  58. procedure VisitElement(Node: TDOMNode);
  59. procedure VisitText(Node: TDOMNode);
  60. procedure VisitCDATA(Node: TDOMNode);
  61. procedure VisitComment(Node: TDOMNode);
  62. procedure VisitFragment(Node: TDOMNode);
  63. procedure VisitAttribute(Node: TDOMNode);
  64. procedure VisitEntityRef(Node: TDOMNode);
  65. procedure VisitDocumentType(Node: TDOMNode);
  66. procedure VisitPI(Node: TDOMNode);
  67. public
  68. constructor Create;
  69. destructor Destroy; override;
  70. end;
  71. TTextXMLWriter = Class(TXMLWriter)
  72. Private
  73. F : ^Text;
  74. Protected
  75. Procedure Write(Const Buffer; Count : Longint);override;
  76. Public
  77. constructor Create(var AFile: Text);
  78. end;
  79. TStreamXMLWriter = Class(TXMLWriter)
  80. Private
  81. F : TStream;
  82. Protected
  83. Procedure Write(Const Buffer; Count : Longint);override;
  84. Public
  85. constructor Create(AStream: TStream);
  86. end;
  87. { ---------------------------------------------------------------------
  88. TTextXMLWriter
  89. ---------------------------------------------------------------------}
  90. constructor TTextXMLWriter.Create(var AFile: Text);
  91. begin
  92. inherited Create;
  93. f := @AFile;
  94. end;
  95. procedure TTextXMLWriter.Write(const Buffer; Count: Longint);
  96. var
  97. s: string;
  98. begin
  99. if Count>0 then
  100. begin
  101. SetString(s, PChar(@Buffer), Count);
  102. system.Write(f^, s);
  103. end;
  104. end;
  105. { ---------------------------------------------------------------------
  106. TStreamXMLWriter
  107. ---------------------------------------------------------------------}
  108. constructor TStreamXMLWriter.Create(AStream: TStream);
  109. begin
  110. inherited Create;
  111. F := AStream;
  112. end;
  113. procedure TStreamXMLWriter.Write(const Buffer; Count: Longint);
  114. begin
  115. if Count > 0 then
  116. F.Write(Buffer, Count);
  117. end;
  118. { ---------------------------------------------------------------------
  119. TXMLWriter
  120. ---------------------------------------------------------------------}
  121. constructor TXMLWriter.Create;
  122. var
  123. I: Integer;
  124. begin
  125. inherited Create;
  126. // some overhead - always be able to write at least one extra UCS4
  127. FBuffer := AllocMem(512+32);
  128. FBufPos := FBuffer;
  129. FCapacity := 512;
  130. // Initialize Indent string
  131. SetLength(FIndent, 100);
  132. for I := 1 to 100 do FIndent[I] := ' ';
  133. FIndentCount := 0;
  134. // Later on, this may be put under user control
  135. // for now, take OS setting
  136. FLineBreak := sLineBreak;
  137. end;
  138. destructor TXMLWriter.Destroy;
  139. begin
  140. if FBufPos > FBuffer then
  141. write(FBuffer^, FBufPos-FBuffer);
  142. FreeMem(FBuffer);
  143. inherited Destroy;
  144. end;
  145. procedure TXMLWriter.wrtChars(Src: PWideChar; Length: Integer);
  146. var
  147. pb: PChar;
  148. wc: Cardinal;
  149. SrcEnd: PWideChar;
  150. I: Integer;
  151. begin
  152. pb := FBufPos;
  153. SrcEnd := Src + Length;
  154. while Src < SrcEnd do
  155. begin
  156. if pb >= @FBuffer[FCapacity] then
  157. begin
  158. write(FBuffer^, FCapacity);
  159. Dec(pb, FCapacity);
  160. if pb > FBuffer then
  161. Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
  162. end;
  163. wc := Cardinal(Src^); Inc(Src);
  164. case wc of
  165. $0A: for I := 1 to System.Length(FLineBreak) do
  166. begin
  167. pb^ := FLineBreak[I]; Inc(pb);
  168. end;
  169. 0..$09, $0B..$7F: begin
  170. pb^ := char(wc); Inc(pb);
  171. end;
  172. $80..$7FF: begin
  173. pb^ := Char($C0 or (wc shr 6)); Inc(pb);
  174. pb^ := Char($80 or (wc and $3F)); Inc(pb);
  175. end;
  176. $D800..$DBFF: begin
  177. if (Src < SrcEnd) and (Src^ >= #$DC00) and (Src^ <= #$DFFF) then
  178. begin
  179. wc := ((wc - $D7C0) shl 10) + (word(Src^) xor $DC00);
  180. Inc(Src);
  181. pb^ := Char($F0 or (wc shr 18)); Inc(pb);
  182. pb^ := Char($80 or ((wc shr 12) and $3F)); Inc(pb);
  183. pb^ := Char($80 or ((wc shr 6) and $3F)); Inc(pb);
  184. pb^ := Char($80 or (wc and $3F)); Inc(pb);
  185. end
  186. else
  187. raise EConvertError.Create('High surrogate without low one');
  188. end;
  189. $DC00..$DFFF:
  190. raise EConvertError.Create('Low surrogate without high one');
  191. else // $800 >= wc > $FFFF, excluding surrogates
  192. begin
  193. pb^ := Char($E0 or (wc shr 12)); Inc(pb);
  194. pb^ := Char($80 or ((wc shr 6) and $3F)); Inc(pb);
  195. pb^ := Char($80 or (wc and $3F)); Inc(pb);
  196. end;
  197. end;
  198. end;
  199. FBufPos := pb;
  200. end;
  201. procedure TXMLWriter.wrtStr(const ws: WideString); { inline }
  202. begin
  203. wrtChars(PWideChar(ws), Length(ws));
  204. end;
  205. procedure TXMLWriter.wrtChr(c: WideChar); { inline }
  206. begin
  207. wrtChars(@c,1);
  208. end;
  209. procedure TXMLWriter.wrtLineEnd; { inline }
  210. begin
  211. // line endings now handled in WrtStr!
  212. wrtChr(#10);
  213. end;
  214. procedure TXMLWriter.wrtIndent; { inline }
  215. begin
  216. wrtChars(PWideChar(FIndent), FIndentCount*2);
  217. end;
  218. procedure TXMLWriter.IncIndent;
  219. var
  220. I, NewLen, OldLen: Integer;
  221. begin
  222. Inc(FIndentCount);
  223. if Length(FIndent) < 2 * FIndentCount then
  224. begin
  225. OldLen := Length(FIndent);
  226. NewLen := 4 * FIndentCount;
  227. SetLength(FIndent, NewLen);
  228. for I := OldLen to NewLen do
  229. FIndent[I] := ' ';
  230. end;
  231. end;
  232. procedure TXMLWriter.DecIndent; { inline }
  233. begin
  234. if FIndentCount>0 then dec(FIndentCount);
  235. end;
  236. procedure TXMLWriter.wrtQuotedLiteral(const ws: WideString);
  237. var
  238. Quote: WideChar;
  239. begin
  240. // TODO: need to check if the string also contains single quote
  241. // both quotes present is a error
  242. if Pos('"', ws) > 0 then
  243. Quote := ''''
  244. else
  245. Quote := '"';
  246. wrtChr(Quote);
  247. wrtStr(ws);
  248. wrtChr(Quote);
  249. end;
  250. const
  251. AttrSpecialChars = ['<', '"', '&', #9, #10, #13];
  252. TextSpecialChars = ['<', '>', '&'];
  253. procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: TCharacters;
  254. const SpecialCharCallback: TSpecialCharCallback);
  255. var
  256. StartPos, EndPos: Integer;
  257. begin
  258. StartPos := 1;
  259. EndPos := 1;
  260. while EndPos <= Length(s) do
  261. begin
  262. if (s[EndPos] < #255) and (Char(s[EndPos]) in SpecialChars) then
  263. begin
  264. wrtChars(@s[StartPos], EndPos - StartPos);
  265. SpecialCharCallback(s[EndPos]);
  266. StartPos := EndPos + 1;
  267. end;
  268. Inc(EndPos);
  269. end;
  270. if StartPos <= length(s) then
  271. wrtChars(@s[StartPos], EndPos - StartPos);
  272. end;
  273. const
  274. QuotStr = '&quot;';
  275. AmpStr = '&amp;';
  276. ltStr = '&lt;';
  277. gtStr = '&gt;';
  278. procedure TXMLWriter.AttrSpecialCharCallback(c: WideChar);
  279. begin
  280. case c of
  281. '"': wrtStr(QuotStr);
  282. '&': wrtStr(AmpStr);
  283. '<': wrtStr(ltStr);
  284. // Escape whitespace using CharRefs to be consistent with W3 spec § 3.3.3
  285. #9: wrtStr('&#x9;');
  286. #10: wrtStr('&#xA;');
  287. #13: wrtStr('&#xD;');
  288. else
  289. wrtChr(c);
  290. end;
  291. end;
  292. procedure TXMLWriter.TextnodeSpecialCharCallback(c: WideChar);
  293. begin
  294. case c of
  295. '<': wrtStr(ltStr);
  296. '>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
  297. '&': wrtStr(AmpStr);
  298. else
  299. wrtChr(c);
  300. end;
  301. end;
  302. procedure TXMLWriter.WriteNode(node: TDOMNode);
  303. begin
  304. case node.NodeType of
  305. ELEMENT_NODE: VisitElement(node);
  306. ATTRIBUTE_NODE: VisitAttribute(node);
  307. TEXT_NODE: VisitText(node);
  308. CDATA_SECTION_NODE: VisitCDATA(node);
  309. ENTITY_REFERENCE_NODE: VisitEntityRef(node);
  310. PROCESSING_INSTRUCTION_NODE: VisitPI(node);
  311. COMMENT_NODE: VisitComment(node);
  312. DOCUMENT_NODE: VisitDocument(node);
  313. DOCUMENT_TYPE_NODE: VisitDocumentType(node);
  314. DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
  315. end;
  316. end;
  317. procedure TXMLWriter.VisitElement(node: TDOMNode);
  318. var
  319. i: Integer;
  320. attr, child: TDOMNode;
  321. SavedInsideTextNode: Boolean;
  322. IsLeaf: Boolean;
  323. MixedContent: Boolean;
  324. begin
  325. if not FInsideTextNode then
  326. wrtIndent;
  327. wrtChr('<');
  328. wrtStr(node.NodeName);
  329. // FIX: Accessing Attributes was causing them to be created for every element :(
  330. if node.HasAttributes then
  331. for i := 0 to node.Attributes.Length - 1 do
  332. begin
  333. attr := node.Attributes.Item[i];
  334. VisitAttribute(attr);
  335. end;
  336. Child := node.FirstChild;
  337. if Child = nil then
  338. wrtStr('/>')
  339. else
  340. begin
  341. SavedInsideTextNode := FInsideTextNode;
  342. wrtChr('>');
  343. MixedContent := False;
  344. repeat
  345. if Assigned(Child.PreviousSibling) and
  346. (Child.PreviousSibling.InheritsFrom(TDOMText) <> Child.InheritsFrom(TDOMText)) then
  347. MixedContent := True;
  348. Child := Child.NextSibling;
  349. until Child = nil;
  350. Child := node.FirstChild; // restore
  351. IsLeaf := (Child = node.LastChild) and (Child.FirstChild = nil);
  352. if not (FInsideTextNode or MixedContent or IsLeaf) then
  353. wrtLineEnd;
  354. FInsideTextNode := {FInsideTextNode or} MixedContent or IsLeaf;
  355. IncIndent;
  356. repeat
  357. WriteNode(Child);
  358. Child := Child.NextSibling;
  359. until Child = nil;
  360. DecIndent;
  361. if not FInsideTextNode then
  362. wrtIndent;
  363. FInsideTextNode := SavedInsideTextNode;
  364. wrtStr('</');
  365. wrtStr(Node.NodeName);
  366. wrtChr('>');
  367. end;
  368. if not FInsideTextNode then
  369. wrtLineEnd;
  370. end;
  371. procedure TXMLWriter.VisitText(node: TDOMNode);
  372. begin
  373. ConvWrite(node.NodeValue, TextSpecialChars, {$IFDEF FPC}@{$ENDIF}TextnodeSpecialCharCallback);
  374. end;
  375. procedure TXMLWriter.VisitCDATA(node: TDOMNode);
  376. begin
  377. if not FInsideTextNode then
  378. wrtIndent;
  379. wrtStr('<![CDATA[');
  380. wrtStr(node.NodeValue);
  381. wrtStr(']]>');
  382. if not FInsideTextNode then
  383. wrtLineEnd;
  384. end;
  385. procedure TXMLWriter.VisitEntityRef(node: TDOMNode);
  386. begin
  387. wrtChr('&');
  388. wrtStr(node.NodeName);
  389. wrtChr(';');
  390. end;
  391. procedure TXMLWriter.VisitPI(node: TDOMNode);
  392. begin
  393. if not FInsideTextNode then wrtIndent;
  394. wrtStr('<?');
  395. wrtStr(TDOMProcessingInstruction(node).Target);
  396. wrtChr(' ');
  397. wrtStr(TDOMProcessingInstruction(node).Data);
  398. wrtStr('?>');
  399. if not FInsideTextNode then wrtLineEnd;
  400. end;
  401. procedure TXMLWriter.VisitComment(node: TDOMNode);
  402. begin
  403. if not FInsideTextNode then wrtIndent;
  404. wrtStr('<!--');
  405. wrtStr(node.NodeValue);
  406. wrtStr('-->');
  407. if not FInsideTextNode then wrtLineEnd;
  408. end;
  409. procedure TXMLWriter.VisitDocument(node: TDOMNode);
  410. var
  411. child: TDOMNode;
  412. begin
  413. wrtStr('<?xml version="');
  414. // Definitely should not escape anything here
  415. if Length(TXMLDocument(node).XMLVersion) > 0 then
  416. wrtStr(TXMLDocument(node).XMLVersion)
  417. else
  418. wrtStr('1.0');
  419. wrtChr('"');
  420. // DISABLED - we are only able write in UTF-8 which does not require labeling
  421. // writing incorrect encoding will render xml unreadable...
  422. (*
  423. if Length(TXMLDocument(node).Encoding) > 0 then
  424. begin
  425. wrtStr(' encoding="');
  426. wrtStr(TXMLDocument(node).Encoding);
  427. wrtChr('"');
  428. end;
  429. *)
  430. wrtStr('?>'#10);
  431. // TODO: now handled as a regular PI, remove this?
  432. if Length(TXMLDocument(node).StylesheetType) > 0 then
  433. begin
  434. wrtStr('<?xml-stylesheet type="');
  435. wrtStr(TXMLDocument(node).StylesheetType);
  436. wrtStr('" href="');
  437. wrtStr(TXMLDocument(node).StylesheetHRef);
  438. wrtStr('"?>'#10);
  439. end;
  440. child := node.FirstChild;
  441. while Assigned(Child) do
  442. begin
  443. WriteNode(Child);
  444. Child := Child.NextSibling;
  445. end;
  446. end;
  447. procedure TXMLWriter.VisitAttribute(Node: TDOMNode);
  448. var
  449. Child: TDOMNode;
  450. begin
  451. wrtChr(' ');
  452. wrtStr(Node.NodeName);
  453. wrtStr('="');
  454. Child := Node.FirstChild;
  455. while Assigned(Child) do
  456. begin
  457. if Child.NodeType = ENTITY_REFERENCE_NODE then
  458. VisitEntityRef(Child)
  459. else
  460. ConvWrite(Child.NodeValue, AttrSpecialChars, {$IFDEF FPC}@{$ENDIF}AttrSpecialCharCallback);
  461. Child := Child.NextSibling;
  462. end;
  463. wrtChr('"');
  464. end;
  465. procedure TXMLWriter.VisitDocumentType(Node: TDOMNode);
  466. begin
  467. wrtStr('<!DOCTYPE ');
  468. wrtStr(Node.NodeName);
  469. with TDOMDocumentType(Node) do
  470. begin
  471. if PublicID <> '' then
  472. begin
  473. wrtStr(' PUBLIC ');
  474. wrtQuotedLiteral(PublicID);
  475. wrtChr(' ');
  476. wrtQuotedLiteral(SystemID);
  477. end
  478. else if SystemID <> '' then
  479. begin
  480. wrtStr(' SYSTEM ');
  481. wrtQuotedLiteral(SystemID);
  482. end;
  483. if InternalSubset <> '' then
  484. begin
  485. wrtChr('[');
  486. wrtStr(InternalSubset);
  487. wrtChr(']');
  488. end;
  489. end;
  490. wrtStr('>'#10);
  491. end;
  492. procedure TXMLWriter.VisitFragment(Node: TDOMNode);
  493. var
  494. Child: TDOMNode;
  495. begin
  496. // Fragment itself should not be written, only its children should...
  497. Child := Node.FirstChild;
  498. while Assigned(Child) do
  499. begin
  500. WriteNode(Child);
  501. Child := Child.NextSibling;
  502. end;
  503. end;
  504. // -------------------------------------------------------------------
  505. // Interface implementation
  506. // -------------------------------------------------------------------
  507. procedure WriteXMLFile(doc: TXMLDocument; const AFileName: String);
  508. var
  509. fs: TFileStream;
  510. begin
  511. fs := TFileStream.Create(AFileName, fmCreate);
  512. try
  513. WriteXMLFile(doc, fs);
  514. finally
  515. fs.Free;
  516. end;
  517. end;
  518. procedure WriteXMLFile(doc: TXMLDocument; var AFile: Text);
  519. begin
  520. with TTextXMLWriter.Create(AFile) do
  521. try
  522. WriteNode(doc);
  523. finally
  524. Free;
  525. end;
  526. end;
  527. procedure WriteXMLFile(doc: TXMLDocument; AStream: TStream);
  528. begin
  529. with TStreamXMLWriter.Create(AStream) do
  530. try
  531. WriteNode(doc);
  532. finally
  533. Free;
  534. end;
  535. end;
  536. procedure WriteXML(Element: TDOMNode; const AFileName: String);
  537. begin
  538. WriteXMLFile(TXMLDocument(Element), AFileName);
  539. end;
  540. procedure WriteXML(Element: TDOMNode; var AFile: Text);
  541. begin
  542. WriteXMLFile(TXMLDocument(Element), AFile);
  543. end;
  544. procedure WriteXML(Element: TDOMNode; AStream: TStream);
  545. begin
  546. WriteXMLFile(TXMLDocument(Element), AStream);
  547. end;
  548. end.