xmlwrite.pp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. {
  2. This file is part of the Free Component Library
  3. XML writing routines
  4. Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
  5. Modified in 2006 by Sergei Gorelkin, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit XMLWrite;
  13. {$MODE objfpc}
  14. {$H+}
  15. interface
  16. uses Classes, DOM;
  17. procedure WriteXMLFile(doc: TXMLDocument; const AFileName: String); overload;
  18. procedure WriteXMLFile(doc: TXMLDocument; var AFile: Text); overload;
  19. procedure WriteXMLFile(doc: TXMLDocument; AStream: TStream); overload;
  20. procedure WriteXML(Element: TDOMNode; const AFileName: String); overload;
  21. procedure WriteXML(Element: TDOMNode; var AFile: Text); overload;
  22. procedure WriteXML(Element: TDOMNode; AStream: TStream); overload;
  23. // ===================================================================
  24. implementation
  25. uses SysUtils;
  26. type
  27. TSpecialCharCallback = procedure(c: WideChar) of object;
  28. TXMLWriter = class(TObject)
  29. private
  30. FInsideTextNode: Boolean;
  31. FIndent: WideString;
  32. FIndentCount: Integer;
  33. FBuffer: PChar;
  34. FBufPos: PChar;
  35. FCapacity: Integer;
  36. FLineBreak: string;
  37. procedure wrtChars(Src: PWideChar; Length: Integer);
  38. procedure IncIndent;
  39. procedure DecIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
  40. procedure wrtStr(const ws: WideString); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  41. procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF}
  42. procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
  43. procedure wrtQuotedLiteral(const ws: WideString);
  44. procedure ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
  45. const SpecialCharCallback: TSpecialCharCallback);
  46. procedure AttrSpecialCharCallback(c: WideChar);
  47. procedure TextNodeSpecialCharCallback(c: WideChar);
  48. protected
  49. procedure Write(const Buffer; Count: Longint); virtual; abstract;
  50. procedure WriteNode(Node: TDOMNode);
  51. procedure VisitDocument(Node: TDOMNode);
  52. procedure VisitElement(Node: TDOMNode);
  53. procedure VisitText(Node: TDOMNode);
  54. procedure VisitCDATA(Node: TDOMNode);
  55. procedure VisitComment(Node: TDOMNode);
  56. procedure VisitFragment(Node: TDOMNode);
  57. procedure VisitAttribute(Node: TDOMNode);
  58. procedure VisitEntityRef(Node: TDOMNode);
  59. procedure VisitDocumentType(Node: TDOMNode);
  60. procedure VisitPI(Node: TDOMNode);
  61. public
  62. constructor Create;
  63. destructor Destroy; override;
  64. end;
  65. TTextXMLWriter = Class(TXMLWriter)
  66. Private
  67. F : ^Text;
  68. Protected
  69. Procedure Write(Const Buffer; Count : Longint);override;
  70. Public
  71. constructor Create(var AFile: Text);
  72. end;
  73. TStreamXMLWriter = Class(TXMLWriter)
  74. Private
  75. F : TStream;
  76. Protected
  77. Procedure Write(Const Buffer; Count : Longint);override;
  78. Public
  79. constructor Create(AStream: TStream);
  80. end;
  81. { ---------------------------------------------------------------------
  82. TTextXMLWriter
  83. ---------------------------------------------------------------------}
  84. constructor TTextXMLWriter.Create(var AFile: Text);
  85. begin
  86. inherited Create;
  87. f := @AFile;
  88. end;
  89. procedure TTextXMLWriter.Write(const Buffer; Count: Longint);
  90. var
  91. s: string;
  92. begin
  93. if Count>0 then
  94. begin
  95. SetString(s, PChar(@Buffer), Count);
  96. system.Write(f^, s);
  97. end;
  98. end;
  99. { ---------------------------------------------------------------------
  100. TStreamXMLWriter
  101. ---------------------------------------------------------------------}
  102. constructor TStreamXMLWriter.Create(AStream: TStream);
  103. begin
  104. inherited Create;
  105. F := AStream;
  106. end;
  107. procedure TStreamXMLWriter.Write(const Buffer; Count: Longint);
  108. begin
  109. if Count > 0 then
  110. F.Write(Buffer, Count);
  111. end;
  112. { ---------------------------------------------------------------------
  113. TXMLWriter
  114. ---------------------------------------------------------------------}
  115. constructor TXMLWriter.Create;
  116. var
  117. I: Integer;
  118. begin
  119. inherited Create;
  120. // some overhead - always be able to write at least one extra UCS4
  121. FBuffer := AllocMem(512+32);
  122. FBufPos := FBuffer;
  123. FCapacity := 512;
  124. // Initialize Indent string
  125. SetLength(FIndent, 100);
  126. FIndent[1] := #10;
  127. for I := 2 to 100 do FIndent[I] := ' ';
  128. FIndentCount := 0;
  129. // Later on, this may be put under user control
  130. // for now, take OS setting
  131. FLineBreak := sLineBreak;
  132. end;
  133. destructor TXMLWriter.Destroy;
  134. begin
  135. if FBufPos > FBuffer then
  136. write(FBuffer^, FBufPos-FBuffer);
  137. FreeMem(FBuffer);
  138. inherited Destroy;
  139. end;
  140. procedure TXMLWriter.wrtChars(Src: PWideChar; Length: Integer);
  141. var
  142. pb: PChar;
  143. wc: Cardinal;
  144. SrcEnd: PWideChar;
  145. begin
  146. pb := FBufPos;
  147. SrcEnd := Src + Length;
  148. while Src < SrcEnd do
  149. begin
  150. if pb >= @FBuffer[FCapacity] then
  151. begin
  152. write(FBuffer^, FCapacity);
  153. Dec(pb, FCapacity);
  154. if pb > FBuffer then
  155. Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
  156. end;
  157. wc := Cardinal(Src^); Inc(Src);
  158. case wc of
  159. $0A: pb := StrECopy(pb, PChar(FLineBreak));
  160. $0D: begin
  161. pb := StrECopy(pb, PChar(FLineBreak));
  162. if (Src < SrcEnd) and (Src^ = #$0A) then
  163. Inc(Src);
  164. end;
  165. 0..$09, $0B, $0C, $0E..$7F: begin
  166. pb^ := char(wc); Inc(pb);
  167. end;
  168. $80..$7FF: begin
  169. pb^ := Char($C0 or (wc shr 6));
  170. pb[1] := Char($80 or (wc and $3F));
  171. Inc(pb,2);
  172. end;
  173. $D800..$DBFF: begin
  174. if (Src < SrcEnd) and (Src^ >= #$DC00) and (Src^ <= #$DFFF) then
  175. begin
  176. wc := ((LongInt(wc) - $D7C0) shl 10) + LongInt(word(Src^) xor $DC00);
  177. Inc(Src);
  178. pb^ := Char($F0 or (wc shr 18));
  179. pb[1] := Char($80 or ((wc shr 12) and $3F));
  180. pb[2] := Char($80 or ((wc shr 6) and $3F));
  181. pb[3] := Char($80 or (wc and $3F));
  182. Inc(pb,4);
  183. end
  184. else
  185. raise EConvertError.Create('High surrogate without low one');
  186. end;
  187. $DC00..$DFFF:
  188. raise EConvertError.Create('Low surrogate without high one');
  189. else // $800 >= wc > $FFFF, excluding surrogates
  190. begin
  191. pb^ := Char($E0 or (wc shr 12));
  192. pb[1] := Char($80 or ((wc shr 6) and $3F));
  193. pb[2] := Char($80 or (wc and $3F));
  194. Inc(pb,3);
  195. end;
  196. end;
  197. end;
  198. FBufPos := pb;
  199. end;
  200. procedure TXMLWriter.wrtStr(const ws: WideString); { inline }
  201. begin
  202. wrtChars(PWideChar(ws), Length(ws));
  203. end;
  204. { No checks here - buffer always has 32 extra bytes }
  205. procedure TXMLWriter.wrtChr(c: WideChar); { inline }
  206. begin
  207. FBufPos^ := char(ord(c));
  208. Inc(FBufPos);
  209. end;
  210. procedure TXMLWriter.wrtIndent; { inline }
  211. begin
  212. wrtChars(PWideChar(FIndent), FIndentCount*2+1);
  213. end;
  214. procedure TXMLWriter.IncIndent;
  215. var
  216. I, NewLen, OldLen: Integer;
  217. begin
  218. Inc(FIndentCount);
  219. if Length(FIndent) < 2 * FIndentCount then
  220. begin
  221. OldLen := Length(FIndent);
  222. NewLen := 4 * FIndentCount;
  223. SetLength(FIndent, NewLen);
  224. for I := OldLen to NewLen do
  225. FIndent[I] := ' ';
  226. end;
  227. end;
  228. procedure TXMLWriter.DecIndent; { inline }
  229. begin
  230. if FIndentCount>0 then dec(FIndentCount);
  231. end;
  232. procedure TXMLWriter.wrtQuotedLiteral(const ws: WideString);
  233. var
  234. Quote: WideChar;
  235. begin
  236. // TODO: need to check if the string also contains single quote
  237. // both quotes present is a error
  238. if Pos('"', ws) > 0 then
  239. Quote := ''''
  240. else
  241. Quote := '"';
  242. wrtChr(Quote);
  243. wrtStr(ws);
  244. wrtChr(Quote);
  245. end;
  246. const
  247. AttrSpecialChars = ['<', '"', '&', #9, #10, #13];
  248. TextSpecialChars = ['<', '>', '&'];
  249. procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
  250. const SpecialCharCallback: TSpecialCharCallback);
  251. var
  252. StartPos, EndPos: Integer;
  253. begin
  254. StartPos := 1;
  255. EndPos := 1;
  256. while EndPos <= Length(s) do
  257. begin
  258. if (s[EndPos] < #255) and (Char(ord(s[EndPos])) in SpecialChars) then
  259. begin
  260. wrtChars(@s[StartPos], EndPos - StartPos);
  261. SpecialCharCallback(s[EndPos]);
  262. StartPos := EndPos + 1;
  263. end;
  264. Inc(EndPos);
  265. end;
  266. if StartPos <= length(s) then
  267. wrtChars(@s[StartPos], EndPos - StartPos);
  268. end;
  269. const
  270. QuotStr = '&quot;';
  271. AmpStr = '&amp;';
  272. ltStr = '&lt;';
  273. gtStr = '&gt;';
  274. procedure TXMLWriter.AttrSpecialCharCallback(c: WideChar);
  275. begin
  276. case c of
  277. '"': wrtStr(QuotStr);
  278. '&': wrtStr(AmpStr);
  279. '<': wrtStr(ltStr);
  280. // Escape whitespace using CharRefs to be consistent with W3 spec § 3.3.3
  281. #9: wrtStr('&#x9;');
  282. #10: wrtStr('&#xA;');
  283. #13: wrtStr('&#xD;');
  284. else
  285. wrtChr(c);
  286. end;
  287. end;
  288. procedure TXMLWriter.TextnodeSpecialCharCallback(c: WideChar);
  289. begin
  290. case c of
  291. '<': wrtStr(ltStr);
  292. '>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
  293. '&': wrtStr(AmpStr);
  294. else
  295. wrtChr(c);
  296. end;
  297. end;
  298. procedure TXMLWriter.WriteNode(node: TDOMNode);
  299. begin
  300. case node.NodeType of
  301. ELEMENT_NODE: VisitElement(node);
  302. ATTRIBUTE_NODE: VisitAttribute(node);
  303. TEXT_NODE: VisitText(node);
  304. CDATA_SECTION_NODE: VisitCDATA(node);
  305. ENTITY_REFERENCE_NODE: VisitEntityRef(node);
  306. PROCESSING_INSTRUCTION_NODE: VisitPI(node);
  307. COMMENT_NODE: VisitComment(node);
  308. DOCUMENT_NODE: VisitDocument(node);
  309. DOCUMENT_TYPE_NODE: VisitDocumentType(node);
  310. ENTITY_NODE,
  311. DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
  312. end;
  313. end;
  314. procedure TXMLWriter.VisitElement(node: TDOMNode);
  315. var
  316. i: Integer;
  317. child: TDOMNode;
  318. SavedInsideTextNode: Boolean;
  319. begin
  320. if not FInsideTextNode then
  321. wrtIndent;
  322. wrtChr('<');
  323. wrtStr(TDOMElement(node).TagName);
  324. // FIX: Accessing Attributes was causing them to be created for every element :(
  325. if node.HasAttributes then
  326. for i := 0 to node.Attributes.Length - 1 do
  327. begin
  328. child := node.Attributes.Item[i];
  329. if TDOMAttr(child).Specified then
  330. VisitAttribute(child);
  331. end;
  332. Child := node.FirstChild;
  333. if Child = nil then
  334. wrtChars('/>', 2)
  335. else
  336. begin
  337. SavedInsideTextNode := FInsideTextNode;
  338. wrtChr('>');
  339. FInsideTextNode := Child.NodeType in [TEXT_NODE, CDATA_SECTION_NODE];
  340. IncIndent;
  341. repeat
  342. WriteNode(Child);
  343. Child := Child.NextSibling;
  344. until Child = nil;
  345. DecIndent;
  346. if not (node.LastChild.NodeType in [TEXT_NODE, CDATA_SECTION_NODE]) then
  347. wrtIndent;
  348. FInsideTextNode := SavedInsideTextNode;
  349. wrtChars('</', 2);
  350. wrtStr(TDOMElement(Node).TagName);
  351. wrtChr('>');
  352. end;
  353. end;
  354. procedure TXMLWriter.VisitText(node: TDOMNode);
  355. begin
  356. ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, {$IFDEF FPC}@{$ENDIF}TextnodeSpecialCharCallback);
  357. end;
  358. procedure TXMLWriter.VisitCDATA(node: TDOMNode);
  359. begin
  360. if not FInsideTextNode then
  361. wrtIndent;
  362. wrtChars('<![CDATA[', 9);
  363. wrtStr(TDOMCharacterData(node).Data);
  364. wrtChars(']]>', 3);
  365. end;
  366. procedure TXMLWriter.VisitEntityRef(node: TDOMNode);
  367. begin
  368. wrtChr('&');
  369. wrtStr(node.NodeName);
  370. wrtChr(';');
  371. end;
  372. procedure TXMLWriter.VisitPI(node: TDOMNode);
  373. begin
  374. if not FInsideTextNode then wrtIndent;
  375. wrtStr('<?');
  376. wrtStr(TDOMProcessingInstruction(node).Target);
  377. wrtChr(' ');
  378. wrtStr(TDOMProcessingInstruction(node).Data);
  379. wrtStr('?>');
  380. end;
  381. procedure TXMLWriter.VisitComment(node: TDOMNode);
  382. begin
  383. if not FInsideTextNode then wrtIndent;
  384. wrtChars('<!--', 4);
  385. wrtStr(TDOMCharacterData(node).Data);
  386. wrtChars('-->', 3);
  387. end;
  388. procedure TXMLWriter.VisitDocument(node: TDOMNode);
  389. var
  390. child: TDOMNode;
  391. begin
  392. wrtStr('<?xml version="');
  393. // Definitely should not escape anything here
  394. if Length(TXMLDocument(node).XMLVersion) > 0 then
  395. wrtStr(TXMLDocument(node).XMLVersion)
  396. else
  397. wrtStr('1.0');
  398. wrtChr('"');
  399. // DISABLED - we are only able write in UTF-8 which does not require labeling
  400. // writing incorrect encoding will render xml unreadable...
  401. (*
  402. if Length(TXMLDocument(node).Encoding) > 0 then
  403. begin
  404. wrtStr(' encoding="');
  405. wrtStr(TXMLDocument(node).Encoding);
  406. wrtChr('"');
  407. end;
  408. *)
  409. wrtStr('?>');
  410. // TODO: now handled as a regular PI, remove this?
  411. if Length(TXMLDocument(node).StylesheetType) > 0 then
  412. begin
  413. wrtStr(#10'<?xml-stylesheet type="');
  414. wrtStr(TXMLDocument(node).StylesheetType);
  415. wrtStr('" href="');
  416. wrtStr(TXMLDocument(node).StylesheetHRef);
  417. wrtStr('"?>');
  418. end;
  419. child := node.FirstChild;
  420. while Assigned(Child) do
  421. begin
  422. WriteNode(Child);
  423. Child := Child.NextSibling;
  424. end;
  425. wrtChars(#10, 1);
  426. end;
  427. procedure TXMLWriter.VisitAttribute(Node: TDOMNode);
  428. var
  429. Child: TDOMNode;
  430. begin
  431. wrtChr(' ');
  432. wrtStr(TDOMAttr(Node).Name);
  433. wrtChars('="', 2);
  434. Child := Node.FirstChild;
  435. while Assigned(Child) do
  436. begin
  437. case Child.NodeType of
  438. ENTITY_REFERENCE_NODE:
  439. VisitEntityRef(Child);
  440. TEXT_NODE:
  441. ConvWrite(TDOMCharacterData(Child).Data, AttrSpecialChars, {$IFDEF FPC}@{$ENDIF}AttrSpecialCharCallback);
  442. end;
  443. Child := Child.NextSibling;
  444. end;
  445. wrtChr('"');
  446. end;
  447. procedure TXMLWriter.VisitDocumentType(Node: TDOMNode);
  448. begin
  449. wrtStr(#10'<!DOCTYPE ');
  450. wrtStr(Node.NodeName);
  451. wrtChr(' ');
  452. with TDOMDocumentType(Node) do
  453. begin
  454. if PublicID <> '' then
  455. begin
  456. wrtStr('PUBLIC ');
  457. wrtQuotedLiteral(PublicID);
  458. wrtChr(' ');
  459. wrtQuotedLiteral(SystemID);
  460. end
  461. else if SystemID <> '' then
  462. begin
  463. wrtStr('SYSTEM ');
  464. wrtQuotedLiteral(SystemID);
  465. end;
  466. if InternalSubset <> '' then
  467. begin
  468. wrtChr('[');
  469. wrtStr(InternalSubset);
  470. wrtChr(']');
  471. end;
  472. end;
  473. wrtChr('>');
  474. end;
  475. procedure TXMLWriter.VisitFragment(Node: TDOMNode);
  476. var
  477. Child: TDOMNode;
  478. begin
  479. // TODO: TextDecl is probably needed
  480. // Fragment itself should not be written, only its children should...
  481. Child := Node.FirstChild;
  482. while Assigned(Child) do
  483. begin
  484. WriteNode(Child);
  485. Child := Child.NextSibling;
  486. end;
  487. end;
  488. // -------------------------------------------------------------------
  489. // Interface implementation
  490. // -------------------------------------------------------------------
  491. procedure WriteXMLFile(doc: TXMLDocument; const AFileName: String);
  492. var
  493. fs: TFileStream;
  494. begin
  495. fs := TFileStream.Create(AFileName, fmCreate);
  496. try
  497. WriteXMLFile(doc, fs);
  498. finally
  499. fs.Free;
  500. end;
  501. end;
  502. procedure WriteXMLFile(doc: TXMLDocument; var AFile: Text);
  503. begin
  504. with TTextXMLWriter.Create(AFile) do
  505. try
  506. WriteNode(doc);
  507. finally
  508. Free;
  509. end;
  510. end;
  511. procedure WriteXMLFile(doc: TXMLDocument; AStream: TStream);
  512. begin
  513. with TStreamXMLWriter.Create(AStream) do
  514. try
  515. WriteNode(doc);
  516. finally
  517. Free;
  518. end;
  519. end;
  520. procedure WriteXML(Element: TDOMNode; const AFileName: String);
  521. begin
  522. WriteXMLFile(TXMLDocument(Element), AFileName);
  523. end;
  524. procedure WriteXML(Element: TDOMNode; var AFile: Text);
  525. begin
  526. WriteXMLFile(TXMLDocument(Element), AFile);
  527. end;
  528. procedure WriteXML(Element: TDOMNode; AStream: TStream);
  529. begin
  530. WriteXMLFile(TXMLDocument(Element), AStream);
  531. end;
  532. end.