123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549 |
- {
- This file is part of the Free Component Library
- HTML writing routines
- Copyright (c) 2000-2002 by
- Areca Systems GmbH / Sebastian Guenther, [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- unit HTMWrite;
- {$MODE objfpc}
- {$H+}
- interface
- uses Classes, DOM;
- procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String);
- procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
- procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
- procedure WriteHTML(Element: TDOMNode; const AFileName: String);
- procedure WriteHTML(Element: TDOMNode; var AFile: Text);
- procedure WriteHTML(Element: TDOMNode; AStream: TStream);
- // ===================================================================
- implementation
- uses SysUtils, HTMLDefs, xmlutils;
- type
- TSpecialCharCallback = procedure(c: WideChar) of object;
- THTMLWriter = class(TObject)
- private
- FStream: TStream;
- FInsideTextNode: Boolean;
- FInsideScript: Boolean;
- FBuffer: PChar;
- FBufPos: PChar;
- FCapacity: Integer;
- FLineBreak: string;
- procedure wrtChars(Src: PWideChar; Length: Integer);
- procedure wrtStr(const ws: XMLString); {$IFDEF HAS_INLINE} inline; {$ENDIF}
- procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF}
- procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
- procedure wrtQuotedLiteral(const ws: XMLString);
- procedure ConvWrite(const s: XMLString; const SpecialChars: TSetOfChar;
- const SpecialCharCallback: TSpecialCharCallback);
- procedure AttrSpecialCharCallback(c: WideChar);
- procedure TextNodeSpecialCharCallback(c: WideChar);
- protected
- procedure WriteNode(Node: TDOMNode);
- procedure VisitDocument(Node: TDOMNode);
- procedure VisitElement(Node: TDOMNode);
- procedure VisitText(Node: TDOMNode);
- procedure VisitCDATA(Node: TDOMNode);
- procedure VisitComment(Node: TDOMNode);
- procedure VisitFragment(Node: TDOMNode);
- procedure VisitAttribute(Node: TDOMNode);
- procedure VisitEntityRef(Node: TDOMNode);
- procedure VisitDocumentType(Node: TDOMNode);
- procedure VisitPI(Node: TDOMNode);
- public
- constructor Create(AStream: TStream; ACapacity : Cardinal = 4096);
- destructor Destroy; override;
- end;
- TTextStream = class(TStream)
- Private
- F : ^Text;
- Public
- constructor Create(var AFile: Text);
- function Write(Const Buffer; Count: Longint): Longint; override;
- end;
- { ---------------------------------------------------------------------
- TTextStream
- ---------------------------------------------------------------------}
- constructor TTextStream.Create(var AFile: Text);
- begin
- inherited Create;
- f := @AFile;
- end;
- function TTextStream.Write(const Buffer; Count: Longint): Longint;
- var
- s: string;
- begin
- if Count>0 then
- begin
- SetString(s, PChar(@Buffer), Count);
- system.Write(f^, s);
- end;
- Result := Count;
- end;
- { ---------------------------------------------------------------------
- THTMLWriter
- ---------------------------------------------------------------------}
- constructor THTMLWriter.Create(AStream: TStream; ACapacity : Cardinal = 4096);
- begin
- inherited Create;
- FStream := AStream;
- // some overhead - always be able to write at least one extra UCS4
- FCapacity := ACapacity;
- FBuffer := AllocMem(FCapacity+32);
- FBufPos := FBuffer;
- // Later on, this may be put under user control
- // for now, take OS setting
- FLineBreak := sLineBreak;
- end;
- destructor THTMLWriter.Destroy;
- begin
- if FBufPos > FBuffer then
- FStream.write(FBuffer^, FBufPos-FBuffer);
- FreeMem(FBuffer);
- inherited Destroy;
- end;
- procedure THTMLWriter.wrtChars(Src: PWideChar; Length: Integer);
- var
- pb: PChar;
- wc: Cardinal;
- SrcEnd: PWideChar;
- begin
- pb := FBufPos;
- SrcEnd := Src + Length;
- while Src < SrcEnd do
- begin
- if pb >= @FBuffer[FCapacity] then
- begin
- FStream.write(FBuffer^, FCapacity);
- Dec(pb, FCapacity);
- if pb > FBuffer then
- Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
- end;
- wc := Cardinal(Src^); Inc(Src);
- case wc of
- $0A: pb := StrECopy(pb, PChar(FLineBreak));
- $0D: begin
- pb := StrECopy(pb, PChar(FLineBreak));
- if (Src < SrcEnd) and (Src^ = #$0A) then
- Inc(Src);
- end;
- 0..$09, $0B, $0C, $0E..$7F: begin
- pb^ := char(wc); Inc(pb);
- end;
- $80..$7FF: begin
- pb^ := Char($C0 or (wc shr 6));
- pb[1] := Char($80 or (wc and $3F));
- Inc(pb,2);
- end;
- $D800..$DBFF: begin
- if (Src < SrcEnd) and (Src^ >= #$DC00) and (Src^ <= #$DFFF) then
- begin
- wc := ((LongInt(wc) - $D7C0) shl 10) + LongInt(word(Src^) xor $DC00);
- Inc(Src);
- pb^ := Char($F0 or (wc shr 18));
- pb[1] := Char($80 or ((wc shr 12) and $3F));
- pb[2] := Char($80 or ((wc shr 6) and $3F));
- pb[3] := Char($80 or (wc and $3F));
- Inc(pb,4);
- end
- else
- raise EConvertError.Create('High surrogate without low one');
- end;
- $DC00..$DFFF:
- raise EConvertError.Create('Low surrogate without high one');
- else // $800 >= wc > $FFFF, excluding surrogates
- begin
- pb^ := Char($E0 or (wc shr 12));
- pb[1] := Char($80 or ((wc shr 6) and $3F));
- pb[2] := Char($80 or (wc and $3F));
- Inc(pb,3);
- end;
- end;
- end;
- FBufPos := pb;
- end;
- procedure THTMLWriter.wrtStr(const ws: XMLString); { inline }
- begin
- wrtChars(PWideChar(ws), Length(ws));
- end;
- { No checks here - buffer always has 32 extra bytes }
- procedure THTMLWriter.wrtChr(c: WideChar); { inline }
- begin
- FBufPos^ := char(ord(c));
- Inc(FBufPos);
- end;
- procedure THTMLWriter.wrtIndent; { inline }
- begin
- wrtChars(#10, 1);
- end;
- procedure THTMLWriter.wrtQuotedLiteral(const ws: XMLString);
- var
- Quote: WideChar;
- begin
- // TODO: need to check if the string also contains single quote
- // both quotes present is a error
- if Pos('"', ws) > 0 then
- Quote := ''''
- else
- Quote := '"';
- wrtChr(Quote);
- wrtStr(ws);
- wrtChr(Quote);
- end;
- const
- AttrSpecialChars = ['<', '"', '&'];
- TextSpecialChars = ['<', '>', '&'];
- procedure THTMLWriter.ConvWrite(const s: XMLString; const SpecialChars: TSetOfChar;
- const SpecialCharCallback: TSpecialCharCallback);
- var
- StartPos, EndPos: Integer;
- begin
- StartPos := 1;
- EndPos := 1;
- while EndPos <= Length(s) do
- begin
- if (s[EndPos] < #255) and (Char(ord(s[EndPos])) in SpecialChars) then
- begin
- wrtChars(@s[StartPos], EndPos - StartPos);
- SpecialCharCallback(s[EndPos]);
- StartPos := EndPos + 1;
- end;
- Inc(EndPos);
- end;
- if StartPos <= length(s) then
- wrtChars(@s[StartPos], EndPos - StartPos);
- end;
- const
- QuotStr = '"';
- AmpStr = '&';
- ltStr = '<';
- gtStr = '>';
- procedure THTMLWriter.AttrSpecialCharCallback(c: WideChar);
- begin
- case c of
- '"': wrtStr(QuotStr);
- '&': wrtStr(AmpStr);
- '<': wrtStr(ltStr);
- else
- wrtChr(c);
- end;
- end;
- procedure THTMLWriter.TextnodeSpecialCharCallback(c: WideChar);
- begin
- case c of
- '<': wrtStr(ltStr);
- '>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
- '&': wrtStr(AmpStr);
- else
- wrtChr(c);
- end;
- end;
- procedure THTMLWriter.WriteNode(node: TDOMNode);
- begin
- case node.NodeType of
- ELEMENT_NODE: VisitElement(node);
- ATTRIBUTE_NODE: VisitAttribute(node);
- TEXT_NODE: VisitText(node);
- CDATA_SECTION_NODE: VisitCDATA(node);
- ENTITY_REFERENCE_NODE: VisitEntityRef(node);
- PROCESSING_INSTRUCTION_NODE: VisitPI(node);
- COMMENT_NODE: VisitComment(node);
- DOCUMENT_NODE: VisitDocument(node);
- DOCUMENT_TYPE_NODE: VisitDocumentType(node);
- ENTITY_NODE,
- DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
- end;
- end;
- procedure THTMLWriter.VisitElement(node: TDOMNode);
- var
- i: Integer;
- child: TDOMNode;
- SavedInsideTextNode: Boolean;
- s: string;
- ElFlags: THTMLElementFlags;
- j: THTMLElementTag;
- meta: Boolean;
- begin
- if not FInsideTextNode then
- wrtIndent;
- meta := False;
- s := LowerCase(node.NodeName);
- ElFlags := [efSubelementContent, efPCDATAContent]; // default flags
- for j := Low(THTMLElementTag) to High(THTMLElementTag) do
- if HTMLElementProps[J].Name = s then
- begin
- ElFlags := HTMLElementProps[j].Flags;
- if j = etMeta then
- meta := True;
- FInsideScript := (j=etScript) or (j=etStyle);
- break;
- end;
- wrtChr('<');
- wrtStr(TDOMElement(node).TagName);
- { Force charset label to utf-8, because it is the encoding we actually write }
- if meta then
- begin
- s := TDOMElement(node).GetAttribute('http-equiv');
- if SameText(s, 'content-type') then
- begin
- wrtStr(' content="text/html; charset=utf-8" http-equiv="Content-Type">');
- Exit;
- end;
- end;
- if node.HasAttributes then
- for i := 0 to node.Attributes.Length - 1 do
- begin
- child := node.Attributes.Item[i];
- VisitAttribute(child);
- end;
- wrtChr('>');
- Child := node.FirstChild;
- if Child <> nil then
- begin
- SavedInsideTextNode := FInsideTextNode;
- FInsideTextNode := efPCDATAContent in ElFlags;
- repeat
- WriteNode(Child);
- Child := Child.NextSibling;
- until Child = nil;
- FInsideTextNode := SavedInsideTextNode;
- end;
- if (not FInsideTextNode) and not (efPCDATAContent in ElFlags) then
- wrtIndent;
- if ElFlags * [efSubelementContent, efPCDATAContent] <> [] then
- begin
- wrtChars('</', 2);
- wrtStr(TDOMElement(Node).TagName);
- wrtChr('>');
- end;
- end;
- procedure THTMLWriter.VisitText(node: TDOMNode);
- begin
- if FInsideScript then
- WrtStr(TDOMCharacterData(node).Data)
- else
- ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, {$IFDEF FPC}@{$ENDIF}TextnodeSpecialCharCallback);
- end;
- procedure THTMLWriter.VisitCDATA(node: TDOMNode);
- begin
- if not FInsideTextNode then
- wrtIndent;
- wrtChars('<![CDATA[', 9);
- wrtStr(TDOMCharacterData(node).Data);
- wrtChars(']]>', 3);
- end;
- procedure THTMLWriter.VisitEntityRef(node: TDOMNode);
- begin
- wrtChr('&');
- wrtStr(node.NodeName);
- wrtChr(';');
- end;
- procedure THTMLWriter.VisitPI(node: TDOMNode);
- begin
- if not FInsideTextNode then wrtIndent;
- wrtStr('<?');
- wrtStr(TDOMProcessingInstruction(node).Target);
- wrtChr(' ');
- wrtStr(TDOMProcessingInstruction(node).Data);
- wrtStr('?>');
- end;
- procedure THTMLWriter.VisitComment(node: TDOMNode);
- begin
- if not FInsideTextNode then wrtIndent;
- wrtChars('<!--', 4);
- wrtStr(TDOMCharacterData(node).Data);
- wrtChars('-->', 3);
- end;
- procedure THTMLWriter.VisitDocument(node: TDOMNode);
- var
- child: TDOMNode;
- begin
- child := node.FirstChild;
- while Assigned(Child) do
- begin
- WriteNode(Child);
- Child := Child.NextSibling;
- end;
- wrtChars(#10, 1);
- end;
- procedure THTMLWriter.VisitAttribute(Node: TDOMNode);
- var
- Child: TDOMNode;
- begin
- wrtChr(' ');
- wrtStr(TDOMAttr(Node).Name);
- wrtChars('="', 2);
- Child := Node.FirstChild;
- while Assigned(Child) do
- begin
- case Child.NodeType of
- ENTITY_REFERENCE_NODE:
- VisitEntityRef(Child);
- TEXT_NODE:
- ConvWrite(TDOMCharacterData(Child).Data, AttrSpecialChars, {$IFDEF FPC}@{$ENDIF}AttrSpecialCharCallback);
- end;
- Child := Child.NextSibling;
- end;
- wrtChr('"');
- end;
- procedure THTMLWriter.VisitDocumentType(Node: TDOMNode);
- begin
- wrtStr('<!DOCTYPE ');
- wrtStr(Node.NodeName);
- wrtChr(' ');
- with TDOMDocumentType(Node) do
- begin
- if PublicID <> '' then
- begin
- wrtStr('PUBLIC ');
- wrtQuotedLiteral(PublicID);
- if SystemID <> '' then
- begin
- wrtChr(' ');
- wrtQuotedLiteral(SystemID);
- end;
- end
- else if SystemID <> '' then
- begin
- wrtStr('SYSTEM ');
- wrtQuotedLiteral(SystemID);
- end;
- end;
- wrtChr('>');
- end;
- procedure THTMLWriter.VisitFragment(Node: TDOMNode);
- var
- Child: TDOMNode;
- begin
- // Fragment itself should not be written, only its children should...
- Child := Node.FirstChild;
- while Assigned(Child) do
- begin
- WriteNode(Child);
- Child := Child.NextSibling;
- end;
- end;
- // -------------------------------------------------------------------
- // Interface implementation
- // -------------------------------------------------------------------
- procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String);
- var
- fs: TFileStream;
- begin
- fs := TFileStream.Create(AFileName, fmCreate);
- try
- WriteHTMLFile(doc, fs);
- finally
- fs.Free;
- end;
- end;
- procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
- var
- s: TStream;
- begin
- s := TTextStream.Create(AFile);
- try
- with THTMLWriter.Create(s) do
- try
- WriteNode(doc);
- finally
- Free;
- end;
- finally
- s.Free;
- end;
- end;
- procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
- begin
- with THTMLWriter.Create(AStream) do
- try
- WriteNode(doc);
- finally
- Free;
- end;
- end;
- procedure WriteHTML(Element: TDOMNode; const AFileName: String);
- begin
- WriteHTMLFile(TXMLDocument(Element), AFileName);
- end;
- procedure WriteHTML(Element: TDOMNode; var AFile: Text);
- begin
- WriteHTMLFile(TXMLDocument(Element), AFile);
- end;
- procedure WriteHTML(Element: TDOMNode; AStream: TStream);
- begin
- WriteHTMLFile(TXMLDocument(Element), AStream);
- end;
- end.
|