Browse Source

* Small cleanups
* Replaced htmldoc unit with dom_html unit
* Added SAX parser framework and SAX HTML parser

sg 22 years ago
parent
commit
d2e9518fbe
6 changed files with 2435 additions and 8 deletions
  1. 2 2
      fcl/xml/Makefile
  2. 2 2
      fcl/xml/Makefile.fpc
  3. 6 4
      fcl/xml/dom.pp
  4. 932 0
      fcl/xml/dom_html.pp
  5. 938 0
      fcl/xml/sax.pp
  6. 555 0
      fcl/xml/sax_html.pp

+ 2 - 2
fcl/xml/Makefile

@@ -213,9 +213,9 @@ UNITSDIR:=$(wildcard $(FPCDIR)/units/$(OS_TARGET))
 endif
 endif
 PACKAGESDIR:=$(wildcard $(FPCDIR) $(FPCDIR)/packages/base $(FPCDIR)/packages/extra)
 PACKAGESDIR:=$(wildcard $(FPCDIR) $(FPCDIR)/packages/base $(FPCDIR)/packages/extra)
 override PACKAGE_NAME=fcl
 override PACKAGE_NAME=fcl
-override TARGET_UNITS+=dom htmldoc xmlcfg xmlread xmlstreaming xmlwrite xhtml htmldefs htmwrite
+override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmldefs htmwrite
 override INSTALL_FPCPACKAGE=y
 override INSTALL_FPCPACKAGE=y
-override COMPILER_OPTIONS+=-S2
+override COMPILER_OPTIONS+=-S2h
 override COMPILER_TARGETDIR+=../$(OS_TARGET)
 override COMPILER_TARGETDIR+=../$(OS_TARGET)
 ifdef REQUIRE_UNITSDIR
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 override UNITSDIR+=$(REQUIRE_UNITSDIR)

+ 2 - 2
fcl/xml/Makefile.fpc

@@ -6,10 +6,10 @@
 main=fcl
 main=fcl
 
 
 [target]
 [target]
-units=dom htmldoc xmlcfg xmlread xmlstreaming xmlwrite xhtml htmldefs htmwrite
+units=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmldefs htmwrite
 
 
 [compiler]
 [compiler]
-options=-S2
+options=-S2h
 targetdir=../$(OS_TARGET)
 targetdir=../$(OS_TARGET)
 
 
 [install]
 [install]

+ 6 - 4
fcl/xml/dom.pp

@@ -32,9 +32,6 @@
 
 
 unit DOM;
 unit DOM;
 
 
-{$MODE objfpc}
-{$H+}
-
 interface
 interface
 
 
 uses SysUtils, Classes;
 uses SysUtils, Classes;
@@ -1504,7 +1501,12 @@ end.
 
 
 {
 {
   $Log$
   $Log$
-  Revision 1.10  2002-09-07 15:15:29  peter
+  Revision 1.11  2002-12-11 21:06:07  sg
+  * Small cleanups
+  * Replaced htmldoc unit with dom_html unit
+  * Added SAX parser framework and SAX HTML parser
+
+  Revision 1.10  2002/09/07 15:15:29  peter
     * old logs removed and tabs fixed
     * old logs removed and tabs fixed
 
 
   Revision 1.9  2002/03/01 10:02:38  sg
   Revision 1.9  2002/03/01 10:02:38  sg

+ 932 - 0
fcl/xml/dom_html.pp

@@ -0,0 +1,932 @@
+{
+    $Id$
+    This file is part of the Free Component Library
+
+    Implementation of DOM HTML interfaces
+    Copyright (c) 2002 by
+      Areca Systems GmbH / Sebastian Guenther, [email protected]
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+{ Please note that this is a very early version, most properties and methods
+  are not implemented yet. }
+
+
+unit DOM_HTML;
+
+interface
+
+uses DOM;
+
+type
+
+  THTMLDocument = class;
+  THTMLFormElement = class;
+  THTMLTableCaptionElement = class;
+  THTMLTableSectionElement = class;
+
+  THTMLCollection = class
+  public
+    property Length: Cardinal;	// !!!: ro
+    function Item(Index: Cardinal): TDOMNode;
+    function NamedItem(const Index: DOMString): TDOMNode;
+  end;
+
+  THTMLOptionsCollection = class
+  public
+    property Length: Cardinal;	// !!!: ro
+    function Item(Index: Cardinal): TDOMNode;
+    function NamedItem(const Index: DOMString): TDOMNode;
+  end;
+
+  THTMLElement = class(TDOMElement)
+  private
+    function GetID: DOMString;
+    procedure SetID(const Value: DOMString);
+    function GetTitle: DOMString;
+    procedure SetTitle(const Value: DOMString);
+    function GetLang: DOMString;
+    procedure SetLang(const Value: DOMString);
+    function GetDir: DOMString;
+    procedure SetDir(const Value: DOMString);
+    function GetClassName: DOMString;
+    procedure SetClassName(const Value: DOMString);
+  protected
+    constructor Create(AOwner: THTMLDocument; const ATagName: DOMString);
+  public
+    property ID: DOMString read GetID write SetID;
+    property Title: DOMString read GetTitle write SetTitle;
+    property Lang: DOMString read GetLang write SetLang;
+    property Dir: DOMString read GetDir write SetDir;
+    property ClassName: DOMString read GetClassName write SetClassName;
+  end;
+
+  THTMLHtmlElement = class(THTMLElement)
+  private
+    function GetVersion: DOMString;
+    procedure SetVersion(const Value: DOMString);
+  public
+    property Version: DOMString read GetVersion write SetVersion;
+  end;
+
+  THTMLHeadElement = class(THTMLElement)
+  private
+    function GetProfile: DOMString;
+    procedure SetProfile(const Value: DOMString);
+  public
+    property Profile: DOMString read GetProfile write SetProfile;
+  end;
+
+  THTMLLinkElement = class(THTMLElement)
+  public
+    property Disabled: Boolean;	// !!!: rw
+    property Charset: DOMString;	// !!!: rw
+    property HRef: DOMString;	// !!!: rw
+    property HRefLang: DOMString;	// !!!: rw
+    property Media: DOMString;	// !!!: rw
+    property Rel: DOMString;	// !!!: rw
+    property Rev: DOMString;	// !!!: rw
+    property Target: DOMString;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+  end;
+
+  THTMLTitleElement = class(THTMLElement)
+  public
+    property Text: DOMString;	// !!!: rw
+  end;
+
+  THTMLMetaElement = class(THTMLElement)
+  public
+    property Content: DOMString;	// !!!: rw
+    property HTTPEqiv: DOMString;	// !!!: rw
+    property Name: DOMString;	// !!!: rw
+    property Scheme: DOMString;	// !!!: rw
+  end;
+
+  THTMLBaseElement = class(THTMLElement)
+  public
+    property HRef: DOMString;	// !!!: rw
+    property Target: DOMString;	// !!!: rw
+  end;
+
+  THTMLIsIndexElement = class(THTMLElement)
+  public
+    property Form: THTMLFormElement;	// !!!: ro
+    property Prompt: DOMString;	// !!!: rw
+  end;
+
+  THTMLStyleElement = class(THTMLElement)
+  public
+    property Disabled: Boolean;	// !!!: rw
+    property Media: DOMString;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+  end;
+
+  THTMLBodyElement = class(THTMLElement)
+  public
+    property ALink: DOMString;	// !!!: rw
+    property Background: DOMString;	// !!!: rw
+    property BgColor: DOMString;	// !!!: rw
+    property Link: DOMString;	// !!!: rw
+    property Text: DOMString;	// !!!: rw
+    property VLink: DOMString;	// !!!: rw
+  end;
+
+  THTMLFormElement = class(THTMLElement)
+  public
+    property Elements: THTMLCollection;	// !!!: ro
+    property Length: Integer;	// !!!: ro
+    property Name: DOMString;	// !!!: rw
+    property AcceptCharset: DOMString;	// !!!: rw
+    property Action: DOMString;	// !!!: rw
+    property EncType: DOMString;	// !!!: rw
+    property Method: DOMString;	// !!!: rw
+    property Target: DOMString;	// !!!: rw
+    procedure Submit; virtual; abstract;
+    procedure Reset; virtual; abstract;
+  end;
+
+  THTMLSelectElement = class(THTMLElement)
+  public
+    property HTMLType: DOMString;	// !!!: ro
+    property SelectedIndex: Integer;	// !!!: rw
+    property Value: DOMString;	// !!!: rw
+    property Length: Cardinal;	// !!!: rw
+    property Form: THTMLFormElement;	// !!!: ro
+    property Options: THTMLOptionsCollection;	// !!!: ro
+    property Disabled: Boolean;	// !!!: rw
+    property Multiple: Boolean;	// !!!: rw
+    property Name: DOMString;	// !!!: rw
+    property Size: Integer;	// !!!: rw
+    property TabIndex: Integer;	// !!!: rw
+    procedure Add(Element, Before: THTMLElement);
+    procedure Remove(Index: Integer);
+    procedure Blur; virtual; abstract;
+    procedure Focus; virtual; abstract;
+  end;
+
+  THTMLOptGroupElement = class(THTMLElement)
+  public
+    property Disabled: Boolean;	// !!!: rw
+    property GroupLabel: DOMString;	// !!!: rw
+  end;
+
+  THTMLOptionElement = class(THTMLElement)
+  public
+    property Form: THTMLFormElement;	// !!!: ro
+    property DefaultSelected: Boolean;	// !!!: rw
+    property Text: DOMString;	// !!!: ro
+    property Index: Integer;	// !!!: ro
+    property Disabled: Boolean;	// !!!: rw
+    property OptionLabel: DOMString;	// !!!: rw
+    property Selected: Boolean;	// !!!: rw
+    property Value: DOMString;	// !!!: rw
+  end;
+
+  THTMLInputElement = class(THTMLElement)
+  public
+    property DefaultValue: DOMString;	// !!!: rw
+    property DefaultChecked: Boolean;	// !!!: rw
+    property Form: THTMLFormElement;	// !!!: ro
+    property Accept: DOMString;	// !!!: rw
+    property AccessKey: DOMString;	// !!!: rw
+    property Align: DOMString;	// !!!: rw
+    property Alt: DOMString;	// !!!: rw
+    property Checked: Boolean;	// !!!: rw
+    property Disabled: Boolean;	// !!!: rw
+    property MaxLength: Integer;	// !!!: rw
+    property Name: DOMString;	// !!!: rw
+    property ReadOnly: Boolean;	// !!!: rw
+    property Size: Cardinal;	// !!!: rw
+    property Src: DOMString;	// !!!: rw
+    property TabIndex: Integer;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+    property UseMap: DOMString;	// !!!: rw
+    property Value: DOMString;	// !!!: rw
+    procedure Blur; virtual; abstract;
+    procedure Focus; virtual; abstract;
+    procedure Select; virtual; abstract;
+    procedure Click; virtual; abstract;
+  end;
+
+  THTMLTextAreaElement = class(THTMLElement)
+  public
+    property DefaultValue: DOMString;	// !!!: rw
+    property Form: THTMLFormElement;	// !!!: ro
+    property AccessKey: DOMString;	// !!!: rw
+    property Cols: Integer;	// !!!: rw
+    property Disabled: Boolean;	// !!!: rw
+    property Name: DOMString;	// !!!: rw
+    property ReadOnly: Boolean;	// !!!: rw
+    property Rows: Integer;	// !!!: rw
+    property TabIndex: Integer;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+    property Value: DOMString;	// !!!: rw
+    procedure Blur; virtual; abstract;
+    procedure Focus; virtual; abstract;
+    procedure Select; virtual; abstract;
+  end;
+
+  THTMLButtonElement = class(THTMLElement)
+  public
+    property Form: THTMLFormElement;	// !!!: ro
+    property AccessKey: DOMString;	// !!!: rw
+    property Disabled: Boolean;	// !!!: rw
+    property Name: DOMString;	// !!!: rw
+    property TabIndex: Integer;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+    property Value: DOMString;	// !!!: rw
+  end;
+
+  THTMLLabelElement = class(THTMLElement)
+  public
+    property Form: THTMLFormElement;	// !!!: ro
+    property AccessKey: DOMString;	// !!!: rw
+    property HtmlFor: DOMString;	// !!!: rw
+  end;
+
+  THTMLFieldSetElement = class(THTMLElement)
+  public
+    property Form: THTMLFormElement;	// !!!: ro
+  end;
+
+  THTMLLegendElement = class(THTMLElement)
+  public
+    property Form: THTMLFormElement;	// !!!: ro
+    property AccessKey: DOMString;	// !!!: rw
+    property Align: DOMString;	// !!!: rw
+  end;
+
+  THTMLUListElement = class(THTMLElement)
+  public
+    property Compact: Boolean;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+  end;
+
+  THTMLOListElement = class(THTMLElement)
+  public
+    property Compact: Boolean;	// !!!: rw
+    property Start: Integer;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+  end;
+
+  THTMLDListElement = class(THTMLElement)
+  public
+    property Compact: Boolean;	// !!!: rw
+  end;
+
+  THTMLDirectoryElement = class(THTMLElement)
+  public
+    property Compact: Boolean;	// !!!: rw
+  end;
+
+  THTMLMenuElement = class(THTMLElement)
+  public
+    property Compact: Boolean;	// !!!: rw
+  end;
+
+  THTMLLIElement = class(THTMLElement)
+  public
+    property HTMLType: DOMString;	// !!!: rw
+    property Value: Integer;	// !!!: rw
+  end;
+
+  THTMLDivElement = class(THTMLElement)
+  public
+    property Align: DOMString;	// !!!: rw
+  end;
+
+  THTMLParagraphElement = class(THTMLElement)
+  public
+    property Align: DOMString;	// !!!: rw
+  end;
+
+  THTMLHeadingElement = class(THTMLElement)
+  public
+    property Align: DOMString;	// !!!: rw
+  end;
+
+  THTMLQuoteElement = class(THTMLElement)
+  public
+    property Cite: DOMString;	// !!!: rw
+  end;
+
+  THTMLPreElement = class(THTMLElement)
+  public
+    property Width: Integer;	// !!!: rw
+  end;
+
+  THTMLBREElement = class(THTMLElement)
+  public
+    property Clear: DOMString;	// !!!: rw
+  end;
+
+  THTMLBaseFontElement = class(THTMLElement)
+  public
+    property Color: DOMString;	// !!!: rw
+    property Face: DOMString;	// !!!: rw
+    property Size: Integer;	// !!!: rw
+  end;
+
+  THTMLFontElement = class(THTMLElement)
+  public
+    property Color: DOMString;	// !!!: rw
+    property Face: DOMString;	// !!!: rw
+    property Size: Integer;	// !!!: rw
+  end;
+
+  THTMLHRElement = class(THTMLElement)
+  public
+    property Align: DOMString;	// !!!: rw
+    property NoShade: Boolean;	// !!!: rw
+    property Size: DOMString;	// !!!: rw
+    property Width: DOMString;	// !!!: rw
+  end;
+
+  THTMLModElement = class(THTMLElement)
+  public
+    property Cite: DOMString;	// !!!: rw
+    property DateTime: DOMString;	// !!!: rw
+  end;
+
+  THTMLAnchorElement = class(THTMLElement)
+  public
+    property AccessKey: DOMString;	// !!!: rw
+    property Charset: DOMString;	// !!!: rw
+    property Coords: DOMString;	// !!!: rw
+    property HRef: DOMString;	// !!!: rw
+    property HRefLang: DOMString;	// !!!: rw
+    property Name: DOMString;	// !!!: rw
+    property Rel: DOMString;	// !!!: rw
+    property Rev: DOMString;	// !!!: rw
+    property Shape: DOMString;	// !!!: rw
+    property TabIndex: Integer;	// !!!: rw
+    property Target: DOMString;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+    procedure Blur; virtual; abstract;
+    procedure Focus; virtual; abstract;
+  end;
+
+  THTMLImageElement = class(THTMLElement)
+  public
+    property Name: DOMString;	// !!!: rw
+    property Align: DOMString;	// !!!: rw
+    property Alt: DOMString;	// !!!: rw
+    property Border: DOMString;	// !!!: rw
+    property Height: Integer;	// !!!: rw
+    property HSpace: Integer;	// !!!: rw
+    property IsMap: Boolean;	// !!!: rw
+    property LongDesc: DOMString;	// !!!: rw
+    property Src: Integer;	// !!!: rw
+    property UseMap: DOMString;	// !!!: rw
+    property VSpace: Integer;	// !!!: rw
+    property Width: Integer;	// !!!: rw
+  end;
+
+  THTMLObjectElement = class(THTMLElement)
+  public
+    property Form: THTMLFormElement;	// !!!: ro
+    property Code: DOMString;	// !!!: rw
+    property Align: DOMString;	// !!!: rw
+    property Archive: DOMString;	// !!!: rw
+    property Border: DOMString;	// !!!: rw
+    property CodeBase: DOMString;	// !!!: rw
+    property CodeType: DOMString;	// !!!: rw
+    property Data: DOMString;	// !!!: rw
+    property Declare: Boolean;	// !!!: rw
+    property Height: DOMString;	// !!!: rw
+    property HSpace: Integer;	// !!!: rw
+    property Name: DOMString;	// !!!: rw
+    property StandBy: DOMString;	// !!!: rw
+    property TabIndex: Integer;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+    property UseMap: DOMString;	// !!!: rw
+    property VSpace: Integer;	// !!!: rw
+    property Width: Integer;	// !!!: rw
+    property ContentDocument: TDOMDocument;	// !!!: ro
+  end;
+
+  THTMLParamElement = class(THTMLElement)
+  public
+    property Name: DOMString;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+    property Value: DOMString;	// !!!: rw
+    property ValueType: DOMString;	// !!!: rw
+  end;
+
+  THTMLAppletElement = class(THTMLElement)
+  public
+    property Align: DOMString;	// !!!: rw
+    property Alt: DOMString;	// !!!: rw
+    property Archive: DOMString;	// !!!: rw
+    property Code: DOMString;	// !!!: rw
+    property CodeBase: DOMString;	// !!!: rw
+    property Height: DOMString;	// !!!: rw
+    property HSpace: Integer;	// !!!: rw
+    property Name: DOMString;	// !!!: rw
+    property AppletObject: DOMString;	// !!!: rw
+    property VSpace: Integer;	// !!!: rw
+    property Width: Integer;	// !!!: rw
+  end;
+
+  THTMLMapElement = class(THTMLElement)
+  public
+    property Areas: THTMLCollection;	// !!!: ro
+    property Name: DOMString;	// !!!: rw
+  end;
+
+  THTMLAreaElement = class(THTMLElement)
+  public
+    property AccessKey: DOMString;	// !!!: rw
+    property Alt: DOMString;	// !!!: rw
+    property Coords: DOMString;	// !!!: rw
+    property HRef: DOMString;	// !!!: rw
+    property NoHRef: Boolean;	// !!!: rw
+    property Shape: DOMString;	// !!!: rw
+    property TabIndex: Integer;	// !!!: rw
+    property Target: DOMString;	// !!!: rw
+  end;
+
+  THTMLScriptElement = class(THTMLElement)
+  public
+    property Text: DOMString;	// !!!: rw
+    property HtmlFor: DOMString;	// !!!: rw
+    property Event: DOMString;	// !!!: rw
+    property Charset: DOMString;	// !!!: rw
+    property Defer: Boolean;	// !!!: rw
+    property Src: DOMString;	// !!!: rw
+    property HTMLType: DOMString;	// !!!: rw
+  end;
+
+  THTMLTableElement = class(THTMLElement)
+  public
+    property Caption: THTMLTableCaptionElement;	// !!!: rw
+    property THead: THTMLTableSectionElement;	// !!!: rw
+    property TFoot: THTMLTableSectionElement;	// !!!: rw
+    property Rows: THTMLCollection;	// !!!: ro
+    property TBodies: THTMLCollection;	// !!!: ro
+    property Align: DOMString;	// !!!: rw
+    property BgColor: DOMString;	// !!!: rw
+    property Border: DOMString;	// !!!: rw
+    property CellPadding: DOMString;	// !!!: rw
+    property CellSpacing: DOMString;	// !!!: rw
+    property Frame: DOMString;	// !!!: rw
+    property Rules: DOMString;	// !!!: rw
+    property Summary: DOMString;	// !!!: rw
+    property Width: DOMString;	// !!!: rw
+    function CreateTHead: THTMLElement;
+    procedure DeleteTHead;
+    function CreateTFoot: THTMLElement;
+    procedure DeleteTFoot;
+    function CreateCaption: THTMLElement;
+    procedure DeleteCaption;
+    function InsertRow(Index: Integer): THTMLElement;
+    procedure DeleteRow(Index: Integer);
+  end;
+
+  THTMLTableCaptionElement = class(THTMLElement)
+  public
+    property Align: DOMString;	// !!!: rw
+  end;
+
+  THTMLTableColElement = class(THTMLElement)
+  public
+    property Align: DOMString;	// !!!: rw
+    property Ch: DOMString;	// !!!: rw
+    property ChOff: DOMString;	// !!!: rw
+    property Span: Integer;	// !!!: rw
+    property VAlign: DOMString;	// !!!: rw
+    property Width: DOMString;	// !!!: rw
+  end;
+
+  THTMLTableSectionElement = class(THTMLElement)
+  public
+    property Align: DOMString;	// !!!: rw
+    property Ch: DOMString;	// !!!: rw
+    property ChOff: DOMString;	// !!!: rw
+    property VAlign: DOMString;	// !!!: rw
+    property Rows: THTMLCollection;	// !!!: ro
+    function InsertRow(Index: Integer): THTMLElement;
+    procedure DeleteRow(Index: Integer);
+  end;
+
+  THTMLTableRowElement = class(THTMLElement)
+  public
+    property RowIndex: Integer;	// !!!: ro
+    property SectionRowIndex: Integer;	// !!!: ro
+    property Cells: THTMLCollection;	// !!!: ro
+    property Align: DOMString;	// !!!: rw
+    property BgColor: DOMString;	// !!!: rw
+    property Ch: DOMString;	// !!!: rw
+    property ChOff: DOMString;	// !!!: rw
+    property VAlign: DOMString;	// !!!: rw
+    function InsertCell(Index: Integer): THTMLElement;
+    procedure DeleteCell(Index: Integer);
+  end;
+
+  THTMLTableCellElement = class(THTMLElement)
+  public
+    property CellIndex: Integer;	// !!!: ro
+    property Abbr: DOMString;	// !!!: rw
+    property Align: DOMString;	// !!!: rw
+    property Axis: DOMString;	// !!!: rw
+    property BgColor: DOMString;	// !!!: rw
+    property Ch: DOMString;	// !!!: rw
+    property ChOff: DOMString;	// !!!: rw
+    property ColSpan: Integer;	// !!!: rw
+    property Headers: DOMString;	// !!!: rw
+    property Height: DOMString;	// !!!: rw
+    property NoWrap: Boolean;	// !!!: rw
+    property RowSpan: Integer;	// !!!: rw
+    property Scope: DOMString;	// !!!: rw
+    property VAlign: DOMString;	// !!!: rw
+    property Width: DOMString;	// !!!: rw
+  end;
+
+  THTMLFrameSetElement = class(THTMLElement)
+  public
+    property Cols: DOMString;	// !!!: rw
+    property Rows: DOMString;	// !!!: rw
+  end;
+
+  THTMLFrameElement = class(THTMLElement)
+  public
+    property FrameBorder: DOMString;	// !!!: rw
+    property LongDesc: DOMString;	// !!!: rw
+    property MarginHeight: DOMString;	// !!!: rw
+    property MarginWidth: DOMString;	// !!!: rw
+    property Name: DOMString;	// !!!: rw
+    property NoResize: Boolean;	// !!!: rw
+    property Scrolling: DOMString;	// !!!: rw
+    property Src: DOMString;	// !!!: rw
+    property ContentDocument: TDOMDocument;	// !!!: ro
+  end;
+
+  THTMLIFrameElement = class(THTMLElement)
+  public
+    property Align: DOMString;	// !!!: rw
+    property FrameBorder: DOMString;	// !!!: rw
+    property Height: DOMString;	// !!!: rw
+    property LongDesc: DOMString;	// !!!: rw
+    property MarginHeight: DOMString;	// !!!: rw
+    property MarginWidth: DOMString;	// !!!: rw
+    property Name: DOMString;	// !!!: rw
+    property Scrolling: DOMString;	// !!!: rw
+    property Src: DOMString;	// !!!: rw
+    property Width: DOMString;	// !!!: rw
+    property ContentDocument: TDOMDocument;	// !!!: ro
+  end;
+
+  THTMLDocument = class(TXMLDocument)
+  private
+    function GetTitle: DOMString;
+    procedure SetTitle(const Value: DOMString);
+  public
+    property Title: DOMString read GetTitle write SetTitle;
+    property Referrer: DOMString;	// !!!: ro
+    property Domain: DOMString;	// !!!: ro
+    property URL: DOMString;	// !!!: ro
+    property Body: THTMLElement;	// !!!: rw
+    property Images: THTMLCollection;	// !!!: ro
+    property Applets: THTMLCollection;	// !!!: ro
+    property Links: THTMLCollection;	// !!!: ro
+    property Forms: THTMLCollection;	// !!!: ro
+    property Anchors: THTMLCollection;	// !!!: ro
+    property Cookie: DOMString;		// !!!: rw
+
+    procedure Open; virtual; abstract;
+    procedure Close; virtual; abstract;
+    procedure Write(const AText: DOMString);
+    procedure WriteLn(const AText: DOMString);
+    function GetElementsByName(const ElementName: DOMString): TDOMNodeList;
+
+    // Helper functions (not in DOM standard):
+    function CreateElement(const tagName: DOMString): THTMLElement;
+    function CreateSubElement: THTMLElement;
+    function CreateSupElement: THTMLElement;
+    function CreateSpanElement: THTMLElement;
+    function CreateBDOElement: THTMLElement;
+    function CreateTTElement: THTMLElement;
+    function CreateIElement: THTMLElement;
+    function CreateBElement: THTMLElement;
+    function CreateUElement: THTMLElement;
+    function CreateSElement: THTMLElement;
+    function CreateStrikeElement: THTMLElement;
+    function CreateBigElement: THTMLElement;
+    function CreateSmallElement: THTMLElement;
+    function CreateEmElement: THTMLElement;
+    function CreateStrongElement: THTMLElement;
+    function CreateDfnElement: THTMLElement;
+    function CreateCodeElement: THTMLElement;
+    function CreateSampElement: THTMLElement;
+    function CreateKbdElement: THTMLElement;
+    function CreateVarElement: THTMLElement;
+    function CreateCiteElement: THTMLElement;
+    function CreateAcronymElement: THTMLElement;
+    function CreateAbbrElement: THTMLElement;
+    function CreateDDElement: THTMLElement;
+    function CreateDTElement: THTMLElement;
+    function CreateNoFramesElement: THTMLElement;
+    function CreateNoScriptElement: THTMLElement;
+    function CreateAddressElement: THTMLElement;
+    function CreateCenterElement: THTMLElement;
+    function CreateHtmlElement: THTMLHtmlElement;
+    function CreateHeadElement: THTMLHeadElement;
+    function CreateLinkElement: THTMLLinkElement;
+{    function CreateTitleElement: THTMLTitleElement;
+    function CreateMetaElement: THTMLMetaElement;
+    function CreateBaseElement: THTMLBaseElement;
+    function CreateIsIndexElement: THTMLIsIndexElement;
+    function CreateStyleElement: THTMLStyleElement;}
+    function CreateBodyElement: THTMLBodyElement;
+{    function CreateFormElement: THTMLFormElement;
+    function CreateSelectElement: THTMLSelectElement;
+    function CreateOptGroupElement: THTMLOptGroupElement;
+    function CreateOptionElement: THTMLOptionElement;
+    function CreateInputElement: THTMLInputElement;
+    function CreateTextAreaElement: THTMLTextAreaElement;
+    function CreateButtonElement: THTMLButtonElement;
+    function CreateLabelElement: THTMLLabelElement;
+    function CreateFieldSetElement: THTMLFieldSetElement;
+    function CreateLegendElement: THTMLLegendElement;}
+    function CreateUListElement: THTMLUListElement;
+    function CreateOListElement: THTMLOListElement;
+    function CreateDListElement: THTMLDListElement;
+{    function CreateDirectoryElement: THTMLDirectoryElement;
+    function CreateMenuElement: THTMLMenuElement;}
+    function CreateLIElement: THTMLLIElement;
+{    function CreateDivElement: THTMLDivElement;}
+    function CreateParagraphElement: THTMLParagraphElement;
+{    function CreateHeadingElement: THTMLHeadingElement;
+    function CreateQuoteElement: THTMLQuoteElement;
+    function CreatePreElement: THTMLPreElement;
+    function CreateBRElement: THTMLBreElement;
+    function CreateBaseFontElement: THTMLBaseFontElement;
+    function CreateFontElement: THTMFontLElement;
+    function CreateHRElement: THTMLHREElement;
+    function CreateModElement: THTMLModElement;
+    function CreateAnchorElement: THTMLAnchorElement;
+    function CreateImageElement: THTMLImageElement;
+    function CreateObjectElement: THTMLObjectElement;
+    function CreateParamElement: THTMLParamElement;
+    function CreateAppletElement: THTMLAppletElement;
+    function CreateMapElement: THTMLMapElement;
+    function CreateAreaElement: THTMLAreaElement;
+    function CreateScriptElement: THTMLScriptElement;
+    function CreateTableElement: THTMLTableElement;
+    function CreateTableCaptionElement: THTMLTableCaptionElement;
+    function CreateTableColElement: THTMLTableColElement;
+    function CreateTableSectionElement: THTMLTableSectionElement;
+    function CreateTableRowElement: THTMLTableRowElement;
+    function CreateTableCellElement: THTMLTableCellElement;
+    function CreateFrameSetElement: THTMLFrameSetElement;
+    function CreateFrameElement: THTMLFrameElement;
+    function CreateIFrameElement: THTMLIFrameElement;}
+  end;
+
+
+implementation
+
+
+function THTMLCollection.Item(Index: Cardinal): TDOMNode;
+begin
+  Result := nil;
+end;
+
+function THTMLCollection.NamedItem(const Index: DOMString): TDOMNode;
+begin
+  Result := nil;
+end;
+
+
+function THTMLOptionsCollection.Item(Index: Cardinal): TDOMNode;
+begin
+  Result := nil;
+end;
+
+function THTMLOptionsCollection.NamedItem(const Index: DOMString): TDOMNode;
+begin
+  Result := nil;
+end;
+
+
+constructor THTMLElement.Create(AOwner: THTMLDocument; const ATagName: DOMString);
+begin
+  inherited Create(AOwner);
+  FNodeName := ATagName;
+end;
+
+function THTMLElement.GetID: DOMString; begin Result := GetAttribute('id') end;
+procedure THTMLElement.SetID(const Value: DOMString); begin SetAttribute('id', Value) end;
+function THTMLElement.GetTitle: DOMString; begin Result := GetAttribute('title') end;
+procedure THTMLElement.SetTitle(const Value: DOMString); begin SetAttribute('title', Value) end;
+function THTMLElement.GetLang: DOMString; begin Result := GetAttribute('lang') end;
+procedure THTMLElement.SetLang(const Value: DOMString); begin SetAttribute('lang', Value) end;
+function THTMLElement.GetDir: DOMString; begin Result := GetAttribute('dir') end;
+procedure THTMLElement.SetDir(const Value: DOMString); begin SetAttribute('dir', Value) end;
+function THTMLElement.GetClassName: DOMString; begin  Result := GetAttribute('class') end;
+procedure THTMLElement.SetClassName(const Value: DOMString); begin SetAttribute('class', Value) end;
+
+
+function THTMLHtmlElement.GetVersion: DOMString; begin  Result := GetAttribute('version') end;
+procedure THTMLHtmlElement.SetVersion(const Value: DOMString); begin SetAttribute('version', Value) end;
+
+
+function THTMLHeadElement.GetProfile: DOMString; begin  Result := GetAttribute('profile') end;
+procedure THTMLHeadElement.SetProfile(const Value: DOMString); begin SetAttribute('profile', Value) end;
+
+
+procedure THTMLSelectElement.Add(Element, Before: THTMLElement);
+begin
+end;
+
+procedure THTMLSelectElement.Remove(Index: Integer);
+begin
+end;
+
+
+function THTMLTableElement.CreateTHead: THTMLElement;
+begin
+  Result := nil;
+end;
+
+procedure THTMLTableElement.DeleteTHead;
+begin
+end;
+
+function THTMLTableElement.CreateTFoot: THTMLElement;
+begin
+  Result := nil;
+end;
+
+procedure THTMLTableElement.DeleteTFoot;
+begin
+end;
+
+function THTMLTableElement.CreateCaption: THTMLElement;
+begin
+  Result := nil;
+end;
+
+procedure THTMLTableElement.DeleteCaption;
+begin
+end;
+
+function THTMLTableElement.InsertRow(Index: Integer): THTMLElement;
+begin
+  Result := nil;
+end;
+
+procedure THTMLTableElement.DeleteRow(Index: Integer);
+begin
+end;
+
+
+function THTMLTableSectionElement.InsertRow(Index: Integer): THTMLElement;
+begin
+  Result := nil;
+end;
+
+procedure THTMLTableSectionElement.DeleteRow(Index: Integer);
+begin
+end;
+
+
+function THTMLTableRowElement.InsertCell(Index: Integer): THTMLElement;
+begin
+  Result := nil;
+end;
+
+procedure THTMLTableRowElement.DeleteCell(Index: Integer);
+begin
+end;
+
+
+function THTMLDocument.GetTitle: DOMString;
+var
+  Node: TDOMNode;
+begin
+  Result := '';
+  if not Assigned(DocumentElement) then
+    exit;
+  Node := DocumentElement.FirstChild;
+  while Assigned(Node) and (Node.NodeName <> 'head') do
+    Node := Node.NextSibling;
+  if not Assigned(Node) then
+    exit;
+  Node := Node.FirstChild;
+  while Assigned(Node) and (Node.NodeName <> 'title') do
+    Node := Node.NextSibling;
+  if not Assigned(Node) then
+    exit;
+  Node := Node.FirstChild;
+  if Assigned(Node) and (Node.NodeType = TEXT_NODE) then
+    Result := Node.NodeValue;
+end;
+
+procedure THTMLDocument.SetTitle(const Value: DOMString);
+var
+  Node: TDOMNode;
+  TitleEl: TDOMElement;
+begin
+  if not Assigned(DocumentElement) then
+    AppendChild(CreateHtmlElement);
+  Node := DocumentElement.FirstChild;
+  while Assigned(Node) and (Node.NodeName <> 'head') do
+    Node := Node.NextSibling;
+  if not Assigned(Node) then
+  begin
+    Node := CreateHeadElement;
+    DocumentElement.InsertBefore(Node, DocumentElement.FirstChild);
+  end;
+  TitleEl := TDOMElement(Node.FirstChild);
+  while Assigned(TitleEl) and (TitleEl.NodeName <> 'title') do
+    TitleEl := TDOMElement(TitleEl.NextSibling);
+  if not Assigned(TitleEl) then
+  begin
+    TitleEl := CreateElement('title');
+    Node.AppendChild(TitleEl);
+  end;
+  while Assigned(TitleEl.FirstChild) do
+    TitleEl.RemoveChild(TitleEl.FirstChild);
+  TitleEl.AppendChild(CreateTextNode(Value));
+end;
+
+procedure THTMLDocument.Write(const AText: DOMString);
+begin
+end;
+
+procedure THTMLDocument.WriteLn(const AText: DOMString);
+begin
+end;
+
+function THTMLDocument.GetElementsByName(const ElementName: DOMString): TDOMNodeList;
+begin
+  Result := nil;
+end;
+
+function THTMLDocument.CreateElement(const tagName: DOMString): THTMLElement;
+begin
+  Result := THTMLElement.Create(Self, tagName);
+end;
+
+function THTMLDocument.CreateSubElement: THTMLElement; begin Result := CreateElement('sub') end;
+function THTMLDocument.CreateSupElement: THTMLElement; begin Result := CreateElement('sup') end;
+function THTMLDocument.CreateSpanElement: THTMLElement; begin Result := CreateElement('span') end;
+function THTMLDocument.CreateBDOElement: THTMLElement; begin Result := CreateElement('bdo') end;
+function THTMLDocument.CreateTTElement: THTMLElement; begin Result := CreateElement('tt') end;
+function THTMLDocument.CreateIElement: THTMLElement; begin Result := CreateElement('i') end;
+function THTMLDocument.CreateBElement: THTMLElement; begin Result := CreateElement('b') end;
+function THTMLDocument.CreateUElement: THTMLElement; begin Result := CreateElement('u') end;
+function THTMLDocument.CreateSElement: THTMLElement; begin Result := CreateElement('s') end;
+function THTMLDocument.CreateStrikeElement: THTMLElement; begin Result := CreateElement('strike') end;
+function THTMLDocument.CreateBigElement: THTMLElement; begin Result := CreateElement('big') end;
+function THTMLDocument.CreateSmallElement: THTMLElement; begin Result := CreateElement('small') end;
+function THTMLDocument.CreateEmElement: THTMLElement; begin Result := CreateElement('em') end;
+function THTMLDocument.CreateStrongElement: THTMLElement; begin Result := CreateElement('strong') end;
+function THTMLDocument.CreateDfnElement: THTMLElement; begin Result := CreateElement('dfn') end;
+function THTMLDocument.CreateCodeElement: THTMLElement; begin Result := CreateElement('code') end;
+function THTMLDocument.CreateSampElement: THTMLElement; begin Result := CreateElement('samp') end;
+function THTMLDocument.CreateKbdElement: THTMLElement; begin Result := CreateElement('kbd') end;
+function THTMLDocument.CreateVarElement: THTMLElement; begin Result := CreateElement('var') end;
+function THTMLDocument.CreateCiteElement: THTMLElement; begin Result := CreateElement('cite') end;
+function THTMLDocument.CreateAcronymElement: THTMLElement; begin Result := CreateElement('acronym') end;
+function THTMLDocument.CreateAbbrElement: THTMLElement; begin Result := CreateElement('abbr') end;
+function THTMLDocument.CreateDDElement: THTMLElement; begin Result := CreateElement('dd') end;
+function THTMLDocument.CreateDTElement: THTMLElement; begin Result := CreateElement('dt') end;
+function THTMLDocument.CreateNoFramesElement: THTMLElement; begin Result := CreateElement('noframes') end;
+function THTMLDocument.CreateNoScriptElement: THTMLElement; begin Result := CreateElement('noscript') end;
+function THTMLDocument.CreateAddressElement: THTMLElement; begin Result := CreateElement('address') end;
+function THTMLDocument.CreateCenterElement: THTMLElement; begin Result := CreateElement('center') end;
+function THTMLDocument.CreateHtmlElement: THTMLHtmlElement; begin Result := THTMLHtmlElement.Create(Self, 'html') end;
+function THTMLDocument.CreateHeadElement: THTMLHeadElement; begin Result := THTMLHeadElement.Create(Self, 'head') end;
+function THTMLDocument.CreateLinkElement: THTMLLinkElement; begin Result := THTMLLinkElement.Create(Self, 'a') end;
+//...
+function THTMLDocument.CreateBodyElement: THTMLBodyElement; begin Result := THTMLBodyElement.Create(Self, 'body') end;
+//...
+function THTMLDocument.CreateUListElement: THTMLUListElement; begin Result := THTMLUListElement.Create(Self, 'ul') end;
+function THTMLDocument.CreateOListElement: THTMLOListElement; begin Result := THTMLOListElement.Create(Self, 'ol') end;
+function THTMLDocument.CreateDListElement: THTMLDListElement; begin Result := THTMLDListElement.Create(Self, 'dl') end;
+// ...
+function THTMLDocument.CreateLIElement: THTMLLIElement; begin Result := THTMLLIElement.Create(Self, 'li') end;
+//...
+function THTMLDocument.CreateParagraphElement: THTMLParagraphElement; begin Result := THTMLParagraphElement.Create(Self, 'p') end;
+
+end.
+
+
+{
+  $Log$
+  Revision 1.1  2002-12-11 21:06:07  sg
+  * Small cleanups
+  * Replaced htmldoc unit with dom_html unit
+  * Added SAX parser framework and SAX HTML parser
+
+}

+ 938 - 0
fcl/xml/sax.pp

@@ -0,0 +1,938 @@
+{
+    $Id$
+    This file is part of the Free Component Library
+
+    SAX 2 (Simple API for XML) implementation
+    Copyright (c) 2000 - 2002 by
+      Areca Systems GmbH / Sebastian Guenther, [email protected]
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+
+unit SAX;
+
+{ $DEFINE UseDynArrays}
+
+{ $IFDEF UseDynArrays}
+  {$MODE objfpc}
+{ $ELSE}
+  { $MODE Delphi}
+{ $ENDIF}
+{$H+}
+
+
+interface
+
+uses SysUtils, Classes;
+
+resourcestring
+  SSAXAttributeIndexError = 'Invalid attribute index %d';
+
+const
+  XMLNS = 'http://www.w3.org/XML/1998/namespace';
+
+type
+
+{$IFDEF ver1_0}
+  SAXString = String;
+  SAXChar = Char;
+{$ELSE}
+  SAXString = WideString;
+  SAXChar = WideChar;
+{$ENDIF}
+  PSAXChar = ^SAXChar;
+
+{ Exceptions }
+
+  ESAXError = class(Exception);
+
+  ESAXAttributeIndexError = class(ESAXError)
+  public
+    constructor Create(Index: Integer);
+  end;
+
+  ESAXParseException = class(ESAXError);
+
+
+{ TSAXInputSource: A single input source for an XML entity }
+
+  TSAXInputSource = class
+  private
+    FStream: TStream;
+    FEncoding: String;
+    FPublicID, FSystemID: SAXString;
+  public
+    constructor Create; overload;
+    constructor Create(AStream: TStream); overload;
+    constructor Create(const ASystemID: SAXString); overload;
+    property Stream: TStream read FStream write FStream;
+    property Encoding: String read FEncoding write FEncoding;
+    property PublicID: SAXString read FPublicID write FPublicID;
+    property SystemID: SAXString read FSystemID write FSystemID;
+  end;
+
+
+{ TSAXAttributes: List of XML attributes }
+
+  TSAXAttributeData = record
+    URI, LocalName, QName, Value: SAXString;
+    AttrType: String;
+  end;
+
+  {$IFNDEF UseDynArrays}
+  PSAXAttributeData = ^TSAXAttributeData;
+  {$ENDIF}
+
+  TSAXAttributes = class
+  protected
+    FLength: Integer;
+    {$IFDEF UseDynArrays}
+    Data: array of TSAXAttributeData;
+    {$ELSE}
+    FData: TList;
+    function GetData(Index: Integer): PSAXAttributeData;
+    property Data[Index:Integer]: PSAXAttributeData read GetData;
+    {$ENDIF}
+    procedure BadIndex(Index: Integer);
+  public
+    constructor Create; overload;
+    constructor Create(Atts: TSAXAttributes); overload;
+    {$IFNDEF UseDynArrays}
+    destructor Destroy; override;
+    {$ENDIF}
+
+    function GetIndex(const QName: SAXString): Integer; overload;
+    function GetIndex(const URI, LocalPart: SAXString): Integer; overload;
+    function GetLength: Integer;
+    function GetLocalName(Index: Integer): SAXString;
+    function GetQName(Index: Integer): SAXString;
+    function GetType(Index: Integer): String; overload;
+    function GetType(const QName: SAXString): String; overload;
+    function GetType(const URI, LocalName: SAXString): String; overload;
+    function GetURI(Index: Integer): SAXString;
+    function GetValue(Index: Integer): SAXString; overload;
+    function GetValue(const QName: SAXString): SAXString; overload;
+    function GetValue(const URI, LocalName: SAXString): SAXString; overload;
+
+    // Manipulation methods:
+    procedure Clear;
+    procedure SetAttributes(Atts: TSAXAttributes);
+    procedure AddAttribute(const AURI, ALocalName, AQName: SAXString;
+      const AType: String; const AValue: SAXString);
+    procedure SetAttribute(Index: Integer;
+      const AURI, ALocalName, AQName: SAXString; const AType: String;
+      const AValue: SAXString);
+    procedure RemoveAttribute(Index: Integer);
+    procedure SetURI(Index: Integer; const AURI: SAXString);
+    procedure SetLocalName(Index: Integer; const ALocalName: SAXString);
+    procedure SetQName(Index: Integer; const AQName: SAXString);
+    procedure SetType(Index: Integer; const AType: String);
+    procedure SetValue(Index: Integer; const AValue: SAXString);
+
+    property Length: Integer read GetLength;
+    property LocalNames[Index: Integer]: SAXString read GetLocalName;
+    property QNames[Index: Integer]: SAXString read GetQName;
+    property Types[Index: Integer]: String read GetType;
+    property URIs[Index: Integer]: SAXString read GetURI;
+    property Values[Index: Integer]: SAXString read GetValue;
+  end;
+
+
+{ TSAXReader: Reading an XML document using callbacks }
+
+  TCharactersEvent = procedure(Sender: TObject; const ch: PSAXChar; AStart, ALength: Integer) of object;
+  TCommentEvent = type TCharactersEvent;
+  TEndElementEvent = procedure(Sender: TObject; const NamespaceURI, LocalName, QName: SAXString) of object;
+  TEndPrefixMappingEvent = procedure(Sender: TObject; const Prefix: SAXString) of object;
+  TIgnorableWhitespaceEvent = procedure(Sender: TObject; const ch: PSAXChar; AStart, ALength: Integer) of object;
+  TProcessingInstructionEvent = procedure(Sender: TObject; const Target, Data: SAXString) of object;
+  TSkippedEntityEvent = procedure(Sender: TObject; const Name: SAXString) of object;
+  TStartElementEvent = procedure(Sender: TObject; const NamespaceURI, LocalName, QName: SAXString; Atts: TSAXAttributes) of object;
+  TStartPrefixMappingEvent = procedure(Sender: TObject; const Prefix, URI: SAXString) of object;
+  TNotationDeclEvent = procedure(Sender: TObject; const Name, PublicID, SystemID: SAXString) of object;
+  TUnparsedEntityDeclEvent = procedure(Sender: TObject; const Name, PublicID, SystemID, NotationName: SAXString) of object;
+  TResolveEntityEvent = function(Sender: TObject; const PublicID, SystemID: SAXString): TSAXInputSource of object;
+  TErrorEvent = procedure(Sender: TObject; AException: ESAXParseException) of object;
+  TFatalErrorEvent = procedure(Sender: TObject; AException: ESAXParseException) of object;
+  TWarningEvent = procedure(Sender: TObject; AException: ESAXParseException) of object;
+
+
+  TSAXReader = class
+  private
+    FOnCharacters: TCharactersEvent;
+    FOnComment: TCommentEvent;
+    FOnEndDocument: TNotifyEvent;
+    FOnEndElement: TEndElementEvent;
+    FOnEndPrefixMapping: TEndPrefixMappingEvent;
+    FOnIgnorableWhitespace: TIgnorableWhitespaceEvent;
+    FOnProcessingInstruction: TProcessingInstructionEvent;
+    FOnSkippedEntity: TSkippedEntityEvent;
+    FOnStartDocument: TNotifyEvent;
+    FOnStartElement: TStartElementEvent;
+    FOnStartPrefixMapping: TStartPrefixMappingEvent;
+    FOnNotationDecl: TNotationDeclEvent;
+    FOnUnparsedEntityDecl: TUnparsedEntityDeclEvent;
+    FOnResolveEntity: TResolveEntityEvent;
+    FOnError: TErrorEvent;
+    FOnFatalError: TFatalErrorEvent;
+    FOnWarning: TWarningEvent;
+  protected
+    FCurColumnNumber, FCurLineNumber: Integer;
+    FCurPublicID, FCurSystemID: SAXString;
+
+    function GetFeature(const Name: String): Boolean; dynamic; abstract;
+    function GetProperty(const Name: String): TObject; dynamic; abstract;
+    procedure SetFeature(const Name: String; Value: Boolean); dynamic; abstract;
+    procedure SetProperty(const Name: String; Value: TObject); dynamic; abstract;
+
+    // Notification of the content of a document
+    procedure DoCharacters(const ch: PSAXChar; AStart, ALength: Integer); dynamic;
+    procedure DoComment(const ch: PSAXChar; AStart, ALength: Integer); dynamic;
+    procedure DoEndDocument; dynamic;
+    procedure DoEndElement(const NamespaceURI, LocalName, QName: SAXString); dynamic;
+    procedure DoEndPrefixMapping(const Prefix: SAXString); dynamic;
+    procedure DoIgnorableWhitespace(const ch: PSAXChar; AStart, ALength: Integer); dynamic;
+    procedure DoProcessingInstruction(const Target, Data: SAXString); dynamic;
+    procedure DoSkippedEntity(const Name: SAXString); dynamic;
+    procedure DoStartDocument; dynamic;
+    procedure DoStartElement(const NamespaceURI, LocalName, QName: SAXString; Atts: TSAXAttributes); dynamic;
+    procedure DoStartPrefixMapping(const Prefix, URI: SAXString); dynamic;
+
+    // Notification of basic DTD-related events
+    procedure DoNotationDecl(const Name, PublicID, SystemID: SAXString); dynamic;
+    procedure DoUnparsedEntityDecl(const Name, PublicID,
+      SystemID, NotationName: SAXString); dynamic;
+
+    // Resolving entities
+    function DoResolveEntity(const PublicID,
+      SystemID: SAXString): TSAXInputSource; dynamic;
+
+    // SAX error handlers
+    procedure DoError(AException: ESAXParseException); dynamic;
+    procedure DoFatalError(AException: ESAXParseException); dynamic;
+    procedure DoWarning(AException: ESAXParseException); dynamic;
+  public
+    procedure Parse(AInput: TSAXInputSource); dynamic; abstract; overload;
+    procedure Parse(const SystemID: SAXString); dynamic; overload;
+    procedure ParseStream(AStream: TStream);
+
+    // Current location
+    property CurColumnNumber: Integer read FCurColumnNumber;
+    property CurLineNumber: Integer read FCurLineNumber;
+    property CurPublicID: SAXString read FCurPublicID;
+    property CurSystemID: SAXString read FCurSystemID;
+
+    property Features[const Name: String]: Boolean read GetFeature write SetFeature;
+    property Properties[const Name: String]: TObject read GetProperty write SetProperty;
+
+    // Content handler callbacks
+    property OnCharacters: TCharactersEvent read FOnCharacters write FOnCharacters;
+    property OnComment: TCommentEvent read FOnComment write FOnComment;
+    property OnEndDocument: TNotifyEvent read FOnEndDocument write FOnEndDocument;
+    property OnEndElement: TEndElementEvent read FOnEndElement write FOnEndElement;
+    property OnEndPrefixMapping: TEndPrefixMappingEvent read FOnEndPrefixMapping write FOnEndPrefixMapping;
+    property OnIgnorableWhitespace: TIgnorableWhitespaceEvent read FOnIgnorableWhitespace write FOnIgnorableWhitespace;
+    property OnProcessingInstruction: TProcessingInstructionEvent read FOnProcessingInstruction write FOnProcessingInstruction;
+    property OnSkippedEntity: TSkippedEntityEvent read FOnSkippedEntity write FOnSkippedEntity;
+    property OnStartDocument: TNotifyEvent read FOnStartDocument write FOnStartDocument;
+    property OnStartElement: TStartElementEvent read FOnStartElement write FOnStartElement;
+    property OnStartPrefixMapping: TStartPrefixMappingEvent read FOnStartPrefixMapping write FOnStartPrefixMapping;
+    // DTD handler callbacks
+    property OnNotationDecl: TNotationDeclEvent read FOnNotationDecl write FOnNotationDecl;
+    property OnUnparsedEntityDecl: TUnparsedEntityDeclEvent read FOnUnparsedEntityDecl write FOnUnparsedEntityDecl;
+    // Entity resolver callbacks
+    property OnResolveEntity: TResolveEntityEvent read FOnResolveEntity write FOnResolveEntity;
+    // Error handler callbacks
+    property OnError: TErrorEvent read FOnError write FOnError;
+    property OnFatalError: TFatalErrorEvent read FOnFatalError write FOnFatalError;
+    property OnWarning: TWarningEvent read FOnWarning write FOnWarning;
+  end;
+
+
+{ TSAXFilter: XML filter }
+
+  TSAXFilter = class(TSAXReader)
+  private
+    FParent: TSAXReader;
+  protected
+    procedure DoCharacters(const ch: PSAXChar; AStart, ALength: Integer); override;
+    procedure DoEndDocument; override;
+    procedure DoEndElement(const NamespaceURI, LocalName, QName: SAXString); override;
+    procedure DoEndPrefixMapping(const Prefix: SAXString); override;
+    procedure DoIgnorableWhitespace(const ch: PSAXChar; AStart, ALength: Integer); override;
+    procedure DoProcessingInstruction(const Target, Data: SAXString); override;
+    procedure DoSkippedEntity(const Name: SAXString); override;
+    procedure DoStartDocument; override;
+    procedure DoStartElement(const NamespaceURI, LocalName, QName: SAXString; Atts: TSAXAttributes); override;
+    procedure DoStartPrefixMapping(const Prefix, URI: SAXString); override;
+    procedure DoNotationDecl(const Name, PublicID, SystemID: SAXString); override;
+    procedure DoUnparsedEntityDecl(const Name, PublicID, SystemID, NotationName: SAXString); override;
+    function DoResolveEntity(const PublicID, SystemID: SAXString): TSAXInputSource; override;
+    procedure DoError(AException: ESAXParseException); override;
+    procedure DoFatalError(AException: ESAXParseException); override;
+    procedure DoWarning(AException: ESAXParseException); override;
+  public
+    property Parent: TSAXReader read FParent write FParent;
+  end;
+
+
+// ===================================================================
+// ===================================================================
+
+implementation
+
+
+constructor ESAXAttributeIndexError.Create(Index: Integer);
+begin
+  inherited CreateFmt(SSAXAttributeIndexError, [Index]);
+end;
+
+
+{ TSAXInputSource }
+
+constructor TSAXInputSource.Create;
+begin
+  inherited Create;
+end;
+
+constructor TSAXInputSource.Create(AStream: TStream);
+begin
+  inherited Create;
+  FStream := AStream;
+end;
+
+constructor TSAXInputSource.Create(const ASystemID: SAXString);
+begin
+  inherited Create;
+  FSystemID := ASystemID;
+end;
+
+
+{ TSAXAttributes }
+
+constructor TSAXAttributes.Create;
+begin
+  inherited Create;
+  {$IFNDEF UseDynArrays}
+  FData := TList.Create;
+  {$ENDIF}
+end;
+
+constructor TSAXAttributes.Create(Atts: TSAXAttributes);
+begin
+  inherited Create;
+  {$IFNDEF UseDynArrays}
+  FData := TList.Create;
+  {$ENDIF}
+  SetAttributes(Atts);
+end;
+
+{$IFNDEF UseDynArrays}
+destructor TSAXAttributes.Destroy;
+begin
+  Clear;
+  FData.Free;
+  inherited Destroy;
+end;
+{$ENDIF}
+
+function TSAXAttributes.GetIndex(const QName: SAXString): Integer;
+begin
+  Result := 0;
+  while Result < FLength do
+  begin
+    if Data[Result]^.QName = QName then
+      exit;
+    Inc(Result);
+  end;
+  Result := -1;
+end;
+
+function TSAXAttributes.GetIndex(const URI, LocalPart: SAXString): Integer;
+begin
+  Result := 0;
+  while Result < FLength do
+  begin
+    if (Data[Result]^.URI = URI) and (Data[Result]^.LocalName = LocalPart) then
+      exit;
+    Inc(Result);
+  end;
+  Result := -1;
+end;
+
+function TSAXAttributes.GetLength: Integer;
+begin
+  Result := FLength;
+end;
+
+function TSAXAttributes.GetLocalName(Index: Integer): SAXString;
+begin
+  if (Index >= 0) and (Index < FLength) then
+    Result := Data[Index]^.LocalName
+  else
+    SetLength(Result, 0);
+end;
+
+function TSAXAttributes.GetQName(Index: Integer): SAXString;
+begin
+  if (Index >= 0) and (Index < FLength) then
+    Result := Data[Index]^.QName
+  else
+    SetLength(Result, 0);
+end;
+
+function TSAXAttributes.GetType(Index: Integer): String;
+begin
+  if (Index >= 0) and (Index < FLength) then
+    Result := Data[Index]^.AttrType
+  else
+    SetLength(Result, 0);
+end;
+
+function TSAXAttributes.GetType(const QName: SAXString): String;
+var
+  i: Integer;
+begin
+  for i := 0 to FLength - 1 do
+    if Data[i]^.QName = QName then
+    begin
+      Result := Data[i]^.AttrType;
+      exit;
+    end;
+  SetLength(Result, 0);
+end;
+
+function TSAXAttributes.GetType(const URI, LocalName: SAXString): String;
+var
+  i: Integer;
+begin
+  for i := 0 to FLength - 1 do
+    if (Data[i]^.URI = URI) and (Data[i]^.LocalName = LocalName) then
+    begin
+      Result := Data[i]^.AttrType;
+      exit;
+    end;
+  SetLength(Result, 0);
+end;
+
+function TSAXAttributes.GetURI(Index: Integer): SAXString;
+begin
+  if (Index >= 0) and (Index < FLength) then
+    Result := Data[Index * 5]^.URI
+  else
+    SetLength(Result, 0);
+end;
+
+function TSAXAttributes.GetValue(Index: Integer): SAXString;
+begin
+  if (Index >= 0) and (Index < FLength) then
+    Result := Data[Index]^.Value
+  else
+    SetLength(Result, 0);
+end;
+
+function TSAXAttributes.GetValue(const QName: SAXString): SAXString;
+var
+  i: Integer;
+begin
+  for i := 0 to FLength - 1 do
+    if Data[i]^.QName = QName then
+    begin
+      Result := Data[i]^.Value;
+      exit;
+    end;
+  SetLength(Result, 0);
+end;
+
+function TSAXAttributes.GetValue(const URI, LocalName: SAXString): SAXString;
+var
+  i: Integer;
+begin
+  for i := 0 to FLength - 1 do
+    if (Data[i]^.URI = URI) and (Data[i]^.LocalName = LocalName) then
+    begin
+      Result := Data[i]^.Value;
+      exit;
+    end;
+  SetLength(Result, 0);
+end;
+
+procedure TSAXAttributes.Clear;
+{$IFDEF UseDynArrays}
+begin
+  SetLength(Data, 0);
+end;
+{$ELSE}
+var
+  i: Integer;
+  p: PSAXAttributeData;
+begin
+  for i := 0 to FData.Count - 1 do
+  begin
+    p := PSAXAttributeData(FData[i]);
+    Dispose(p);
+  end;
+end;
+{$ENDIF}
+
+procedure TSAXAttributes.SetAttributes(Atts: TSAXAttributes);
+var
+  i: Integer;
+begin
+  FLength := Atts.Length;
+  {$IFDEF UseDynArrays}
+  SetLength(Data, FLength);
+  {$ELSE}
+  FData.Count := FLength;
+  {$ENDIF}
+  for i := 0 to FLength - 1 do
+    {$IFDEF UseDynArrays}
+    with Data[i] do
+    {$ELSE}
+    with Data[i]^ do
+    {$ENDIF}
+    begin
+      URI := Atts.URIs[i];
+      LocalName := Atts.LocalNames[i];
+      QName := Atts.QNames[i];
+      AttrType := Atts.Types[i];
+      Value := Atts.Values[i];
+    end;
+end;
+
+procedure TSAXAttributes.AddAttribute(const AURI, ALocalName, AQName: SAXString;
+  const AType: String; const AValue: SAXString);
+{$IFNDEF UseDynArrays}
+var
+  p: PSAXAttributeData;
+{$ENDIF}
+begin
+  Inc(FLength);
+  {$IFDEF UseDynArrays}
+  SetLength(Data, FLength);
+  {$ELSE}
+  New(p);
+  FData.Add(p);
+  {$ENDIF}
+  {$IFDEF UseDynArrays}
+  with Data[FLength - 1] do
+  {$ELSE}
+  with Data[FLength - 1]^ do
+  {$ENDIF}
+  begin
+    URI := AURI;
+    LocalName := ALocalName;
+    QName := AQName;
+    AttrType := AType;
+    Value := AValue;
+  end;
+end;
+
+procedure TSAXAttributes.SetAttribute(Index: Integer;
+  const AURI, ALocalName, AQName: SAXString; const AType: String;
+  const AValue: SAXString);
+begin
+  if (Index >= 0) and (Index < FLength) then
+    {$IFDEF UseDynArrays}
+    with Data[Index] do
+    {$ELSE}
+    with Data[Index]^ do
+    {$ENDIF}
+    begin
+      URI := AURI;
+      LocalName := ALocalName;
+      QName := AQName;
+      AttrType := AType;
+      Value := AValue;
+    end
+  else
+    BadIndex(Index);
+end;
+
+procedure TSAXAttributes.RemoveAttribute(Index: Integer);
+{$IFDEF UseDynArrays}
+var
+  i: Integer;
+{$ENDIF}
+begin
+  if (Index >= 0) and (Index < FLength) then
+  begin
+    {$IFDEF UseDynArrays}
+    for i := Index to FLength - 1 do
+      Data[i] := Data[i + 1];
+    Dec(FLength);
+    SetLength(Data, FLength);
+    {$ELSE}
+    FData.Delete(Index);
+    Dec(FLength);
+    {$ENDIF}
+  end else
+    BadIndex(Index);
+end;
+
+procedure TSAXAttributes.SetURI(Index: Integer; const AURI: SAXString);
+begin
+  if (Index >= 0) and (Index < FLength) then
+    Data[Index]^.URI := AURI
+  else
+    BadIndex(Index);
+end;
+
+procedure TSAXAttributes.SetLocalName(Index: Integer;
+  const ALocalName: SAXString);
+begin
+  if (Index >= 0) and (Index < FLength) then
+    Data[Index]^.LocalName := ALocalName
+  else
+    BadIndex(Index);
+end;
+
+procedure TSAXAttributes.SetQName(Index: Integer; const AQName: SAXString);
+begin
+  if (Index >= 0) and (Index < FLength) then
+    Data[Index]^.QName := AQName
+  else
+    BadIndex(Index);
+end;
+
+procedure TSAXAttributes.SetType(Index: Integer; const AType: String);
+begin
+  if (Index >= 0) and (Index < FLength) then
+    Data[Index]^.AttrType := AType
+  else
+    BadIndex(Index);
+end;
+
+procedure TSAXAttributes.SetValue(Index: Integer; const AValue: SAXString);
+begin
+  if (Index >= 0) and (Index < FLength) then
+    Data[Index]^.Value := AValue
+  else
+    BadIndex(Index);
+end;
+
+{$IFNDEF UseDynArrays}
+function TSAXAttributes.GetData(Index: Integer): PSAXAttributeData;
+begin
+  Result := PSAXAttributeData(FData[Index]);
+end;
+{$ENDIF}
+
+procedure TSAXAttributes.BadIndex(Index: Integer);
+begin
+  raise ESAXAttributeIndexError.Create(Index) at get_caller_addr(get_frame);
+end;
+
+
+{ TSAXReader }
+
+procedure TSAXReader.Parse(const SystemID: SAXString);
+var
+  Input: TSAXInputSource;
+begin
+  Input := TSAXInputSource.Create(SystemID);
+  try
+    Input.Stream := TFileStream.Create(SystemID, fmOpenRead);
+    try
+      Parse(Input);
+    finally
+      Input.Stream.Free;
+    end;
+  finally
+    Input.Free;
+  end;
+end;
+
+procedure TSAXReader.ParseStream(AStream: TStream);
+var
+  Input: TSAXInputSource;
+begin
+  Input := TSAXInputSource.Create(AStream);
+  try
+    Parse(Input);
+  finally
+    Input.Free;
+  end;
+end;
+
+function TSAXReader.DoResolveEntity(const PublicID,
+  SystemID: SAXString): TSAXInputSource;
+begin
+  if Assigned(OnResolveEntity) then
+    Result := OnResolveEntity(Self, PublicID, SystemID)
+  else
+    Result := nil;
+end;
+
+procedure TSAXReader.DoNotationDecl(const Name, PublicID, SystemID: SAXString);
+begin
+  if Assigned(OnNotationDecl) then
+    OnNotationDecl(Self, Name, PublicID, SystemID);
+end;
+
+procedure TSAXReader.DoUnparsedEntityDecl(const Name, PublicID,
+  SystemID, NotationName: SAXString);
+begin
+  if Assigned(OnUnparsedEntityDecl) then
+    OnUnparsedEntityDecl(Self, Name, PublicID, SystemID, NotationName);
+end;
+
+procedure TSAXReader.DoCharacters(const ch: PSAXChar;
+  AStart, ALength: Integer);
+begin
+  if Assigned(OnCharacters) then
+    OnCharacters(Self, ch, AStart, ALength);
+end;
+
+procedure TSAXReader.DoComment(const ch: PSAXChar;
+  AStart, ALength: Integer);
+begin
+  if Assigned(OnComment) then
+    OnComment(Self, ch, AStart, ALength);
+end;
+
+procedure TSAXReader.DoEndDocument;
+begin
+  if Assigned(OnEndDocument) then
+    OnEndDocument(Self);
+end;
+
+procedure TSAXReader.DoEndElement(const NamespaceURI,
+  LocalName, QName: SAXString);
+begin
+  if Assigned(OnEndElement) then
+    OnEndElement(Self, NamespaceURI, LocalName, QName);
+end;
+
+procedure TSAXReader.DoEndPrefixMapping(const Prefix: SAXString);
+begin
+  if Assigned(OnEndPrefixMapping) then
+    OnEndPrefixMapping(Self, Prefix);
+end;
+
+procedure TSAXReader.DoIgnorableWhitespace(const ch: PSAXChar;
+  AStart, ALength: Integer);
+begin
+  if Assigned(OnIgnorableWhitespace) then
+    OnIgnorableWhitespace(Self, ch, AStart, ALength);
+end;
+
+procedure TSAXReader.DoProcessingInstruction(const Target,
+  Data: SAXString);
+begin
+  if Assigned(OnProcessingInstruction) then
+    OnProcessingInstruction(Self, Target, Data);
+end;
+
+procedure TSAXReader.DoSkippedEntity(const Name: SAXString);
+begin
+  if Assigned(OnSkippedEntity) then
+    OnSkippedEntity(Self, Name);
+end;
+
+procedure TSAXReader.DoStartDocument;
+begin
+  if Assigned(OnStartDocument) then
+    OnStartDocument(Self);
+end;
+
+procedure TSAXReader.DoStartElement(const NamespaceURI,
+  LocalName, QName: SAXString; Atts: TSAXAttributes);
+begin
+  if Assigned(OnStartElement) then
+    OnStartElement(Self, NamespaceURI, LocalName, QName, Atts);
+end;
+
+procedure TSAXReader.DoStartPrefixMapping(const Prefix, URI: SAXString);
+begin
+  if Assigned(OnStartPrefixMapping) then
+    OnStartPrefixMapping(Self, Prefix, URI);
+end;
+
+procedure TSAXReader.DoError(AException: ESAXParseException);
+begin
+  if Assigned(OnError) then
+    OnError(Self, AException);
+  AException.Free;
+end;
+
+procedure TSAXReader.DoFatalError(AException: ESAXParseException);
+begin
+  if Assigned(OnFatalError) then
+    OnFatalError(Self, AException)
+  else
+    raise AException;
+  AException.Free;
+end;
+
+procedure TSAXReader.DoWarning(AException: ESAXParseException);
+begin
+  if Assigned(OnWarning) then
+    OnWarning(Self, AException);
+  AException.Free;
+end;
+
+
+{ TSAXFilter }
+
+function TSAXFilter.DoResolveEntity(const PublicID,
+  SystemID: SAXString): TSAXInputSource;
+begin
+  if Assigned(OnResolveEntity) then
+    Result := OnResolveEntity(Self, PublicID, SystemID)
+  else if Assigned(Parent) then
+    Result := Parent.DoResolveEntity(PublicID, SystemID)
+  else
+    Result := nil;
+end;
+
+procedure TSAXFilter.DoNotationDecl(const Name, PublicID, SystemID: SAXString);
+begin
+  if Assigned(OnNotationDecl) then
+    OnNotationDecl(Self, Name, PublicID, SystemID)
+  else if Assigned(Parent) then
+    Parent.DoNotationDecl(Name, PublicID, SystemID);
+end;
+
+procedure TSAXFilter.DoUnparsedEntityDecl(const Name, PublicID,
+  SystemID, NotationName: SAXString);
+begin
+  if Assigned(OnUnparsedEntityDecl) then
+    OnUnparsedEntityDecl(Self, Name, PublicID, SystemID, NotationName)
+  else if Assigned(Parent) then
+    Parent.DoUnparsedEntityDecl(Name, PublicID, SystemID, NotationName);
+end;
+
+procedure TSAXFilter.DoCharacters(const ch: PSAXChar;
+  AStart, ALength: Integer);
+begin
+  if Assigned(OnCharacters) then
+    OnCharacters(Self, ch, AStart, ALength)
+  else if Assigned(Parent) then
+    Parent.DoCharacters(ch, AStart, ALength);
+end;
+
+procedure TSAXFilter.DoEndDocument;
+begin
+  if Assigned(OnEndDocument) then
+    OnEndDocument(Self)
+  else if Assigned(Parent) then
+    Parent.DoEndDocument;
+end;
+
+procedure TSAXFilter.DoEndElement(const NamespaceURI,
+  LocalName, QName: SAXString);
+begin
+  if Assigned(OnEndElement) then
+    OnEndElement(Self, NamespaceURI, LocalName, QName)
+  else if Assigned(Parent) then
+    Parent.DoEndElement(NamespaceURI, LocalName, QName);
+end;
+
+procedure TSAXFilter.DoEndPrefixMapping(const Prefix: SAXString);
+begin
+  if Assigned(OnEndPrefixMapping) then
+    OnEndPrefixMapping(Self, Prefix)
+  else if Assigned(Parent) then
+    Parent.DoEndPrefixMapping(Prefix);
+end;
+
+procedure TSAXFilter.DoIgnorableWhitespace(const ch: PSAXChar;
+  AStart, ALength: Integer);
+begin
+  if Assigned(OnIgnorableWhitespace) then
+    OnIgnorableWhitespace(Self, ch, AStart, ALength)
+  else if Assigned(Parent) then
+    Parent.DoIgnorableWhitespace(ch, AStart, ALength);
+end;
+
+procedure TSAXFilter.DoProcessingInstruction(const Target,
+  Data: SAXString);
+begin
+  if Assigned(OnProcessingInstruction) then
+    OnProcessingInstruction(Self, Target, Data)
+  else if Assigned(Parent) then
+    Parent.DoProcessingInstruction(Target, Data);
+end;
+
+procedure TSAXFilter.DoSkippedEntity(const Name: SAXString);
+begin
+  if Assigned(OnSkippedEntity) then
+    OnSkippedEntity(Self, Name)
+  else if Assigned(Parent) then
+    Parent.DoSkippedEntity(Name);
+end;
+
+procedure TSAXFilter.DoStartDocument;
+begin
+  if Assigned(OnStartDocument) then
+    OnStartDocument(Self)
+  else if Assigned(Parent) then
+    Parent.DoStartDocument;
+end;
+
+procedure TSAXFilter.DoStartElement(const NamespaceURI,
+  LocalName, QName: SAXString; Atts: TSAXAttributes);
+begin
+  if Assigned(OnStartElement) then
+    OnStartElement(Self, NamespaceURI, LocalName, QName, Atts)
+  else if Assigned(Parent) then
+    Parent.DoStartElement(NamespaceURI, LocalName, QName, Atts);
+end;
+
+procedure TSAXFilter.DoStartPrefixMapping(const Prefix, URI: SAXString);
+begin
+  if Assigned(OnStartPrefixMapping) then
+    OnStartPrefixMapping(Self, Prefix, URI)
+  else if Assigned(Parent) then
+    Parent.DoStartPrefixMapping(Prefix, URI);
+end;
+
+procedure TSAXFilter.DoError(AException: ESAXParseException);
+begin
+  if Assigned(OnError) then
+    OnError(Self, AException)
+  else if Assigned(Parent) then
+    Parent.DoError(AException);
+  AException.Free;
+end;
+
+procedure TSAXFilter.DoFatalError(AException: ESAXParseException);
+begin
+  if Assigned(OnFatalError) then
+    OnFatalError(Self, AException)
+  else if Assigned(Parent) then
+    Parent.DoFatalError(AException)
+  else
+    raise AException;
+  AException.Free;
+end;
+
+procedure TSAXFilter.DoWarning(AException: ESAXParseException);
+begin
+  if Assigned(OnWarning) then
+    OnWarning(Self, AException)
+  else if Assigned(Parent) then
+    Parent.DoWarning(AException);
+  AException.Free;
+end;
+
+
+end.
+
+
+{
+  $Log$
+  Revision 1.1  2002-12-11 21:06:07  sg
+  * Small cleanups
+  * Replaced htmldoc unit with dom_html unit
+  * Added SAX parser framework and SAX HTML parser
+
+}

+ 555 - 0
fcl/xml/sax_html.pp

@@ -0,0 +1,555 @@
+{
+    $Id$
+    This file is part of the Free Component Library
+
+    HTML parser with SAX-like interface
+    Copyright (c) 2000-2002 by
+      Areca Systems GmbH / Sebastian Guenther, [email protected]
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+
+{
+  Known problems:
+  * The whitespace handling does only work for processing the DOM tree.
+    Storing the DOM tree to a XML file will result in a quite ugly file.
+    (This probably has got much better with recent versions, which do
+    decent whitespace converting, but it's not tested really good.)
+  * Entity references in attribute values don't get parsed.
+}
+
+
+unit SAX_HTML;
+
+interface
+
+uses SysUtils, Classes, SAX, DOM;
+
+type
+
+{ THTMLReader: The HTML reader class }
+
+  THTMLScannerContext = (
+    scUnknown,
+    scWhitespace,	// within whitespace
+    scText,		// within text
+    scEntityReference,	// within entity reference ("&...;")
+    scTag);		// within a start tag or end tag
+
+  THTMLReader = class(TSAXReader)
+  private
+    FStarted: Boolean;
+    FEndOfStream: Boolean;
+    FScannerContext: THTMLScannerContext;
+    FTokenText: String;
+    FCurStringValueDelimiter: Char;
+    FAttrNameRead: Boolean;
+  protected
+    procedure EnterNewScannerContext(NewContext: THTMLScannerContext);
+  public
+    constructor Create;
+    destructor Destroy; override;
+
+    procedure Parse(AInput: TSAXInputSource); override; overload;
+
+    property EndOfStream: Boolean read FEndOfStream;
+    property ScannerContext: THTMLScannerContext read FScannerContext;
+    property TokenText: String read FTokenText;
+  end;
+
+
+{ THTMLToDOMConverter }
+
+  THTMLNodeType = (ntWhitespace, ntText, ntEntityReference, ntTag);
+
+  THTMLNodeInfo = class
+    NodeType: THTMLNodeType;
+    DOMNode: TDOMNode;
+  end;
+
+  THTMLToDOMConverter = class
+  private
+    FReader: THTMLReader;
+    FDocument: TDOMDocument;
+    FElementStack: TList;
+    FNodeBuffer: TList;
+
+    procedure ReaderCharacters(Sender: TObject; const ch: PSAXChar;
+      Start, Count: Integer);
+    procedure ReaderIgnorableWhitespace(Sender: TObject; const ch: PSAXChar;
+      Start, Count: Integer);
+    procedure ReaderSkippedEntity(Sender: TObject; const Name: SAXString);
+    procedure ReaderStartElement(Sender: TObject;
+      const NamespaceURI, LocalName, RawName: SAXString; Attr: TSAXAttributes);
+    procedure ReaderEndElement(Sender: TObject;
+      const NamespaceURI, LocalName, RawName: SAXString);
+
+  public
+    constructor Create(AReader: THTMLReader; ADocument: TDOMDocument);
+    destructor Destroy; override;
+  end;
+
+
+
+implementation
+
+uses HTMLDefs;
+
+const
+  WhitespaceChars = [#9, #10, #13, ' '];
+
+
+constructor THTMLReader.Create;
+begin
+  inherited Create;
+  FScannerContext := scUnknown;
+end;
+
+destructor THTMLReader.Destroy;
+begin
+  if FStarted then
+    DoEndDocument;
+  inherited Destroy;
+end;
+
+procedure THTMLReader.Parse(AInput: TSAXInputSource);
+const
+  MaxBufferSize = 1024;
+var
+  Buffer: array[0..MaxBufferSize - 1] of Char;
+  BufferSize, BufferPos: Integer;
+begin
+  if not FStarted then
+  begin
+    FStarted := True;
+    DoStartDocument;
+  end;
+
+  FEndOfStream := False;
+  while True do
+  begin
+    // Read data into the input buffer
+    BufferSize := AInput.Stream.Read(Buffer, MaxBufferSize);
+    if BufferSize = 0 then
+    begin
+      FEndOfStream := True;
+      break;
+    end;
+
+    BufferPos := 0;
+    while BufferPos < BufferSize do
+      case ScannerContext of
+        scUnknown:
+	  case Buffer[BufferPos] of
+	    #9, #10, #13, ' ':
+	      EnterNewScannerContext(scWhitespace);
+	    '&':
+	      begin
+	        Inc(BufferPos);
+	        EnterNewScannerContext(scEntityReference);
+	      end;
+	    '<':
+	      begin
+	        Inc(BufferPos);
+	        EnterNewScannerContext(scTag);
+	      end;
+	    else
+	      EnterNewScannerContext(scText);
+	  end;
+	scWhitespace:
+	  case Buffer[BufferPos] of
+	    #9, #10, #13, ' ':
+	      begin
+		FTokenText := FTokenText + Buffer[BufferPos];
+	        Inc(BufferPos);
+	      end;
+	    '&':
+	      begin
+	        Inc(BufferPos);
+	        EnterNewScannerContext(scEntityReference);
+	      end;
+	    '<':
+	      begin
+	        Inc(BufferPos);
+		EnterNewScannerContext(scTag);
+	      end;
+	    else
+	      EnterNewScannerContext(scText);
+	  end;
+        scText:
+	  case Buffer[BufferPos] of
+	    #9, #10, #13, ' ':
+	      EnterNewScannerContext(scWhitespace);
+	    '&':
+	      begin
+	        Inc(BufferPos);
+	        EnterNewScannerContext(scEntityReference);
+	      end;
+	    '<':
+	      begin
+	        Inc(BufferPos);
+		EnterNewScannerContext(scTag);
+	      end;
+	    else
+	    begin
+	      FTokenText := FTokenText + Buffer[BufferPos];
+	      Inc(BufferPos);
+	    end;
+	  end;
+	scEntityReference:
+	  if Buffer[BufferPos] = ';' then
+	  begin
+	    Inc(BufferPos);
+	    EnterNewScannerContext(scUnknown);
+	  end else if not (Buffer[BufferPos] in
+	    ['a'..'z', 'A'..'Z', '0'..'9', '#']) then
+	    EnterNewScannerContext(scUnknown)
+	  else
+	  begin
+	    FTokenText := FTokenText + Buffer[BufferPos];
+	    Inc(BufferPos);
+	  end;
+	scTag:
+	  case Buffer[BufferPos] of
+	    '''', '"':
+	      begin
+	        if FAttrNameRead then
+		begin
+	          if FCurStringValueDelimiter = #0 then
+		    FCurStringValueDelimiter := Buffer[BufferPos]
+		  else if FCurStringValueDelimiter = Buffer[BufferPos] then
+		  begin
+		    FCurStringValueDelimiter := #0;
+		    FAttrNameRead := False;
+		  end;
+		end;
+		FTokenText := FTokenText + Buffer[BufferPos];
+		Inc(BufferPos);
+	      end;
+	    '=':
+	      begin
+	        FAttrNameRead := True;
+		FTokenText := FTokenText + Buffer[BufferPos];
+		Inc(BufferPos);
+	      end;
+	    '>':
+	      begin
+	        Inc(BufferPos);
+		if FCurStringValueDelimiter = #0 then
+		  EnterNewScannerContext(scUnknown);
+	      end;
+	    else
+	    begin
+	      FTokenText := FTokenText + Buffer[BufferPos];
+	      Inc(BufferPos);
+	    end;
+	  end;
+      end;
+  end;
+end;
+
+procedure THTMLReader.EnterNewScannerContext(NewContext: THTMLScannerContext);
+
+  function SplitTagString(const s: String; var Attr: TSAXAttributes): String;
+  var
+    i, j: Integer;
+    AttrName: String;
+    ValueDelimiter: Char;
+    DoIncJ: Boolean;
+  begin
+    Attr := nil;
+    i := Pos(' ', s);
+    if i <= 0 then
+      Result := LowerCase(s)
+    else
+    begin
+      Result := LowerCase(Copy(s, 1, i - 1));
+      Attr := TSAXAttributes.Create;
+
+      Inc(i);
+
+      while (i <= Length(s)) and (s[i] in WhitespaceChars) do
+        Inc(i);
+
+      SetLength(AttrName, 0);
+      j := i;
+
+      while j <= Length(s) do
+        if s[j] = '=' then
+	begin
+	  AttrName := LowerCase(Copy(s, i, j - i));
+	  Inc(j);
+	  if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
+	  begin
+  	    ValueDelimiter := s[j];
+	    Inc(j);
+	  end else
+	    ValueDelimiter := #0;
+	  i := j;
+	  DoIncJ := False;
+	  while j <= Length(s) do
+	    if ValueDelimiter = #0 then
+	      if s[j] in WhitespaceChars then
+	        break
+	      else
+	        Inc(j)
+	    else if s[j] = ValueDelimiter then
+	    begin
+	      DoIncJ := True;
+	      break
+	    end else
+	      Inc(j);
+
+          Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
+
+	  if DoIncJ then
+	    Inc(j);
+
+          while (j <= Length(s)) and (s[j] in WhitespaceChars) do
+	    Inc(j);
+	  i := j;
+	end
+	else if s[j] in WhitespaceChars then
+	begin
+	  Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
+	  Inc(j);
+          while (j <= Length(s)) and (s[j] in WhitespaceChars) do
+	    Inc(j);
+	  i := j;
+        end else
+	  Inc(j);
+    end;
+  end;
+
+var
+  Attr: TSAXAttributes;
+  EntString, TagName: String;
+  Found: Boolean;
+  Ent: Char;
+  i: Integer;
+begin
+  case ScannerContext of
+    scWhitespace:
+      DoIgnorableWhitespace(PChar(TokenText), 1, Length(TokenText));
+    scText:
+      DoCharacters(PChar(TokenText), 0, Length(TokenText));
+    scEntityReference:
+      begin
+        if ResolveHTMLEntityReference(TokenText, Ent) then
+	begin
+	  EntString := Ent;
+	  DoCharacters(PChar(EntString), 0, 1);
+	end else
+	begin
+	  { Is this a predefined Unicode character entity? We must check this,
+	    as undefined entities must be handled as text, for compatiblity
+	    to popular browsers... }
+	  Found := False;
+	  for i := Low(UnicodeHTMLEntities) to High(UnicodeHTMLEntities) do
+	    if UnicodeHTMLEntities[i] = TokenText then
+	    begin
+	      Found := True;
+	      break;
+	    end;
+	  if Found then
+	    DoSkippedEntity(TokenText)
+	  else
+            DoCharacters(PChar('&' + TokenText), 0, Length(TokenText) + 1);
+	end;
+      end;
+    scTag:
+      if Length(TokenText) > 0 then
+      begin
+        Attr := nil;
+        if TokenText[1] = '/' then
+	begin
+	  DoEndElement('',
+	    SplitTagString(Copy(TokenText, 2, Length(TokenText)), Attr), '');
+	end else if TokenText[1] <> '!' then
+	begin
+	  // Do NOT combine to a single line, as Attr is an output value!
+	  TagName := SplitTagString(TokenText, Attr);
+	  DoStartElement('', TagName, '', Attr);
+	end;
+	if Assigned(Attr) then
+  	  Attr.Free;
+      end;
+  end;
+  FScannerContext := NewContext;
+  SetLength(FTokenText, 0);
+  FCurStringValueDelimiter := #0;
+  FAttrNameRead := False;
+end;
+
+
+{ THTMLToDOMConverter }
+
+constructor THTMLToDOMConverter.Create(AReader: THTMLReader; ADocument: TDOMDocument);
+begin
+  inherited Create;
+  FReader := AReader;
+  FReader.OnCharacters := @ReaderCharacters;
+  FReader.OnIgnorableWhitespace := @ReaderIgnorableWhitespace;
+  FReader.OnSkippedEntity := @ReaderSkippedEntity;
+  FReader.OnStartElement := @ReaderStartElement;
+  FReader.OnEndElement := @ReaderEndElement;
+  FDocument := ADocument;
+  FElementStack := TList.Create;
+  FNodeBuffer := TList.Create;
+end;
+
+destructor THTMLToDOMConverter.Destroy;
+var
+  i: Integer;
+begin
+  // Theoretically, always exactly one item will remain - the root element:
+  for i := 0 to FNodeBuffer.Count - 1 do
+    THTMLNodeInfo(FNodeBuffer[i]).Free;
+  FNodeBuffer.Free;
+
+  FElementStack.Free;
+  inherited Destroy;
+end;
+
+procedure THTMLToDOMConverter.ReaderCharacters(Sender: TObject;
+  const ch: PSAXChar; Start, Count: Integer);
+var
+  s: String;
+  NodeInfo: THTMLNodeInfo;
+begin
+  SetLength(s, Count);
+  Move(ch^, s[1], Count * SizeOf(SAXChar));
+
+  NodeInfo := THTMLNodeInfo.Create;
+  NodeInfo.NodeType := ntText;
+  NodeInfo.DOMNode := FDocument.CreateTextNode(s);
+  FNodeBuffer.Add(NodeInfo);
+end;
+
+procedure THTMLToDOMConverter.ReaderIgnorableWhitespace(Sender: TObject;
+  const ch: PSAXChar; Start, Count: Integer);
+var
+  s: String;
+  NodeInfo: THTMLNodeInfo;
+begin
+  SetLength(s, Count);
+  Move(ch^, s[1], Count * SizeOf(SAXChar));
+
+  NodeInfo := THTMLNodeInfo.Create;
+  NodeInfo.NodeType := ntWhitespace;
+  NodeInfo.DOMNode := FDocument.CreateTextNode(s);
+  FNodeBuffer.Add(NodeInfo);
+end;
+
+procedure THTMLToDOMConverter.ReaderSkippedEntity(Sender: TObject;
+  const Name: SAXString);
+var
+  NodeInfo: THTMLNodeInfo;
+begin
+  NodeInfo := THTMLNodeInfo.Create;
+  NodeInfo.NodeType := ntEntityReference;
+  NodeInfo.DOMNode := FDocument.CreateEntityReference(Name);
+  FNodeBuffer.Add(NodeInfo);
+end;
+
+procedure THTMLToDOMConverter.ReaderStartElement(Sender: TObject;
+  const NamespaceURI, LocalName, RawName: SAXString; Attr: TSAXAttributes);
+var
+  NodeInfo: THTMLNodeInfo;
+  Element: TDOMElement;
+  i: Integer;
+begin
+  Element := FDocument.CreateElement(LocalName);
+  if Assigned(Attr) then
+  begin
+    // WriteLn('Attribute: ', Attr.GetLength);
+    for i := 0 to Attr.GetLength - 1 do
+    begin
+      // WriteLn('#', i, ': LocalName = ', Attr.GetLocalName(i), ', Value = ', Attr.GetValue(i));
+      Element[Attr.GetLocalName(i)] := Attr.GetValue(i);
+    end;
+  end;
+
+  NodeInfo := THTMLNodeInfo.Create;
+  NodeInfo.NodeType := ntTag;
+  NodeInfo.DOMNode := Element;
+  if not Assigned(FDocument.DocumentElement) then
+    FDocument.AppendChild(NodeInfo.DOMNode);
+  FNodeBuffer.Add(NodeInfo);
+end;
+
+procedure THTMLToDOMConverter.ReaderEndElement(Sender: TObject;
+  const NamespaceURI, LocalName, RawName: SAXString);
+var
+  NodeInfo, NodeInfo2: THTMLNodeInfo;
+  i, j: Integer;
+  TagInfo: PHTMLElementProps;
+  IsFirst: Boolean;
+begin
+  // WriteLn('End: ', LocalName);
+  // Find the matching start tag
+  i := FNodeBuffer.Count - 1;
+  while i >= 0 do
+  begin
+    NodeInfo := THTMLNodeInfo(FNodeBuffer.Items[i]);
+    if (NodeInfo.NodeType = ntTag) and
+      (CompareText(NodeInfo.DOMNode.NodeName, LocalName) = 0) then
+    begin
+      // We found the matching start tag
+
+      TagInfo := nil;
+      for j := Low(HTMLElProps) to High(HTMLElProps) do
+        if CompareText(HTMLElProps[j].Name, LocalName) = 0 then
+	begin
+	  TagInfo := @HTMLElProps[j];
+	  break;
+	end;
+
+      Inc(i);
+      IsFirst := True;
+      while i < FNodeBuffer.Count do
+      begin
+        NodeInfo2 := THTMLNodeInfo(FNodeBuffer.Items[i]);
+
+	if (NodeInfo2.NodeType = ntWhitespace) and Assigned(TagInfo) and
+	  (not (efPreserveWhitespace in TagInfo^.Flags)) then
+	  // Handle whitespace, which doesn't need to get preserved...
+	  if not (efPCDATAContent in TagInfo^.Flags) then
+	    // No character data allowed within the current element
+	    NodeInfo2.DOMNode.Free
+	  else
+	  begin
+	    // Character data allowed, so normalize it
+	    NodeInfo2.DOMNode.NodeValue := ' ';
+            NodeInfo.DOMNode.AppendChild(NodeInfo2.DOMNode)
+	  end;
+
+	NodeInfo2.Free;
+	FNodeBuffer.Delete(i);
+	IsFirst := False;
+      end;
+      break;
+    end;
+    Dec(i);
+  end;
+end;
+
+
+end.
+
+
+{
+  $Log$
+  Revision 1.1  2002-12-11 21:06:07  sg
+  * Small cleanups
+  * Replaced htmldoc unit with dom_html unit
+  * Added SAX parser framework and SAX HTML parser
+
+}