| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516 |
- // -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
- //
- // internal System.Xml.XmlParser
- //
- // Author:
- // Daniel Weber ([email protected])
- //
- // (C) 2001 Daniel Weber
- //
- using System;
- using System.Collections;
- namespace System.Xml
- {
- internal class XmlParser
- {
- // Private data members
- XmlDocument Fdoc;
- XmlInputSource Fsrc;
- // Elements push themselves on on start, pop on complete
- Stack RefNodes;
- // private classes
- private enum DomPieceType
- {
- xmlProcessingInstruction,
- xmlXmlDeclaration,
- xmlTextDeclaration,
- xmlComment,xmlCDATA,
- xmlPCDATA,
- xmlDoctype,
- xmlStartTag,
- xmlEndTag,
- xmlEmptyElementTag,
- xmlCharRef,
- xmlEntityRef,
- xmlParameterEntityRef,
- xmlEntityDecl,
- xmlElementDecl,
- xmlAttributeDecl,
- xmlNotationDecl,
- xmlCondSection,
- xmlUnknown,
- }
- // constants
- private const char CR = (char) 0x0D;
- private const char LF = (char) 0x0A;
- private const char QM = '?';
- private const char AMP = '&';
- private const char GTCODE = '<';
- private const char LTCODE = '>';
- private const char SINGLEQUOTE = (char) 0x39; // '
- private const char DOUBLEQUOTE = (char) 0x34; // "
- private const char NUMBERSIGN = (char) 0x35; // #
- private const char SLASH = (char) 0x47; // /
- private const string PISTART = "<?";
- private const string PIEND = "?>";
- private const string XMLDECLSTART = "<?xml";
- private const string COMMENTSTART = "<!--";
- private const string CDATASTART = "<![CDATA[";
- private const string DOCTYPESTART = "<!DOCTYPE";
- // private properties
- private XmlNode refNode
- {
- get
- {
- Object e = RefNodes.Peek();
- if ( e == null )
- return null;
- else
- return e as XmlNode;
- }
- }
- // public properties
- // public methods
- public bool parse()
- {
- bool retval = true;
- XmlException parseError = null;
- bool singleQuoteOpen = false;
- bool doubleQuoteOpen = false;
- bool bracketOpen = false;
- string content = "";
- DomPieceType pieceType = DomPieceType.xmlUnknown;
- string subEndMarker = "";
- string subStartMarker = "";
- try
- {
- while ( !Fsrc.atEOF() )
- {
- if ( parseError != null) break;
- char c = Fsrc.getNextChar();
- if ( !XmlNames_1_0.IsXmlChar(c) )
- {
- content += c;
- throw new XmlException("'Invalid character error.'", Fsrc);
- }
-
- switch (pieceType)
- {
- case DomPieceType.xmlUnknown:
- if ( c == GTCODE)
- pieceType = DomPieceType.xmlStartTag;
- else if ( c == AMP)
- pieceType = DomPieceType.xmlEntityRef;
- else
- pieceType = DomPieceType.xmlPCDATA;
- content += c;
- Fsrc.pieceStart();
- break;
- case DomPieceType.xmlPCDATA:
- if ( c == GTCODE )
- {
- parseError = writePCDATA(content);
- content = "";
- pieceType = DomPieceType.xmlStartTag;
- Fsrc.pieceStart();
- }
- else if ( c == AMP)
- {
- parseError = writePCDATA(content);
- content = "";
- pieceType = DomPieceType.xmlEntityRef;
- Fsrc.pieceStart();
- }
- content += c;
- break;
- case DomPieceType.xmlEntityRef:
- content += c;
- if ( c == ';' )
- {
- if ( content[2] == NUMBERSIGN )
- parseError = writeCharRef(content);
- else
- parseError = writeEntityRef(content);
- content = "";
- pieceType = DomPieceType.xmlUnknown;
- }
- break;
- case DomPieceType.xmlStartTag:
- content += c;
- switch( content.Length)
- {
- case 2:
- if (content.StartsWith(PISTART))
- pieceType = DomPieceType.xmlProcessingInstruction;
- break;
- case 4:
- if (content.StartsWith(COMMENTSTART))
- pieceType = DomPieceType.xmlComment;
- break;
- case 9:
- if (content.StartsWith(CDATASTART))
- pieceType = DomPieceType.xmlCDATA;
- else if (content.StartsWith(DOCTYPESTART))
- {
- pieceType = DomPieceType.xmlDoctype;
- subEndMarker = "";
- subStartMarker = "";
- bracketOpen = false;
- }
- break;
- }
- // Count quotation marks:
- if ((c == SINGLEQUOTE) && (! doubleQuoteOpen))
- singleQuoteOpen = ! singleQuoteOpen;
- else if ((c == DOUBLEQUOTE) && (! singleQuoteOpen))
- doubleQuoteOpen = ! doubleQuoteOpen;
- else if (c == LTCODE)
- {
- if ((! doubleQuoteOpen) && (! singleQuoteOpen))
- {
- if (content[2] == SLASH)
- {
- int l = content.Length;
- int offset = 3;
- // eliminate white-space after tag name:
- while ((l-offset > 0) && XmlNames_1_0.IsXmlWhiteSpace(content[l-offset+2]))
- offset++;
- parseError = writeEndElement(content.Substring(3, l-offset));
- }
- else
- {
- if (content[content.Length-1] == SLASH)
- parseError = writeEmptyElement(content.Substring(2, content.Length-3));
- else
- parseError = writeStartElement(content.Substring(2, content.Length-2));
- }
- content = "";
- pieceType = DomPieceType.xmlUnknown;
- }
- }
- break;
- //<?PINAME ?>
- case DomPieceType.xmlProcessingInstruction:
- content += c;
- if ( c == LTCODE )
- if (content[content.Length-1] == QM)
- {
- if ( (content.Length > 5) &&
- (XmlNames_1_0.IsXmlWhiteSpace(content[6])) &&
- (content.StartsWith(XMLDECLSTART)) )
- parseError = writeXmlDeclaration(content.Substring(3, content.Length-4));
- else
- parseError = writeProcessingInstruction(content.Substring(3, content.Length-4));
- content = "";
- pieceType = DomPieceType.xmlUnknown;
- }
- break;
-
- case DomPieceType.xmlComment:
- content += c;
- if (c == LTCODE)
- if ( (content.EndsWith("-->") ) && (content.Length > 6) )
- {
- parseError = writeComment(content.Substring(5, content.Length-7));
- content = "";
- pieceType = DomPieceType.xmlUnknown;
- }
- break;
- case DomPieceType.xmlCDATA:
- content += c;
- if (c == LTCODE )
- {
- if (content[content.Length-1] == ']' )
- if (content[content.Length-2] == ']')
- {
- parseError = writeCDATA(content.Substring(10, content.Length-12));
- content = "";
- pieceType = DomPieceType.xmlUnknown;
- }
- }
- break;
- case DomPieceType.xmlDoctype:
- content += c;
- if (subEndMarker == "")
- {
- if ( (c == SINGLEQUOTE) && (! doubleQuoteOpen))
- {
- singleQuoteOpen = !singleQuoteOpen;
- }
- else if ( (c == DOUBLEQUOTE) && (! singleQuoteOpen))
- {
- doubleQuoteOpen = ! doubleQuoteOpen;
- }
- if (bracketOpen)
- {
- if (! (singleQuoteOpen | doubleQuoteOpen) )
- {
- if (c == GTCODE)
- {
- subStartMarker = "<";
- }
- else if ( (c == '!') && (subStartMarker == "<"))
- {
- subStartMarker = "<";
- }
- else if ( (c == QM) && (subStartMarker == "<") )
- {
- subStartMarker = "";
- subEndMarker = PIEND;
- }
- else if ((c == '-') && (subStartMarker == "<!"))
- {
- subStartMarker = "<!-";
- }
- else if ((c == '-') && (subStartMarker == "<!-"))
- {
- subStartMarker = "";
- subEndMarker = "-->";
- }
- else if (subStartMarker != "")
- {
- subStartMarker = "";
- }
-
- if ((c == ']') && (! singleQuoteOpen) && (! doubleQuoteOpen))
- bracketOpen = false;
- }
- }
- else // if BracketOpened ...
- {
- if ((c == '[') && (! singleQuoteOpen) && (! doubleQuoteOpen))
- bracketOpen = true;
- }
- } //if BracketOpened ... else ...
- else // if (SubEndMarker = '') ...
- {
- if (content.EndsWith(subEndMarker))
- subEndMarker = "";
- } //if (SubEndMarker = '') ... else ...
- if ((! doubleQuoteOpen) && (! singleQuoteOpen) && (! bracketOpen) &&
- (subEndMarker == "") && (c == '>'))
- {
- parseError = writeDoctype(content);
- content = "";
- pieceType = DomPieceType.xmlUnknown;
- }
- break;
- } // switch
- } // while more characters
- if (parseError == null)
- if (content.Length > 0)
- parseError = writePCDATA(content);
- } // try
- catch
- {
- // we need to raise the exception again, converted to an XmlException
- /*
- except
- on EConvertError do raise EParserInvalidCharacter_Err.create('Invalid character error.');
- on EReadError do raise EParserInvalidCharacter_Err.create('Invalid character error.');
- end; {try ...}
- except
- on E: EParserInvalidCharacter_Err do
- parserError:= parserErrorFactory(sender,inputSource.Locator,
- EParserInvalidCharacter_Err.create('Invalid character error.'),
- '');
-
- */
- }
- if (parseError != null)
- {
- // Deal with the error, somehow
- retval = false;
- }
- return retval;
- }
- // private methods
- private XmlException writePCDATA(string content)
- {
- return null;
- }
- private XmlException writeEntityRef(string content)
- {
- string entityName = content.Substring(2, content.Length - 2);
- return null;
- }
- private XmlException writeCharRef( string content)
- {
- return null;
- }
- private XmlException writeEndElement( string content)
- {
- return null;
- }
- private XmlException writeEmptyElement( string content)
- {
- return null;
- }
- private XmlException writeStartElement( string content)
- {
- return null;
- }
- private XmlException writeComment( string content)
- {
- /*
- procedure TdomStandardIntSubsetBuilder.comment(const sender: TdomCustomParser;
- const locator: TdomStandardLocator;
- const data: wideString);
- var
- newComment: TdomCMComment;
- begin
- if not assigned(FRefNode) then exit;
- try
- newComment:= FRefNode.OwnerCMObject.CreateCMComment(data);
- try
- FRefNode.appendChild(newComment);
- except
- if assigned(newComment.ParentNode)
- then newComment.ParentNode.RemoveChild(newComment);
- FRefNode.OwnerCMObject.FreeAllCMNodes(TdomCMNode(newComment));
- raise;
- end; {try ...}
- except
- raise EParserInvalidCharacter_Err.create('Invalid character error.');
- end; {try ...}
- end;
- function TXmlDocBuilder.comment(const sender: TXmlCustomProcessorAgent;
- const locator: TdomStandardLocator;
- data: wideString): TXmlParserError;
- var
- newComment: TdomComment;
- begin
- if assigned(FOnComment) then FOnComment(sender,locator,data);
- result:= nil;
- if assigned(FRefNode) then begin
- try
- newComment:= FRefNode.OwnerDocument.CreateComment(data);
- try
- FRefNode.appendChild(newComment);
- except
- if assigned(newComment.ParentNode)
- then newComment.ParentNode.RemoveChild(newComment);
- FRefNode.OwnerDocument.FreeAllNodes(TdomNode(newComment));
- raise;
- end; {try ...}
- except
- result:= parserErrorFactory(sender,locator,
- EParserInvalidComment_Err.create('Invalid comment error.'),
- data);
- end; {try ...}
- end; {if assigned(FRefNode) ...}
- if not assigned(result)
- then if assigned(nextHandler)
- then result:= nextHandler.comment(sender,locator,data);
- end;
- function TXmlWFTestContentHandler.comment(const sender: TXmlCustomProcessorAgent;
- const locator: TdomStandardLocator;
- data: wideString): TXmlParserError;
- var
- dataLength: integer;
- begin
- if assigned(FOnComment) then FOnComment(sender,locator,data);
- if not FIsActive
- then raise EParserException.Create('TXmlWFTestContentHandler not active.');
- result:= nil;
- FXMLDeclarationAllowed:= false;
- if pos('--',data) > 0
- then result:= parserErrorFactory(sender,locator,
- EParserInvalidComment_Err.create('Invalid comment error.'),
- '--');
- dataLength:= length(data);
- if dataLength > 0
- then if WideChar(data[dataLength]) = '-'
- then if not assigned(result)
- then result:= parserErrorFactory(sender,locator,
- EParserInvalidComment_Err.create('Invalid comment error.'),
- '-');
- if not IsXmlChars(data)
- then if not assigned(result)
- then result:= parserErrorFactory(sender,locator,
- EParserInvalidCharacter_Err.create('Invalid character error.'),
- data);
- if not assigned(result)
- then if assigned(nextHandler)
- then result:= nextHandler.comment(sender,locator,data);
- end;
- */
- return null;
- }
- private XmlException writeXmlDeclaration ( string content)
- {
- return null;
- }
- private XmlException writeProcessingInstruction( string content)
- {
- return null;
- }
- private XmlException writeCDATA( string content)
- {
- return null;
- }
- private XmlException writeDoctype( string content)
- {
- return null;
- }
- private void mainLoop()
- {
- }
- // Constructors
- public XmlParser ( XmlInputSource src, XmlDocument doc )
- {
- Fsrc = src;
- Fdoc = doc;
- RefNodes = new Stack();
- }
- }
- }
|