| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521 |
- //
- // System.Xml.XPath.Tokenizer.cs / Mono.Xml.Xsl/PatternTokenizer.cs
- //
- // Author:
- // Piers Haken ([email protected])
- // Atsushi Enomoto ([email protected])
- //
- // (C) 2002 Piers Haken
- // (C) 2005 Novell Inc,
- //
- // IMPORTANT:
- //
- // Do not edit PatternTokenizer.cs. It is autogenerated.
- //
- //
- // Permission is hereby granted, free of charge, to any person obtaining
- // a copy of this software and associated documentation files (the
- // "Software"), to deal in the Software without restriction, including
- // without limitation the rights to use, copy, modify, merge, publish,
- // distribute, sublicense, and/or sell copies of the Software, and to
- // permit persons to whom the Software is furnished to do so, subject to
- // the following conditions:
- //
- // The above copyright notice and this permission notice shall be
- // included in all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- //
- using System;
- using System.Globalization;
- using System.IO;
- using System.Text;
- using System.Collections;
- using System.Xml;
- using System.Xml.XPath;
- using Mono.Xml.XPath;
- #if XSLT_PATTERN
- namespace Mono.Xml.Xsl
- #else
- namespace Mono.Xml.XPath
- #endif
- {
- internal class Tokenizer : yyParser.yyInput
- {
- private string m_rgchInput;
- private int m_ich;
- private int m_cch;
- private int m_iToken;
- private int m_iTokenPrev = Token.EOF;
- private Object m_objToken;
- private bool m_fPrevWasOperator = false;
- private bool m_fThisIsOperator = false;
- private static readonly Hashtable s_mapTokens = new Hashtable ();
- private static readonly Object [] s_rgTokenMap =
- {
- Token.AND, "and",
- Token.OR, "or",
- Token.DIV, "div",
- Token.MOD, "mod",
- Token.ANCESTOR, "ancestor",
- Token.ANCESTOR_OR_SELF, "ancestor-or-self",
- Token.ATTRIBUTE, "attribute",
- Token.CHILD, "child",
- Token.DESCENDANT, "descendant",
- Token.DESCENDANT_OR_SELF, "descendant-or-self",
- Token.FOLLOWING, "following",
- Token.FOLLOWING_SIBLING, "following-sibling",
- Token.NAMESPACE, "namespace",
- Token.PARENT, "parent",
- Token.PRECEDING, "preceding",
- Token.PRECEDING_SIBLING, "preceding-sibling",
- Token.SELF, "self",
- Token.COMMENT, "comment",
- Token.TEXT, "text",
- Token.PROCESSING_INSTRUCTION, "processing-instruction",
- Token.NODE, "node",
- };
- private const char EOL = '\0';
- static Tokenizer ()
- {
- for (int i = 0; i < s_rgTokenMap.Length; i += 2)
- s_mapTokens.Add (s_rgTokenMap [i + 1], s_rgTokenMap [i]);
- }
- public Tokenizer (string strInput)
- {
- //Console.WriteLine ("Tokenizing: " + strInput);
- m_rgchInput = strInput;
- m_ich = 0;
- m_cch = strInput.Length;
- SkipWhitespace ();
- }
- private char Peek (int iOffset)
- {
- if (m_ich + iOffset>= m_cch)
- return EOL;
- return m_rgchInput [m_ich + iOffset];
- }
- private char Peek ()
- {
- return Peek (0);
- }
- private char GetChar ()
- {
- if (m_ich >= m_cch)
- return EOL;
- return m_rgchInput [m_ich++];
- }
- private char PutBack ()
- {
- if (m_ich == 0)
- throw new XPathException ("XPath parser returned an error status: invalid tokenizer state.");
- return m_rgchInput [--m_ich];
- }
- private bool SkipWhitespace () // returns trus if any whitespace was skipped
- {
- if (!IsWhitespace (Peek ()))
- return false;
-
- while (IsWhitespace (Peek ()))
- GetChar ();
- return true;
- }
- private int ParseNumber ()
- {
- StringBuilder sb = new StringBuilder ();
- while (IsDigit (Peek ()))
- sb.Append ((char) GetChar ());
- // don't handle '3.' as an error case (it is not. XPath 3.7 syntax [30])
- if (Peek () == '.')
- {
- sb.Append ((char) GetChar ());
- while (IsDigit (Peek ()))
- sb.Append ((char) GetChar ());
- }
- m_objToken = Double.Parse (sb.ToString (), NumberFormatInfo.InvariantInfo);
- return Token.NUMBER;
- }
- private int ParseLiteral ()
- {
- StringBuilder sb = new StringBuilder ();
- char chInit = GetChar ();
- char ch;
- while ((ch = Peek ()) != chInit)
- {
- if (ch == EOL)
- throw new XPathException ("unmatched "+chInit+" in expression");
- sb.Append ((char) GetChar ());
- }
- GetChar ();
- m_objToken = sb.ToString ();
- return Token.LITERAL;
- }
- private string ReadIdentifier ()
- {
- StringBuilder sb = new StringBuilder ();
- char ch = Peek ();
- if (!Char.IsLetter (ch) && ch != '_')
- return null;
- sb.Append ((char) GetChar ());
- while ((ch = Peek ()) == '_' || ch == '-' || ch == '.' || Char.IsLetterOrDigit (ch))
- sb.Append ((char) GetChar ());
- SkipWhitespace ();
- return sb.ToString ();
- }
- private int ParseIdentifier ()
- {
- string strToken = ReadIdentifier ();
- Object objToken = s_mapTokens [strToken];
- int iToken = (objToken != null) ? (int) objToken : Token.QName;
- m_objToken = strToken;
- char ch = Peek ();
- if (ch == ':')
- {
- if (Peek (1) == ':')
- {
- // If the two characters following an NCName (possibly
- // after intervening ExprWhitespace) are ::, then the
- // token must be recognized as an AxisName.
- if (objToken == null || !IsAxisName (iToken))
- throw new XPathException ("invalid axis name: '"+strToken+"'");
- return iToken;
- }
- GetChar ();
- SkipWhitespace ();
- ch = Peek ();
- if (ch == '*')
- {
- GetChar ();
- m_objToken = new XmlQualifiedName ("", strToken);
- return Token.QName;
- }
- string strToken2 = ReadIdentifier ();
- if (strToken2 == null)
- throw new XPathException ("invalid QName: "+strToken+":"+(char)ch);
- ch = Peek ();
- m_objToken = new XmlQualifiedName (strToken2, strToken);
- if (ch == '(')
- return Token.FUNCTION_NAME;
- return Token.QName;
- }
- // If there is a preceding token and the preceding
- // token is not one of @, ::, (, [, , or an Operator,
- // then a * must be recognized as a MultiplyOperator
- // and an NCName must be recognized as an OperatorName.
- if (!IsFirstToken && !m_fPrevWasOperator)
- {
- if (objToken == null || !IsOperatorName (iToken))
- throw new XPathException ("invalid operator name: '"+strToken+"'");
- return iToken;
- }
- if (ch == '(')
- {
- // If the character following an NCName (possibly
- // after intervening ExprWhitespace) is (, then the
- // token must be recognized as a NodeType or a FunctionName.
- if (objToken == null)
- {
- m_objToken = new XmlQualifiedName (strToken, "");
- return Token.FUNCTION_NAME;
- }
- if (IsNodeType (iToken))
- return iToken;
- throw new XPathException ("invalid function name: '"+strToken+"'");
- }
- m_objToken = new XmlQualifiedName (strToken, "");
- return Token.QName;
- }
- private static bool IsWhitespace (char ch)
- {
- // return Char.IsWhiteSpace (ch);
- return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
- }
- private static bool IsDigit (char ch)
- {
- // return Char.IsDigit (ch);
- return ch >= '0' && ch <= '9';
- }
- int ParseToken ()
- {
- char ch = Peek ();
- switch (ch)
- {
- case EOL:
- return Token.EOF;
- case '/':
- m_fThisIsOperator = true;
- GetChar ();
- if (Peek () == '/')
- {
- GetChar ();
- return Token.SLASH2;
- }
- return Token.SLASH;
- case '.':
- GetChar ();
- if (Peek () == '.')
- {
- GetChar ();
- return Token.DOT2;
- }
- else if (IsDigit (Peek ()))
- {
- PutBack ();
- return ParseNumber ();
- }
- return Token.DOT;
- case ':':
- GetChar ();
- if (Peek () == ':')
- {
- m_fThisIsOperator = true;
- GetChar ();
- return Token.COLON2;
- }
- return Token.ERROR;
- case ',':
- m_fThisIsOperator = true;
- GetChar ();
- return Token.COMMA;
- case '@':
- m_fThisIsOperator = true;
- GetChar ();
- return Token.AT;
- case '[':
- m_fThisIsOperator = true;
- GetChar ();
- return Token.BRACKET_OPEN;
- case ']':
- GetChar ();
- return Token.BRACKET_CLOSE;
- case '(':
- m_fThisIsOperator = true;
- GetChar ();
- return Token.PAREN_OPEN;
- case ')':
- GetChar ();
- return Token.PAREN_CLOSE;
- case '+':
- m_fThisIsOperator = true;
- GetChar ();
- return Token.PLUS;
- case '-':
- m_fThisIsOperator = true;
- GetChar ();
- return Token.MINUS;
- case '*':
- GetChar ();
- if (!IsFirstToken && !m_fPrevWasOperator)
- {
- m_fThisIsOperator = true;
- return Token.MULTIPLY;
- }
- return Token.ASTERISK;
- case '$':
- GetChar ();
- m_fThisIsOperator = true;
- return Token.DOLLAR;
- case '|':
- m_fThisIsOperator = true;
- GetChar ();
- return Token.BAR;
- case '=':
- m_fThisIsOperator = true;
- GetChar ();
- return Token.EQ;
- case '!':
- GetChar ();
- if (Peek () == '=')
- {
- m_fThisIsOperator = true;
- GetChar ();
- return Token.NE;
- }
- break;
- case '>':
- m_fThisIsOperator = true;
- GetChar ();
- if (Peek () == '=')
- {
- GetChar ();
- return Token.GE;
- }
- return Token.GT;
- case '<':
- m_fThisIsOperator = true;
- GetChar ();
- if (Peek () == '=')
- {
- GetChar ();
- return Token.LE;
- }
- return Token.LT;
- case '\'':
- return ParseLiteral ();
- case '\"':
- return ParseLiteral ();
- default:
- if (IsDigit (ch))
- {
- return ParseNumber ();
- }
- else if (Char.IsLetter (ch) || ch == '_') // NCName
- {
- int iToken = ParseIdentifier ();
- if (IsOperatorName (iToken))
- m_fThisIsOperator = true;
- return iToken;
- }
- break;
- }
- throw new XPathException ("invalid token: '"+ch+"'");
- }
- ///////////////////////////
- // yyParser.yyInput methods
- ///////////////////////////
- /** move on to next token.
- @return false if positioned beyond tokens.
- @throws IOException on input error.
- */
- public bool advance ()
- {
- m_fThisIsOperator = false;
- m_objToken = null;
- m_iToken = ParseToken ();
- SkipWhitespace ();
- m_iTokenPrev = m_iToken;
- m_fPrevWasOperator = m_fThisIsOperator;
- return (m_iToken != Token.EOF);
- }
- /** classifies current token.
- Should not be called if advance() returned false.
- @return current %token or single character.
- */
- public int token ()
- {
- return m_iToken;
- }
- /** associated with current token.
- Should not be called if advance() returned false.
- @return value for token().
- */
- public Object value ()
- {
- return m_objToken;
- }
- private bool IsFirstToken { get { return m_iTokenPrev == Token.EOF; } }
- private bool IsNodeType (int iToken)
- {
- switch (iToken)
- {
- case Token.COMMENT:
- case Token.TEXT:
- case Token.PROCESSING_INSTRUCTION:
- case Token.NODE:
- return true;
- default:
- return false;
- }
- }
- private bool IsOperatorName (int iToken)
- {
- switch (iToken)
- {
- case Token.AND:
- case Token.OR:
- case Token.MOD:
- case Token.DIV:
- return true;
- default:
- return false;
- }
- }
- private bool IsAxisName (int iToken)
- {
- switch (iToken)
- {
- case Token.ATTRIBUTE:
- case Token.ANCESTOR:
- case Token.ANCESTOR_OR_SELF:
- case Token.CHILD:
- case Token.DESCENDANT:
- case Token.DESCENDANT_OR_SELF:
- case Token.FOLLOWING:
- case Token.FOLLOWING_SIBLING:
- case Token.NAMESPACE:
- case Token.PARENT:
- case Token.PRECEDING:
- case Token.PRECEDING_SIBLING:
- case Token.SELF:
- return true;
- default:
- return false;
- }
- }
- }
- }
|