| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338 |
- //
- // System.Web.Compilation.AspTokenizer
- //
- // Authors:
- // Gonzalo Paniagua Javier ([email protected])
- //
- // (C) 2002,2003 Ximian, Inc (http://www.ximian.com)
- //
- //
- // Permission is hereby granted, free of charge, to any person obtaining
- // a copy of this software and associated documentation files (the
- // "Software"), to deal in the Software without restriction, including
- // without limitation the rights to use, copy, modify, merge, publish,
- // distribute, sublicense, and/or sell copies of the Software, and to
- // permit persons to whom the Software is furnished to do so, subject to
- // the following conditions:
- //
- // The above copyright notice and this permission notice shall be
- // included in all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- //
- using System;
- using System.Collections;
- using System.IO;
- using System.Text;
- namespace System.Web.Compilation
- {
- class Token
- {
- public const int EOF = 0x0200000;
- public const int IDENTIFIER = 0x0200001;
- public const int DIRECTIVE = 0x0200002;
- public const int ATTVALUE = 0x0200003;
- public const int TEXT = 0x0200004;
- public const int DOUBLEDASH = 0x0200005;
- public const int CLOSING = 0x0200006;
- }
- class AspTokenizer
- {
- TextReader sr;
- int current_token;
- StringBuilder sb, odds;
- int col, line;
- int begcol, begline;
- int position;
- bool inTag;
- bool expectAttrValue;
- bool alternatingQuotes;
- bool hasPutBack;
- bool verbatim;
- bool have_value;
- bool have_unget;
- int unget_value;
- string val;
-
- public AspTokenizer (TextReader reader)
- {
- this.sr = reader;
- sb = new StringBuilder ();
- odds= new StringBuilder();
- col = line = 1;
- hasPutBack = inTag = false;
- }
- public bool Verbatim
- {
- get { return verbatim; }
- set { verbatim = value; }
- }
- public void put_back ()
- {
- if (hasPutBack)
- throw new HttpException ("put_back called twice!");
-
- hasPutBack = true;
- position -= Value.Length;
- }
-
- public int get_token ()
- {
- if (hasPutBack){
- hasPutBack = false;
- position += Value.Length;
- return current_token;
- }
- begline = line;
- begcol = col;
- have_value = false;
- current_token = NextToken ();
- return current_token;
- }
- bool is_identifier_start_character (char c)
- {
- return (Char.IsLetter (c) || c == '_' );
- }
- bool is_identifier_part_character (char c)
- {
- return (Char.IsLetterOrDigit (c) || c == '_' || c == '-');
- }
- void ungetc (int value)
- {
- have_unget = true;
- unget_value = value;
- // Only '/' passes through here now.
- // If we ever let \n here, update 'line'
- position--;
- col--;
- }
-
- int read_char ()
- {
- int c;
- if (have_unget) {
- c = unget_value;
- have_unget = false;
- } else {
- c = sr.Read ();
- }
- if (c == '\r' && sr.Peek () == '\n') {
- c = sr.Read ();
- position++;
- }
- if (c == '\n'){
- col = -1;
- line++;
- }
- if (c != -1) {
- col++;
- position++;
- }
- return c;
- }
- int ReadAttValue (int start)
- {
- int quoteChar = 0;
- bool quoted = false;
- if (start == '"' || start == '\'') {
- quoteChar = start;
- quoted = true;
- } else {
- sb.Append ((char) start);
- }
- int c;
- int last = 0;
- bool inServerTag = false;
- alternatingQuotes = true;
-
- while ((c = sr.Peek ()) != -1) {
- if (c == '%' && last == '<') {
- inServerTag = true;
- } else if (inServerTag && c == '>' && last == '%') {
- inServerTag = false;
- } else if (!inServerTag) {
- if (!quoted && c == '/') {
- read_char ();
- c = sr.Peek ();
- if (c == -1) {
- c = '/';
- } else if (c == '>') {
- ungetc ('/');
- break;
- }
- } else if (!quoted && (c == '>' || Char.IsWhiteSpace ((char) c))) {
- break;
- } else if (quoted && c == quoteChar && last != '\\') {
- read_char ();
- break;
- }
- } else if (quoted && c == quoteChar) {
- alternatingQuotes = false;
- }
- sb.Append ((char) c);
- read_char ();
- last = c;
- }
- return Token.ATTVALUE;
- }
- int NextToken ()
- {
- int c;
-
- sb.Length = 0;
- odds.Length=0;
- while ((c = read_char ()) != -1){
- if (verbatim){
- inTag = false;
- sb.Append ((char) c);
- return c;
- }
- if (inTag && expectAttrValue && (c == '"' || c == '\''))
- return ReadAttValue (c);
-
- if (c == '<'){
- inTag = true;
- sb.Append ((char) c);
- return c;
- }
- if (c == '>'){
- inTag = false;
- sb.Append ((char) c);
- return c;
- }
- if (current_token == '<' && "%/!".IndexOf ((char) c) != -1){
- sb.Append ((char) c);
- return c;
- }
- if (inTag && current_token == '%' && "@#=".IndexOf ((char) c) != -1){
- sb.Append ((char) c);
- return c;
- }
- if (inTag && c == '-' && sr.Peek () == '-'){
- sb.Append ("--");
- read_char ();
- return Token.DOUBLEDASH;
- }
- if (!inTag){
- sb.Append ((char) c);
- while ((c = sr.Peek ()) != -1 && c != '<')
- sb.Append ((char) read_char ());
- return (c != -1 || sb.Length > 0) ? Token.TEXT : Token.EOF;
- }
- if (inTag && current_token == '=' && !Char.IsWhiteSpace ((char) c))
- return ReadAttValue (c);
- if (inTag && is_identifier_start_character ((char) c)){
- sb.Append ((char) c);
- while ((c = sr.Peek ()) != -1) {
- if (!is_identifier_part_character ((char) c) && c != ':')
- break;
- sb.Append ((char) read_char ());
- }
- if (current_token == '@' && Directive.IsDirective (sb.ToString ()))
- return Token.DIRECTIVE;
-
- return Token.IDENTIFIER;
- }
- if (!Char.IsWhiteSpace ((char) c)) {
- sb.Append ((char) c);
- return c;
- }
- // keep otherwise discarded characters in case we need.
- odds.Append((char) c);
- }
- return Token.EOF;
- }
- public string Value {
- get {
- if (have_value)
- return val;
- have_value = true;
- val = sb.ToString ();
- return val;
- }
- }
- public string Odds {
- get {
- return odds.ToString();
- }
- }
- public bool InTag {
- get { return inTag; }
- set { inTag = value; }
- }
- // Hack for preventing confusion with VB comments (see bug #63451)
- public bool ExpectAttrValue {
- get { return expectAttrValue; }
- set { expectAttrValue = value; }
- }
-
- public bool AlternatingQuotes {
- get { return alternatingQuotes; }
- }
-
- public int BeginLine {
- get { return begline; }
- }
- public int BeginColumn {
- get { return begcol; }
- }
- public int EndLine {
- get { return line; }
- }
- public int EndColumn {
- get { return col; }
- }
- public int Position {
- get { return position; }
- }
- }
- }
|