| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432 |
- //
- // System.Web.Compilation.AspTokenizer
- //
- // Authors:
- // Gonzalo Paniagua Javier ([email protected])
- // Marek Habersack <[email protected]>
- //
- // (C) 2002,2003 Ximian, Inc (http://www.ximian.com)
- // (C) 2003-2009 Novell, Inc (http://novell.com)
- //
- //
- // Permission is hereby granted, free of charge, to any person obtaining
- // a copy of this software and associated documentation files (the
- // "Software"), to deal in the Software without restriction, including
- // without limitation the rights to use, copy, modify, merge, publish,
- // distribute, sublicense, and/or sell copies of the Software, and to
- // permit persons to whom the Software is furnished to do so, subject to
- // the following conditions:
- //
- // The above copyright notice and this permission notice shall be
- // included in all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- //
- using System;
- using System.Collections;
- using System.IO;
- using System.Text;
- using System.Security.Cryptography;
- namespace System.Web.Compilation
- {
- class Token
- {
- public const int EOF = 0x0200000;
- public const int IDENTIFIER = 0x0200001;
- public const int DIRECTIVE = 0x0200002;
- public const int ATTVALUE = 0x0200003;
- public const int TEXT = 0x0200004;
- public const int DOUBLEDASH = 0x0200005;
- public const int CLOSING = 0x0200006;
- }
- class AspTokenizer
- {
- const int CHECKSUM_BUF_SIZE = 8192;
- class PutBackItem
- {
- public readonly string Value;
- public readonly int Position;
- public readonly int CurrentToken;
- public readonly bool InTag;
-
- public PutBackItem (string value, int position, int currentToken, bool inTag)
- {
- Value = value;
- Position = position;
- CurrentToken = currentToken;
- InTag = inTag;
- }
- }
-
- static char [] lfcr = new char [] { '\n', '\r' };
- TextReader sr;
- int current_token;
- StringBuilder sb, odds;
- int col, line;
- int begcol, begline;
- int position;
- bool inTag;
- bool expectAttrValue;
- bool alternatingQuotes;
- bool hasPutBack;
- bool verbatim;
- bool have_value;
- bool have_unget;
- int unget_value;
- string val;
- Stack putBackBuffer;
- MD5 checksum;
- char[] checksum_buf = new char [CHECKSUM_BUF_SIZE];
- int checksum_buf_pos = -1;
-
- public MD5 Checksum {
- get { return checksum; }
- }
-
- public AspTokenizer (TextReader reader)
- {
- this.sr = reader;
- sb = new StringBuilder ();
- odds= new StringBuilder();
- col = line = 1;
- hasPutBack = inTag = false;
- }
- public bool Verbatim
- {
- get { return verbatim; }
- set { verbatim = value; }
- }
- public void put_back ()
- {
- if (hasPutBack && !inTag)
- throw new HttpException ("put_back called twice!");
-
- hasPutBack = true;
- if (putBackBuffer == null)
- putBackBuffer = new Stack ();
- string val = Value;
- putBackBuffer.Push (new PutBackItem (val, position, current_token, inTag));
- position -= val.Length;
- }
-
- public int get_token ()
- {
- if (hasPutBack) {
- PutBackItem pbi;
- if (verbatim) {
- pbi = putBackBuffer.Pop () as PutBackItem;
- string value = pbi.Value;
- switch (value.Length) {
- case 0:
- // do nothing, CurrentToken will be used
- break;
- case 1:
- pbi = new PutBackItem (String.Empty, pbi.Position, (int)value [0], false);
- break;
- default:
- pbi = new PutBackItem (value, pbi.Position, (int)value [0], false);
- break;
- }
- } else
- pbi = putBackBuffer.Pop () as PutBackItem;
-
- hasPutBack = putBackBuffer.Count > 0;
- position = pbi.Position;
- have_value = false;
- val = null;
- sb = new StringBuilder (pbi.Value);
- current_token = pbi.CurrentToken;
- inTag = pbi.InTag;
- return current_token;
- }
- begline = line;
- begcol = col;
- have_value = false;
- current_token = NextToken ();
- return current_token;
- }
- bool is_identifier_start_character (char c)
- {
- return (Char.IsLetter (c) || c == '_' );
- }
- bool is_identifier_part_character (char c)
- {
- return (Char.IsLetterOrDigit (c) || c == '_' || c == '-');
- }
- void ungetc (int value)
- {
- have_unget = true;
- unget_value = value;
- // Only '/' passes through here now.
- // If we ever let \n here, update 'line'
- position--;
- col--;
- }
- void TransformNextBlock (int count, bool final)
- {
- byte[] input = Encoding.UTF8.GetBytes (checksum_buf, 0, count);
- if (checksum == null)
- checksum = MD5.Create ();
-
- if (final)
- checksum.TransformFinalBlock (input, 0, input.Length);
- else
- checksum.TransformBlock (input, 0, input.Length, input, 0);
- input = null;
-
- checksum_buf_pos = -1;
- }
-
- void UpdateChecksum (int c)
- {
- bool final = c == -1;
- if (!final) {
- if (checksum_buf_pos + 1 >= CHECKSUM_BUF_SIZE)
- TransformNextBlock (checksum_buf_pos + 1, false);
- checksum_buf [++checksum_buf_pos] = (char)c;
- } else
- TransformNextBlock (checksum_buf_pos + 1, true);
- }
- int read_char ()
- {
- int c;
- if (have_unget) {
- c = unget_value;
- have_unget = false;
- } else {
- c = sr.Read ();
- UpdateChecksum (c);
- }
- if (c == '\r' && sr.Peek () == '\n') {
- c = sr.Read ();
- UpdateChecksum (c);
- position++;
- }
- if (c == '\n'){
- col = -1;
- line++;
- }
- if (c != -1) {
- col++;
- position++;
- }
- return c;
- }
- int ReadAttValue (int start)
- {
- int quoteChar = 0;
- bool quoted = false;
- if (start == '"' || start == '\'') {
- quoteChar = start;
- quoted = true;
- } else {
- sb.Append ((char) start);
- }
- int c;
- int last = 0;
- bool inServerTag = false;
- alternatingQuotes = true;
-
- while ((c = sr.Peek ()) != -1) {
- if (c == '%' && last == '<') {
- inServerTag = true;
- } else if (inServerTag && c == '>' && last == '%') {
- inServerTag = false;
- } else if (!inServerTag) {
- if (!quoted && c == '/') {
- read_char ();
- c = sr.Peek ();
- if (c == -1) {
- c = '/';
- } else if (c == '>') {
- ungetc ('/');
- break;
- }
- } else if (!quoted && (c == '>' || Char.IsWhiteSpace ((char) c))) {
- break;
- } else if (quoted && c == quoteChar && last != '\\') {
- read_char ();
- break;
- }
- } else if (quoted && c == quoteChar) {
- alternatingQuotes = false;
- }
- sb.Append ((char) c);
- read_char ();
- last = c;
- }
- return Token.ATTVALUE;
- }
- int NextToken ()
- {
- int c;
-
- sb.Length = 0;
- odds.Length=0;
- while ((c = read_char ()) != -1){
- if (verbatim){
- inTag = false;
- sb.Append ((char) c);
- return c;
- }
- if (inTag && expectAttrValue && (c == '"' || c == '\''))
- return ReadAttValue (c);
-
- if (c == '<'){
- inTag = true;
- sb.Append ((char) c);
- return c;
- }
- if (c == '>'){
- inTag = false;
- sb.Append ((char) c);
- return c;
- }
- if (current_token == '<' && "%/!".IndexOf ((char) c) != -1){
- sb.Append ((char) c);
- return c;
- }
- if (inTag && current_token == '%' && "@#=".IndexOf ((char) c) != -1){
- if (odds.Length == 0 || odds.ToString ().IndexOfAny (lfcr) < 0) {
- sb.Append ((char) c);
- return c;
- }
- sb.Append ((char) c);
- continue;
- }
- if (inTag && c == '-' && sr.Peek () == '-'){
- sb.Append ("--");
- read_char ();
- return Token.DOUBLEDASH;
- }
- if (!inTag){
- sb.Append ((char) c);
- while ((c = sr.Peek ()) != -1 && c != '<')
- sb.Append ((char) read_char ());
- return (c != -1 || sb.Length > 0) ? Token.TEXT : Token.EOF;
- }
- if (inTag && current_token == '=' && !Char.IsWhiteSpace ((char) c))
- return ReadAttValue (c);
- if (inTag && is_identifier_start_character ((char) c)){
- sb.Append ((char) c);
- while ((c = sr.Peek ()) != -1) {
- if (!is_identifier_part_character ((char) c) && c != ':')
- break;
- sb.Append ((char) read_char ());
- }
- if (current_token == '@' && Directive.IsDirective (sb.ToString ()))
- return Token.DIRECTIVE;
-
- return Token.IDENTIFIER;
- }
- if (!Char.IsWhiteSpace ((char) c)) {
- sb.Append ((char) c);
- return c;
- }
- // keep otherwise discarded characters in case we need.
- odds.Append((char) c);
- }
- return Token.EOF;
- }
- public string Value {
- get {
- if (have_value)
- return val;
- have_value = true;
- val = sb.ToString ();
- return val;
- }
- }
- public string Odds {
- get {
- return odds.ToString();
- }
- }
- public bool InTag {
- get { return inTag; }
- set { inTag = value; }
- }
- // Hack for preventing confusion with VB comments (see bug #63451)
- public bool ExpectAttrValue {
- get { return expectAttrValue; }
- set { expectAttrValue = value; }
- }
-
- public bool AlternatingQuotes {
- get { return alternatingQuotes; }
- }
-
- public int BeginLine {
- get { return begline; }
- }
- public int BeginColumn {
- get { return begcol; }
- }
- public int EndLine {
- get { return line; }
- }
- public int EndColumn {
- get { return col; }
- }
- public int Position {
- get { return position; }
- }
- }
- }
|