Tokenizer.cs 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. //
  2. // SqlWhereClauseTokenizer.cs
  3. //
  4. // Author:
  5. // Juraj Skripsky ([email protected])
  6. //
  7. // (C) 2004 HotFeet GmbH (http://www.hotfeet.ch)
  8. //
  9. //
  10. // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
  11. //
  12. // Permission is hereby granted, free of charge, to any person obtaining
  13. // a copy of this software and associated documentation files (the
  14. // "Software"), to deal in the Software without restriction, including
  15. // without limitation the rights to use, copy, modify, merge, publish,
  16. // distribute, sublicense, and/or sell copies of the Software, and to
  17. // permit persons to whom the Software is furnished to do so, subject to
  18. // the following conditions:
  19. //
  20. // The above copyright notice and this permission notice shall be
  21. // included in all copies or substantial portions of the Software.
  22. //
  23. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  27. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  28. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  29. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  30. //
  31. using System;
  32. using System.Data;
  33. using System.IO;
  34. using System.Text;
  35. using System.Collections;
  36. namespace Mono.Data.SqlExpressions {
  37. internal class Tokenizer : yyParser.yyInput {
  38. private static readonly IDictionary tokenMap = new Hashtable ();
  39. private static readonly Object [] tokens = {
  40. Token.AND, "and",
  41. Token.OR, "or",
  42. Token.NOT, "not",
  43. Token.TRUE, "true",
  44. Token.FALSE, "false",
  45. Token.NULL, "null",
  46. Token.PARENT, "parent",
  47. Token.CHILD, "child",
  48. Token.IS, "is",
  49. Token.IN, "in",
  50. Token.LIKE, "like",
  51. Token.COUNT, "count",
  52. Token.SUM, "sum",
  53. Token.AVG, "avg",
  54. Token.MAX, "max",
  55. Token.MIN, "min",
  56. Token.STDEV, "stdev",
  57. Token.VAR, "var",
  58. Token.IIF, "iif",
  59. Token.SUBSTRING, "substring",
  60. Token.ISNULL, "isnull",
  61. Token.LEN, "len",
  62. Token.TRIM, "trim",
  63. Token.CONVERT, "convert"
  64. };
  65. private char[] input;
  66. private int pos;
  67. private int tok;
  68. private object val;
  69. static Tokenizer ()
  70. {
  71. for (int i = 0; i < tokens.Length; i += 2)
  72. tokenMap.Add (tokens [i + 1], tokens [i]);
  73. }
  74. public Tokenizer (string strInput)
  75. {
  76. input = strInput.ToCharArray ();
  77. pos = 0;
  78. }
  79. private char Current() {
  80. return input [pos];
  81. }
  82. private char Next() {
  83. if (pos + 1 >= input.Length)
  84. return (char)0;
  85. return input [pos + 1];
  86. }
  87. private void MoveNext() {
  88. pos++;
  89. }
  90. private void SkipWhiteSpace ()
  91. {
  92. while (Char.IsWhiteSpace (Current ()))
  93. MoveNext ();
  94. }
  95. private object ReadNumber ()
  96. {
  97. StringBuilder sb = new StringBuilder ();
  98. sb.Append (Current ());
  99. char next;
  100. while (Char.IsDigit (next = Next ()) || next == '.') {
  101. sb.Append (next);
  102. MoveNext ();
  103. }
  104. string str = sb.ToString ();
  105. if (str.IndexOf(".") == -1)
  106. return Int64.Parse (str);
  107. else
  108. return double.Parse (str);
  109. }
  110. private char ProcessEscapes(char c)
  111. {
  112. if (c == '\\') {
  113. MoveNext();
  114. c = Next();
  115. switch (c) {
  116. case 'n':
  117. c = '\n';
  118. break;
  119. case 'r':
  120. c = '\r';
  121. break;
  122. case 't':
  123. c = '\t';
  124. break;
  125. case '\\':
  126. c = '\\';
  127. break;
  128. default:
  129. throw new SyntaxErrorException (String.Format ("Invalid escape sequence: '\\{0}'.", c));
  130. }
  131. }
  132. return c;
  133. }
  134. private string ReadString (char terminator)
  135. {
  136. StringBuilder sb = new StringBuilder ();
  137. char next;
  138. while ((next = Next ()) != terminator) {
  139. sb.Append (ProcessEscapes (next));
  140. MoveNext ();
  141. }
  142. MoveNext ();
  143. return sb.ToString ();
  144. }
  145. private string ReadIdentifier ()
  146. {
  147. StringBuilder sb = new StringBuilder ();
  148. sb.Append (Current ());
  149. char next;
  150. while ((next = Next ()) == '_' || Char.IsLetterOrDigit (next) || next == '\\') {
  151. sb.Append (ProcessEscapes (next));
  152. MoveNext ();
  153. }
  154. return sb.ToString ();
  155. }
  156. private int ParseIdentifier ()
  157. {
  158. string strToken = ReadIdentifier ();
  159. object tokenObj = tokenMap[strToken.ToLower()];
  160. if(tokenObj != null)
  161. return (int)tokenObj;
  162. val = strToken;
  163. return Token.Identifier;
  164. }
  165. private int ParseToken ()
  166. {
  167. char cur;
  168. switch (cur = Current ()) {
  169. case '(':
  170. return Token.PAROPEN;
  171. case ')':
  172. return Token.PARCLOSE;
  173. case '.':
  174. return Token.DOT;
  175. case ',':
  176. return Token.COMMA;
  177. case '+':
  178. return Token.PLUS;
  179. case '-':
  180. return Token.MINUS;
  181. case '*':
  182. return Token.MUL;
  183. case '/':
  184. return Token.DIV;
  185. case '%':
  186. return Token.MOD;
  187. case '=':
  188. return Token.EQ;
  189. case '<':
  190. return Token.LT;
  191. case '>':
  192. return Token.GT;
  193. case '[':
  194. val = ReadString (']');
  195. return Token.Identifier;
  196. case '#':
  197. string date = ReadString ('#');
  198. val = DateTime.Parse (date);
  199. return Token.DateLiteral;
  200. case '\'':
  201. case '\"':
  202. val = ReadString (cur);
  203. return Token.StringLiteral;
  204. default:
  205. if (Char.IsDigit (cur)) {
  206. val = ReadNumber ();
  207. return Token.NumberLiteral;
  208. } else if (Char.IsLetter (cur) || cur == '_')
  209. return ParseIdentifier ();
  210. break;
  211. }
  212. throw new Exception ("invalid token: '" + cur + "'");
  213. }
  214. ///////////////////////////
  215. // yyParser.yyInput methods
  216. ///////////////////////////
  217. /** move on to next token.
  218. @return false if positioned beyond tokens.
  219. @throws IOException on input error.
  220. */
  221. public bool advance ()
  222. {
  223. val = null;
  224. tok = -1;
  225. try {
  226. SkipWhiteSpace();
  227. tok = ParseToken();
  228. MoveNext();
  229. return true;
  230. } catch(IndexOutOfRangeException) {
  231. return false;
  232. }
  233. }
  234. /** classifies current token.
  235. Should not be called if advance() returned false.
  236. @return current %token or single character.
  237. */
  238. public int token ()
  239. {
  240. return tok;
  241. }
  242. /** associated with current token.
  243. Should not be called if advance() returned false.
  244. @return value for token().
  245. */
  246. public Object value ()
  247. {
  248. return val;
  249. }
  250. }
  251. }