Tokenizer.cs 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. //
  2. // SqlWhereClauseTokenizer.cs
  3. //
  4. // Author:
  5. // Juraj Skripsky ([email protected])
  6. //
  7. // (C) 2004 HotFeet GmbH (http://www.hotfeet.ch)
  8. //
  9. using System;
  10. using System.Data;
  11. using System.IO;
  12. using System.Text;
  13. using System.Collections;
  14. namespace Mono.Data.SqlExpressions {
  15. internal class Tokenizer : yyParser.yyInput {
  16. private static readonly IDictionary tokenMap = new Hashtable ();
  17. private static readonly Object [] tokens = {
  18. Token.AND, "and",
  19. Token.OR, "or",
  20. Token.NOT, "not",
  21. Token.TRUE, "true",
  22. Token.FALSE, "false",
  23. Token.NULL, "null",
  24. Token.PARENT, "parent",
  25. Token.CHILD, "child",
  26. Token.IS, "is",
  27. Token.IN, "in",
  28. Token.LIKE, "like",
  29. Token.COUNT, "count",
  30. Token.SUM, "sum",
  31. Token.AVG, "avg",
  32. Token.MAX, "max",
  33. Token.MIN, "min",
  34. Token.STDEV, "stdev",
  35. Token.VAR, "var",
  36. Token.IIF, "iif",
  37. Token.SUBSTRING, "substring",
  38. Token.ISNULL, "isnull",
  39. Token.LEN, "len",
  40. Token.TRIM, "trim",
  41. Token.CONVERT, "convert"
  42. };
  43. private char[] input;
  44. private int pos;
  45. private int tok;
  46. private object val;
  47. static Tokenizer ()
  48. {
  49. for (int i = 0; i < tokens.Length; i += 2)
  50. tokenMap.Add (tokens [i + 1], tokens [i]);
  51. }
  52. public Tokenizer (string strInput)
  53. {
  54. input = strInput.ToCharArray ();
  55. pos = 0;
  56. }
  57. private char Current() {
  58. return input [pos];
  59. }
  60. private char Next() {
  61. if (pos + 1 >= input.Length)
  62. return (char)0;
  63. return input [pos + 1];
  64. }
  65. private void MoveNext() {
  66. pos++;
  67. }
  68. private void SkipWhiteSpace ()
  69. {
  70. while (Char.IsWhiteSpace (Current ()))
  71. MoveNext ();
  72. }
  73. private object ReadNumber ()
  74. {
  75. StringBuilder sb = new StringBuilder ();
  76. sb.Append (Current ());
  77. char next;
  78. while (Char.IsDigit (next = Next ())) {
  79. sb.Append (next);
  80. MoveNext ();
  81. }
  82. if (next == '.') {
  83. sb.Append (next);
  84. while (Char.IsDigit (next = Next ())) {
  85. sb.Append (next);
  86. MoveNext ();
  87. }
  88. return double.Parse (sb.ToString ());
  89. }
  90. return int.Parse (sb.ToString ());
  91. }
  92. private char ProcessEscapes(char c)
  93. {
  94. if (c == '\\') {
  95. MoveNext();
  96. c = Next();
  97. switch (c) {
  98. case 'n':
  99. c = '\n';
  100. break;
  101. case 'r':
  102. c = '\r';
  103. break;
  104. case 't':
  105. c = '\t';
  106. break;
  107. case '\\':
  108. c = '\\';
  109. break;
  110. default:
  111. throw new SyntaxErrorException (String.Format ("Invalid escape sequence: '\\{0}'.", c));
  112. }
  113. }
  114. return c;
  115. }
  116. private string ReadString (char terminator)
  117. {
  118. StringBuilder sb = new StringBuilder ();
  119. char next;
  120. while ((next = Next ()) != terminator) {
  121. sb.Append (ProcessEscapes (next));
  122. MoveNext ();
  123. }
  124. MoveNext ();
  125. return sb.ToString ();
  126. }
  127. private string ReadIdentifier ()
  128. {
  129. StringBuilder sb = new StringBuilder ();
  130. sb.Append (Current ());
  131. char next;
  132. while ((next = Next ()) == '_' || Char.IsLetterOrDigit (next) || next == '\\') {
  133. sb.Append (ProcessEscapes (next));
  134. MoveNext ();
  135. }
  136. return sb.ToString ();
  137. }
  138. private int ParseIdentifier ()
  139. {
  140. string strToken = ReadIdentifier ();
  141. object tokenObj = tokenMap[strToken.ToLower()];
  142. if(tokenObj != null)
  143. return (int)tokenObj;
  144. val = strToken;
  145. return Token.Identifier;
  146. }
  147. private int ParseToken ()
  148. {
  149. char cur;
  150. switch (cur = Current ()) {
  151. case '(':
  152. return Token.PAROPEN;
  153. case ')':
  154. return Token.PARCLOSE;
  155. case '.':
  156. return Token.DOT;
  157. case ',':
  158. return Token.COMMA;
  159. case '+':
  160. return Token.PLUS;
  161. case '-':
  162. return Token.MINUS;
  163. case '*':
  164. return Token.MUL;
  165. case '/':
  166. return Token.DIV;
  167. case '%':
  168. return Token.MOD;
  169. case '=':
  170. return Token.EQ;
  171. case '<':
  172. return Token.LT;
  173. case '>':
  174. return Token.GT;
  175. case '[':
  176. val = ReadString (']');
  177. return Token.Identifier;
  178. case '#':
  179. string date = ReadString ('#');
  180. val = DateTime.Parse (date);
  181. return Token.DateLiteral;
  182. case '\'':
  183. case '\"':
  184. val = ReadString (cur);
  185. return Token.StringLiteral;
  186. default:
  187. if (Char.IsDigit (cur)) {
  188. val = ReadNumber ();
  189. return Token.NumberLiteral;
  190. } else if (Char.IsLetter (cur) || cur == '_')
  191. return ParseIdentifier ();
  192. break;
  193. }
  194. throw new Exception ("invalid token: '" + cur + "'");
  195. }
  196. ///////////////////////////
  197. // yyParser.yyInput methods
  198. ///////////////////////////
  199. /** move on to next token.
  200. @return false if positioned beyond tokens.
  201. @throws IOException on input error.
  202. */
  203. public bool advance ()
  204. {
  205. val = null;
  206. tok = -1;
  207. try {
  208. SkipWhiteSpace();
  209. tok = ParseToken();
  210. MoveNext();
  211. return true;
  212. } catch(IndexOutOfRangeException) {
  213. return false;
  214. }
  215. }
  216. /** classifies current token.
  217. Should not be called if advance() returned false.
  218. @return current %token or single character.
  219. */
  220. public int token ()
  221. {
  222. return tok;
  223. }
  224. /** associated with current token.
  225. Should not be called if advance() returned false.
  226. @return value for token().
  227. */
  228. public Object value ()
  229. {
  230. return val;
  231. }
  232. }
  233. }