Tokenizer.cs 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. //
  2. // SqlWhereClauseTokenizer.cs
  3. //
  4. // Author:
  5. // Juraj Skripsky ([email protected])
  6. //
  7. // (C) 2004 HotFeet GmbH (http://www.hotfeet.ch)
  8. //
  9. //
  10. // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
  11. //
  12. // Permission is hereby granted, free of charge, to any person obtaining
  13. // a copy of this software and associated documentation files (the
  14. // "Software"), to deal in the Software without restriction, including
  15. // without limitation the rights to use, copy, modify, merge, publish,
  16. // distribute, sublicense, and/or sell copies of the Software, and to
  17. // permit persons to whom the Software is furnished to do so, subject to
  18. // the following conditions:
  19. //
  20. // The above copyright notice and this permission notice shall be
  21. // included in all copies or substantial portions of the Software.
  22. //
  23. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  27. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  28. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  29. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  30. //
  31. using System;
  32. using System.Data;
  33. using System.IO;
  34. using System.Text;
  35. using System.Collections;
  36. namespace Mono.Data.SqlExpressions {
  37. internal class Tokenizer : yyParser.yyInput {
  38. private static readonly IDictionary tokenMap = new Hashtable ();
  39. private static readonly Object [] tokens = {
  40. Token.AND, "and",
  41. Token.OR, "or",
  42. Token.NOT, "not",
  43. Token.TRUE, "true",
  44. Token.FALSE, "false",
  45. Token.NULL, "null",
  46. Token.PARENT, "parent",
  47. Token.CHILD, "child",
  48. Token.IS, "is",
  49. Token.IN, "in",
  50. Token.LIKE, "like",
  51. Token.COUNT, "count",
  52. Token.SUM, "sum",
  53. Token.AVG, "avg",
  54. Token.MAX, "max",
  55. Token.MIN, "min",
  56. Token.STDEV, "stdev",
  57. Token.VAR, "var",
  58. Token.IIF, "iif",
  59. Token.SUBSTRING, "substring",
  60. Token.ISNULL, "isnull",
  61. Token.LEN, "len",
  62. Token.TRIM, "trim",
  63. Token.CONVERT, "convert"
  64. };
  65. private char[] input;
  66. private int pos;
  67. private int tok;
  68. private object val;
  69. static Tokenizer ()
  70. {
  71. for (int i = 0; i < tokens.Length; i += 2)
  72. tokenMap.Add (tokens [i + 1], tokens [i]);
  73. }
  74. public Tokenizer (string strInput)
  75. {
  76. input = strInput.ToCharArray ();
  77. pos = 0;
  78. }
  79. private char Current() {
  80. return input [pos];
  81. }
  82. private char Next() {
  83. if (pos + 1 >= input.Length)
  84. return (char)0;
  85. return input [pos + 1];
  86. }
  87. private bool MoveNext() {
  88. pos++;
  89. if (pos >= input.Length)
  90. return false;
  91. return true;
  92. }
  93. private bool SkipWhiteSpace ()
  94. {
  95. if (pos >= input.Length)
  96. return false;
  97. while (Char.IsWhiteSpace (Current ())) {
  98. if (!MoveNext ())
  99. return false;
  100. }
  101. return true;
  102. }
  103. private object ReadNumber ()
  104. {
  105. StringBuilder sb = new StringBuilder ();
  106. sb.Append (Current ());
  107. char next;
  108. while (Char.IsDigit (next = Next ()) || next == '.') {
  109. sb.Append (next);
  110. if (!MoveNext ())
  111. break;
  112. }
  113. string str = sb.ToString ();
  114. if (str.IndexOf(".") == -1)
  115. return Int64.Parse (str);
  116. else
  117. return double.Parse (str);
  118. }
  119. private char ProcessEscapes(char c)
  120. {
  121. if (c == '\\') {
  122. if (MoveNext())
  123. c = Current ();
  124. else
  125. c = '\0';
  126. switch (c) {
  127. case 'n':
  128. c = '\n';
  129. break;
  130. case 'r':
  131. c = '\r';
  132. break;
  133. case 't':
  134. c = '\t';
  135. break;
  136. case '\\':
  137. c = '\\';
  138. break;
  139. default:
  140. throw new SyntaxErrorException (String.Format ("Invalid escape sequence: '\\{0}'.", c));
  141. }
  142. }
  143. return c;
  144. }
  145. private string ReadString (char terminator)
  146. {
  147. return ReadString (terminator, false /* canEscape */);
  148. }
  149. private string ReadString (char terminator,
  150. bool canEscape // twice the terminator is not a terminator
  151. )
  152. {
  153. bool terminated = false;
  154. StringBuilder sb = new StringBuilder ();
  155. while (MoveNext ()) {
  156. if (Current () == terminator) {
  157. if (Next () == terminator) {
  158. sb.Append (ProcessEscapes (Current ()));
  159. MoveNext ();
  160. continue;
  161. }
  162. terminated = true;
  163. break;
  164. }
  165. sb.Append (ProcessEscapes (Current ()));
  166. }
  167. if (! terminated)
  168. throw new SyntaxErrorException (String.Format ("invalid string at {0}{1}<--",
  169. terminator,
  170. sb.ToString ())
  171. );
  172. return sb.ToString ();
  173. }
  174. private string ReadIdentifier ()
  175. {
  176. StringBuilder sb = new StringBuilder ();
  177. sb.Append (Current ());
  178. char next;
  179. while ((next = Next ()) == '_' || Char.IsLetterOrDigit (next) || next == '\\') {
  180. sb.Append (ProcessEscapes (next));
  181. if (!MoveNext ())
  182. break;
  183. }
  184. return sb.ToString ();
  185. }
  186. private int ParseIdentifier ()
  187. {
  188. string strToken = ReadIdentifier ();
  189. object tokenObj = tokenMap[strToken.ToLower()];
  190. if(tokenObj != null)
  191. return (int)tokenObj;
  192. val = strToken;
  193. return Token.Identifier;
  194. }
  195. private int ParseToken ()
  196. {
  197. char cur;
  198. switch (cur = Current ()) {
  199. case '(':
  200. return Token.PAROPEN;
  201. case ')':
  202. return Token.PARCLOSE;
  203. case '.':
  204. return Token.DOT;
  205. case ',':
  206. return Token.COMMA;
  207. case '+':
  208. return Token.PLUS;
  209. case '-':
  210. return Token.MINUS;
  211. case '*':
  212. return Token.MUL;
  213. case '/':
  214. return Token.DIV;
  215. case '%':
  216. return Token.MOD;
  217. case '=':
  218. return Token.EQ;
  219. case '<':
  220. return Token.LT;
  221. case '>':
  222. return Token.GT;
  223. case '[':
  224. val = ReadString (']');
  225. return Token.Identifier;
  226. case '#':
  227. string date = ReadString ('#');
  228. val = DateTime.Parse (date);
  229. return Token.DateLiteral;
  230. case '\'':
  231. case '\"':
  232. val = ReadString (cur, true);
  233. return Token.StringLiteral;
  234. default:
  235. if (Char.IsDigit (cur)) {
  236. val = ReadNumber ();
  237. return Token.NumberLiteral;
  238. } else if (Char.IsLetter (cur) || cur == '_')
  239. return ParseIdentifier ();
  240. break;
  241. }
  242. throw new SyntaxErrorException ("invalid token: '" + cur + "'");
  243. }
  244. ///////////////////////////
  245. // yyParser.yyInput methods
  246. ///////////////////////////
  247. /** move on to next token.
  248. @return false if positioned beyond tokens.
  249. @throws IOException on input error.
  250. */
  251. public bool advance ()
  252. {
  253. if (!SkipWhiteSpace())
  254. return false;
  255. tok = ParseToken();
  256. MoveNext ();
  257. return true;
  258. }
  259. /** classifies current token.
  260. Should not be called if advance() returned false.
  261. @return current %token or single character.
  262. */
  263. public int token ()
  264. {
  265. return tok;
  266. }
  267. /** associated with current token.
  268. Should not be called if advance() returned false.
  269. @return value for token().
  270. */
  271. public Object value ()
  272. {
  273. return val;
  274. }
  275. }
  276. }