sqlexer.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. /*
  2. see copyright notice in squirrel.h
  3. */
  4. #include "sqpcheader.h"
  5. #include <ctype.h>
  6. #include <stdlib.h>
  7. #include "sqtable.h"
  8. #include "sqstring.h"
  9. #include "sqcompiler.h"
  10. #include "sqlexer.h"
  11. #define CUR_CHAR (_currdata)
  12. #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
  13. #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
  14. #define NEXT() {Next();_currentcolumn++;}
  15. #define INIT_TEMP_STRING() { _longstr.resize(0);}
  16. #define APPEND_CHAR(c) { _longstr.push_back(c);}
  17. #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
  18. #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
  19. SQLexer::SQLexer(){}
  20. SQLexer::~SQLexer()
  21. {
  22. _keywords->Release();
  23. }
  24. void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
  25. {
  26. _errfunc = efunc;
  27. _errtarget = ed;
  28. _sharedstate = ss;
  29. _keywords = SQTable::Create(ss, 26);
  30. ADD_KEYWORD(while, TK_WHILE);
  31. ADD_KEYWORD(do, TK_DO);
  32. ADD_KEYWORD(if, TK_IF);
  33. ADD_KEYWORD(else, TK_ELSE);
  34. ADD_KEYWORD(break, TK_BREAK);
  35. ADD_KEYWORD(continue, TK_CONTINUE);
  36. ADD_KEYWORD(return, TK_RETURN);
  37. ADD_KEYWORD(null, TK_NULL);
  38. ADD_KEYWORD(function, TK_FUNCTION);
  39. ADD_KEYWORD(local, TK_LOCAL);
  40. ADD_KEYWORD(for, TK_FOR);
  41. ADD_KEYWORD(foreach, TK_FOREACH);
  42. ADD_KEYWORD(in, TK_IN);
  43. ADD_KEYWORD(typeof, TK_TYPEOF);
  44. ADD_KEYWORD(base, TK_BASE);
  45. ADD_KEYWORD(delete, TK_DELETE);
  46. ADD_KEYWORD(try, TK_TRY);
  47. ADD_KEYWORD(catch, TK_CATCH);
  48. ADD_KEYWORD(throw, TK_THROW);
  49. ADD_KEYWORD(clone, TK_CLONE);
  50. ADD_KEYWORD(yield, TK_YIELD);
  51. ADD_KEYWORD(resume, TK_RESUME);
  52. ADD_KEYWORD(switch, TK_SWITCH);
  53. ADD_KEYWORD(case, TK_CASE);
  54. ADD_KEYWORD(default, TK_DEFAULT);
  55. ADD_KEYWORD(this, TK_THIS);
  56. ADD_KEYWORD(class,TK_CLASS);
  57. ADD_KEYWORD(extends,TK_EXTENDS);
  58. ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
  59. ADD_KEYWORD(instanceof,TK_INSTANCEOF);
  60. ADD_KEYWORD(true,TK_TRUE);
  61. ADD_KEYWORD(false,TK_FALSE);
  62. ADD_KEYWORD(static,TK_STATIC);
  63. ADD_KEYWORD(enum,TK_ENUM);
  64. ADD_KEYWORD(const,TK_CONST);
  65. _readf = rg;
  66. _up = up;
  67. _lasttokenline = _currentline = 1;
  68. _currentcolumn = 0;
  69. _prevtoken = -1;
  70. _reached_eof = SQFalse;
  71. Next();
  72. }
  73. void SQLexer::Error(const SQChar *err)
  74. {
  75. _errfunc(_errtarget,err);
  76. }
  77. void SQLexer::Next()
  78. {
  79. SQInteger t = _readf(_up);
  80. if(t > MAX_CHAR) Error(_SC("Invalid character"));
  81. if(t != 0) {
  82. _currdata = (LexChar)t;
  83. return;
  84. }
  85. _currdata = SQUIRREL_EOB;
  86. _reached_eof = SQTrue;
  87. }
  88. const SQChar *SQLexer::Tok2Str(SQInteger tok)
  89. {
  90. SQObjectPtr itr, key, val;
  91. SQInteger nitr;
  92. while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
  93. itr = (SQInteger)nitr;
  94. if(((SQInteger)_integer(val)) == tok)
  95. return _stringval(key);
  96. }
  97. return NULL;
  98. }
  99. void SQLexer::LexBlockComment()
  100. {
  101. bool done = false;
  102. while(!done) {
  103. switch(CUR_CHAR) {
  104. case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
  105. case _SC('\n'): _currentline++; NEXT(); continue;
  106. case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
  107. default: NEXT();
  108. }
  109. }
  110. }
  111. void SQLexer::LexLineComment()
  112. {
  113. do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
  114. }
  115. SQInteger SQLexer::Lex()
  116. {
  117. _lasttokenline = _currentline;
  118. while(CUR_CHAR != SQUIRREL_EOB) {
  119. switch(CUR_CHAR){
  120. case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
  121. case _SC('\n'):
  122. _currentline++;
  123. _prevtoken=_curtoken;
  124. _curtoken=_SC('\n');
  125. NEXT();
  126. _currentcolumn=1;
  127. continue;
  128. case _SC('#'): LexLineComment(); continue;
  129. case _SC('/'):
  130. NEXT();
  131. switch(CUR_CHAR){
  132. case _SC('*'):
  133. NEXT();
  134. LexBlockComment();
  135. continue;
  136. case _SC('/'):
  137. LexLineComment();
  138. continue;
  139. case _SC('='):
  140. NEXT();
  141. RETURN_TOKEN(TK_DIVEQ);
  142. continue;
  143. case _SC('>'):
  144. NEXT();
  145. RETURN_TOKEN(TK_ATTR_CLOSE);
  146. continue;
  147. default:
  148. RETURN_TOKEN('/');
  149. }
  150. case _SC('='):
  151. NEXT();
  152. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
  153. else { NEXT(); RETURN_TOKEN(TK_EQ); }
  154. case _SC('<'):
  155. NEXT();
  156. switch(CUR_CHAR) {
  157. case _SC('='):
  158. NEXT();
  159. if(CUR_CHAR == _SC('>')) {
  160. NEXT();
  161. RETURN_TOKEN(TK_3WAYSCMP);
  162. }
  163. RETURN_TOKEN(TK_LE)
  164. break;
  165. case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
  166. case _SC('<'): NEXT(); RETURN_TOKEN(TK_SHIFTL); break;
  167. case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
  168. }
  169. RETURN_TOKEN('<');
  170. case _SC('>'):
  171. NEXT();
  172. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
  173. else if(CUR_CHAR == _SC('>')){
  174. NEXT();
  175. if(CUR_CHAR == _SC('>')){
  176. NEXT();
  177. RETURN_TOKEN(TK_USHIFTR);
  178. }
  179. RETURN_TOKEN(TK_SHIFTR);
  180. }
  181. else { RETURN_TOKEN('>') }
  182. case _SC('!'):
  183. NEXT();
  184. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
  185. else { NEXT(); RETURN_TOKEN(TK_NE); }
  186. case _SC('@'): {
  187. SQInteger stype;
  188. NEXT();
  189. if(CUR_CHAR != _SC('"')) {
  190. RETURN_TOKEN('@');
  191. }
  192. if((stype=ReadString('"',true))!=-1) {
  193. RETURN_TOKEN(stype);
  194. }
  195. Error(_SC("error parsing the string"));
  196. }
  197. case _SC('"'):
  198. case _SC('\''): {
  199. SQInteger stype;
  200. if((stype=ReadString(CUR_CHAR,false))!=-1){
  201. RETURN_TOKEN(stype);
  202. }
  203. Error(_SC("error parsing the string"));
  204. }
  205. case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
  206. case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
  207. {SQInteger ret = CUR_CHAR;
  208. NEXT(); RETURN_TOKEN(ret); }
  209. case _SC('.'):
  210. NEXT();
  211. if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
  212. NEXT();
  213. if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
  214. NEXT();
  215. RETURN_TOKEN(TK_VARPARAMS);
  216. case _SC('&'):
  217. NEXT();
  218. if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
  219. else { NEXT(); RETURN_TOKEN(TK_AND); }
  220. case _SC('|'):
  221. NEXT();
  222. if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
  223. else { NEXT(); RETURN_TOKEN(TK_OR); }
  224. case _SC(':'):
  225. NEXT();
  226. if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
  227. else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
  228. case _SC('*'):
  229. NEXT();
  230. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
  231. else RETURN_TOKEN('*');
  232. case _SC('%'):
  233. NEXT();
  234. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
  235. else RETURN_TOKEN('%');
  236. case _SC('-'):
  237. NEXT();
  238. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
  239. else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
  240. else RETURN_TOKEN('-');
  241. case _SC('+'):
  242. NEXT();
  243. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
  244. else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
  245. else RETURN_TOKEN('+');
  246. case SQUIRREL_EOB:
  247. return 0;
  248. default:{
  249. if (scisdigit(CUR_CHAR)) {
  250. SQInteger ret = ReadNumber();
  251. RETURN_TOKEN(ret);
  252. }
  253. else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
  254. SQInteger t = ReadID();
  255. RETURN_TOKEN(t);
  256. }
  257. else {
  258. SQInteger c = CUR_CHAR;
  259. if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
  260. NEXT();
  261. RETURN_TOKEN(c);
  262. }
  263. RETURN_TOKEN(0);
  264. }
  265. }
  266. }
  267. return 0;
  268. }
  269. SQInteger SQLexer::GetIDType(SQChar *s)
  270. {
  271. SQObjectPtr t;
  272. if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
  273. return SQInteger(_integer(t));
  274. }
  275. return TK_IDENTIFIER;
  276. }
  277. SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
  278. {
  279. INIT_TEMP_STRING();
  280. NEXT();
  281. if(IS_EOB()) return -1;
  282. for(;;) {
  283. while(CUR_CHAR != ndelim) {
  284. switch(CUR_CHAR) {
  285. case SQUIRREL_EOB:
  286. Error(_SC("unfinished string"));
  287. return -1;
  288. case _SC('\n'):
  289. if(!verbatim) Error(_SC("newline in a constant"));
  290. APPEND_CHAR(CUR_CHAR); NEXT();
  291. _currentline++;
  292. break;
  293. case _SC('\\'):
  294. if(verbatim) {
  295. APPEND_CHAR('\\'); NEXT();
  296. }
  297. else {
  298. NEXT();
  299. switch(CUR_CHAR) {
  300. case _SC('x'): NEXT(); {
  301. if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
  302. const SQInteger maxdigits = 4;
  303. SQChar temp[maxdigits+1];
  304. SQInteger n = 0;
  305. while(isxdigit(CUR_CHAR) && n < maxdigits) {
  306. temp[n] = CUR_CHAR;
  307. n++;
  308. NEXT();
  309. }
  310. temp[n] = 0;
  311. SQChar *sTemp;
  312. APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
  313. }
  314. break;
  315. case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
  316. case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
  317. case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
  318. case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
  319. case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
  320. case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
  321. case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
  322. case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
  323. case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
  324. case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
  325. case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
  326. default:
  327. Error(_SC("unrecognised escaper char"));
  328. break;
  329. }
  330. }
  331. break;
  332. default:
  333. APPEND_CHAR(CUR_CHAR);
  334. NEXT();
  335. }
  336. }
  337. NEXT();
  338. if(verbatim && CUR_CHAR == '"') { //double quotation
  339. APPEND_CHAR(CUR_CHAR);
  340. NEXT();
  341. }
  342. else {
  343. break;
  344. }
  345. }
  346. TERMINATE_BUFFER();
  347. SQInteger len = _longstr.size()-1;
  348. if(ndelim == _SC('\'')) {
  349. if(len == 0) Error(_SC("empty constant"));
  350. if(len > 1) Error(_SC("constant too long"));
  351. _nvalue = _longstr[0];
  352. return TK_INTEGER;
  353. }
  354. _svalue = &_longstr[0];
  355. return TK_STRING_LITERAL;
  356. }
  357. void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
  358. {
  359. *res = 0;
  360. while(*s != 0)
  361. {
  362. if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
  363. else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
  364. else { assert(0); }
  365. }
  366. }
  367. void LexInteger(const SQChar *s,SQUnsignedInteger *res)
  368. {
  369. *res = 0;
  370. while(*s != 0)
  371. {
  372. *res = (*res)*10+((*s++)-'0');
  373. }
  374. }
  375. SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
  376. void LexOctal(const SQChar *s,SQUnsignedInteger *res)
  377. {
  378. *res = 0;
  379. while(*s != 0)
  380. {
  381. if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
  382. else { assert(0); }
  383. }
  384. }
  385. SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
  386. #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
  387. SQInteger SQLexer::ReadNumber()
  388. {
  389. #define TINT 1
  390. #define TFLOAT 2
  391. #define THEX 3
  392. #define TSCIENTIFIC 4
  393. #define TOCTAL 5
  394. SQInteger type = TINT, firstchar = CUR_CHAR;
  395. SQChar *sTemp;
  396. INIT_TEMP_STRING();
  397. NEXT();
  398. if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
  399. if(scisodigit(CUR_CHAR)) {
  400. type = TOCTAL;
  401. while(scisodigit(CUR_CHAR)) {
  402. APPEND_CHAR(CUR_CHAR);
  403. NEXT();
  404. }
  405. if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
  406. }
  407. else {
  408. NEXT();
  409. type = THEX;
  410. while(isxdigit(CUR_CHAR)) {
  411. APPEND_CHAR(CUR_CHAR);
  412. NEXT();
  413. }
  414. if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
  415. }
  416. }
  417. else {
  418. APPEND_CHAR((int)firstchar);
  419. while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
  420. if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
  421. if(isexponent(CUR_CHAR)) {
  422. if(type != TFLOAT) Error(_SC("invalid numeric format"));
  423. type = TSCIENTIFIC;
  424. APPEND_CHAR(CUR_CHAR);
  425. NEXT();
  426. if(CUR_CHAR == '+' || CUR_CHAR == '-'){
  427. APPEND_CHAR(CUR_CHAR);
  428. NEXT();
  429. }
  430. if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
  431. }
  432. APPEND_CHAR(CUR_CHAR);
  433. NEXT();
  434. }
  435. }
  436. TERMINATE_BUFFER();
  437. switch(type) {
  438. case TSCIENTIFIC:
  439. case TFLOAT:
  440. _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
  441. return TK_FLOAT;
  442. case TINT:
  443. LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  444. return TK_INTEGER;
  445. case THEX:
  446. LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  447. return TK_INTEGER;
  448. case TOCTAL:
  449. LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  450. return TK_INTEGER;
  451. }
  452. return 0;
  453. }
  454. SQInteger SQLexer::ReadID()
  455. {
  456. SQInteger res;
  457. INIT_TEMP_STRING();
  458. do {
  459. APPEND_CHAR(CUR_CHAR);
  460. NEXT();
  461. } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
  462. TERMINATE_BUFFER();
  463. res = GetIDType(&_longstr[0]);
  464. if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
  465. _svalue = &_longstr[0];
  466. }
  467. return res;
  468. }