sqlexer.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /*
  2. see copyright notice in squirrel.h
  3. */
  4. #include "sqpcheader.h"
  5. #include <ctype.h>
  6. #include <stdlib.h>
  7. #include "sqtable.h"
  8. #include "sqstring.h"
  9. #include "sqcompiler.h"
  10. #include "sqlexer.h"
  11. #define CUR_CHAR (_currdata)
  12. #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
  13. #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
  14. #define NEXT() {Next();_currentcolumn++;}
  15. #define INIT_TEMP_STRING() { _longstr.resize(0);}
  16. #define APPEND_CHAR(c) { _longstr.push_back(c);}
  17. #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
  18. #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
  19. SQLexer::SQLexer(){}
  20. SQLexer::~SQLexer()
  21. {
  22. _keywords->Release();
  23. }
  24. void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
  25. {
  26. _errfunc = efunc;
  27. _errtarget = ed;
  28. _sharedstate = ss;
  29. _keywords = SQTable::Create(ss, 26);
  30. ADD_KEYWORD(while, TK_WHILE);
  31. ADD_KEYWORD(do, TK_DO);
  32. ADD_KEYWORD(if, TK_IF);
  33. ADD_KEYWORD(else, TK_ELSE);
  34. ADD_KEYWORD(break, TK_BREAK);
  35. ADD_KEYWORD(continue, TK_CONTINUE);
  36. ADD_KEYWORD(return, TK_RETURN);
  37. ADD_KEYWORD(null, TK_NULL);
  38. ADD_KEYWORD(function, TK_FUNCTION);
  39. ADD_KEYWORD(local, TK_LOCAL);
  40. ADD_KEYWORD(var, TK_LOCAL);
  41. ADD_KEYWORD(for, TK_FOR);
  42. ADD_KEYWORD(foreach, TK_FOREACH);
  43. ADD_KEYWORD(in, TK_IN);
  44. ADD_KEYWORD(typeof, TK_TYPEOF);
  45. ADD_KEYWORD(base, TK_BASE);
  46. ADD_KEYWORD(delete, TK_DELETE);
  47. ADD_KEYWORD(try, TK_TRY);
  48. ADD_KEYWORD(catch, TK_CATCH);
  49. ADD_KEYWORD(throw, TK_THROW);
  50. ADD_KEYWORD(clone, TK_CLONE);
  51. ADD_KEYWORD(yield, TK_YIELD);
  52. ADD_KEYWORD(resume, TK_RESUME);
  53. ADD_KEYWORD(switch, TK_SWITCH);
  54. ADD_KEYWORD(case, TK_CASE);
  55. ADD_KEYWORD(default, TK_DEFAULT);
  56. ADD_KEYWORD(this, TK_THIS);
  57. ADD_KEYWORD(class,TK_CLASS);
  58. ADD_KEYWORD(extends,TK_EXTENDS);
  59. ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
  60. ADD_KEYWORD(instanceof,TK_INSTANCEOF);
  61. ADD_KEYWORD(true,TK_TRUE);
  62. ADD_KEYWORD(false,TK_FALSE);
  63. ADD_KEYWORD(static,TK_STATIC);
  64. ADD_KEYWORD(enum,TK_ENUM);
  65. ADD_KEYWORD(const,TK_CONST);
  66. ADD_KEYWORD(__LINE__,TK___LINE__);
  67. ADD_KEYWORD(__FILE__,TK___FILE__);
  68. ADD_KEYWORD(new,TK_IGNORE);
  69. _readf = rg;
  70. _up = up;
  71. _lasttokenline = _currentline = 1;
  72. _currentcolumn = 0;
  73. _prevtoken = -1;
  74. _reached_eof = SQFalse;
  75. Next();
  76. }
  77. void SQLexer::Error(const SQChar *err)
  78. {
  79. _errfunc(_errtarget,err);
  80. }
  81. void SQLexer::Next()
  82. {
  83. SQInteger t = _readf(_up);
  84. if(t > MAX_CHAR) Error(_SC("Invalid character"));
  85. if(t != 0) {
  86. _currdata = (LexChar)t;
  87. return;
  88. }
  89. _currdata = SQUIRREL_EOB;
  90. _reached_eof = SQTrue;
  91. }
  92. const SQChar *SQLexer::Tok2Str(SQInteger tok)
  93. {
  94. SQObjectPtr itr, key, val;
  95. SQInteger nitr;
  96. while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
  97. itr = (SQInteger)nitr;
  98. if(((SQInteger)_integer(val)) == tok)
  99. return _stringval(key);
  100. }
  101. return NULL;
  102. }
  103. void SQLexer::LexBlockComment()
  104. {
  105. bool done = false;
  106. while(!done) {
  107. switch(CUR_CHAR) {
  108. case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
  109. case _SC('\n'): _currentline++; NEXT(); continue;
  110. case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
  111. default: NEXT();
  112. }
  113. }
  114. }
  115. void SQLexer::LexLineComment()
  116. {
  117. do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
  118. }
  119. SQInteger SQLexer::Lex()
  120. {
  121. _lasttokenline = _currentline;
  122. while(CUR_CHAR != SQUIRREL_EOB) {
  123. switch(CUR_CHAR){
  124. case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
  125. case _SC('\n'):
  126. _currentline++;
  127. _prevtoken=_curtoken;
  128. _curtoken=_SC('\n');
  129. NEXT();
  130. _currentcolumn=1;
  131. continue;
  132. case _SC('#'): LexLineComment(); continue;
  133. case _SC('/'):
  134. NEXT();
  135. switch(CUR_CHAR){
  136. case _SC('*'):
  137. NEXT();
  138. LexBlockComment();
  139. continue;
  140. case _SC('/'):
  141. LexLineComment();
  142. continue;
  143. case _SC('='):
  144. NEXT();
  145. RETURN_TOKEN(TK_DIVEQ);
  146. continue;
  147. case _SC('>'):
  148. NEXT();
  149. RETURN_TOKEN(TK_ATTR_CLOSE);
  150. continue;
  151. default:
  152. RETURN_TOKEN('/');
  153. }
  154. case _SC('='):
  155. NEXT();
  156. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
  157. else { NEXT(); RETURN_TOKEN(TK_EQ); }
  158. case _SC('<'):
  159. NEXT();
  160. switch(CUR_CHAR) {
  161. case _SC('='):
  162. NEXT();
  163. if(CUR_CHAR == _SC('>')) {
  164. NEXT();
  165. RETURN_TOKEN(TK_3WAYSCMP);
  166. }
  167. RETURN_TOKEN(TK_LE)
  168. break;
  169. case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
  170. case _SC('<'): NEXT(); RETURN_TOKEN(TK_SHIFTL); break;
  171. case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
  172. }
  173. RETURN_TOKEN('<');
  174. case _SC('>'):
  175. NEXT();
  176. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
  177. else if(CUR_CHAR == _SC('>')){
  178. NEXT();
  179. if(CUR_CHAR == _SC('>')){
  180. NEXT();
  181. RETURN_TOKEN(TK_USHIFTR);
  182. }
  183. RETURN_TOKEN(TK_SHIFTR);
  184. }
  185. else { RETURN_TOKEN('>') }
  186. case _SC('!'):
  187. NEXT();
  188. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
  189. else { NEXT(); RETURN_TOKEN(TK_NE); }
  190. case _SC('@'): {
  191. SQInteger stype;
  192. NEXT();
  193. if(CUR_CHAR != _SC('"')) {
  194. RETURN_TOKEN('@');
  195. }
  196. if((stype=ReadString('"',true))!=-1) {
  197. RETURN_TOKEN(stype);
  198. }
  199. Error(_SC("error parsing the string"));
  200. }
  201. case _SC('"'):
  202. case _SC('\''): {
  203. SQInteger stype;
  204. if((stype=ReadString(CUR_CHAR,false))!=-1){
  205. RETURN_TOKEN(stype);
  206. }
  207. Error(_SC("error parsing the string"));
  208. }
  209. case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
  210. case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
  211. {SQInteger ret = CUR_CHAR;
  212. NEXT(); RETURN_TOKEN(ret); }
  213. case _SC('.'):
  214. NEXT();
  215. if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
  216. NEXT();
  217. if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
  218. NEXT();
  219. RETURN_TOKEN(TK_VARPARAMS);
  220. case _SC('&'):
  221. NEXT();
  222. if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
  223. else { NEXT(); RETURN_TOKEN(TK_AND); }
  224. case _SC('|'):
  225. NEXT();
  226. if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
  227. else { NEXT(); RETURN_TOKEN(TK_OR); }
  228. case _SC(':'):
  229. NEXT();
  230. if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
  231. else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
  232. case _SC('*'):
  233. NEXT();
  234. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
  235. else RETURN_TOKEN('*');
  236. case _SC('%'):
  237. NEXT();
  238. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
  239. else RETURN_TOKEN('%');
  240. case _SC('-'):
  241. NEXT();
  242. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
  243. else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
  244. else if (CUR_CHAR == _SC('>')){ NEXT(); RETURN_TOKEN('.');} //accept C/C++ like pointers
  245. else RETURN_TOKEN('-');
  246. case _SC('+'):
  247. NEXT();
  248. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
  249. else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
  250. else RETURN_TOKEN('+');
  251. case SQUIRREL_EOB:
  252. return 0;
  253. default:{
  254. if (scisdigit(CUR_CHAR)) {
  255. SQInteger ret = ReadNumber();
  256. RETURN_TOKEN(ret);
  257. }
  258. else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
  259. SQInteger t = ReadID();
  260. RETURN_TOKEN(t);
  261. }
  262. else {
  263. SQInteger c = CUR_CHAR;
  264. if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
  265. NEXT();
  266. RETURN_TOKEN(c);
  267. }
  268. RETURN_TOKEN(0);
  269. }
  270. }
  271. }
  272. return 0;
  273. }
  274. SQInteger SQLexer::GetIDType(SQChar *s)
  275. {
  276. SQObjectPtr t;
  277. if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
  278. return SQInteger(_integer(t));
  279. }
  280. return TK_IDENTIFIER;
  281. }
  282. SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
  283. {
  284. INIT_TEMP_STRING();
  285. NEXT();
  286. if(IS_EOB()) return -1;
  287. for(;;) {
  288. while(CUR_CHAR != ndelim) {
  289. switch(CUR_CHAR) {
  290. case SQUIRREL_EOB:
  291. Error(_SC("unfinished string"));
  292. return -1;
  293. case _SC('\n'):
  294. if(!verbatim) Error(_SC("newline in a constant"));
  295. APPEND_CHAR(CUR_CHAR); NEXT();
  296. _currentline++;
  297. break;
  298. case _SC('\\'):
  299. if(verbatim) {
  300. APPEND_CHAR('\\'); NEXT();
  301. }
  302. else {
  303. NEXT();
  304. switch(CUR_CHAR) {
  305. case _SC('x'): NEXT(); {
  306. if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
  307. const SQInteger maxdigits = 4;
  308. SQChar temp[maxdigits+1];
  309. SQInteger n = 0;
  310. while(isxdigit(CUR_CHAR) && n < maxdigits) {
  311. temp[n] = CUR_CHAR;
  312. n++;
  313. NEXT();
  314. }
  315. temp[n] = 0;
  316. SQChar *sTemp;
  317. APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
  318. }
  319. break;
  320. case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
  321. case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
  322. case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
  323. case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
  324. case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
  325. case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
  326. case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
  327. case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
  328. case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
  329. case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
  330. case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
  331. default:
  332. Error(_SC("unrecognised escaper char"));
  333. break;
  334. }
  335. }
  336. break;
  337. default:
  338. APPEND_CHAR(CUR_CHAR);
  339. NEXT();
  340. }
  341. }
  342. NEXT();
  343. if(verbatim && CUR_CHAR == '"') { //double quotation
  344. APPEND_CHAR(CUR_CHAR);
  345. NEXT();
  346. }
  347. else {
  348. break;
  349. }
  350. }
  351. TERMINATE_BUFFER();
  352. SQInteger len = _longstr.size()-1;
  353. if(ndelim == _SC('\'')) {
  354. if(len == 0) Error(_SC("empty constant"));
  355. if(len > 1) Error(_SC("constant too long"));
  356. _nvalue = _longstr[0];
  357. return TK_INTEGER;
  358. }
  359. _svalue = &_longstr[0];
  360. return TK_STRING_LITERAL;
  361. }
  362. void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
  363. {
  364. *res = 0;
  365. while(*s != 0)
  366. {
  367. if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
  368. else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
  369. else { assert(0); }
  370. }
  371. }
  372. void LexInteger(const SQChar *s,SQUnsignedInteger *res)
  373. {
  374. *res = 0;
  375. while(*s != 0)
  376. {
  377. *res = (*res)*10+((*s++)-'0');
  378. }
  379. }
  380. SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
  381. void LexOctal(const SQChar *s,SQUnsignedInteger *res)
  382. {
  383. *res = 0;
  384. while(*s != 0)
  385. {
  386. if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
  387. else { assert(0); }
  388. }
  389. }
  390. SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
  391. #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
  392. SQInteger SQLexer::ReadNumber()
  393. {
  394. #define TINT 1
  395. #define TFLOAT 2
  396. #define THEX 3
  397. #define TSCIENTIFIC 4
  398. #define TOCTAL 5
  399. SQInteger type = TINT, firstchar = CUR_CHAR;
  400. SQChar *sTemp;
  401. INIT_TEMP_STRING();
  402. NEXT();
  403. if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
  404. if(scisodigit(CUR_CHAR)) {
  405. type = TOCTAL;
  406. while(scisodigit(CUR_CHAR)) {
  407. APPEND_CHAR(CUR_CHAR);
  408. NEXT();
  409. }
  410. if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
  411. }
  412. else {
  413. NEXT();
  414. type = THEX;
  415. while(isxdigit(CUR_CHAR)) {
  416. APPEND_CHAR(CUR_CHAR);
  417. NEXT();
  418. }
  419. if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
  420. }
  421. }
  422. else {
  423. APPEND_CHAR((int)firstchar);
  424. while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
  425. if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
  426. if(isexponent(CUR_CHAR)) {
  427. if(type != TFLOAT) Error(_SC("invalid numeric format"));
  428. type = TSCIENTIFIC;
  429. APPEND_CHAR(CUR_CHAR);
  430. NEXT();
  431. if(CUR_CHAR == '+' || CUR_CHAR == '-'){
  432. APPEND_CHAR(CUR_CHAR);
  433. NEXT();
  434. }
  435. if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
  436. }
  437. APPEND_CHAR(CUR_CHAR);
  438. NEXT();
  439. }
  440. }
  441. TERMINATE_BUFFER();
  442. switch(type) {
  443. case TSCIENTIFIC:
  444. case TFLOAT:
  445. _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
  446. return TK_FLOAT;
  447. case TINT:
  448. LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  449. return TK_INTEGER;
  450. case THEX:
  451. LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  452. return TK_INTEGER;
  453. case TOCTAL:
  454. LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  455. return TK_INTEGER;
  456. }
  457. return 0;
  458. }
  459. SQInteger SQLexer::ReadID()
  460. {
  461. SQInteger res;
  462. INIT_TEMP_STRING();
  463. do {
  464. APPEND_CHAR(CUR_CHAR);
  465. NEXT();
  466. } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
  467. TERMINATE_BUFFER();
  468. res = GetIDType(&_longstr[0]);
  469. if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
  470. _svalue = &_longstr[0];
  471. }
  472. return res;
  473. }