sqlexer.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. /*
  2. see copyright notice in squirrel.h
  3. */
  4. #include "sqpcheader.h"
  5. #include <ctype.h>
  6. #include <stdlib.h>
  7. #include "sqtable.h"
  8. #include "sqstring.h"
  9. #include "sqcompiler.h"
  10. #include "sqlexer.h"
  11. #define CUR_CHAR (_currdata)
  12. #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
  13. #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
  14. #define NEXT() {Next();_currentcolumn++;}
  15. #define INIT_TEMP_STRING() { _longstr.resize(0);}
  16. #define APPEND_CHAR(c) { _longstr.push_back(c);}
  17. #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
  18. #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
  19. SQLexer::SQLexer(){}
  20. SQLexer::~SQLexer()
  21. {
  22. _keywords->Release();
  23. }
  24. void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
  25. {
  26. _errfunc = efunc;
  27. _errtarget = ed;
  28. _sharedstate = ss;
  29. _keywords = SQTable::Create(ss, 26);
  30. ADD_KEYWORD(while, TK_WHILE);
  31. ADD_KEYWORD(do, TK_DO);
  32. ADD_KEYWORD(if, TK_IF);
  33. ADD_KEYWORD(else, TK_ELSE);
  34. ADD_KEYWORD(break, TK_BREAK);
  35. ADD_KEYWORD(continue, TK_CONTINUE);
  36. ADD_KEYWORD(return, TK_RETURN);
  37. ADD_KEYWORD(null, TK_NULL);
  38. ADD_KEYWORD(function, TK_FUNCTION);
  39. ADD_KEYWORD(local, TK_LOCAL);
  40. ADD_KEYWORD(var, TK_LOCAL);
  41. ADD_KEYWORD(for, TK_FOR);
  42. ADD_KEYWORD(foreach, TK_FOREACH);
  43. ADD_KEYWORD(in, TK_IN);
  44. ADD_KEYWORD(typeof, TK_TYPEOF);
  45. ADD_KEYWORD(base, TK_BASE);
  46. ADD_KEYWORD(delete, TK_DELETE);
  47. ADD_KEYWORD(try, TK_TRY);
  48. ADD_KEYWORD(catch, TK_CATCH);
  49. ADD_KEYWORD(throw, TK_THROW);
  50. ADD_KEYWORD(clone, TK_CLONE);
  51. ADD_KEYWORD(yield, TK_YIELD);
  52. ADD_KEYWORD(resume, TK_RESUME);
  53. ADD_KEYWORD(switch, TK_SWITCH);
  54. ADD_KEYWORD(case, TK_CASE);
  55. ADD_KEYWORD(default, TK_DEFAULT);
  56. ADD_KEYWORD(this, TK_THIS);
  57. ADD_KEYWORD(class,TK_CLASS);
  58. ADD_KEYWORD(extends,TK_EXTENDS);
  59. ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
  60. ADD_KEYWORD(instanceof,TK_INSTANCEOF);
  61. ADD_KEYWORD(true,TK_TRUE);
  62. ADD_KEYWORD(false,TK_FALSE);
  63. ADD_KEYWORD(static,TK_STATIC);
  64. ADD_KEYWORD(enum,TK_ENUM);
  65. ADD_KEYWORD(const,TK_CONST);
  66. ADD_KEYWORD(__LINE__,TK___LINE__);
  67. ADD_KEYWORD(__FILE__,TK___FILE__);
  68. ADD_KEYWORD(new,TK_IGNORE);
  69. _readf = rg;
  70. _up = up;
  71. _lasttokenline = _currentline = 1;
  72. _currentcolumn = 0;
  73. _prevtoken = -1;
  74. _reached_eof = SQFalse;
  75. Next();
  76. }
  77. void SQLexer::Error(const SQChar *err)
  78. {
  79. _errfunc(_errtarget,err);
  80. }
  81. void SQLexer::Next()
  82. {
  83. SQInteger t = _readf(_up);
  84. if(t > MAX_CHAR) Error(_SC("Invalid character"));
  85. if(t != 0) {
  86. _currdata = (LexChar)t;
  87. return;
  88. }
  89. _currdata = SQUIRREL_EOB;
  90. _reached_eof = SQTrue;
  91. }
  92. const SQChar *SQLexer::Tok2Str(SQInteger tok)
  93. {
  94. SQObjectPtr itr, key, val;
  95. SQInteger nitr;
  96. while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
  97. itr = (SQInteger)nitr;
  98. if(((SQInteger)_integer(val)) == tok)
  99. return _stringval(key);
  100. }
  101. return NULL;
  102. }
  103. void SQLexer::LexBlockComment()
  104. {
  105. bool done = false;
  106. while(!done) {
  107. switch(CUR_CHAR) {
  108. case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
  109. case _SC('\n'): _currentline++; NEXT(); continue;
  110. case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
  111. default: NEXT();
  112. }
  113. }
  114. }
  115. void SQLexer::LexLineComment()
  116. {
  117. do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
  118. }
  119. SQInteger SQLexer::Lex()
  120. {
  121. _lasttokenline = _currentline;
  122. while(CUR_CHAR != SQUIRREL_EOB) {
  123. switch(CUR_CHAR){
  124. case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
  125. case _SC('\n'):
  126. _currentline++;
  127. _prevtoken=_curtoken;
  128. _curtoken=_SC('\n');
  129. NEXT();
  130. _currentcolumn=1;
  131. continue;
  132. case _SC('#'): LexLineComment(); continue;
  133. case _SC('/'):
  134. NEXT();
  135. switch(CUR_CHAR){
  136. case _SC('*'):
  137. NEXT();
  138. LexBlockComment();
  139. continue;
  140. case _SC('/'):
  141. LexLineComment();
  142. continue;
  143. case _SC('='):
  144. NEXT();
  145. RETURN_TOKEN(TK_DIVEQ);
  146. continue;
  147. case _SC('>'):
  148. NEXT();
  149. RETURN_TOKEN(TK_ATTR_CLOSE);
  150. continue;
  151. default:
  152. RETURN_TOKEN('/');
  153. }
  154. case _SC('='):
  155. NEXT();
  156. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
  157. else { NEXT(); RETURN_TOKEN(TK_EQ); }
  158. case _SC('<'):
  159. NEXT();
  160. switch(CUR_CHAR) {
  161. case _SC('='):
  162. NEXT();
  163. if(CUR_CHAR == _SC('>')) {
  164. NEXT();
  165. RETURN_TOKEN(TK_3WAYSCMP);
  166. }
  167. RETURN_TOKEN(TK_LE)
  168. break;
  169. case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
  170. case _SC('<'): NEXT(); RETURN_TOKEN(TK_SHIFTL); break;
  171. case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
  172. }
  173. RETURN_TOKEN('<');
  174. case _SC('>'):
  175. NEXT();
  176. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
  177. else if(CUR_CHAR == _SC('>')){
  178. NEXT();
  179. if(CUR_CHAR == _SC('>')){
  180. NEXT();
  181. RETURN_TOKEN(TK_USHIFTR);
  182. }
  183. RETURN_TOKEN(TK_SHIFTR);
  184. }
  185. else { RETURN_TOKEN('>') }
  186. case _SC('!'):
  187. NEXT();
  188. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
  189. else { NEXT(); RETURN_TOKEN(TK_NE); }
  190. case _SC('@'): {
  191. SQInteger stype;
  192. NEXT();
  193. if(CUR_CHAR != _SC('"')) {
  194. RETURN_TOKEN('@');
  195. }
  196. if((stype=ReadString('"',true))!=-1) {
  197. RETURN_TOKEN(stype);
  198. }
  199. Error(_SC("error parsing the string"));
  200. }
  201. case _SC('"'):
  202. case _SC('\''): {
  203. SQInteger stype;
  204. if((stype=ReadString(CUR_CHAR,false))!=-1){
  205. RETURN_TOKEN(stype);
  206. }
  207. Error(_SC("error parsing the string"));
  208. }
  209. case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
  210. case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
  211. {
  212. SQInteger ret = CUR_CHAR;
  213. NEXT();
  214. if(ret == _SC('[') && CUR_CHAR == _SC('=')){
  215. //lets try lua literal delimiters
  216. SQInteger stype;
  217. if((stype=ReadString(CUR_CHAR,true))!=-1){
  218. RETURN_TOKEN(stype);
  219. }
  220. Error(_SC("error parsing the string"));
  221. }
  222. else RETURN_TOKEN(ret);
  223. }
  224. case _SC('.'):
  225. NEXT();
  226. if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
  227. NEXT();
  228. if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
  229. NEXT();
  230. RETURN_TOKEN(TK_VARPARAMS);
  231. case _SC('&'):
  232. NEXT();
  233. if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
  234. else { NEXT(); RETURN_TOKEN(TK_AND); }
  235. case _SC('|'):
  236. NEXT();
  237. if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
  238. else { NEXT(); RETURN_TOKEN(TK_OR); }
  239. case _SC(':'):
  240. NEXT();
  241. if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
  242. else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
  243. case _SC('*'):
  244. NEXT();
  245. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
  246. else RETURN_TOKEN('*');
  247. case _SC('%'):
  248. NEXT();
  249. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
  250. else RETURN_TOKEN('%');
  251. case _SC('-'):
  252. NEXT();
  253. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
  254. else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
  255. else if (CUR_CHAR == _SC('>')){ NEXT(); RETURN_TOKEN('.');} //accept C/C++ like pointers
  256. else RETURN_TOKEN('-');
  257. case _SC('+'):
  258. NEXT();
  259. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
  260. else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
  261. else RETURN_TOKEN('+');
  262. case SQUIRREL_EOB:
  263. return 0;
  264. default:{
  265. if (scisdigit(CUR_CHAR)) {
  266. SQInteger ret = ReadNumber();
  267. RETURN_TOKEN(ret);
  268. }
  269. else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
  270. SQInteger t = ReadID();
  271. RETURN_TOKEN(t);
  272. }
  273. else {
  274. SQInteger c = CUR_CHAR;
  275. if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
  276. NEXT();
  277. RETURN_TOKEN(c);
  278. }
  279. RETURN_TOKEN(0);
  280. }
  281. }
  282. }
  283. return 0;
  284. }
  285. SQInteger SQLexer::GetIDType(SQChar *s)
  286. {
  287. SQObjectPtr t;
  288. if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
  289. return SQInteger(_integer(t));
  290. }
  291. return TK_IDENTIFIER;
  292. }
  293. SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
  294. {
  295. INIT_TEMP_STRING();
  296. SQInteger start_equals = 0;
  297. if(ndelim == _SC('=')){
  298. //lua like literal
  299. while(!IS_EOB() && CUR_CHAR == _SC('=')) {
  300. ++start_equals;
  301. NEXT();
  302. }
  303. if(CUR_CHAR != _SC('[')){
  304. //it's not a lua literal delimiter
  305. Error(_SC("expect '[' on literal delimiter"));
  306. return -1;
  307. }
  308. ndelim = _SC(']');
  309. }
  310. NEXT();
  311. if(IS_EOB()) return -1;
  312. if(start_equals && CUR_CHAR == _SC('\n')) {
  313. NEXT(); //if a new line follows the start of delimiter drop it
  314. if(IS_EOB()) return -1;
  315. }
  316. for(;;) {
  317. while(CUR_CHAR != ndelim) {
  318. switch(CUR_CHAR) {
  319. case SQUIRREL_EOB:
  320. Error(_SC("unfinished string"));
  321. return -1;
  322. case _SC('\n'):
  323. if(!verbatim) Error(_SC("newline in a constant"));
  324. APPEND_CHAR(CUR_CHAR); NEXT();
  325. _currentline++;
  326. break;
  327. case _SC('\\'):
  328. if(verbatim) {
  329. APPEND_CHAR('\\'); NEXT();
  330. }
  331. else {
  332. NEXT();
  333. switch(CUR_CHAR) {
  334. case _SC('x'): NEXT(); {
  335. if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
  336. const SQInteger maxdigits = 4;
  337. SQChar temp[maxdigits+1];
  338. SQInteger n = 0;
  339. while(isxdigit(CUR_CHAR) && n < maxdigits) {
  340. temp[n] = CUR_CHAR;
  341. n++;
  342. NEXT();
  343. }
  344. temp[n] = 0;
  345. SQChar *sTemp;
  346. APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
  347. }
  348. break;
  349. case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
  350. case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
  351. case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
  352. case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
  353. case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
  354. case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
  355. case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
  356. case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
  357. case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
  358. case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
  359. case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
  360. default:
  361. Error(_SC("unrecognised escaper char"));
  362. break;
  363. }
  364. }
  365. break;
  366. default:
  367. APPEND_CHAR(CUR_CHAR);
  368. NEXT();
  369. }
  370. }
  371. NEXT();
  372. if(start_equals){
  373. if(CUR_CHAR == _SC('=')){
  374. SQInteger end_equals = start_equals;
  375. NEXT();
  376. if(CUR_CHAR == _SC('=') || CUR_CHAR == _SC(']')){
  377. --end_equals;
  378. while(!IS_EOB() && CUR_CHAR == _SC('=')) {
  379. --end_equals;
  380. NEXT();
  381. }
  382. if(end_equals) Error(_SC("expect same number of '=' on literal delimiter"));
  383. if(CUR_CHAR != _SC(']')) Error(_SC("expect ']' to close literal delimiter"));
  384. NEXT();
  385. break;
  386. }
  387. APPEND_CHAR(_SC('='));
  388. }
  389. APPEND_CHAR(CUR_CHAR);
  390. NEXT();
  391. }
  392. else if(verbatim && CUR_CHAR == '"') { //double quotation
  393. APPEND_CHAR(CUR_CHAR);
  394. NEXT();
  395. }
  396. else {
  397. break;
  398. }
  399. }
  400. TERMINATE_BUFFER();
  401. SQInteger len = _longstr.size()-1;
  402. if(ndelim == _SC('\'')) {
  403. if(len == 0) Error(_SC("empty constant"));
  404. if(len > 1) Error(_SC("constant too long"));
  405. _nvalue = _longstr[0];
  406. return TK_INTEGER;
  407. }
  408. _svalue = &_longstr[0];
  409. return TK_STRING_LITERAL;
  410. }
  411. void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
  412. {
  413. *res = 0;
  414. while(*s != 0)
  415. {
  416. if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
  417. else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
  418. else { assert(0); }
  419. }
  420. }
  421. void LexInteger(const SQChar *s,SQUnsignedInteger *res)
  422. {
  423. *res = 0;
  424. while(*s != 0)
  425. {
  426. *res = (*res)*10+((*s++)-'0');
  427. }
  428. }
  429. SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
  430. void LexOctal(const SQChar *s,SQUnsignedInteger *res)
  431. {
  432. *res = 0;
  433. while(*s != 0)
  434. {
  435. if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
  436. else { assert(0); }
  437. }
  438. }
  439. SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
  440. #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
  441. SQInteger SQLexer::ReadNumber()
  442. {
  443. #define TINT 1
  444. #define TFLOAT 2
  445. #define THEX 3
  446. #define TSCIENTIFIC 4
  447. #define TOCTAL 5
  448. SQInteger type = TINT, firstchar = CUR_CHAR;
  449. SQChar *sTemp;
  450. INIT_TEMP_STRING();
  451. NEXT();
  452. if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
  453. if(scisodigit(CUR_CHAR)) {
  454. type = TOCTAL;
  455. while(scisodigit(CUR_CHAR)) {
  456. APPEND_CHAR(CUR_CHAR);
  457. NEXT();
  458. }
  459. if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
  460. }
  461. else {
  462. NEXT();
  463. type = THEX;
  464. while(isxdigit(CUR_CHAR)) {
  465. APPEND_CHAR(CUR_CHAR);
  466. NEXT();
  467. }
  468. if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
  469. }
  470. }
  471. else {
  472. APPEND_CHAR((int)firstchar);
  473. while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
  474. if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
  475. if(isexponent(CUR_CHAR)) {
  476. if(type != TFLOAT) Error(_SC("invalid numeric format"));
  477. type = TSCIENTIFIC;
  478. APPEND_CHAR(CUR_CHAR);
  479. NEXT();
  480. if(CUR_CHAR == '+' || CUR_CHAR == '-'){
  481. APPEND_CHAR(CUR_CHAR);
  482. NEXT();
  483. }
  484. if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
  485. }
  486. APPEND_CHAR(CUR_CHAR);
  487. NEXT();
  488. }
  489. }
  490. TERMINATE_BUFFER();
  491. switch(type) {
  492. case TSCIENTIFIC:
  493. case TFLOAT:
  494. _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
  495. return TK_FLOAT;
  496. case TINT:
  497. LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  498. return TK_INTEGER;
  499. case THEX:
  500. LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  501. return TK_INTEGER;
  502. case TOCTAL:
  503. LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  504. return TK_INTEGER;
  505. }
  506. return 0;
  507. }
  508. SQInteger SQLexer::ReadID()
  509. {
  510. SQInteger res;
  511. INIT_TEMP_STRING();
  512. do {
  513. APPEND_CHAR(CUR_CHAR);
  514. NEXT();
  515. } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
  516. TERMINATE_BUFFER();
  517. res = GetIDType(&_longstr[0]);
  518. if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
  519. _svalue = &_longstr[0];
  520. }
  521. return res;
  522. }