sqlexer.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. /*
  2. see copyright notice in squirrel.h
  3. */
  4. #include "sqpcheader.h"
  5. #include <ctype.h>
  6. #include <stdlib.h>
  7. #include "sqtable.h"
  8. #include "sqstring.h"
  9. #include "sqcompiler.h"
  10. #include "sqlexer.h"
  11. #define CUR_CHAR (_currdata)
  12. #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
  13. #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
  14. #define NEXT() {Next();_currentcolumn++;}
  15. #define INIT_TEMP_STRING() { _longstr.resize(0);}
  16. #define APPEND_CHAR(c) { _longstr.push_back(c);}
  17. #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
  18. #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
  19. SQLexer::SQLexer(){}
  20. SQLexer::~SQLexer()
  21. {
  22. _keywords->Release();
  23. }
  24. void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
  25. {
  26. _errfunc = efunc;
  27. _errtarget = ed;
  28. _sharedstate = ss;
  29. _keywords = SQTable::Create(ss, 26);
  30. ADD_KEYWORD(while, TK_WHILE);
  31. ADD_KEYWORD(do, TK_DO);
  32. ADD_KEYWORD(if, TK_IF);
  33. ADD_KEYWORD(else, TK_ELSE);
  34. ADD_KEYWORD(break, TK_BREAK);
  35. ADD_KEYWORD(continue, TK_CONTINUE);
  36. ADD_KEYWORD(return, TK_RETURN);
  37. ADD_KEYWORD(null, TK_NULL);
  38. ADD_KEYWORD(function, TK_FUNCTION);
  39. ADD_KEYWORD(local, TK_LOCAL);
  40. ADD_KEYWORD(var, TK_LOCAL);
  41. ADD_KEYWORD(for, TK_FOR);
  42. ADD_KEYWORD(foreach, TK_FOREACH);
  43. ADD_KEYWORD(in, TK_IN);
  44. ADD_KEYWORD(typeof, TK_TYPEOF);
  45. ADD_KEYWORD(base, TK_BASE);
  46. ADD_KEYWORD(delete, TK_DELETE);
  47. ADD_KEYWORD(try, TK_TRY);
  48. ADD_KEYWORD(catch, TK_CATCH);
  49. ADD_KEYWORD(throw, TK_THROW);
  50. ADD_KEYWORD(clone, TK_CLONE);
  51. ADD_KEYWORD(yield, TK_YIELD);
  52. ADD_KEYWORD(resume, TK_RESUME);
  53. ADD_KEYWORD(switch, TK_SWITCH);
  54. ADD_KEYWORD(case, TK_CASE);
  55. ADD_KEYWORD(default, TK_DEFAULT);
  56. ADD_KEYWORD(this, TK_THIS);
  57. ADD_KEYWORD(class,TK_CLASS);
  58. ADD_KEYWORD(extends,TK_EXTENDS);
  59. ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
  60. ADD_KEYWORD(instanceof,TK_INSTANCEOF);
  61. ADD_KEYWORD(true,TK_TRUE);
  62. ADD_KEYWORD(false,TK_FALSE);
  63. ADD_KEYWORD(static,TK_STATIC);
  64. ADD_KEYWORD(enum,TK_ENUM);
  65. ADD_KEYWORD(const,TK_CONST);
  66. ADD_KEYWORD(__LINE__,TK___LINE__);
  67. ADD_KEYWORD(__FILE__,TK___FILE__);
  68. _readf = rg;
  69. _up = up;
  70. _lasttokenline = _currentline = 1;
  71. _currentcolumn = 0;
  72. _prevtoken = -1;
  73. _reached_eof = SQFalse;
  74. Next();
  75. }
  76. void SQLexer::Error(const SQChar *err)
  77. {
  78. _errfunc(_errtarget,err);
  79. }
  80. void SQLexer::Next()
  81. {
  82. SQInteger t = _readf(_up);
  83. if(t > MAX_CHAR) Error(_SC("Invalid character"));
  84. if(t != 0) {
  85. _currdata = (LexChar)t;
  86. return;
  87. }
  88. _currdata = SQUIRREL_EOB;
  89. _reached_eof = SQTrue;
  90. }
  91. const SQChar *SQLexer::Tok2Str(SQInteger tok)
  92. {
  93. SQObjectPtr itr, key, val;
  94. SQInteger nitr;
  95. while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
  96. itr = (SQInteger)nitr;
  97. if(((SQInteger)_integer(val)) == tok)
  98. return _stringval(key);
  99. }
  100. return NULL;
  101. }
  102. void SQLexer::LexBlockComment()
  103. {
  104. bool done = false;
  105. while(!done) {
  106. switch(CUR_CHAR) {
  107. case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
  108. case _SC('\n'): _currentline++; NEXT(); continue;
  109. case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
  110. default: NEXT();
  111. }
  112. }
  113. }
  114. void SQLexer::LexLineComment()
  115. {
  116. do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
  117. }
  118. SQInteger SQLexer::Lex()
  119. {
  120. _lasttokenline = _currentline;
  121. while(CUR_CHAR != SQUIRREL_EOB) {
  122. switch(CUR_CHAR){
  123. case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
  124. case _SC('\n'):
  125. _currentline++;
  126. _prevtoken=_curtoken;
  127. _curtoken=_SC('\n');
  128. NEXT();
  129. _currentcolumn=1;
  130. continue;
  131. case _SC('#'): LexLineComment(); continue;
  132. case _SC('/'):
  133. NEXT();
  134. switch(CUR_CHAR){
  135. case _SC('*'):
  136. NEXT();
  137. LexBlockComment();
  138. continue;
  139. case _SC('/'):
  140. LexLineComment();
  141. continue;
  142. case _SC('='):
  143. NEXT();
  144. RETURN_TOKEN(TK_DIVEQ);
  145. continue;
  146. case _SC('>'):
  147. NEXT();
  148. RETURN_TOKEN(TK_ATTR_CLOSE);
  149. continue;
  150. default:
  151. RETURN_TOKEN('/');
  152. }
  153. case _SC('='):
  154. NEXT();
  155. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
  156. else { NEXT(); RETURN_TOKEN(TK_EQ); }
  157. case _SC('<'):
  158. NEXT();
  159. switch(CUR_CHAR) {
  160. case _SC('='):
  161. NEXT();
  162. if(CUR_CHAR == _SC('>')) {
  163. NEXT();
  164. RETURN_TOKEN(TK_3WAYSCMP);
  165. }
  166. RETURN_TOKEN(TK_LE)
  167. break;
  168. case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
  169. case _SC('<'): NEXT(); RETURN_TOKEN(TK_SHIFTL); break;
  170. case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
  171. }
  172. RETURN_TOKEN('<');
  173. case _SC('>'):
  174. NEXT();
  175. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
  176. else if(CUR_CHAR == _SC('>')){
  177. NEXT();
  178. if(CUR_CHAR == _SC('>')){
  179. NEXT();
  180. RETURN_TOKEN(TK_USHIFTR);
  181. }
  182. RETURN_TOKEN(TK_SHIFTR);
  183. }
  184. else { RETURN_TOKEN('>') }
  185. case _SC('!'):
  186. NEXT();
  187. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
  188. else { NEXT(); RETURN_TOKEN(TK_NE); }
  189. case _SC('@'): {
  190. SQInteger stype;
  191. NEXT();
  192. if(CUR_CHAR != _SC('"')) {
  193. RETURN_TOKEN('@');
  194. }
  195. if((stype=ReadString('"',true))!=-1) {
  196. RETURN_TOKEN(stype);
  197. }
  198. Error(_SC("error parsing the string"));
  199. }
  200. case _SC('"'):
  201. case _SC('\''): {
  202. SQInteger stype;
  203. if((stype=ReadString(CUR_CHAR,false))!=-1){
  204. RETURN_TOKEN(stype);
  205. }
  206. Error(_SC("error parsing the string"));
  207. }
  208. case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
  209. case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
  210. {SQInteger ret = CUR_CHAR;
  211. NEXT(); RETURN_TOKEN(ret); }
  212. case _SC('.'):
  213. NEXT();
  214. if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
  215. NEXT();
  216. if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
  217. NEXT();
  218. RETURN_TOKEN(TK_VARPARAMS);
  219. case _SC('&'):
  220. NEXT();
  221. if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
  222. else { NEXT(); RETURN_TOKEN(TK_AND); }
  223. case _SC('|'):
  224. NEXT();
  225. if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
  226. else { NEXT(); RETURN_TOKEN(TK_OR); }
  227. case _SC(':'):
  228. NEXT();
  229. if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
  230. else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
  231. case _SC('*'):
  232. NEXT();
  233. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
  234. else RETURN_TOKEN('*');
  235. case _SC('%'):
  236. NEXT();
  237. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
  238. else RETURN_TOKEN('%');
  239. case _SC('-'):
  240. NEXT();
  241. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
  242. else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
  243. else RETURN_TOKEN('-');
  244. case _SC('+'):
  245. NEXT();
  246. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
  247. else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
  248. else RETURN_TOKEN('+');
  249. case SQUIRREL_EOB:
  250. return 0;
  251. default:{
  252. if (scisdigit(CUR_CHAR)) {
  253. SQInteger ret = ReadNumber();
  254. RETURN_TOKEN(ret);
  255. }
  256. else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
  257. SQInteger t = ReadID();
  258. RETURN_TOKEN(t);
  259. }
  260. else {
  261. SQInteger c = CUR_CHAR;
  262. if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
  263. NEXT();
  264. RETURN_TOKEN(c);
  265. }
  266. RETURN_TOKEN(0);
  267. }
  268. }
  269. }
  270. return 0;
  271. }
  272. SQInteger SQLexer::GetIDType(SQChar *s)
  273. {
  274. SQObjectPtr t;
  275. if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
  276. return SQInteger(_integer(t));
  277. }
  278. return TK_IDENTIFIER;
  279. }
  280. SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
  281. {
  282. INIT_TEMP_STRING();
  283. NEXT();
  284. if(IS_EOB()) return -1;
  285. for(;;) {
  286. while(CUR_CHAR != ndelim) {
  287. switch(CUR_CHAR) {
  288. case SQUIRREL_EOB:
  289. Error(_SC("unfinished string"));
  290. return -1;
  291. case _SC('\n'):
  292. if(!verbatim) Error(_SC("newline in a constant"));
  293. APPEND_CHAR(CUR_CHAR); NEXT();
  294. _currentline++;
  295. break;
  296. case _SC('\\'):
  297. if(verbatim) {
  298. APPEND_CHAR('\\'); NEXT();
  299. }
  300. else {
  301. NEXT();
  302. switch(CUR_CHAR) {
  303. case _SC('x'): NEXT(); {
  304. if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
  305. const SQInteger maxdigits = 4;
  306. SQChar temp[maxdigits+1];
  307. SQInteger n = 0;
  308. while(isxdigit(CUR_CHAR) && n < maxdigits) {
  309. temp[n] = CUR_CHAR;
  310. n++;
  311. NEXT();
  312. }
  313. temp[n] = 0;
  314. SQChar *sTemp;
  315. APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
  316. }
  317. break;
  318. case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
  319. case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
  320. case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
  321. case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
  322. case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
  323. case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
  324. case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
  325. case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
  326. case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
  327. case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
  328. case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
  329. default:
  330. Error(_SC("unrecognised escaper char"));
  331. break;
  332. }
  333. }
  334. break;
  335. default:
  336. APPEND_CHAR(CUR_CHAR);
  337. NEXT();
  338. }
  339. }
  340. NEXT();
  341. if(verbatim && CUR_CHAR == '"') { //double quotation
  342. APPEND_CHAR(CUR_CHAR);
  343. NEXT();
  344. }
  345. else {
  346. break;
  347. }
  348. }
  349. TERMINATE_BUFFER();
  350. SQInteger len = _longstr.size()-1;
  351. if(ndelim == _SC('\'')) {
  352. if(len == 0) Error(_SC("empty constant"));
  353. if(len > 1) Error(_SC("constant too long"));
  354. _nvalue = _longstr[0];
  355. return TK_INTEGER;
  356. }
  357. _svalue = &_longstr[0];
  358. return TK_STRING_LITERAL;
  359. }
  360. void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
  361. {
  362. *res = 0;
  363. while(*s != 0)
  364. {
  365. if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
  366. else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
  367. else { assert(0); }
  368. }
  369. }
  370. void LexInteger(const SQChar *s,SQUnsignedInteger *res)
  371. {
  372. *res = 0;
  373. while(*s != 0)
  374. {
  375. *res = (*res)*10+((*s++)-'0');
  376. }
  377. }
  378. SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
  379. void LexOctal(const SQChar *s,SQUnsignedInteger *res)
  380. {
  381. *res = 0;
  382. while(*s != 0)
  383. {
  384. if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
  385. else { assert(0); }
  386. }
  387. }
  388. SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
  389. #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
  390. SQInteger SQLexer::ReadNumber()
  391. {
  392. #define TINT 1
  393. #define TFLOAT 2
  394. #define THEX 3
  395. #define TSCIENTIFIC 4
  396. #define TOCTAL 5
  397. SQInteger type = TINT, firstchar = CUR_CHAR;
  398. SQChar *sTemp;
  399. INIT_TEMP_STRING();
  400. NEXT();
  401. if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
  402. if(scisodigit(CUR_CHAR)) {
  403. type = TOCTAL;
  404. while(scisodigit(CUR_CHAR)) {
  405. APPEND_CHAR(CUR_CHAR);
  406. NEXT();
  407. }
  408. if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
  409. }
  410. else {
  411. NEXT();
  412. type = THEX;
  413. while(isxdigit(CUR_CHAR)) {
  414. APPEND_CHAR(CUR_CHAR);
  415. NEXT();
  416. }
  417. if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
  418. }
  419. }
  420. else {
  421. APPEND_CHAR((int)firstchar);
  422. while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
  423. if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
  424. if(isexponent(CUR_CHAR)) {
  425. if(type != TFLOAT) Error(_SC("invalid numeric format"));
  426. type = TSCIENTIFIC;
  427. APPEND_CHAR(CUR_CHAR);
  428. NEXT();
  429. if(CUR_CHAR == '+' || CUR_CHAR == '-'){
  430. APPEND_CHAR(CUR_CHAR);
  431. NEXT();
  432. }
  433. if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
  434. }
  435. APPEND_CHAR(CUR_CHAR);
  436. NEXT();
  437. }
  438. }
  439. TERMINATE_BUFFER();
  440. switch(type) {
  441. case TSCIENTIFIC:
  442. case TFLOAT:
  443. _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
  444. return TK_FLOAT;
  445. case TINT:
  446. LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  447. return TK_INTEGER;
  448. case THEX:
  449. LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  450. return TK_INTEGER;
  451. case TOCTAL:
  452. LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  453. return TK_INTEGER;
  454. }
  455. return 0;
  456. }
  457. SQInteger SQLexer::ReadID()
  458. {
  459. SQInteger res;
  460. INIT_TEMP_STRING();
  461. do {
  462. APPEND_CHAR(CUR_CHAR);
  463. NEXT();
  464. } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
  465. TERMINATE_BUFFER();
  466. res = GetIDType(&_longstr[0]);
  467. if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
  468. _svalue = &_longstr[0];
  469. }
  470. return res;
  471. }