sqlexer.nut 37 KB


  1. //__max_print_stack_str_size <- 256;
  2. #include "sq_lexer_tokens.nut"
  3. #include "sqtypedefs.nut"
  4. const SQFalse = 0;
  5. const SQTrue = 1;
  6. const SQUIRREL_EOB = -1;
  7. const EOF = -1;
  8. const MAX_CHAR = 0xFF;
  9. const MAX_HEX_DIGITS = 12;
  10. const TINT =1;
  11. const TFLOAT = 2;
  12. const THEX = 3;
  13. const TSCIENTIFIC = 4;
  14. const TOCTAL = 5;
  15. SQInteger isexponent(SQInteger c)
  16. {
  17. return c == 'e' || c=='E';
  18. }
  19. SQInteger scisdigit(SQInteger c)
  20. {
  21. return c >= '0' && c <= '9';
  22. }
  23. SQInteger scdigitvalue(SQInteger c)
  24. {
  25. if(c >= '0' && c <= '9') return c - '0';
  26. assert(0);
  27. return 0;
  28. }
  29. SQInteger scisodigit(SQInteger c)
  30. {
  31. return c >= '0' && c <= '7';
  32. }
  33. SQInteger scodigitvalue(SQInteger c)
  34. {
  35. if(c >= '0' && c <= '7') return c - '0';
  36. assert(0);
  37. return 0;
  38. }
  39. SQInteger scisxdigit(SQInteger c)
  40. {
  41. return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
  42. }
  43. SQInteger scxdigitvalue(SQInteger c)
  44. {
  45. if(c >= '0' && c <= '9') return (c - '0');
  46. if(c >= 'a' && c <= 'f') return (c - 'a') + 10;
  47. if(c >= 'A' && c <= 'F') return (c - 'A') + 10;
  48. assert(0);
  49. return 0;
  50. }
  51. SQInteger scisalpha(SQInteger c)
  52. {
  53. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
  54. }
  55. SQInteger scisalnum(SQInteger c)
  56. {
  57. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
  58. }
  59. SQInteger sctoupper(SQInteger c)
  60. {
  61. if((c >= 'a' && c <= 'z')) return c - ('a'-'A');
  62. return c;
  63. }
  64. SQUnsignedInteger LexHexadecimal(const SQChar_ptr_t s, SQInteger sz)
  65. {
  66. SQUnsignedInteger res = 0;
  67. //for(char c : s)
  68. foreach(c in s)
  69. {
  70. res = res * 16 + scxdigitvalue(c);
  71. }
  72. return res;
  73. }
  74. SQUnsignedInteger LexInteger(const SQChar_ptr_t s, SQInteger sz)
  75. {
  76. SQUnsignedInteger res = 0;
  77. //for(char c : s)
  78. foreach(c in s)
  79. {
  80. res = res * 10 + scdigitvalue(c);
  81. }
  82. return res;
  83. }
  84. SQUnsignedInteger LexOctal(const SQChar_ptr_t s, SQInteger sz)
  85. {
  86. SQUnsignedInteger res = 0;
  87. //for(char c : s)
  88. foreach(c in s)
  89. {
  90. res = res * 8 + scodigitvalue(c);
  91. }
  92. return res;
  93. }
  94. //template<typename T>
  95. class SQSharedState
  96. {
  97. };
  98. struct FILE
  99. {
  100. string_t data;
  101. int_t cursor, size;
  102. FILE(string_t dt)
  103. {
  104. data = dt;
  105. cursor = 0;
  106. size = data.size();
  107. }
  108. };
  109. typedef class_ptr_t FILE_ptr_t;
  110. SQInteger fgetc(FILE_ptr_t fp)
  111. {
  112. if(fp->cursor < fp->size)
  113. {
  114. return fp->data[fp->cursor++];
  115. }
  116. return EOF;
  117. }
  118. static SQInteger compilerReadFunc(SQUserPointer fp)
  119. {
  120. SQInteger c = fgetc(/*(FILE *)*/fp);
  121. if(c == EOF)
  122. {
  123. return 0;
  124. }
  125. return c;
  126. }
  127. struct SQString
  128. {
  129. static SQString_ptr_t Create(SQSharedState_ptr_t ss, string_t s)
  130. {
  131. return s;
  132. }
  133. };
  134. struct SQTable
  135. {
  136. table_t _tbl;
  137. SQTable()
  138. {
  139. _tbl = {};
  140. }
  141. static SQTable_ptr_t Create(SQSharedState_ptr_t ss, SQInteger n)
  142. {
  143. return SQTable();
  144. }
  145. bool NewSlot(const SQObjectPtr &key,const SQObjectPtr &val)
  146. {
  147. table_rawset(_tbl, key, val);
  148. return true;
  149. }
  150. inline bool GetStr(const SQChar_ptr_t key,SQInteger keylen,SQObjectPtr &val)
  151. {
  152. val = table_rawget(_tbl, key, NULL);
  153. return val;
  154. }
  155. SQInteger Next(bool getweakrefs,const SQObjectPtr &refpos, SQObjectPtr &outkey, SQObjectPtr &outval)
  156. {
  157. return -1;
  158. }
  159. };
  160. struct SQLexerNut
  161. {
  162. SQInteger CUR_CHAR;
  163. SQLexerNut()
  164. {
  165. _keywords=0;
  166. _longstr = blob(0, 8192);
  167. }
  168. virtual ~SQLexerNut()
  169. {
  170. _keywords->Release();
  171. }
  172. void INIT_TEMP_STRING()
  173. {
  174. _longstr.clear();
  175. }
  176. void TERMINATE_BUFFER()
  177. {
  178. // _longstr.push_back('\0'));
  179. }
  180. void APPEND_CHAR(SQInteger c)
  181. {
  182. _longstr.writen(c, 'c');
  183. }
  184. SQInteger NEXT()
  185. {
  186. SQInteger rc = Next();if(rc < 0) return rc;_currentcolumn++;
  187. }
  188. SQInteger RETURN_TOKEN(SQInteger t)
  189. {
  190. _prevtoken = _curtoken; _curtoken = t; return t;
  191. }
  192. bool IS_EOB(){return CUR_CHAR <= SQUIRREL_EOB;}
  193. SQInteger Init(SQSharedState_ptr_t ss,SQLEXREADFUNC rg,SQUserPointer up,
  194. CompilerErrorFunc efunc,void_ptr_t ed, SQBool want_comments=SQFalse)
  195. {
  196. _want_comments = want_comments;
  197. //_lasterror[0] = '\0';
  198. _svalue = NULL;
  199. _errfunc = efunc;
  200. _errtarget = ed;
  201. _sharedstate = ss;
  202. if(_keywords) _keywords->Release();
  203. _keywords = GetKeywords();
  204. return ResetReader(rg, up, 1);
  205. }
  206. SQInteger ResetReader(SQLEXREADFUNC rg, SQUserPointer up, SQInteger line)
  207. {
  208. _readf = rg;
  209. _up = up;
  210. _lasttokenline = _currentline = line;
  211. _lasttokencolumn = 0;
  212. _currentcolumn = 0;
  213. _prevtoken = -1;
  214. _reached_eof = SQFalse;
  215. return Next();
  216. }
  217. void ADD_KEYWORD(SQTable_ptr_t tbl, string_t &key, SQInteger id)
  218. {
  219. tbl->NewSlot( SQString.Create(_sharedstate, key), id);
  220. }
  221. SQTable_ptr_t GetKeywords()
  222. {
  223. SQTable_ptr_t tbl = SQTable.Create(_sharedstate, (TK_LAST_ENUM_TOKEN - TK_FIRST_ENUM_TOKEN - 1) /*26*/);
  224. ADD_KEYWORD(tbl, "any_t", TK_LOCAL_ANY_T);
  225. ADD_KEYWORD(tbl, "array_t", TK_LOCAL_ARRAY_T);
  226. ADD_KEYWORD(tbl, "as", TK_AS);
  227. ADD_KEYWORD(tbl, "auto", TK_LOCAL);
  228. ADD_KEYWORD(tbl, "base", TK_BASE);
  229. ADD_KEYWORD(tbl, "bool_t", TK_LOCAL_BOOL_T);
  230. ADD_KEYWORD(tbl, "break", TK_BREAK);
  231. ADD_KEYWORD(tbl, "case", TK_CASE);
  232. ADD_KEYWORD(tbl, "catch", TK_CATCH);
  233. ADD_KEYWORD(tbl, "char_t", TK_LOCAL_CHAR_T);
  234. ADD_KEYWORD(tbl, "class",TK_CLASS);
  235. ADD_KEYWORD(tbl, "clone", TK_CLONE);
  236. ADD_KEYWORD(tbl, "constructor",TK_CONSTRUCTOR);
  237. ADD_KEYWORD(tbl, "const",TK_CONST);
  238. ADD_KEYWORD(tbl, "constexpr",TK_CONSTEXPR);
  239. ADD_KEYWORD(tbl, "continue", TK_CONTINUE);
  240. ADD_KEYWORD(tbl, "declare", TK_DECLARE);
  241. ADD_KEYWORD(tbl, "default", TK_DEFAULT);
  242. ADD_KEYWORD(tbl, "delete", TK_DELETE);
  243. ADD_KEYWORD(tbl, "destructor",TK_DESTRUCTOR);
  244. ADD_KEYWORD(tbl, "do", TK_DO);
  245. ADD_KEYWORD(tbl, "double_t", TK_LOCAL_DOUBLE_T);
  246. ADD_KEYWORD(tbl, "else", TK_ELSE);
  247. ADD_KEYWORD(tbl, "enum",TK_ENUM);
  248. ADD_KEYWORD(tbl, "extends",TK_EXTENDS);
  249. ADD_KEYWORD(tbl, "extern",TK_EXTERN);
  250. ADD_KEYWORD(tbl, "false",TK_FALSE);
  251. ADD_KEYWORD(tbl, "__FILE__",TK___FILE__);
  252. ADD_KEYWORD(tbl, "float_t", TK_LOCAL_FLOAT_T);
  253. ADD_KEYWORD(tbl, "foreach", TK_FOREACH);
  254. ADD_KEYWORD(tbl, "for", TK_FOR);
  255. ADD_KEYWORD(tbl, "friend", TK_FRIEND);
  256. ADD_KEYWORD(tbl, "function", TK_FUNCTION);
  257. ADD_KEYWORD(tbl, "__FUNCTION__",TK___FUNCTION__);
  258. ADD_KEYWORD(tbl, "if", TK_IF);
  259. ADD_KEYWORD(tbl, "instanceof",TK_INSTANCEOF);
  260. ADD_KEYWORD(tbl, "int16_t", TK_LOCAL_INT16_T);
  261. ADD_KEYWORD(tbl, "int32_t", TK_LOCAL_INT32_T);
  262. ADD_KEYWORD(tbl, "int64_t", TK_LOCAL_INT64_T);
  263. ADD_KEYWORD(tbl, "int8_t", TK_LOCAL_INT8_T);
  264. ADD_KEYWORD(tbl, "in", TK_IN);
  265. ADD_KEYWORD(tbl, "inline", TK_INLINE);
  266. ADD_KEYWORD(tbl, "int_t", TK_LOCAL_INT_T);
  267. ADD_KEYWORD(tbl, "let", TK_LOCAL);
  268. ADD_KEYWORD(tbl, "__LINE__",TK___LINE__);
  269. ADD_KEYWORD(tbl, "local", TK_LOCAL);
  270. ADD_KEYWORD(tbl, "long_double_t", TK_LOCAL_LONG_DOUBLE_T);
  271. ADD_KEYWORD(tbl, "new",TK_IGNORE);
  272. ADD_KEYWORD(tbl, "noexcept",TK_NOEXCEPT);
  273. ADD_KEYWORD(tbl, "number_t", TK_LOCAL_NUMBER_T);
  274. ADD_KEYWORD(tbl, "null", TK_NULL);
  275. ADD_KEYWORD(tbl, "NULL", TK_NULL);
  276. ADD_KEYWORD(tbl, "private",TK_PRIVATE);
  277. ADD_KEYWORD(tbl, "public",TK_PUBLIC);
  278. ADD_KEYWORD(tbl, "resume", TK_RESUME);
  279. ADD_KEYWORD(tbl, "return", TK_RETURN);
  280. ADD_KEYWORD(tbl, "size_t",TK_LOCAL_SIZE_T);
  281. ADD_KEYWORD(tbl, "ssize_t",TK_LOCAL_SSIZE_T);
  282. ADD_KEYWORD(tbl, "static",TK_STATIC);
  283. ADD_KEYWORD(tbl, "string_t", TK_LOCAL_STRING_T);
  284. ADD_KEYWORD(tbl, "struct",TK_STRUCT);
  285. ADD_KEYWORD(tbl, "switch", TK_SWITCH);
  286. ADD_KEYWORD(tbl, "table_t", TK_LOCAL_TABLE_T);
  287. ADD_KEYWORD(tbl, "template", TK_TEMPLATE);
  288. ADD_KEYWORD(tbl, "this", TK_THIS);
  289. ADD_KEYWORD(tbl, "throw", TK_THROW);
  290. ADD_KEYWORD(tbl, "typedef", TK_TYPEDEF);
  291. ADD_KEYWORD(tbl, "true",TK_TRUE);
  292. ADD_KEYWORD(tbl, "try", TK_TRY);
  293. ADD_KEYWORD(tbl, "typeof", TK_TYPEOF);
  294. ADD_KEYWORD(tbl, "uint16_t", TK_LOCAL_UINT16_T);
  295. ADD_KEYWORD(tbl, "uint32_t", TK_LOCAL_UINT32_T);
  296. ADD_KEYWORD(tbl, "uint64_t", TK_LOCAL_UINT64_T);
  297. ADD_KEYWORD(tbl, "uint8_t", TK_LOCAL_UINT8_T);
  298. ADD_KEYWORD(tbl, "uint_t", TK_LOCAL_UINT_T);
  299. ADD_KEYWORD(tbl, "unsafe", TK_UNSAFE);
  300. ADD_KEYWORD(tbl, "using", TK_USING);
  301. ADD_KEYWORD(tbl, "var", TK_LOCAL);
  302. ADD_KEYWORD(tbl, "virtual", TK_VIRTUAL);
  303. ADD_KEYWORD(tbl, "void_ptr_t", TK_LOCAL_VOIDPTR_T);
  304. ADD_KEYWORD(tbl, "void", TK_VOID);
  305. ADD_KEYWORD(tbl, "volatile", TK_VOLATILE);
  306. ADD_KEYWORD(tbl, "wchar_t", TK_LOCAL_WCHAR_T);
  307. ADD_KEYWORD(tbl, "weakref_t", TK_LOCAL_WEAKREF_T);
  308. ADD_KEYWORD(tbl, "while", TK_WHILE);
  309. ADD_KEYWORD(tbl, "yield", TK_YIELD);
  310. return tbl;
  311. }
  312. SQInteger Error(const SQChar_ptr_t err, ...)
  313. {
  314. _lasterror = err;
  315. if(0)
  316. {
  317. throw err;
  318. va_list vl;
  319. va_start(vl, fmt);
  320. scvsprintf(_lasterror, sizeof(_lasterror), fmt, vl);
  321. va_end(vl);
  322. }
  323. if(_errfunc) _errfunc(_errtarget,_lasterror);
  324. return -1;
  325. }
  326. SQInteger Lex()
  327. {
  328. //print("Lex", __LINE__, _currentline, _currentcolumn, CUR_CHAR);
  329. _lasttokenline = _currentline;
  330. _lasttokencolumn = _currentcolumn;
  331. while(CUR_CHAR != SQUIRREL_EOB)
  332. {
  333. switch(CUR_CHAR)
  334. {
  335. case '\t':
  336. case '\r':
  337. case ' ':
  338. if(Next()) return -1;
  339. continue;
  340. case '\n':
  341. _currentline++;
  342. _prevtoken=_curtoken;
  343. _curtoken='\n';
  344. if(Next()) return -1;
  345. _currentcolumn=1;
  346. continue;
  347. case '#':
  348. if(Next()) return -1;
  349. if(CUR_CHAR == '!') //shell shebang
  350. {
  351. if(LexLineComment()) return -1;
  352. if(_want_comments) return RETURN_TOKEN(TK_COMMENT_LINE)
  353. continue;
  354. }
  355. return RETURN_TOKEN(TK_PRAGMA);
  356. continue;
  357. case '/':
  358. if(Next()) return -1;
  359. switch(CUR_CHAR)
  360. {
  361. case '*':
  362. if(LexBlockComment()) return -1;
  363. if(_want_comments) return RETURN_TOKEN(TK_COMMENT_BLOCK)
  364. continue;
  365. case '/':
  366. if(LexLineComment()) return -1;
  367. if(_want_comments) return RETURN_TOKEN(TK_COMMENT_LINE)
  368. continue;
  369. case '=':
  370. if(Next()) return -1;
  371. return RETURN_TOKEN(TK_DIVEQ);
  372. continue;
  373. case '>':
  374. if(Next()) return -1;
  375. return RETURN_TOKEN(TK_ATTR_CLOSE);
  376. continue;
  377. default:
  378. return RETURN_TOKEN('/');
  379. }
  380. case '=':
  381. if(Next()) return -1;
  382. if (CUR_CHAR != '=')
  383. {
  384. return RETURN_TOKEN('=')
  385. }
  386. else
  387. {
  388. if(Next()) return -1;
  389. if (CUR_CHAR == '=')
  390. {
  391. if(Next()) return -1;
  392. return RETURN_TOKEN(TK_EQ_IDENTITY)
  393. }
  394. else
  395. {
  396. return RETURN_TOKEN(TK_EQ);
  397. }
  398. }
  399. case '<':
  400. if(Next()) return -1;
  401. switch(CUR_CHAR)
  402. {
  403. case '=':
  404. if(Next()) return -1;
  405. if(CUR_CHAR == '>')
  406. {
  407. if(Next()) return -1;
  408. return RETURN_TOKEN(TK_3WAYSCMP);
  409. }
  410. return RETURN_TOKEN(TK_LE)
  411. break;
  412. case '-':
  413. if(Next()) return -1;
  414. return RETURN_TOKEN(TK_NEWSLOT);
  415. break;
  416. case '<':
  417. if(Next()) return -1;
  418. return RETURN_TOKEN(TK_SHIFTL);
  419. break;
  420. case '/':
  421. if(Next()) return -1;
  422. return RETURN_TOKEN(TK_ATTR_OPEN);
  423. break;
  424. }
  425. return RETURN_TOKEN('<');
  426. case '>':
  427. if(Next()) return -1;
  428. if (CUR_CHAR == '=')
  429. {
  430. if(Next()) return -1;
  431. return RETURN_TOKEN(TK_GE);
  432. }
  433. else if(CUR_CHAR == '>')
  434. {
  435. if(Next()) return -1;
  436. if(CUR_CHAR == '>')
  437. {
  438. if(Next()) return -1;
  439. return RETURN_TOKEN(TK_USHIFTR);
  440. }
  441. return RETURN_TOKEN(TK_SHIFTR);
  442. }
  443. else
  444. {
  445. return RETURN_TOKEN('>')
  446. }
  447. case '!':
  448. if(Next()) return -1;
  449. if (CUR_CHAR != '=')
  450. {
  451. return RETURN_TOKEN('!')
  452. }
  453. else
  454. {
  455. if(Next()) return -1;
  456. if (CUR_CHAR == '=')
  457. {
  458. if(Next()) return -1;
  459. return RETURN_TOKEN(TK_NE_IDENTITY)
  460. }
  461. else
  462. {
  463. return RETURN_TOKEN(TK_NE);
  464. }
  465. }
  466. case '@':
  467. {
  468. SQInteger stype;
  469. if(Next()) return -1;
  470. if(CUR_CHAR != '"')
  471. {
  472. return RETURN_TOKEN('@');
  473. }
  474. if((stype=ReadString('"',true))!=-1)
  475. {
  476. return RETURN_TOKEN(stype);
  477. }
  478. return Error("error parsing the string");
  479. }
  480. case '"':
  481. case '\'':
  482. {
  483. SQInteger stype;
  484. if((stype=ReadString(CUR_CHAR,false))!=-1)
  485. {
  486. return RETURN_TOKEN(stype);
  487. }
  488. return Error("error parsing the string");
  489. }
  490. case '{':
  491. case '}':
  492. case '(':
  493. case ')':
  494. case '[':
  495. case ']':
  496. case ';':
  497. case ',':
  498. case '?':
  499. case '~':
  500. {
  501. SQInteger ret = CUR_CHAR;
  502. if(Next()) return -1;
  503. if((ret == '[' || ret == '{' || ret == '(') && CUR_CHAR == '=')
  504. {
  505. //lets try lua literal delimiters
  506. SQInteger stype;
  507. if((stype=ReadString(ret,true))!=-1)
  508. {
  509. return RETURN_TOKEN(stype);
  510. }
  511. return Error("error parsing the string");
  512. }
  513. else return RETURN_TOKEN(ret);
  514. }
  515. case '.':
  516. if(Next()) return -1;
  517. if (CUR_CHAR != '.')
  518. {
  519. return RETURN_TOKEN('.')
  520. }
  521. if(Next()) return -1;
  522. if (CUR_CHAR != '.')
  523. {
  524. return Error("invalid token '..'");
  525. }
  526. if(Next()) return -1;
  527. return RETURN_TOKEN(TK_VARPARAMS);
  528. case '^':
  529. if(Next()) return -1;
  530. //if (CUR_CHAR == '='){ if(Next()) return -1; return RETURN_TOKEN(TK_BIT_XOR_EQ);}
  531. return RETURN_TOKEN('^');
  532. case '&':
  533. if(Next()) return -1;
  534. //if (CUR_CHAR == '='){ if(Next()) return -1; return RETURN_TOKEN(TK_BIT_AND_EQ);}
  535. if (CUR_CHAR != '&')
  536. {
  537. return RETURN_TOKEN('&')
  538. }
  539. else
  540. {
  541. if(Next()) return -1;
  542. return RETURN_TOKEN(TK_AND);
  543. }
  544. case '|':
  545. if(Next()) return -1;
  546. //if (CUR_CHAR == '='){ if(Next()) return -1; return RETURN_TOKEN(TK_BIT_OR_EQ);}
  547. if (CUR_CHAR != '|')
  548. {
  549. return RETURN_TOKEN('|')
  550. }
  551. else
  552. {
  553. if(Next()) return -1;
  554. return RETURN_TOKEN(TK_OR);
  555. }
  556. case ':':
  557. if(Next()) return -1;
  558. if (CUR_CHAR != ':')
  559. {
  560. return RETURN_TOKEN(':')
  561. }
  562. else
  563. {
  564. if(Next()) return -1;
  565. return RETURN_TOKEN(TK_DOUBLE_COLON);
  566. }
  567. case '*':
  568. if(Next()) return -1;
  569. if (CUR_CHAR == '=')
  570. {
  571. if(Next()) return -1;
  572. return RETURN_TOKEN(TK_MULEQ);
  573. }
  574. else return RETURN_TOKEN('*');
  575. case '%':
  576. if(Next()) return -1;
  577. if (CUR_CHAR == '=')
  578. {
  579. if(Next()) return -1;
  580. return RETURN_TOKEN(TK_MODEQ);
  581. }
  582. else return RETURN_TOKEN('%');
  583. case '-':
  584. if(Next()) return -1;
  585. if (CUR_CHAR == '=')
  586. {
  587. if(Next()) return -1;
  588. return RETURN_TOKEN(TK_MINUSEQ);
  589. }
  590. else if (CUR_CHAR == '-')
  591. {
  592. if(Next()) return -1;
  593. return RETURN_TOKEN(TK_MINUSMINUS);
  594. }
  595. else if (CUR_CHAR == '>')
  596. {
  597. if(Next()) return -1; //accept C/C++ like pointers
  598. return RETURN_TOKEN('.');
  599. }
  600. else return RETURN_TOKEN('-');
  601. case '+':
  602. if(Next()) return -1;
  603. if (CUR_CHAR == '=')
  604. {
  605. if(Next()) return -1;
  606. return RETURN_TOKEN(TK_PLUSEQ);
  607. }
  608. else if (CUR_CHAR == '+')
  609. {
  610. if(Next()) return -1;
  611. return RETURN_TOKEN(TK_PLUSPLUS);
  612. }
  613. else return RETURN_TOKEN('+');
  614. case SQUIRREL_EOB:
  615. return 0;
  616. default:
  617. {
  618. if (scisdigit(CUR_CHAR))
  619. {
  620. SQInteger ret = ReadNumber();
  621. if(ret < 0) return -1;
  622. return RETURN_TOKEN(ret);
  623. }
  624. else if (scisalpha(CUR_CHAR) || CUR_CHAR == '_')
  625. {
  626. SQInteger t = ReadID();
  627. if(t < 0) return -1;
  628. return RETURN_TOKEN(t);
  629. }
  630. else
  631. {
  632. SQInteger c = CUR_CHAR;
  633. if (sciscntrl((int)c)) return Error("unexpected character(control)");
  634. if(Next()) return -1;
  635. return RETURN_TOKEN(c);
  636. }
  637. return RETURN_TOKEN(0);
  638. }
  639. }
  640. }
  641. return 0;
  642. }
  643. const SQChar_ptr_t Tok2Str(SQInteger tok)
  644. {
  645. foreach(k,v in _keywords->_tbl)
  646. {
  647. if(v == tok) return k;
  648. }
  649. if(0)
  650. {
  651. SQObjectPtr itr, key, val;
  652. SQInteger nitr;
  653. while((nitr = _keywords->Next(false,itr, key, val)) != -1)
  654. {
  655. itr = /*(SQInteger)*/nitr;
  656. if(/*((SQInteger)_integer(*/val/*))*/ == tok)
  657. return /*_stringval(*/key/*)*/;
  658. }
  659. }
  660. return NULL;
  661. }
  662. const SQChar_ptr_t GetTokenName(int tk_code)
  663. {
  664. foreach(k,v in getconsttable())
  665. {
  666. if(v == tk_code) return k;
  667. }
  668. const SQChar_ptr_t str_tk;
  669. switch(tk_code)
  670. {
  671. //#define ENUM_TK(a) case TK_##a: str_tk = "TK_" #a); break;
  672. //SQ_KEYWORDS_LIST()
  673. //#undef ENUM_TK
  674. default:
  675. str_tk = "()";
  676. }
  677. return str_tk;
  678. }
  679. //private:
  680. SQInteger GetIDType(const SQChar_ptr_t s,SQInteger len)
  681. {
  682. foreach(k,v in _keywords->_tbl)
  683. {
  684. if(k == s) return v;
  685. }
  686. SQObjectPtr t;
  687. if(_keywords->GetStr(s,len, t))
  688. {
  689. return /*SQInteger(_integer(*/t/*))*/;
  690. }
  691. return TK_IDENTIFIER;
  692. }
  693. SQInteger ReadString(SQInteger ndelim,bool verbatim)
  694. {
  695. INIT_TEMP_STRING();
  696. SQInteger start_equals = 0;
  697. SQChar cdelim1, cdelim2;
  698. if(ndelim == '{')
  699. {
  700. cdelim1 = '{';
  701. cdelim2 = '}';
  702. }
  703. else if(ndelim == '(')
  704. {
  705. cdelim1 = '(';
  706. cdelim2 = ')';
  707. }
  708. else
  709. {
  710. cdelim1 = '[';
  711. cdelim2 = ']';
  712. }
  713. if(CUR_CHAR == '=')
  714. {
  715. //lua like literal
  716. while(!IS_EOB() && CUR_CHAR == '=')
  717. {
  718. ++start_equals;
  719. if(Next()) return -1;
  720. }
  721. if(CUR_CHAR != cdelim1)
  722. {
  723. //it's not a lua literal delimiter
  724. return Error("expect '%c' on literal delimiter", cdelim1);
  725. }
  726. ndelim = cdelim2;
  727. }
  728. if(Next()) return -1;
  729. if(IS_EOB()) return -1;
  730. if(start_equals)
  731. {
  732. int cr_nl = CUR_CHAR == '\r';
  733. if(cr_nl) if(Next()) return -1;
  734. cr_nl = CUR_CHAR == '\n';
  735. if(cr_nl) if(Next()) return -1;
  736. if(cr_nl) //if a new line follows the start of delimiter drop it
  737. {
  738. ++_currentline;
  739. if(IS_EOB())
  740. {
  741. return Error("unfinished string");
  742. }
  743. }
  744. }
  745. for(;;)
  746. {
  747. while(CUR_CHAR != ndelim)
  748. {
  749. SQInteger x = CUR_CHAR;
  750. switch(x)
  751. {
  752. case SQUIRREL_EOB:
  753. return Error("unfinished string");
  754. case '\n':
  755. if(!verbatim) return Error("newline in a constant");
  756. APPEND_CHAR(CUR_CHAR);
  757. if(Next()) return -1;
  758. _currentline++;
  759. break;
  760. case '\\':
  761. if(verbatim)
  762. {
  763. APPEND_CHAR('\\');
  764. if(Next()) return -1;
  765. }
  766. else
  767. {
  768. if(Next()) return -1;
  769. switch(CUR_CHAR)
  770. {
  771. case 'x':
  772. {
  773. const SQInteger maxdigits = sizeof(SQChar) * 2;
  774. SQChar temp; //[maxdigits + 1];
  775. if(ProcessStringHexEscape(temp, maxdigits) < 0) return -1;
  776. SQChar_ptr_t stemp;
  777. APPEND_CHAR((SQChar)scstrtoul(temp, /*&*/stemp, 16));
  778. }
  779. break;
  780. case 'U':
  781. case 'u':
  782. {
  783. const SQInteger maxdigits = x == 'u' ? 4 : 8;
  784. SQChar temp; //[8 + 1];
  785. if(ProcessStringHexEscape(temp, maxdigits) < 0) return -1;
  786. SQChar_ptr_t stemp;
  787. /*
  788. #ifdef SQUNICODE
  789. #if WCHAR_SIZE == 2
  790. AddUTF16(scstrtoul(temp, &stemp, 16));
  791. #else
  792. ADD_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
  793. #endif
  794. #else
  795. */
  796. AddUTF8(scstrtoul(temp, /*&*/stemp, 16));
  797. //#endif
  798. }
  799. break;
  800. case 't':
  801. APPEND_CHAR('\t');
  802. if(Next()) return -1;
  803. break;
  804. case 'a':
  805. APPEND_CHAR('\a');
  806. if(Next()) return -1;
  807. break;
  808. case 'b':
  809. APPEND_CHAR('\b');
  810. if(Next()) return -1;
  811. break;
  812. case 'n':
  813. APPEND_CHAR('\n');
  814. if(Next()) return -1;
  815. break;
  816. case 'r':
  817. APPEND_CHAR('\r');
  818. if(Next()) return -1;
  819. break;
  820. case 'v':
  821. APPEND_CHAR('\v');
  822. if(Next()) return -1;
  823. break;
  824. case 'f':
  825. APPEND_CHAR('\f');
  826. if(Next()) return -1;
  827. break;
  828. case '0':
  829. APPEND_CHAR('\0');
  830. if(Next()) return -1;
  831. break;
  832. case '\\':
  833. APPEND_CHAR('\\');
  834. if(Next()) return -1;
  835. break;
  836. case '"':
  837. APPEND_CHAR('"');
  838. if(Next()) return -1;
  839. break;
  840. case '\'':
  841. APPEND_CHAR('\'');
  842. if(Next()) return -1;
  843. break;
  844. default:
  845. return Error("unrecognised escaper char");
  846. break;
  847. }
  848. }
  849. break;
  850. default:
  851. APPEND_CHAR(CUR_CHAR);
  852. if(Next()) return -1;
  853. }
  854. }
  855. if(Next()) return -1;
  856. if(start_equals)
  857. {
  858. bool lastBraceAdded = false;
  859. if(CUR_CHAR == '=')
  860. {
  861. SQInteger end_equals = start_equals;
  862. if(Next()) return -1;
  863. if(CUR_CHAR == '=' || CUR_CHAR == cdelim2)
  864. {
  865. --end_equals;
  866. while(!IS_EOB() && CUR_CHAR == '=')
  867. {
  868. --end_equals;
  869. if(Next()) return -1;
  870. }
  871. if(end_equals) return Error("expect same number of '=' on literal delimiter");
  872. if(CUR_CHAR != cdelim2) return Error("expect '%c' to close literal delimiter", cdelim2);
  873. if(Next()) return -1;
  874. break;
  875. }
  876. APPEND_CHAR(cdelim2); //the first NEXT() after break the while loop
  877. APPEND_CHAR('=');
  878. lastBraceAdded = true;
  879. }
  880. if(!lastBraceAdded) APPEND_CHAR(cdelim2); //the first NEXT() after break the while loop
  881. APPEND_CHAR(CUR_CHAR);
  882. if(Next()) return -1;
  883. }
  884. else if(verbatim && CUR_CHAR == '"') //double quotation
  885. {
  886. APPEND_CHAR(CUR_CHAR);
  887. if(Next()) return -1;
  888. }
  889. else
  890. {
  891. break;
  892. }
  893. }
  894. TERMINATE_BUFFER();
  895. SQInteger len = _longstr.size()/*-1*/;
  896. if(ndelim == '\'')
  897. {
  898. if(len == 0) return Error("empty constant");
  899. if(len > 1) return Error("constant too long");
  900. _nvalue = _longstr.tostring()/*[0]*/;
  901. return TK_INTEGER;
  902. }
  903. _svalue = /*&*/_longstr.tostring()/*[0]*/;
  904. return TK_STRING_LITERAL;
  905. }
  906. //#define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
  907. SQInteger ReadNumber()
  908. {
  909. //#define TINT 1
  910. //#define TFLOAT 2
  911. //#define THEX 3
  912. //#define TSCIENTIFIC 4
  913. //#define TOCTAL 5
  914. SQInteger type = TINT, firstchar = CUR_CHAR;
  915. SQUnsignedInteger itmp=0;
  916. SQChar_ptr_t sTemp;
  917. INIT_TEMP_STRING();
  918. if(Next()) return -1;
  919. if(firstchar == '0' && (sctoupper(CUR_CHAR) == 'X' || scisodigit(CUR_CHAR)) )
  920. {
  921. if(scisodigit(CUR_CHAR))
  922. {
  923. type = TOCTAL;
  924. while(scisodigit(CUR_CHAR))
  925. {
  926. APPEND_CHAR(CUR_CHAR);
  927. if(Next()) return -1;
  928. }
  929. if(scisdigit(CUR_CHAR)) return Error("invalid octal number");
  930. }
  931. else
  932. {
  933. if(Next()) return -1;
  934. type = THEX;
  935. while(scisxdigit(CUR_CHAR))
  936. {
  937. APPEND_CHAR(CUR_CHAR);
  938. if(Next()) return -1;
  939. }
  940. if(_longstr.size() > MAX_HEX_DIGITS) return Error("too many digits for an Hex number");
  941. }
  942. }
  943. else
  944. {
  945. APPEND_CHAR(/*(int)*/firstchar);
  946. while (CUR_CHAR == '.' || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR))
  947. {
  948. if(CUR_CHAR == '.' || isexponent(CUR_CHAR)) type = TFLOAT;
  949. if(isexponent(CUR_CHAR))
  950. {
  951. if(type != TFLOAT) return Error("invalid numeric format");
  952. type = TSCIENTIFIC;
  953. APPEND_CHAR(CUR_CHAR);
  954. if(Next()) return -1;
  955. if(CUR_CHAR == '+' || CUR_CHAR == '-')
  956. {
  957. APPEND_CHAR(CUR_CHAR);
  958. if(Next()) return -1;
  959. }
  960. if(!scisdigit(CUR_CHAR)) return Error("exponent expected");
  961. }
  962. APPEND_CHAR(CUR_CHAR);
  963. if(Next()) return -1;
  964. }
  965. }
  966. TERMINATE_BUFFER();
  967. switch(type)
  968. {
  969. case TSCIENTIFIC:
  970. case TFLOAT:
  971. _fvalue = /*(SQFloat)*/scstrtod(/*&*/_longstr.tostring()/*[0]*/,/*&*/sTemp);
  972. return TK_FLOAT;
  973. case TINT:
  974. itmp = LexInteger(/*&*/_longstr.tostring()/*[0]*/,/*&*/itmp);
  975. break;
  976. case THEX:
  977. itmp = LexHexadecimal(/*&*/_longstr.tostring()/*[0]*/,/*&*/itmp);
  978. break;
  979. case TOCTAL:
  980. itmp = LexOctal(/*&*/_longstr.tostring()/*[0]*/,/*&*/itmp);
  981. break;
  982. }
  983. switch(type)
  984. {
  985. case TINT:
  986. case THEX:
  987. case TOCTAL:
  988. //to allow 64 bits integers comment bellow
  989. //if(itmp > INT_MAX) return Error("integer overflow %ulld %d"));
  990. _nvalue = /*(SQInteger)*/ itmp;
  991. return TK_INTEGER;
  992. }
  993. return 0;
  994. }
  995. SQInteger LexBlockComment()
  996. {
  997. /*
  998. if(CUR_CHAR == '*')
  999. {
  1000. NEXT();
  1001. if(CUR_CHAR != '*'){ //document comment
  1002. printf("Doument comment found at line %d\n", _currentline);
  1003. }
  1004. }
  1005. */
  1006. bool done = false;
  1007. if(_want_comments) INIT_TEMP_STRING();
  1008. if(Next()) return -1; //remove the comment token '*'
  1009. while(!done)
  1010. {
  1011. switch(CUR_CHAR)
  1012. {
  1013. case '*':
  1014. {
  1015. if(Next()) return -1;
  1016. if(CUR_CHAR == '/')
  1017. {
  1018. done = true;
  1019. if(Next()) return -1;
  1020. continue;
  1021. }
  1022. };
  1023. break;
  1024. case '\n':
  1025. _currentline++;
  1026. break;
  1027. case SQUIRREL_EOB:
  1028. return Error("missing \"*/\" in comment");
  1029. }
  1030. if(_want_comments) APPEND_CHAR(CUR_CHAR);
  1031. if(Next()) return -1;
  1032. }
  1033. if(_want_comments)
  1034. {
  1035. TERMINATE_BUFFER();
  1036. if(_longstr.size() > 0) _longstr./*pop_back()*/setLen(_longstr.size()-1); //remove the last '*'
  1037. _svalue = /*&*/_longstr.tostring()/*[0]*/;
  1038. }
  1039. return 0;
  1040. }
  1041. SQInteger LexLineComment()
  1042. {
  1043. if(_want_comments) INIT_TEMP_STRING();
  1044. if(Next()) return -1; //remove the comment token
  1045. while (CUR_CHAR != '\n' && (!IS_EOB()))
  1046. {
  1047. if(_want_comments) APPEND_CHAR(CUR_CHAR);
  1048. if(Next()) return -1;
  1049. }
  1050. if(_want_comments)
  1051. {
  1052. TERMINATE_BUFFER();
  1053. _svalue = /*&*/_longstr.tostring()/*[0]*/;
  1054. }
  1055. return 0;
  1056. }
  1057. SQInteger ReadID()
  1058. {
  1059. SQInteger res;
  1060. INIT_TEMP_STRING();
  1061. do
  1062. {
  1063. APPEND_CHAR(CUR_CHAR);
  1064. if(Next()) return -1;
  1065. }
  1066. while(scisalnum(CUR_CHAR) || CUR_CHAR == '_');
  1067. TERMINATE_BUFFER();
  1068. res = GetIDType(/*&*/_longstr.tostring()/*[0]*/,_longstr.size() - 1);
  1069. if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR || res == TK_DESTRUCTOR)
  1070. {
  1071. _svalue = /*&*/_longstr.tostring()/*[0]*/;
  1072. }
  1073. return res;
  1074. }
  1075. SQInteger Next()
  1076. {
  1077. SQInteger t = _readf(_up);
  1078. if(t > MAX_CHAR) return Error("Invalid character");
  1079. if(t != 0)
  1080. {
  1081. CUR_CHAR = _currdata = /*(LexChar)*/t;
  1082. ++_currentcolumn;
  1083. return 0;
  1084. }
  1085. CUR_CHAR = _currdata = SQUIRREL_EOB;
  1086. _reached_eof = SQTrue;
  1087. return 0;
  1088. }
  1089. /*
  1090. #ifdef SQUNICODE
  1091. #if WCHAR_SIZE == 2
  1092. SQInteger AddUTF16(SQUnsignedInteger ch);
  1093. #endif
  1094. #else
  1095. SQInteger AddUTF8(SQUnsignedInteger ch);
  1096. #endif
  1097. */
  1098. SQInteger AddUTF8(SQUnsignedInteger ch)
  1099. {
  1100. if (ch < 0x80)
  1101. {
  1102. APPEND_CHAR((char)ch);
  1103. return 1;
  1104. }
  1105. if (ch < 0x800)
  1106. {
  1107. APPEND_CHAR((SQChar)((ch >> 6) | 0xC0));
  1108. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  1109. return 2;
  1110. }
  1111. if (ch < 0x10000)
  1112. {
  1113. APPEND_CHAR((SQChar)((ch >> 12) | 0xE0));
  1114. APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
  1115. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  1116. return 3;
  1117. }
  1118. if (ch < 0x110000)
  1119. {
  1120. APPEND_CHAR((SQChar)((ch >> 18) | 0xF0));
  1121. APPEND_CHAR((SQChar)(((ch >> 12) & 0x3F) | 0x80));
  1122. APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
  1123. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  1124. return 4;
  1125. }
  1126. return 0;
  1127. }
  1128. SQInteger ProcessStringHexEscape(SQChar_ptr_t dest, SQInteger maxdigits)
  1129. {
  1130. if(Next()) return -1;
  1131. if (!isxdigit(CUR_CHAR)) return Error("hexadecimal number expected");
  1132. SQInteger n = 0;
  1133. while (isxdigit(CUR_CHAR) && n < maxdigits)
  1134. {
  1135. dest[n] = CUR_CHAR;
  1136. n++;
  1137. if(Next()) return -1;
  1138. }
  1139. dest[n] = 0;
  1140. return n;
  1141. }
  1142. SQInteger _curtoken;
  1143. SQTable_ptr_t _keywords;
  1144. SQBool _reached_eof;
  1145. //public:
  1146. SQInteger _prevtoken;
  1147. SQInteger _currentline;
  1148. SQInteger _lasttokenline;
  1149. SQInteger _lasttokencolumn;
  1150. SQInteger _currentcolumn;
  1151. const SQChar_ptr_t _svalue;
  1152. SQInteger _nvalue;
  1153. SQFloat _fvalue;
  1154. SQLEXREADFUNC _readf;
  1155. SQUserPointer _up;
  1156. LexChar _currdata;
  1157. SQSharedState_ptr_t _sharedstate;
  1158. sqvector_SQChar _longstr;
  1159. CompilerErrorFunc _errfunc;
  1160. void_ptr_t _errtarget;
  1161. SQChar_ptr_t _lasterror/*[256]*/;
  1162. SQBool _want_comments;
  1163. };
  1164. SQSharedState ss;
  1165. SQUserPointer up;
  1166. CompilerErrorFunc efunc;
  1167. void_ptr_t ed;
  1168. string_t cpp_code = readfile(__FILE__);
  1169. print(__FILE__, cpp_code.len());
  1170. //cpp_code = "int main(){return 0;}";
  1171. FILE source = FILE(cpp_code);
  1172. SQLexerNut lex = SQLexerNut();
  1173. lex.Init(ss, compilerReadFunc, source, efunc, ed, SQTrue);
  1174. int_t tok;
  1175. double_t start_time = os.clock();
  1176. while((tok = lex.Lex()) > 0)
  1177. {
  1178. //printf("Token = %d\n", tok);
  1179. string_t tkstr;
  1180. if(tok > TK_FIRST_ENUM_TOKEN && tok < TK_LAST_ENUM_TOKEN) tkstr = lex.Tok2Str(tok);
  1181. else tkstr = tok.tochar();
  1182. print(
  1183. tok,
  1184. tkstr || "",
  1185. lex.GetTokenName(tok),
  1186. lex._svalue || "",
  1187. lex._nvalue,
  1188. //lex._lasttokenline,
  1189. //lex._lasttokencolumn,
  1190. lex._currentline,
  1191. lex._currentcolumn
  1192. );
  1193. }
  1194. print("Time spent", os.clock() - start_time);