sqlexer.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752
  1. /*
  2. see copyright notice in squirrel.h
  3. */
  4. #include "sqpcheader.h"
  5. #include <ctype.h>
  6. #include <stdlib.h>
  7. #include <limits.h>
  8. #include "sqtable.h"
  9. #include "sqstring.h"
  10. #include "sqcompiler.h"
  11. #include "sqlexer.h"
  12. #define CUR_CHAR (_currdata)
  13. #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
  14. #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
  15. #define NEXT() {SQInteger rc = Next(); if(rc < 0) return rc; _currentcolumn++;}
  16. #define INIT_TEMP_STRING() { _longstr.resize(0);}
  17. #define APPEND_CHAR(c) { _longstr.push_back(c);}
  18. #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
  19. #define ADD_KEYWORD(key,id) tbl->NewSlot( SQString::Create(_sharedstate, _SC(#key)) ,SQInteger(id))
  20. SQLexer::SQLexer(){_keywords=0;}
  21. SQLexer::~SQLexer()
  22. {
  23. _keywords->Release();
  24. }
  25. SQInteger SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
  26. {
  27. _lasterror[0] = '\0';
  28. _svalue = NULL;
  29. _errfunc = efunc;
  30. _errtarget = ed;
  31. _sharedstate = ss;
  32. if(_keywords) _keywords->Release();
  33. _keywords = GetKeywords();
  34. return ResetReader(rg, up, 1);
  35. }
  36. SQInteger SQLexer::ResetReader(SQLEXREADFUNC rg, SQUserPointer up, SQInteger line)
  37. {
  38. _readf = rg;
  39. _up = up;
  40. _lasttokenline = _currentline = line;
  41. _currentcolumn = 0;
  42. _prevtoken = -1;
  43. _reached_eof = SQFalse;
  44. return Next();
  45. }
  46. SQTable * SQLexer::GetKeywords()
  47. {
  48. SQTable *tbl = SQTable::Create(_sharedstate, (TK_LAST_ENUM_TOKEN - TK_FIRST_ENUM_TOKEN - 1) /*26*/);
  49. ADD_KEYWORD(any_t, TK_LOCAL_ANY_T);
  50. ADD_KEYWORD(array_t, TK_LOCAL_ARRAY_T);
  51. ADD_KEYWORD(auto, TK_LOCAL);
  52. ADD_KEYWORD(base, TK_BASE);
  53. ADD_KEYWORD(bool_t, TK_LOCAL_BOOL_T);
  54. ADD_KEYWORD(break, TK_BREAK);
  55. ADD_KEYWORD(case, TK_CASE);
  56. ADD_KEYWORD(catch, TK_CATCH);
  57. ADD_KEYWORD(char_t, TK_LOCAL_CHAR_T);
  58. ADD_KEYWORD(class,TK_CLASS);
  59. ADD_KEYWORD(clone, TK_CLONE);
  60. ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
  61. ADD_KEYWORD(const,TK_CONST);
  62. ADD_KEYWORD(continue, TK_CONTINUE);
  63. ADD_KEYWORD(default, TK_DEFAULT);
  64. ADD_KEYWORD(delete, TK_DELETE);
  65. ADD_KEYWORD(destructor,TK_DESTRUCTOR);
  66. ADD_KEYWORD(do, TK_DO);
  67. ADD_KEYWORD(double_t, TK_LOCAL_DOUBLE_T);
  68. ADD_KEYWORD(else, TK_ELSE);
  69. ADD_KEYWORD(enum,TK_ENUM);
  70. ADD_KEYWORD(extends,TK_EXTENDS);
  71. ADD_KEYWORD(extern,TK_EXTERN);
  72. ADD_KEYWORD(false,TK_FALSE);
  73. ADD_KEYWORD(__FILE__,TK___FILE__);
  74. ADD_KEYWORD(float_t, TK_LOCAL_FLOAT_T);
  75. ADD_KEYWORD(foreach, TK_FOREACH);
  76. ADD_KEYWORD(for, TK_FOR);
  77. ADD_KEYWORD(function, TK_FUNCTION);
  78. ADD_KEYWORD(__FUNCTION__,TK___FUNCTION__);
  79. ADD_KEYWORD(if, TK_IF);
  80. ADD_KEYWORD(instanceof,TK_INSTANCEOF);
  81. ADD_KEYWORD(int16_t, TK_LOCAL_INT16_T);
  82. ADD_KEYWORD(int32_t, TK_LOCAL_INT32_T);
  83. ADD_KEYWORD(int64_t, TK_LOCAL_INT64_T);
  84. ADD_KEYWORD(int8_t, TK_LOCAL_INT8_T);
  85. ADD_KEYWORD(in, TK_IN);
  86. ADD_KEYWORD(int_t, TK_LOCAL_INT_T);
  87. ADD_KEYWORD(let, TK_LOCAL);
  88. ADD_KEYWORD(__LINE__,TK___LINE__);
  89. ADD_KEYWORD(local, TK_LOCAL);
  90. ADD_KEYWORD(long_double_t, TK_LOCAL_LONG_DOUBLE_T);
  91. ADD_KEYWORD(new,TK_IGNORE);
  92. ADD_KEYWORD(number_t, TK_LOCAL_NUMBER_T);
  93. ADD_KEYWORD(null, TK_NULL);
  94. ADD_KEYWORD(NULL, TK_NULL);
  95. ADD_KEYWORD(private,TK_PRIVATE);
  96. ADD_KEYWORD(public,TK_PUBLIC);
  97. ADD_KEYWORD(resume, TK_RESUME);
  98. ADD_KEYWORD(return, TK_RETURN);
  99. ADD_KEYWORD(static,TK_STATIC);
  100. ADD_KEYWORD(string_t, TK_LOCAL_STRING_T);
  101. ADD_KEYWORD(struct,TK_STRUCT);
  102. ADD_KEYWORD(switch, TK_SWITCH);
  103. ADD_KEYWORD(table_t, TK_LOCAL_TABLE_T);
  104. ADD_KEYWORD(this, TK_THIS);
  105. ADD_KEYWORD(throw, TK_THROW);
  106. ADD_KEYWORD(true,TK_TRUE);
  107. ADD_KEYWORD(try, TK_TRY);
  108. ADD_KEYWORD(typeof, TK_TYPEOF);
  109. ADD_KEYWORD(uint16_t, TK_LOCAL_UINT16_T);
  110. ADD_KEYWORD(uint32_t, TK_LOCAL_UINT32_T);
  111. ADD_KEYWORD(uint64_t, TK_LOCAL_UINT64_T);
  112. ADD_KEYWORD(uint8_t, TK_LOCAL_UINT8_T);
  113. ADD_KEYWORD(uint_t, TK_LOCAL_UINT_T);
  114. ADD_KEYWORD(var, TK_LOCAL);
  115. ADD_KEYWORD(virtual, TK_VIRTUAL);
  116. ADD_KEYWORD(void_ptr_t, TK_LOCAL_VOIDPTR_T);
  117. ADD_KEYWORD(void, TK_VOID);
  118. ADD_KEYWORD(volatile, TK_VOLATILE);
  119. ADD_KEYWORD(wchar_t, TK_LOCAL_WCHAR_T);
  120. ADD_KEYWORD(weakref_t, TK_LOCAL_WEAKREF_T);
  121. ADD_KEYWORD(while, TK_WHILE);
  122. ADD_KEYWORD(yield, TK_YIELD);
  123. return tbl;
  124. }
  125. SQInteger SQLexer::Error(const SQChar *fmt, ...)
  126. {
  127. va_list vl;
  128. va_start(vl, fmt);
  129. scvsprintf(_lasterror, sizeof(_lasterror), fmt, vl);
  130. va_end(vl);
  131. if(_errfunc) _errfunc(_errtarget,_lasterror);
  132. return -1;
  133. }
  134. SQInteger SQLexer::Next()
  135. {
  136. SQInteger t = _readf(_up);
  137. if(t > MAX_CHAR) return Error(_SC("Invalid character"));
  138. if(t != 0) {
  139. _currdata = (LexChar)t;
  140. return 0;
  141. }
  142. _currdata = SQUIRREL_EOB;
  143. _reached_eof = SQTrue;
  144. return 0;
  145. }
  146. const SQChar *SQLexer::Tok2Str(SQInteger tok)
  147. {
  148. SQObjectPtr itr, key, val;
  149. SQInteger nitr;
  150. while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
  151. itr = (SQInteger)nitr;
  152. if(((SQInteger)_integer(val)) == tok)
  153. return _stringval(key);
  154. }
  155. return NULL;
  156. }
  157. const SQChar *SQLexer::GetTokenName(int tk_code) {
  158. const SQChar *str_tk;
  159. switch(tk_code){
  160. #define ENUM_TK(a) case TK_##a: str_tk = _SC("TK_" #a); break;
  161. SQ_KEYWORDS_LIST()
  162. #undef ENUM_TK
  163. default:
  164. str_tk = _SC("???");
  165. }
  166. return str_tk;
  167. }
  168. SQInteger SQLexer::LexBlockComment()
  169. {
  170. /*
  171. if(CUR_CHAR == _SC('*'))
  172. {
  173. NEXT();
  174. if(CUR_CHAR != _SC('*')){ //document comment
  175. printf("Doument comment found at line %d\n", _currentline);
  176. }
  177. }
  178. */
  179. bool done = false;
  180. while(!done) {
  181. switch(CUR_CHAR) {
  182. case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
  183. case _SC('\n'): _currentline++; NEXT(); continue;
  184. case SQUIRREL_EOB: return Error(_SC("missing \"*/\" in comment"));
  185. default: NEXT();
  186. }
  187. }
  188. return 0;
  189. }
  190. SQInteger SQLexer::LexLineComment()
  191. {
  192. do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
  193. return 0;
  194. }
  195. SQInteger SQLexer::Lex()
  196. {
  197. _lasttokenline = _currentline;
  198. while(CUR_CHAR != SQUIRREL_EOB) {
  199. switch(CUR_CHAR){
  200. case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
  201. case _SC('\n'):
  202. _currentline++;
  203. _prevtoken=_curtoken;
  204. _curtoken=_SC('\n');
  205. NEXT();
  206. _currentcolumn=1;
  207. continue;
  208. case _SC('#'):
  209. NEXT();
  210. if(CUR_CHAR == '!') //shell shebang
  211. {
  212. if(LexLineComment()) return -1;
  213. continue;
  214. }
  215. RETURN_TOKEN(TK_PRAGMA);
  216. continue;
  217. case _SC('/'):
  218. NEXT();
  219. switch(CUR_CHAR){
  220. case _SC('*'):
  221. NEXT();
  222. if(LexBlockComment()) return -1;
  223. continue;
  224. case _SC('/'):
  225. if(LexLineComment()) return -1;
  226. continue;
  227. case _SC('='):
  228. NEXT();
  229. RETURN_TOKEN(TK_DIVEQ);
  230. continue;
  231. case _SC('>'):
  232. NEXT();
  233. RETURN_TOKEN(TK_ATTR_CLOSE);
  234. continue;
  235. default:
  236. RETURN_TOKEN('/');
  237. }
  238. case _SC('='):
  239. NEXT();
  240. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
  241. else {
  242. NEXT();
  243. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_EQ_IDENTITY) }
  244. else { RETURN_TOKEN(TK_EQ); }
  245. }
  246. case _SC('<'):
  247. NEXT();
  248. switch(CUR_CHAR) {
  249. case _SC('='):
  250. NEXT();
  251. if(CUR_CHAR == _SC('>')) {
  252. NEXT();
  253. RETURN_TOKEN(TK_3WAYSCMP);
  254. }
  255. RETURN_TOKEN(TK_LE)
  256. break;
  257. case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
  258. case _SC('<'): NEXT(); RETURN_TOKEN(TK_SHIFTL); break;
  259. case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
  260. }
  261. RETURN_TOKEN('<');
  262. case _SC('>'):
  263. NEXT();
  264. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
  265. else if(CUR_CHAR == _SC('>')){
  266. NEXT();
  267. if(CUR_CHAR == _SC('>')){
  268. NEXT();
  269. RETURN_TOKEN(TK_USHIFTR);
  270. }
  271. RETURN_TOKEN(TK_SHIFTR);
  272. }
  273. else { RETURN_TOKEN('>') }
  274. case _SC('!'):
  275. NEXT();
  276. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
  277. else {
  278. NEXT();
  279. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_NE_IDENTITY)}
  280. else { RETURN_TOKEN(TK_NE); }
  281. }
  282. case _SC('@'): {
  283. SQInteger stype;
  284. NEXT();
  285. if(CUR_CHAR != _SC('"')) {
  286. RETURN_TOKEN('@');
  287. }
  288. if((stype=ReadString('"',true))!=-1) {
  289. RETURN_TOKEN(stype);
  290. }
  291. return Error(_SC("error parsing the string"));
  292. }
  293. case _SC('"'):
  294. case _SC('\''): {
  295. SQInteger stype;
  296. if((stype=ReadString(CUR_CHAR,false))!=-1){
  297. RETURN_TOKEN(stype);
  298. }
  299. return Error(_SC("error parsing the string"));
  300. }
  301. case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
  302. case _SC(';'): case _SC(','): case _SC('?'): case _SC('~'):
  303. {
  304. SQInteger ret = CUR_CHAR;
  305. NEXT();
  306. if((ret == _SC('[') || ret == _SC('{') || ret == _SC('(')) && CUR_CHAR == _SC('=')){
  307. //lets try lua literal delimiters
  308. SQInteger stype;
  309. if((stype=ReadString(ret,true))!=-1){
  310. RETURN_TOKEN(stype);
  311. }
  312. return Error(_SC("error parsing the string"));
  313. }
  314. else RETURN_TOKEN(ret);
  315. }
  316. case _SC('.'):
  317. NEXT();
  318. if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
  319. NEXT();
  320. if (CUR_CHAR != _SC('.')){ return Error(_SC("invalid token '..'")); }
  321. NEXT();
  322. RETURN_TOKEN(TK_VARPARAMS);
  323. case _SC('^'):
  324. NEXT();
  325. //if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_XOR_EQ);}
  326. RETURN_TOKEN('^');
  327. case _SC('&'):
  328. NEXT();
  329. //if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_AND_EQ);}
  330. if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
  331. else { NEXT(); RETURN_TOKEN(TK_AND); }
  332. case _SC('|'):
  333. NEXT();
  334. //if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_OR_EQ);}
  335. if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
  336. else { NEXT(); RETURN_TOKEN(TK_OR); }
  337. case _SC(':'):
  338. NEXT();
  339. if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
  340. else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
  341. case _SC('*'):
  342. NEXT();
  343. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
  344. else RETURN_TOKEN('*');
  345. case _SC('%'):
  346. NEXT();
  347. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
  348. else RETURN_TOKEN('%');
  349. case _SC('-'):
  350. NEXT();
  351. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
  352. else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
  353. else if (CUR_CHAR == _SC('>')){ NEXT(); RETURN_TOKEN('.');} //accept C/C++ like pointers
  354. else RETURN_TOKEN('-');
  355. case _SC('+'):
  356. NEXT();
  357. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
  358. else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
  359. else RETURN_TOKEN('+');
  360. case SQUIRREL_EOB:
  361. return 0;
  362. default:{
  363. if (scisdigit(CUR_CHAR)) {
  364. SQInteger ret = ReadNumber();
  365. if(ret < 0) return -1;
  366. RETURN_TOKEN(ret);
  367. }
  368. else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
  369. SQInteger t = ReadID();
  370. if(t < 0) return -1;
  371. RETURN_TOKEN(t);
  372. }
  373. else {
  374. SQInteger c = CUR_CHAR;
  375. if (sciscntrl((int)c)) return Error(_SC("unexpected character(control)"));
  376. NEXT();
  377. RETURN_TOKEN(c);
  378. }
  379. RETURN_TOKEN(0);
  380. }
  381. }
  382. }
  383. return 0;
  384. }
  385. SQInteger SQLexer::GetIDType(const SQChar *s,SQInteger len)
  386. {
  387. SQObjectPtr t;
  388. if(_keywords->GetStr(s,len, t)) {
  389. return SQInteger(_integer(t));
  390. }
  391. return TK_IDENTIFIER;
  392. }
  393. #ifdef SQUNICODE
  394. #if WCHAR_SIZE == 2
  395. SQInteger SQLexer::AddUTF16(SQUnsignedInteger ch)
  396. {
  397. if (ch >= 0x10000)
  398. {
  399. SQUnsignedInteger code = (ch - 0x10000);
  400. APPEND_CHAR((SQChar)(0xD800 | (code >> 10)));
  401. APPEND_CHAR((SQChar)(0xDC00 | (code & 0x3FF)));
  402. return 2;
  403. }
  404. else {
  405. APPEND_CHAR((SQChar)ch);
  406. return 1;
  407. }
  408. }
  409. #endif
  410. #else
  411. SQInteger SQLexer::AddUTF8(SQUnsignedInteger ch)
  412. {
  413. if (ch < 0x80) {
  414. APPEND_CHAR((char)ch);
  415. return 1;
  416. }
  417. if (ch < 0x800) {
  418. APPEND_CHAR((SQChar)((ch >> 6) | 0xC0));
  419. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  420. return 2;
  421. }
  422. if (ch < 0x10000) {
  423. APPEND_CHAR((SQChar)((ch >> 12) | 0xE0));
  424. APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
  425. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  426. return 3;
  427. }
  428. if (ch < 0x110000) {
  429. APPEND_CHAR((SQChar)((ch >> 18) | 0xF0));
  430. APPEND_CHAR((SQChar)(((ch >> 12) & 0x3F) | 0x80));
  431. APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
  432. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  433. return 4;
  434. }
  435. return 0;
  436. }
  437. #endif
  438. SQInteger SQLexer::ProcessStringHexEscape(SQChar *dest, SQInteger maxdigits)
  439. {
  440. NEXT();
  441. if (!isxdigit(CUR_CHAR)) return Error(_SC("hexadecimal number expected"));
  442. SQInteger n = 0;
  443. while (isxdigit(CUR_CHAR) && n < maxdigits) {
  444. dest[n] = CUR_CHAR;
  445. n++;
  446. NEXT();
  447. }
  448. dest[n] = 0;
  449. return n;
  450. }
  451. SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
  452. {
  453. INIT_TEMP_STRING();
  454. SQInteger start_equals = 0;
  455. SQChar cdelim1, cdelim2;
  456. if(ndelim == _SC('{')){
  457. cdelim1 = _SC('{');
  458. cdelim2 = _SC('}');
  459. }
  460. else if(ndelim == _SC('(')){
  461. cdelim1 = _SC('(');
  462. cdelim2 = _SC(')');
  463. }
  464. else {
  465. cdelim1 = _SC('[');
  466. cdelim2 = _SC(']');
  467. }
  468. if(CUR_CHAR == _SC('=')){
  469. //lua like literal
  470. while(!IS_EOB() && CUR_CHAR == _SC('=')) {
  471. ++start_equals;
  472. NEXT();
  473. }
  474. if(CUR_CHAR != cdelim1){
  475. //it's not a lua literal delimiter
  476. return Error(_SC("expect '%c' on literal delimiter"), cdelim1);
  477. }
  478. ndelim = cdelim2;
  479. }
  480. NEXT();
  481. if(IS_EOB()) return -1;
  482. if(start_equals) {
  483. int cr_nl = CUR_CHAR == _SC('\r');
  484. if(cr_nl) NEXT();
  485. cr_nl = CUR_CHAR == _SC('\n');
  486. if(cr_nl) NEXT();
  487. if(cr_nl) {//if a new line follows the start of delimiter drop it
  488. ++_currentline;
  489. if(IS_EOB())
  490. {
  491. return Error(_SC("unfinished string"));
  492. }
  493. }
  494. }
  495. for(;;) {
  496. while(CUR_CHAR != ndelim) {
  497. SQInteger x = CUR_CHAR;
  498. switch(x) {
  499. case SQUIRREL_EOB:
  500. return Error(_SC("unfinished string"));
  501. case _SC('\n'):
  502. if(!verbatim) return Error(_SC("newline in a constant"));
  503. APPEND_CHAR(CUR_CHAR); NEXT();
  504. _currentline++;
  505. break;
  506. case _SC('\\'):
  507. if(verbatim) {
  508. APPEND_CHAR('\\'); NEXT();
  509. }
  510. else {
  511. NEXT();
  512. switch(CUR_CHAR) {
  513. case _SC('x'): {
  514. const SQInteger maxdigits = sizeof(SQChar) * 2;
  515. SQChar temp[maxdigits + 1];
  516. if(ProcessStringHexEscape(temp, maxdigits) < 0) return -1;
  517. SQChar *stemp;
  518. APPEND_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
  519. }
  520. break;
  521. case _SC('U'):
  522. case _SC('u'): {
  523. const SQInteger maxdigits = x == 'u' ? 4 : 8;
  524. SQChar temp[8 + 1];
  525. if(ProcessStringHexEscape(temp, maxdigits) < 0) return -1;
  526. SQChar *stemp;
  527. #ifdef SQUNICODE
  528. #if WCHAR_SIZE == 2
  529. AddUTF16(scstrtoul(temp, &stemp, 16));
  530. #else
  531. ADD_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
  532. #endif
  533. #else
  534. AddUTF8(scstrtoul(temp, &stemp, 16));
  535. #endif
  536. }
  537. break;
  538. case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
  539. case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
  540. case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
  541. case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
  542. case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
  543. case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
  544. case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
  545. case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
  546. case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
  547. case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
  548. case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
  549. default:
  550. return Error(_SC("unrecognised escaper char"));
  551. break;
  552. }
  553. }
  554. break;
  555. default:
  556. APPEND_CHAR(CUR_CHAR);
  557. NEXT();
  558. }
  559. }
  560. NEXT();
  561. if(start_equals){
  562. bool lastBraceAdded = false;
  563. if(CUR_CHAR == _SC('=')){
  564. SQInteger end_equals = start_equals;
  565. NEXT();
  566. if(CUR_CHAR == _SC('=') || CUR_CHAR == cdelim2){
  567. --end_equals;
  568. while(!IS_EOB() && CUR_CHAR == _SC('=')) {
  569. --end_equals;
  570. NEXT();
  571. }
  572. if(end_equals) return Error(_SC("expect same number of '=' on literal delimiter"));
  573. if(CUR_CHAR != cdelim2) return Error(_SC("expect '%c' to close literal delimiter"), cdelim2);
  574. NEXT();
  575. break;
  576. }
  577. APPEND_CHAR(cdelim2); //the first NEXT() after break the while loop
  578. APPEND_CHAR(_SC('='));
  579. lastBraceAdded = true;
  580. }
  581. if(!lastBraceAdded) APPEND_CHAR(cdelim2); //the first NEXT() after break the while loop
  582. APPEND_CHAR(CUR_CHAR);
  583. NEXT();
  584. }
  585. else if(verbatim && CUR_CHAR == '"') { //double quotation
  586. APPEND_CHAR(CUR_CHAR);
  587. NEXT();
  588. }
  589. else {
  590. break;
  591. }
  592. }
  593. TERMINATE_BUFFER();
  594. SQInteger len = _longstr.size()-1;
  595. if(ndelim == _SC('\'')) {
  596. if(len == 0) return Error(_SC("empty constant"));
  597. if(len > 1) return Error(_SC("constant too long"));
  598. _nvalue = _longstr[0];
  599. return TK_INTEGER;
  600. }
  601. _svalue = &_longstr[0];
  602. return TK_STRING_LITERAL;
  603. }
  604. void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
  605. {
  606. *res = 0;
  607. while(*s != 0)
  608. {
  609. if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
  610. else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
  611. else { assert(0); }
  612. }
  613. }
  614. void LexInteger(const SQChar *s,SQUnsignedInteger *res)
  615. {
  616. *res = 0;
  617. while(*s != 0)
  618. {
  619. *res = (*res)*10+((*s++)-'0');
  620. }
  621. }
  622. SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
  623. void LexOctal(const SQChar *s,SQUnsignedInteger *res)
  624. {
  625. *res = 0;
  626. while(*s != 0)
  627. {
  628. if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
  629. else { assert(0); }
  630. }
  631. }
  632. SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
  633. #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
  634. SQInteger SQLexer::ReadNumber()
  635. {
  636. #define TINT 1
  637. #define TFLOAT 2
  638. #define THEX 3
  639. #define TSCIENTIFIC 4
  640. #define TOCTAL 5
  641. SQInteger type = TINT, firstchar = CUR_CHAR;
  642. SQUnsignedInteger itmp=0;
  643. SQChar *sTemp;
  644. INIT_TEMP_STRING();
  645. NEXT();
  646. if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
  647. if(scisodigit(CUR_CHAR)) {
  648. type = TOCTAL;
  649. while(scisodigit(CUR_CHAR)) {
  650. APPEND_CHAR(CUR_CHAR);
  651. NEXT();
  652. }
  653. if(scisdigit(CUR_CHAR)) return Error(_SC("invalid octal number"));
  654. }
  655. else {
  656. NEXT();
  657. type = THEX;
  658. while(isxdigit(CUR_CHAR)) {
  659. APPEND_CHAR(CUR_CHAR);
  660. NEXT();
  661. }
  662. if(_longstr.size() > MAX_HEX_DIGITS) return Error(_SC("too many digits for an Hex number"));
  663. }
  664. }
  665. else {
  666. APPEND_CHAR((int)firstchar);
  667. while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
  668. if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
  669. if(isexponent(CUR_CHAR)) {
  670. if(type != TFLOAT) return Error(_SC("invalid numeric format"));
  671. type = TSCIENTIFIC;
  672. APPEND_CHAR(CUR_CHAR);
  673. NEXT();
  674. if(CUR_CHAR == '+' || CUR_CHAR == '-'){
  675. APPEND_CHAR(CUR_CHAR);
  676. NEXT();
  677. }
  678. if(!scisdigit(CUR_CHAR)) return Error(_SC("exponent expected"));
  679. }
  680. APPEND_CHAR(CUR_CHAR);
  681. NEXT();
  682. }
  683. }
  684. TERMINATE_BUFFER();
  685. switch(type) {
  686. case TSCIENTIFIC:
  687. case TFLOAT:
  688. _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
  689. return TK_FLOAT;
  690. case TINT:
  691. LexInteger(&_longstr[0],&itmp);
  692. break;
  693. case THEX:
  694. LexHexadecimal(&_longstr[0],&itmp);
  695. break;
  696. case TOCTAL:
  697. LexOctal(&_longstr[0],&itmp);
  698. break;
  699. }
  700. switch(type) {
  701. case TINT:
  702. case THEX:
  703. case TOCTAL:
  704. //to allow 64 bits integers comment bellow
  705. //if(itmp > INT_MAX) return Error(_SC("integer overflow %ulld %d"));
  706. _nvalue = (SQInteger) itmp;
  707. return TK_INTEGER;
  708. }
  709. return 0;
  710. }
  711. SQInteger SQLexer::ReadID()
  712. {
  713. SQInteger res;
  714. INIT_TEMP_STRING();
  715. do {
  716. APPEND_CHAR(CUR_CHAR);
  717. NEXT();
  718. } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
  719. TERMINATE_BUFFER();
  720. res = GetIDType(&_longstr[0],_longstr.size() - 1);
  721. if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR || res == TK_DESTRUCTOR) {
  722. _svalue = &_longstr[0];
  723. }
  724. return res;
  725. }