sqlexer.cpp 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009
  1. /*
  2. see copyright notice in squirrel.h
  3. */
  4. #include "sqpcheader.h"
  5. #include <ctype.h>
  6. #include <stdlib.h>
  7. #include <limits.h>
  8. #include "sqtable.h"
  9. #include "sqstring.h"
  10. #include "sqcompiler.h"
  11. #include "sqlexer.h"
  12. #define CUR_CHAR (data->currdata)
  13. #define RETURN_TOKEN(t) { data->prevtoken = data->curtoken; data->curtoken = t; return t;}
  14. #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
  15. //#define NEXT() {SQInteger rc = Next(); if(rc < 0) return rc; data->currentcolumn++;}
  16. #define NEXT() {if(Next()) return -1;}
  17. #define INIT_TEMP_STRING() { data->longstr.resize(0);}
  18. #define APPEND_CHAR(c) { data->longstr.push_back(c);}
  19. #define TERMINATE_BUFFER() {data->longstr.push_back(_SC('\0'));}
  20. #define ADD_KEYWORD(key,id) tbl->NewSlot( SQString::Create(_sharedstate, _SC(#key)) ,SQInteger(id))
  21. SQLexer::SQLexer(){_keywords=0;}
  22. SQLexer::~SQLexer()
  23. {
  24. if(_keywords) _keywords->Release();
  25. }
  26. SQInteger SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg,
  27. SQUserPointer up,CompilerErrorFunc efunc,void *ed,
  28. SQBool want_comments, SQBool want_stringSingleAndDoubleQuotes)
  29. {
  30. _want_comments = want_comments;
  31. _want_stringSingleAndDoubleQuotes = want_stringSingleAndDoubleQuotes;
  32. data = &_data;
  33. _data_lookahead.currentline = -1;
  34. _errfunc = efunc;
  35. _errtarget = ed;
  36. _sharedstate = ss;
  37. if(_keywords) _keywords->Release();
  38. _keywords = GetKeywords();
  39. return ResetReader(rg, up, 1);
  40. }
  41. SQInteger SQLexer::ResetReader(SQLEXREADFUNC rg, SQUserPointer up, SQInteger line)
  42. {
  43. _readf = rg;
  44. _up = up;
  45. data->lasttokenline = data->currentline = line;
  46. data->lasttokencolumn = 0;
  47. data->currentcolumn = 0;
  48. data->prevtoken = -1;
  49. data->readcount = 0;
  50. data->reached_eof = SQFalse;
  51. data->isCharacter = SQFalse;
  52. return Next();
  53. }
  54. SQTable * SQLexer::GetKeywords()
  55. {
  56. SQTable *tbl = SQTable::Create(_sharedstate, (TK_LAST_ENUM_TOKEN - TK_FIRST_ENUM_TOKEN - 1) /*26*/);
  57. ADD_KEYWORD(any_t, TK_LOCAL_ANY_T);
  58. ADD_KEYWORD(array_t, TK_LOCAL_ARRAY_T);
  59. ADD_KEYWORD(as, TK_AS);
  60. ADD_KEYWORD(auto, TK_LOCAL);
  61. ADD_KEYWORD(base, TK_BASE);
  62. ADD_KEYWORD(bool_t, TK_LOCAL_BOOL_T);
  63. ADD_KEYWORD(break, TK_BREAK);
  64. ADD_KEYWORD(case, TK_CASE);
  65. ADD_KEYWORD(catch, TK_CATCH);
  66. ADD_KEYWORD(char_t, TK_LOCAL_CHAR_T);
  67. ADD_KEYWORD(class,TK_CLASS);
  68. ADD_KEYWORD(clone, TK_CLONE);
  69. ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
  70. ADD_KEYWORD(const,TK_CONST);
  71. ADD_KEYWORD(constexpr,TK_CONSTEXPR);
  72. ADD_KEYWORD(continue, TK_CONTINUE);
  73. ADD_KEYWORD(declare, TK_DECLARE);
  74. ADD_KEYWORD(default, TK_DEFAULT);
  75. ADD_KEYWORD(delete, TK_DELETE);
  76. ADD_KEYWORD(destructor,TK_DESTRUCTOR);
  77. ADD_KEYWORD(do, TK_DO);
  78. ADD_KEYWORD(double_t, TK_LOCAL_DOUBLE_T);
  79. ADD_KEYWORD(else, TK_ELSE);
  80. ADD_KEYWORD(enum,TK_ENUM);
  81. ADD_KEYWORD(extends,TK_EXTENDS);
  82. ADD_KEYWORD(extern,TK_EXTERN);
  83. ADD_KEYWORD(false,TK_FALSE);
  84. ADD_KEYWORD(__FILE__,TK___FILE__);
  85. ADD_KEYWORD(float_t, TK_LOCAL_FLOAT_T);
  86. ADD_KEYWORD(foreach, TK_FOREACH);
  87. ADD_KEYWORD(for, TK_FOR);
  88. ADD_KEYWORD(friend, TK_FRIEND);
  89. ADD_KEYWORD(function, TK_FUNCTION);
  90. ADD_KEYWORD(__FUNCTION__,TK___FUNCTION__);
  91. ADD_KEYWORD(goto, TK_GOTO);
  92. ADD_KEYWORD(if, TK_IF);
  93. ADD_KEYWORD(instanceof,TK_INSTANCEOF);
  94. ADD_KEYWORD(int16_t, TK_LOCAL_INT16_T);
  95. ADD_KEYWORD(int32_t, TK_LOCAL_INT32_T);
  96. ADD_KEYWORD(int64_t, TK_LOCAL_INT64_T);
  97. ADD_KEYWORD(int8_t, TK_LOCAL_INT8_T);
  98. ADD_KEYWORD(in, TK_IN);
  99. ADD_KEYWORD(inline, TK_INLINE);
  100. ADD_KEYWORD(interface, TK_INTERFACE);
  101. ADD_KEYWORD(int_t, TK_LOCAL_INT_T);
  102. ADD_KEYWORD(let, TK_LOCAL);
  103. ADD_KEYWORD(__LINE__,TK___LINE__);
  104. ADD_KEYWORD(local, TK_LOCAL);
  105. ADD_KEYWORD(long_double_t, TK_LOCAL_LONG_DOUBLE_T);
  106. ADD_KEYWORD(new,TK_IGNORE);
  107. ADD_KEYWORD(noexcept,TK_NOEXCEPT);
  108. ADD_KEYWORD(number_t, TK_LOCAL_NUMBER_T);
  109. ADD_KEYWORD(null, TK_NULL);
  110. ADD_KEYWORD(NULL, TK_NULL);
  111. ADD_KEYWORD(private,TK_PRIVATE);
  112. ADD_KEYWORD(protected,TK_PROTECTED);
  113. ADD_KEYWORD(public,TK_PUBLIC);
  114. ADD_KEYWORD(rawcall, TK_RAWCALL);
  115. ADD_KEYWORD(resume, TK_RESUME);
  116. ADD_KEYWORD(return, TK_RETURN);
  117. ADD_KEYWORD(size_t,TK_LOCAL_SIZE_T);
  118. ADD_KEYWORD(ssize_t,TK_LOCAL_SSIZE_T);
  119. ADD_KEYWORD(static,TK_STATIC);
  120. ADD_KEYWORD(string_t, TK_LOCAL_STRING_T);
  121. ADD_KEYWORD(struct,TK_STRUCT);
  122. ADD_KEYWORD(switch, TK_SWITCH);
  123. ADD_KEYWORD(table_t, TK_LOCAL_TABLE_T);
  124. ADD_KEYWORD(template, TK_TEMPLATE);
  125. ADD_KEYWORD(this, TK_THIS);
  126. ADD_KEYWORD(throw, TK_THROW);
  127. ADD_KEYWORD(typedef, TK_TYPEDEF);
  128. ADD_KEYWORD(true,TK_TRUE);
  129. ADD_KEYWORD(try, TK_TRY);
  130. ADD_KEYWORD(typeof, TK_TYPEOF);
  131. ADD_KEYWORD(uint16_t, TK_LOCAL_UINT16_T);
  132. ADD_KEYWORD(uint32_t, TK_LOCAL_UINT32_T);
  133. ADD_KEYWORD(uint64_t, TK_LOCAL_UINT64_T);
  134. ADD_KEYWORD(uint8_t, TK_LOCAL_UINT8_T);
  135. ADD_KEYWORD(uint_t, TK_LOCAL_UINT_T);
  136. ADD_KEYWORD(unsafe, TK_UNSAFE);
  137. ADD_KEYWORD(using, TK_USING);
  138. ADD_KEYWORD(var, TK_LOCAL);
  139. ADD_KEYWORD(virtual, TK_VIRTUAL);
  140. ADD_KEYWORD(void_ptr_t, TK_LOCAL_VOIDPTR_T);
  141. ADD_KEYWORD(void, TK_VOID);
  142. ADD_KEYWORD(volatile, TK_VOLATILE);
  143. ADD_KEYWORD(wchar_t, TK_LOCAL_WCHAR_T);
  144. ADD_KEYWORD(weakref_t, TK_LOCAL_WEAKREF_T);
  145. ADD_KEYWORD(while, TK_WHILE);
  146. ADD_KEYWORD(yield, TK_YIELD);
  147. return tbl;
  148. }
  149. SQInteger SQLexer::Error(const SQChar *fmt, ...)
  150. {
  151. va_list vl;
  152. va_start(vl, fmt);
  153. scvsprintf(data->lasterror, sizeof(data->lasterror), fmt, vl);
  154. va_end(vl);
  155. if(_errfunc) _errfunc(_errtarget,data->lasterror);
  156. return -1;
  157. }
  158. SQInteger SQLexer::Next()
  159. {
  160. SQInteger t = _readf(_up);
  161. if(t > MAX_CHAR) return Error(_SC("Invalid character"));
  162. if(t != 0) {
  163. data->currdata = (LexChar)t;
  164. ++data->currentcolumn;
  165. ++data->readcount;
  166. return 0;
  167. }
  168. data->currdata = SQUIRREL_EOB;
  169. data->reached_eof = SQTrue;
  170. return 0;
  171. }
  172. const SQChar *SQLexer::Tok2Str(SQInteger tok)
  173. {
  174. SQObjectPtr itr, key, val;
  175. SQInteger nitr;
  176. while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
  177. itr = (SQInteger)nitr;
  178. if(((SQInteger)_integer(val)) == tok)
  179. return _stringval(key);
  180. }
  181. return NULL;
  182. }
  183. const SQChar *SQLexer::GetTokenName(SQInteger tk_code) {
  184. const SQChar *str_tk;
  185. switch(tk_code){
  186. #define ENUM_TK(a) case TK_##a: str_tk = _SC("TK_" #a); break;
  187. SQ_KEYWORDS_LIST()
  188. #undef ENUM_TK
  189. default:
  190. str_tk = _SC("()");
  191. }
  192. return str_tk;
  193. }
  194. SQInteger SQLexer::LexBlockComment()
  195. {
  196. /*
  197. if(CUR_CHAR == _SC('*'))
  198. {
  199. NEXT();
  200. if(CUR_CHAR != _SC('*')){ //document comment
  201. printf("Doument comment found at line %d\n", data->currentline);
  202. }
  203. }
  204. */
  205. bool done = false;
  206. SQInteger nested = 0;
  207. if(_want_comments) INIT_TEMP_STRING();
  208. NEXT(); //remove the comment token '*'
  209. while(!done) {
  210. switch(CUR_CHAR) {
  211. case _SC('/'): {
  212. if(_want_comments) APPEND_CHAR(CUR_CHAR);
  213. NEXT();
  214. if(CUR_CHAR == _SC('*')) ++nested;
  215. else continue; //reevaluate, when it's a \n it'll be incremented line bellow
  216. }
  217. break;
  218. case _SC('*'): {
  219. NEXT();
  220. if(CUR_CHAR == _SC('/')) {
  221. if(nested) {
  222. --nested;
  223. if(_want_comments) APPEND_CHAR(_SC('*'));
  224. break;
  225. }
  226. else
  227. {
  228. done = true; NEXT(); continue;
  229. }
  230. }
  231. else if(_want_comments) APPEND_CHAR(_SC('*'));
  232. continue; //reevaluate, when it's a \n it'll be incremented line bellow
  233. }
  234. break;
  235. case _SC('\n'): data->currentline++; break;
  236. case SQUIRREL_EOB: return Error(_SC("missing \"*/\" in comment"));
  237. }
  238. if(_want_comments) APPEND_CHAR(CUR_CHAR);
  239. NEXT();
  240. }
  241. if(_want_comments)
  242. {
  243. TERMINATE_BUFFER();
  244. if(data->longstr.size() > 0) data->longstr.pop_back(); //remove the last '*'
  245. data->svalue = &data->longstr[0];
  246. }
  247. return 0;
  248. }
  249. SQInteger SQLexer::LexLineComment()
  250. {
  251. if(_want_comments) INIT_TEMP_STRING();
  252. NEXT(); //remove the comment token
  253. while (CUR_CHAR != _SC('\n') && (!IS_EOB())) {if(_want_comments) APPEND_CHAR(CUR_CHAR); NEXT();}
  254. if(_want_comments)
  255. {
  256. TERMINATE_BUFFER();
  257. data->svalue = &data->longstr[0];
  258. }
  259. return 0;
  260. }
  261. SQInteger SQLexer::LookaheadLex()
  262. {
  263. if(CUR_CHAR == SQUIRREL_EOB) return 0;
  264. if(_data_lookahead.currentline >= 0)
  265. {
  266. return Error(_SC("lex lookahead already done"));
  267. }
  268. _data_lookahead.copy(&_data);
  269. data = &_data_lookahead;
  270. Lex();
  271. data = &_data;
  272. return _data_lookahead.curtoken;
  273. }
  274. //dontThrowIntegerOverflow for when in 32bits parsing 64bits integer inside excluded ifdef
  275. SQInteger SQLexer::Lex(bool dontThrowIntegerOverflow)
  276. {
  277. if(_data_lookahead.currentline >= 0 && data != &_data_lookahead)
  278. {
  279. //we did a lookahead before, reuse it now
  280. _data.copy(&_data_lookahead);
  281. _data_lookahead.currentline = -1;
  282. return _data.curtoken;
  283. }
  284. data->lasttokenline = data->currentline;
  285. data->lasttokencolumn = data->currentcolumn;
  286. data->svalue = NULL;
  287. while(CUR_CHAR != SQUIRREL_EOB) {
  288. switch(CUR_CHAR){
  289. case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
  290. case _SC('\n'):
  291. data->currentline++;
  292. data->prevtoken=data->curtoken;
  293. data->curtoken=_SC('\n');
  294. NEXT();
  295. data->currentcolumn=1;
  296. continue;
  297. case _SC('#'):
  298. NEXT();
  299. if(CUR_CHAR == _SC('!')) //shell shebang
  300. {
  301. if(LexLineComment()) return -1;
  302. if(_want_comments) RETURN_TOKEN(TK_COMMENT_LINE)
  303. continue;
  304. }
  305. RETURN_TOKEN(TK_PRAGMA);
  306. case _SC('/'):
  307. NEXT();
  308. switch(CUR_CHAR){
  309. case _SC('*'):
  310. if(LexBlockComment()) return -1;
  311. if(_want_comments) RETURN_TOKEN(TK_COMMENT_BLOCK)
  312. continue;
  313. case _SC('/'):
  314. if(LexLineComment()) return -1;
  315. if(_want_comments) RETURN_TOKEN(TK_COMMENT_LINE)
  316. continue;
  317. case _SC('='):
  318. NEXT();
  319. RETURN_TOKEN(TK_DIVEQ);
  320. case _SC('>'):
  321. NEXT();
  322. RETURN_TOKEN(TK_ATTR_CLOSE);
  323. default:
  324. RETURN_TOKEN('/');
  325. }
  326. case _SC('='):
  327. NEXT();
  328. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
  329. else {
  330. NEXT();
  331. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_EQ_IDENTITY) }
  332. else { RETURN_TOKEN(TK_EQ); }
  333. }
  334. case _SC('<'):
  335. NEXT();
  336. switch(CUR_CHAR) {
  337. case _SC('='):
  338. NEXT();
  339. if(CUR_CHAR == _SC('>')) {
  340. NEXT();
  341. RETURN_TOKEN(TK_3WAYSCMP);
  342. }
  343. RETURN_TOKEN(TK_LE)
  344. case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
  345. case _SC('<'):
  346. NEXT();
  347. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_SHIFT_LEFT_EQ);}
  348. RETURN_TOKEN(TK_SHIFTL); break;
  349. case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
  350. }
  351. RETURN_TOKEN('<');
  352. case _SC('>'):
  353. NEXT();
  354. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
  355. else if(CUR_CHAR == _SC('>')){
  356. NEXT();
  357. if(CUR_CHAR == _SC('>')){
  358. NEXT();
  359. RETURN_TOKEN(TK_USHIFTR);
  360. } else if (CUR_CHAR == _SC('=')){
  361. NEXT();
  362. RETURN_TOKEN(TK_BIT_SHIFT_RIGHT_EQ);
  363. }
  364. RETURN_TOKEN(TK_SHIFTR);
  365. }
  366. else { RETURN_TOKEN('>') }
  367. case _SC('!'):
  368. NEXT();
  369. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN(_SC('!'))}
  370. else {
  371. NEXT();
  372. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_NE_IDENTITY)}
  373. else { RETURN_TOKEN(TK_NE); }
  374. }
  375. case _SC('@'): {
  376. SQInteger stype;
  377. NEXT();
  378. if(CUR_CHAR != _SC('"')) {
  379. RETURN_TOKEN(_SC('@'));
  380. }
  381. if((stype=ReadString(_SC('"'),true))!=-1) {
  382. RETURN_TOKEN(stype);
  383. }
  384. return Error(_SC("error parsing the string"));
  385. }
  386. case _SC('"'):
  387. case _SC('\''): {
  388. SQInteger stype;
  389. if((stype=ReadString(CUR_CHAR,false))!=-1){
  390. RETURN_TOKEN(stype);
  391. }
  392. return Error(_SC("error parsing the string"));
  393. }
  394. case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
  395. case _SC(';'): case _SC(','): case _SC('?'): case _SC('~'):
  396. {
  397. SQInteger ret = CUR_CHAR;
  398. NEXT();
  399. if((ret == _SC('[') || ret == _SC('{') || ret == _SC('(')) && CUR_CHAR == _SC('=')){
  400. //lets try lua literal delimiters
  401. SQInteger stype;
  402. if((stype=ReadString(ret,true))!=-1){
  403. RETURN_TOKEN(stype);
  404. }
  405. return Error(_SC("error parsing the string"));
  406. }
  407. else RETURN_TOKEN(ret);
  408. }
  409. case _SC('.'):
  410. NEXT();
  411. if (CUR_CHAR != _SC('.')){
  412. if (scisdigit(CUR_CHAR)) {
  413. SQInteger ret = ReadNumber(_SC('.'), dontThrowIntegerOverflow);
  414. if(ret < 0) return -1;
  415. RETURN_TOKEN(ret);
  416. }
  417. RETURN_TOKEN(_SC('.'));
  418. }
  419. NEXT();
  420. if (CUR_CHAR != _SC('.')){ return Error(_SC("invalid token '..'")); }
  421. NEXT();
  422. RETURN_TOKEN(TK_VARPARAMS);
  423. case _SC('^'):
  424. NEXT();
  425. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_XOR_EQ);}
  426. RETURN_TOKEN(_SC('^'));
  427. case _SC('&'):
  428. NEXT();
  429. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_AND_EQ);}
  430. if (CUR_CHAR != _SC('&')){ RETURN_TOKEN(_SC('&')) }
  431. else { NEXT(); RETURN_TOKEN(TK_AND); }
  432. case _SC('|'):
  433. NEXT();
  434. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_OR_EQ);}
  435. if (CUR_CHAR != _SC('|')){ RETURN_TOKEN(_SC('|')) }
  436. else { NEXT(); RETURN_TOKEN(TK_OR); }
  437. case _SC(':'):
  438. NEXT();
  439. if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(_SC(':')) }
  440. else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
  441. case _SC('*'):
  442. NEXT();
  443. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
  444. else RETURN_TOKEN(_SC('*'));
  445. case _SC('%'):
  446. NEXT();
  447. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
  448. else RETURN_TOKEN(_SC('%'));
  449. case _SC('-'):
  450. NEXT();
  451. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
  452. else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
  453. else if (CUR_CHAR == _SC('>')){ NEXT(); RETURN_TOKEN(_SC('.'));} //accept C/C++ like pointers
  454. else RETURN_TOKEN(_SC('-'));
  455. case _SC('+'):
  456. NEXT();
  457. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
  458. else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
  459. else RETURN_TOKEN(_SC('+'));
  460. case SQUIRREL_EOB:
  461. return 0;
  462. default:{
  463. if (scisdigit(CUR_CHAR)) {
  464. SQInteger ret = ReadNumber(0, dontThrowIntegerOverflow);
  465. if(ret < 0) return -1;
  466. RETURN_TOKEN(ret);
  467. }
  468. else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
  469. SQInteger t = ReadID();
  470. if(t < 0) return -1;
  471. RETURN_TOKEN(t);
  472. }
  473. else {
  474. SQInteger c = CUR_CHAR;
  475. if (sciscntrl((int)c)) return Error(_SC("unexpected character(control)"));
  476. NEXT();
  477. RETURN_TOKEN(c);
  478. }
  479. RETURN_TOKEN(0);
  480. }
  481. }
  482. }
  483. return 0;
  484. }
  485. SQInteger SQLexer::GetIDType(const SQChar *s,SQInteger len)
  486. {
  487. SQObjectPtr t;
  488. if(_keywords->GetStr(s,len, t)) {
  489. return SQInteger(_integer(t));
  490. }
  491. return TK_IDENTIFIER;
  492. }
  493. #ifdef SQUNICODE
  494. #if WCHAR_SIZE == 2
  495. SQInteger SQLexer::AddUTF16(SQUnsignedInteger ch)
  496. {
  497. if (ch >= 0x10000)
  498. {
  499. SQUnsignedInteger code = (ch - 0x10000);
  500. APPEND_CHAR((SQChar)(0xD800 | (code >> 10)));
  501. APPEND_CHAR((SQChar)(0xDC00 | (code & 0x3FF)));
  502. return 2;
  503. }
  504. else {
  505. APPEND_CHAR((SQChar)ch);
  506. return 1;
  507. }
  508. }
  509. #endif
  510. #else
  511. SQInteger SQLexer::AddUTF8(SQUnsignedInteger ch)
  512. {
  513. if (ch < 0x80) {
  514. APPEND_CHAR((char)ch);
  515. return 1;
  516. }
  517. if (ch < 0x800) {
  518. APPEND_CHAR((SQChar)((ch >> 6) | 0xC0));
  519. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  520. return 2;
  521. }
  522. if (ch < 0x10000) {
  523. APPEND_CHAR((SQChar)((ch >> 12) | 0xE0));
  524. APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
  525. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  526. return 3;
  527. }
  528. if (ch < 0x110000) {
  529. APPEND_CHAR((SQChar)((ch >> 18) | 0xF0));
  530. APPEND_CHAR((SQChar)(((ch >> 12) & 0x3F) | 0x80));
  531. APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
  532. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  533. return 4;
  534. }
  535. return 0;
  536. }
  537. #endif
  538. SQInteger SQLexer::ProcessStringHexEscape(SQChar *dest, SQInteger maxdigits)
  539. {
  540. NEXT();
  541. if (!isxdigit(CUR_CHAR)) return Error(_SC("hexadecimal number expected"));
  542. SQInteger n = 0;
  543. while (isxdigit(CUR_CHAR) && n < maxdigits) {
  544. dest[n] = CUR_CHAR;
  545. n++;
  546. NEXT();
  547. }
  548. dest[n] = 0;
  549. return n;
  550. }
  551. SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
  552. SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
  553. {
  554. INIT_TEMP_STRING();
  555. SQInteger start_equals = 0;
  556. SQChar cpp_delimin[32], cdelim1, cdelim2, saved_ndelim = ndelim;
  557. if(ndelim == _SC('{')){
  558. cdelim1 = _SC('{');
  559. cdelim2 = _SC('}');
  560. }
  561. else if(ndelim == _SC('(')){
  562. cdelim1 = _SC('(');
  563. cdelim2 = _SC(')');
  564. }
  565. else if(ndelim == _SC('[')){
  566. cdelim1 = _SC('[');
  567. cdelim2 = _SC(']');
  568. }
  569. else if(ndelim == _SC('R')){
  570. cdelim1 = _SC('(');
  571. cdelim2 = _SC(')');
  572. ndelim = cdelim2;
  573. if(CUR_CHAR != _SC('"'))
  574. {
  575. return Error(_SC("expect '\"' on literal delimiter"));
  576. }
  577. NEXT();
  578. size_t i=0;
  579. for(;(i < sizeof(cpp_delimin)-1) && (CUR_CHAR != _SC('(')); ++i)
  580. {
  581. cpp_delimin[i] = CUR_CHAR;
  582. NEXT();
  583. }
  584. cpp_delimin[i] = _SC('\0');
  585. }
  586. else
  587. {
  588. cdelim1 = cdelim2 = _SC('\0');
  589. }
  590. if((cdelim1 == saved_ndelim) && (CUR_CHAR == _SC('='))){
  591. //lua like literal
  592. while(!IS_EOB() && CUR_CHAR == _SC('=')) {
  593. ++start_equals;
  594. NEXT();
  595. }
  596. if(CUR_CHAR != cdelim1){
  597. //it's not a lua literal delimiter
  598. return Error(_SC("expect '%c' on literal delimiter"), cdelim1);
  599. }
  600. ndelim = cdelim2;
  601. }
  602. NEXT();
  603. if(IS_EOB()) return -1;
  604. if(start_equals) {
  605. int cr_nl = CUR_CHAR == _SC('\r');
  606. if(cr_nl) NEXT();
  607. cr_nl = CUR_CHAR == _SC('\n');
  608. if(cr_nl) NEXT();
  609. if(cr_nl) {//if a new line follows the start of delimiter drop it
  610. ++data->currentline;
  611. if(IS_EOB())
  612. {
  613. return Error(_SC("unfinished string"));
  614. }
  615. }
  616. }
  617. for(;;) {
  618. try_again:
  619. while(CUR_CHAR != ndelim) {
  620. SQInteger x = CUR_CHAR;
  621. switch(x) {
  622. case SQUIRREL_EOB:
  623. return Error(_SC("unfinished string"));
  624. case _SC('\n'):
  625. if(!verbatim) return Error(_SC("newline in a constant"));
  626. APPEND_CHAR(CUR_CHAR); NEXT();
  627. data->currentline++;
  628. break;
  629. case _SC('\\'):
  630. if(verbatim) {
  631. APPEND_CHAR(_SC('\\')); NEXT();
  632. }
  633. else {
  634. NEXT();
  635. switch(CUR_CHAR) {
  636. case _SC('x'): {
  637. const SQInteger maxdigits = sizeof(SQChar) * 2;
  638. SQChar temp[maxdigits + 1];
  639. if(ProcessStringHexEscape(temp, maxdigits) < 0) return -1;
  640. SQChar *stemp;
  641. APPEND_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
  642. }
  643. break;
  644. case _SC('U'):
  645. case _SC('u'): {
  646. const SQInteger maxdigits = CUR_CHAR == _SC('u') ? 4 : 8;
  647. SQChar temp[8 + 1];
  648. if(ProcessStringHexEscape(temp, maxdigits) < 0) return -1;
  649. SQChar *stemp;
  650. #ifdef SQUNICODE
  651. #if WCHAR_SIZE == 2
  652. AddUTF16(scstrtoul(temp, &stemp, 16));
  653. #else
  654. ADD_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
  655. #endif
  656. #else
  657. AddUTF8(scstrtoul(temp, &stemp, 16));
  658. #endif
  659. }
  660. break;
  661. //end of string continuation
  662. case _SC('\n'):
  663. if(ndelim == _SC('"')){
  664. data->currentline++;
  665. NEXT();
  666. break;
  667. }
  668. //falthrough
  669. case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
  670. case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
  671. case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
  672. case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
  673. case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
  674. case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
  675. case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
  676. case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
  677. case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
  678. case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
  679. case _SC('0'):
  680. case _SC('1'):
  681. case _SC('2'):
  682. case _SC('3'):
  683. case _SC('4'):
  684. case _SC('5'):
  685. case _SC('6'):
  686. case _SC('7'):
  687. {
  688. int ndigits = 0;
  689. int octal_char = 0;
  690. while(scisodigit(CUR_CHAR)) {
  691. octal_char = (octal_char)*8+(CUR_CHAR-_SC('0'));
  692. NEXT();
  693. if(++ndigits == 3) break;
  694. }
  695. if(octal_char > 0xff) return Error(_SC("max value of embedded octal digits is \377"));
  696. APPEND_CHAR(octal_char);
  697. goto try_again;
  698. }
  699. break;
  700. case _SC('/'): APPEND_CHAR(CUR_CHAR); NEXT();
  701. break;
  702. default:
  703. return Error(_SC("unrecognised escaper char"));
  704. }
  705. }
  706. break;
  707. default:
  708. APPEND_CHAR(CUR_CHAR);
  709. NEXT();
  710. }
  711. }
  712. NEXT();
  713. if(start_equals){
  714. bool lastBraceAdded = false;
  715. if(CUR_CHAR == _SC('=')){
  716. SQInteger end_equals = start_equals;
  717. NEXT();
  718. if(CUR_CHAR == _SC('=') || CUR_CHAR == cdelim2){
  719. --end_equals;
  720. while(!IS_EOB() && CUR_CHAR == _SC('=')) {
  721. --end_equals;
  722. NEXT();
  723. }
  724. if(end_equals) return Error(_SC("expect same number of '=' on literal delimiter"));
  725. if(CUR_CHAR != cdelim2) return Error(_SC("expect '%c' to close literal delimiter"), cdelim2);
  726. NEXT();
  727. break;
  728. }
  729. APPEND_CHAR(cdelim2); //the first NEXT() after break the while loop
  730. APPEND_CHAR(_SC('='));
  731. lastBraceAdded = true;
  732. }
  733. if(!lastBraceAdded) APPEND_CHAR(cdelim2); //the first NEXT() after break the while loop
  734. APPEND_CHAR(CUR_CHAR);
  735. NEXT();
  736. }
  737. else if(saved_ndelim == _SC('R')) {
  738. if(CUR_CHAR == ndelim)
  739. {
  740. APPEND_CHAR(ndelim);
  741. goto try_again;
  742. }
  743. size_t i = 0;
  744. for(;(i < sizeof(cpp_delimin)-1) && (CUR_CHAR != _SC('"')) && cpp_delimin[i]; ++i)
  745. {
  746. if(CUR_CHAR != cpp_delimin[i])
  747. {
  748. //false positive append all chars till here and continue
  749. APPEND_CHAR(ndelim);
  750. for(size_t j=0; j < i; ++j) APPEND_CHAR(cpp_delimin[j]); //recover already eaten chars from buffer
  751. APPEND_CHAR(CUR_CHAR); //append the last one that mismatch
  752. if(CUR_CHAR == _SC('\n')) data->currentline++;
  753. NEXT();
  754. goto try_again;
  755. //return Error(_SC("expect \"%s\" to close literal delimiter"), cpp_delimin);
  756. }
  757. NEXT();
  758. }
  759. if(CUR_CHAR != _SC('"')) return Error(_SC("expect '\"' to close literal delimiter"));
  760. NEXT(); //eat last '"'
  761. break;
  762. }
  763. else if(verbatim && CUR_CHAR == _SC('"')) { //double quotation
  764. APPEND_CHAR(CUR_CHAR);
  765. NEXT();
  766. }
  767. else {
  768. break;
  769. }
  770. }
  771. TERMINATE_BUFFER();
  772. SQInteger len = data->longstr.size()-1;
  773. if(ndelim == _SC('\'') && !_want_stringSingleAndDoubleQuotes) {
  774. if(len == 0) return Error(_SC("empty constant"));
  775. if(len > 1) return Error(_SC("constant too long"));
  776. data->nvalue = data->longstr[0];
  777. data->isCharacter = SQTrue;
  778. return TK_INTEGER;
  779. }
  780. data->svalue = &data->longstr[0];
  781. return TK_STRING_LITERAL;
  782. }
  783. #define MAXBY10 (SQUnsignedInteger)(SQ_INT_MAX / 10)
  784. #define MAXLASTD (SQUnsignedInteger)(SQ_INT_MAX % 10)
  785. static int isneg (const SQChar **s) {
  786. if (**s == _SC('-')) { (*s)++; return 1; }
  787. else if (**s == _SC('+')) (*s)++;
  788. return 0;
  789. }
  790. #define ADD_CHECK_DIGIT(dig, base) \
  791. if (a >= MAXBY10 && (a > MAXBY10 || d > ((int)MAXLASTD + neg))) /* overflow? */ \
  792. return false; /* do not accept it (as integer) */ \
  793. a = a*base+dig;
  794. static bool LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
  795. {
  796. SQUnsignedInteger a = 0;
  797. int d = 0, neg = isneg(&s);
  798. while(*s != 0)
  799. {
  800. if(scisdigit(*s)) d = (*s++)-_SC('0');
  801. else if(scisxdigit(*s)) d = toupper(*s++)-_SC('A')+10;
  802. else { assert(0); }
  803. ADD_CHECK_DIGIT(d, 16);
  804. }
  805. *res = a;
  806. return true;
  807. }
  808. static bool LexInteger(const SQChar *s,SQUnsignedInteger *res)
  809. {
  810. SQUnsignedInteger a = 0;
  811. int d = 0, neg = isneg(&s);
  812. while(*s != 0)
  813. {
  814. d = (*s++)-_SC('0');
  815. ADD_CHECK_DIGIT(d, 10);
  816. }
  817. *res = neg ? (((SQUnsignedInteger)0)-a) : a;
  818. return true;
  819. }
  820. static bool LexOctal(const SQChar *s,SQUnsignedInteger *res)
  821. {
  822. SQUnsignedInteger a = 0;
  823. int d = 0, neg = isneg(&s);
  824. while(*s != 0)
  825. {
  826. if(scisodigit(*s)) d = (*s++)-_SC('0');
  827. else { assert(0); }
  828. ADD_CHECK_DIGIT(d, 8);
  829. }
  830. *res = neg ? (((SQUnsignedInteger)0)-a) : a;
  831. return true;
  832. }
  833. static SQInteger isexponent(SQInteger c) { return c == _SC('e') || c==_SC('E'); }
  834. #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
  835. //dontThrowIntegerOverflow for when in 32bits parsing 64bits integer inside excluded ifdef
  836. SQInteger SQLexer::ReadNumber(SQInteger startChar, bool dontThrowIntegerOverflow)
  837. {
  838. #define TINT 1
  839. #define TFLOAT 2
  840. #define THEX 3
  841. #define TSCIENTIFIC 4
  842. #define TOCTAL 5
  843. SQInteger rtype, type = TINT, firstchar = startChar ? startChar : CUR_CHAR;
  844. SQUnsignedInteger itmp=0;
  845. SQChar *sTemp;
  846. INIT_TEMP_STRING();
  847. if(!startChar) NEXT();
  848. if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
  849. if(scisodigit(CUR_CHAR)) {
  850. type = TOCTAL;
  851. while(scisodigit(CUR_CHAR)) {
  852. APPEND_CHAR(CUR_CHAR);
  853. NEXT();
  854. }
  855. if(scisdigit(CUR_CHAR)) return Error(_SC("invalid octal number"));
  856. }
  857. else {
  858. NEXT();
  859. type = THEX;
  860. while(isxdigit(CUR_CHAR)) {
  861. APPEND_CHAR(CUR_CHAR);
  862. NEXT();
  863. }
  864. if((data->longstr.size() > MAX_HEX_DIGITS) && !dontThrowIntegerOverflow)
  865. return Error(_SC("too many digits for an Hex number"));
  866. }
  867. }
  868. else {
  869. APPEND_CHAR((int)firstchar);
  870. while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
  871. if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
  872. if(isexponent(CUR_CHAR)) {
  873. if(type != TFLOAT) return Error(_SC("invalid numeric format"));
  874. type = TSCIENTIFIC;
  875. APPEND_CHAR(CUR_CHAR);
  876. NEXT();
  877. if(CUR_CHAR == _SC('+') || CUR_CHAR == _SC('-')){
  878. APPEND_CHAR(CUR_CHAR);
  879. NEXT();
  880. }
  881. if(!scisdigit(CUR_CHAR)) return Error(_SC("exponent expected"));
  882. }
  883. APPEND_CHAR(CUR_CHAR);
  884. NEXT();
  885. }
  886. }
  887. TERMINATE_BUFFER();
  888. bool okNumber = true;
  889. switch(type) {
  890. case TINT:
  891. okNumber = LexInteger(&data->longstr[0],&itmp);
  892. if(okNumber) break;
  893. //fallthrough
  894. case TSCIENTIFIC:
  895. case TFLOAT:
  896. data->fvalue = (SQFloat)scstrtod(&data->longstr[0],&sTemp);
  897. if(CUR_CHAR == _SC('f')) NEXT(); //0.0f C/C++ notation
  898. return TK_FLOAT;
  899. case THEX:
  900. okNumber = LexHexadecimal(&data->longstr[0],&itmp);
  901. break;
  902. case TOCTAL:
  903. okNumber = LexOctal(&data->longstr[0],&itmp);
  904. break;
  905. }
  906. if(!okNumber && !dontThrowIntegerOverflow) Error(_SC("integer overflow %s"), &data->longstr[0]);
  907. rtype = TK_INTEGER;
  908. data->isCharacter = SQFalse;
  909. switch(type) {
  910. case TINT:
  911. switch(CUR_CHAR)
  912. {
  913. case _SC('u'):
  914. case _SC('U'):
  915. rtype = TK_UNSIGNED_INTEGER;
  916. NEXT();
  917. break;
  918. }
  919. switch(CUR_CHAR)
  920. {
  921. case _SC('s'):
  922. case _SC('S'):
  923. rtype = (rtype == TK_UNSIGNED_INTEGER) ? TK_UNSIGNED_SHORT_INTEGER : TK_SHORT_INTEGER;
  924. NEXT();
  925. break;
  926. case _SC('l'):
  927. case _SC('L'):
  928. rtype = (rtype == TK_UNSIGNED_INTEGER) ? TK_UNSIGNED_LONG_INTEGER : TK_LONG_INTEGER;
  929. NEXT();
  930. if((CUR_CHAR == _SC('l')) || (CUR_CHAR == _SC('L')) )
  931. {
  932. switch(rtype)
  933. {
  934. case TK_UNSIGNED_INTEGER:
  935. case TK_UNSIGNED_LONG_INTEGER:
  936. rtype = TK_UNSIGNED_LONG_LONG_INTEGER;
  937. break;
  938. default:
  939. rtype = TK_LONG_LONG_INTEGER;
  940. }
  941. NEXT();
  942. }
  943. break;
  944. }
  945. case THEX:
  946. case TOCTAL:
  947. //to allow 64 bits integers comment bellow
  948. //if(itmp > INT_MAX) return Error(_SC("integer overflow %ulld %d"));
  949. data->nvalue = (SQInteger) itmp;
  950. return rtype;
  951. }
  952. return 0;
  953. }
  954. SQInteger SQLexer::ReadID()
  955. {
  956. SQInteger res;
  957. INIT_TEMP_STRING();
  958. do {
  959. APPEND_CHAR(CUR_CHAR);
  960. NEXT();
  961. } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
  962. TERMINATE_BUFFER();
  963. if((CUR_CHAR == _SC('"')) && (data->longstr[0] == _SC('R')) && (data->longstr.size() == 2))
  964. {
  965. //C++ multiline string
  966. return ReadString(_SC('R'),true);
  967. }
  968. res = GetIDType(&data->longstr[0],data->longstr.size() - 1);
  969. if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR || res == TK_DESTRUCTOR) {
  970. data->svalue = &data->longstr[0];
  971. }
  972. return res;
  973. }