sqlexer.cpp 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031
  1. /*
  2. see copyright notice in squirrel.h
  3. */
  4. #include "sqpcheader.h"
  5. #include <ctype.h>
  6. #include <stdlib.h>
  7. #include <limits.h>
  8. #include "sqtable.h"
  9. #include "sqstring.h"
  10. #include "sqcompiler.h"
  11. #include "sqlexer.h"
  12. #define CUR_CHAR (data->currdata)
  13. #define RETURN_TOKEN(t) { data->prevtoken = data->curtoken; data->curtoken = t; return t;}
  14. #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
  15. //#define NEXT() {SQInteger rc = Next(); if(rc < 0) return rc; data->currentcolumn++;}
  16. #define NEXT() {if(Next()) return -1;}
  17. #define INIT_TEMP_STRING() { data->longstr.resize(0);}
  18. #define APPEND_CHAR(c) { data->longstr.push_back(c);}
  19. #define TERMINATE_BUFFER() {data->longstr.push_back(_SC('\0'));}
  20. #define ADD_KEYWORD(key,id) tbl->NewSlot( SQString::Create(_sharedstate, _SC(#key)) ,SQInteger(id))
  21. SQLexer::SQLexer(){_keywords=0;}
  22. SQLexer::~SQLexer()
  23. {
  24. if(_keywords) _keywords->Release();
  25. }
  26. SQInteger SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg,
  27. SQUserPointer up,CompilerErrorFunc efunc,void *ed,
  28. SQBool want_comments, SQBool want_stringSingleAndDoubleQuotes)
  29. {
  30. _want_comments = want_comments;
  31. _want_stringSingleAndDoubleQuotes = want_stringSingleAndDoubleQuotes;
  32. data = &_data;
  33. _data_lookahead.currentline = -1;
  34. _errfunc = efunc;
  35. _errtarget = ed;
  36. _sharedstate = ss;
  37. if(_keywords) _keywords->Release();
  38. _keywords = GetKeywords();
  39. return ResetReader(rg, up, 1);
  40. }
  41. SQInteger SQLexer::ResetReader(SQLEXREADFUNC rg, SQUserPointer up, SQInteger line)
  42. {
  43. _readf = rg;
  44. _up = up;
  45. data->lasttokenline = data->currentline = line;
  46. data->lasttokencolumn = 0;
  47. data->currentcolumn = 0;
  48. data->prevtoken = -1;
  49. data->readcount = 0;
  50. data->reached_eof = SQFalse;
  51. data->isCharacter = SQFalse;
  52. return Next();
  53. }
  54. SQTable * SQLexer::GetKeywords()
  55. {
  56. SQTable *tbl = SQTable::Create(_sharedstate, (TK_LAST_ENUM_TOKEN - TK_FIRST_ENUM_TOKEN - 1) /*26*/);
  57. ADD_KEYWORD(any_t, TK_LOCAL_ANY_T);
  58. ADD_KEYWORD(array_t, TK_LOCAL_ARRAY_T);
  59. ADD_KEYWORD(as, TK_AS);
  60. ADD_KEYWORD(auto, TK_LOCAL);
  61. ADD_KEYWORD(base, TK_BASE);
  62. ADD_KEYWORD(bool_t, TK_LOCAL_BOOL_T);
  63. ADD_KEYWORD(break, TK_BREAK);
  64. ADD_KEYWORD(case, TK_CASE);
  65. ADD_KEYWORD(catch, TK_CATCH);
  66. ADD_KEYWORD(char_t, TK_LOCAL_CHAR_T);
  67. ADD_KEYWORD(class,TK_CLASS);
  68. ADD_KEYWORD(clone, TK_CLONE);
  69. ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
  70. ADD_KEYWORD(const,TK_CONST);
  71. ADD_KEYWORD(constexpr,TK_CONSTEXPR);
  72. ADD_KEYWORD(continue, TK_CONTINUE);
  73. ADD_KEYWORD(declare, TK_DECLARE);
  74. ADD_KEYWORD(default, TK_DEFAULT);
  75. ADD_KEYWORD(delete, TK_DELETE);
  76. ADD_KEYWORD(destructor,TK_DESTRUCTOR);
  77. ADD_KEYWORD(do, TK_DO);
  78. ADD_KEYWORD(double_t, TK_LOCAL_DOUBLE_T);
  79. ADD_KEYWORD(else, TK_ELSE);
  80. ADD_KEYWORD(enum,TK_ENUM);
  81. ADD_KEYWORD(extends,TK_EXTENDS);
  82. ADD_KEYWORD(extern,TK_EXTERN);
  83. ADD_KEYWORD(false,TK_FALSE);
  84. ADD_KEYWORD(__FILE__,TK___FILE__);
  85. ADD_KEYWORD(float_t, TK_LOCAL_FLOAT_T);
  86. ADD_KEYWORD(foreach, TK_FOREACH);
  87. ADD_KEYWORD(for, TK_FOR);
  88. ADD_KEYWORD(friend, TK_FRIEND);
  89. ADD_KEYWORD(function, TK_FUNCTION);
  90. ADD_KEYWORD(__FUNCTION__,TK___FUNCTION__);
  91. ADD_KEYWORD(goto, TK_GOTO);
  92. ADD_KEYWORD(if, TK_IF);
  93. ADD_KEYWORD(instanceof,TK_INSTANCEOF);
  94. ADD_KEYWORD(int16_t, TK_LOCAL_INT16_T);
  95. ADD_KEYWORD(int32_t, TK_LOCAL_INT32_T);
  96. ADD_KEYWORD(int64_t, TK_LOCAL_INT64_T);
  97. ADD_KEYWORD(int8_t, TK_LOCAL_INT8_T);
  98. ADD_KEYWORD(in, TK_IN);
  99. ADD_KEYWORD(inline, TK_INLINE);
  100. ADD_KEYWORD(interface, TK_INTERFACE);
  101. ADD_KEYWORD(int_t, TK_LOCAL_INT_T);
  102. ADD_KEYWORD(let, TK_LOCAL);
  103. ADD_KEYWORD(__LINE__,TK___LINE__);
  104. ADD_KEYWORD(local, TK_LOCAL);
  105. ADD_KEYWORD(long_double_t, TK_LOCAL_LONG_DOUBLE_T);
  106. ADD_KEYWORD(new,TK_IGNORE);
  107. ADD_KEYWORD(noexcept,TK_NOEXCEPT);
  108. ADD_KEYWORD(number_t, TK_LOCAL_NUMBER_T);
  109. ADD_KEYWORD(null, TK_NULL);
  110. ADD_KEYWORD(NULL, TK_NULL);
  111. ADD_KEYWORD(private,TK_PRIVATE);
  112. ADD_KEYWORD(protected,TK_PROTECTED);
  113. ADD_KEYWORD(public,TK_PUBLIC);
  114. ADD_KEYWORD(rawcall, TK_RAWCALL);
  115. ADD_KEYWORD(resume, TK_RESUME);
  116. ADD_KEYWORD(return, TK_RETURN);
  117. ADD_KEYWORD(size_t,TK_LOCAL_SIZE_T);
  118. ADD_KEYWORD(ssize_t,TK_LOCAL_SSIZE_T);
  119. ADD_KEYWORD(static,TK_STATIC);
  120. ADD_KEYWORD(string_t, TK_LOCAL_STRING_T);
  121. ADD_KEYWORD(struct,TK_STRUCT);
  122. ADD_KEYWORD(switch, TK_SWITCH);
  123. ADD_KEYWORD(table_t, TK_LOCAL_TABLE_T);
  124. ADD_KEYWORD(template, TK_TEMPLATE);
  125. ADD_KEYWORD(this, TK_THIS);
  126. ADD_KEYWORD(throw, TK_THROW);
  127. ADD_KEYWORD(typedef, TK_TYPEDEF);
  128. ADD_KEYWORD(true,TK_TRUE);
  129. ADD_KEYWORD(try, TK_TRY);
  130. ADD_KEYWORD(typeof, TK_TYPEOF);
  131. ADD_KEYWORD(uint16_t, TK_LOCAL_UINT16_T);
  132. ADD_KEYWORD(uint32_t, TK_LOCAL_UINT32_T);
  133. ADD_KEYWORD(uint64_t, TK_LOCAL_UINT64_T);
  134. ADD_KEYWORD(uint8_t, TK_LOCAL_UINT8_T);
  135. ADD_KEYWORD(uint_t, TK_LOCAL_UINT_T);
  136. ADD_KEYWORD(unsafe, TK_UNSAFE);
  137. ADD_KEYWORD(using, TK_USING);
  138. ADD_KEYWORD(var, TK_LOCAL);
  139. ADD_KEYWORD(virtual, TK_VIRTUAL);
  140. ADD_KEYWORD(void_ptr_t, TK_LOCAL_VOIDPTR_T);
  141. ADD_KEYWORD(void, TK_VOID);
  142. ADD_KEYWORD(volatile, TK_VOLATILE);
  143. ADD_KEYWORD(wchar_t, TK_LOCAL_WCHAR_T);
  144. ADD_KEYWORD(weakref_t, TK_LOCAL_WEAKREF_T);
  145. ADD_KEYWORD(while, TK_WHILE);
  146. ADD_KEYWORD(yield, TK_YIELD);
  147. return tbl;
  148. }
  149. SQInteger SQLexer::Error(const SQChar *fmt, ...)
  150. {
  151. va_list vl;
  152. va_start(vl, fmt);
  153. scvsprintf(data->lasterror, sizeof(data->lasterror), fmt, vl);
  154. va_end(vl);
  155. if(_errfunc) _errfunc(_errtarget,data->lasterror);
  156. return -1;
  157. }
  158. SQInteger SQLexer::Next()
  159. {
  160. SQInteger t = _readf(_up);
  161. if(t > MAX_CHAR) return Error(_SC("Invalid character"));
  162. if(t != 0) {
  163. data->currdata = (LexChar)t;
  164. ++data->currentcolumn;
  165. ++data->readcount;
  166. return 0;
  167. }
  168. data->currdata = SQUIRREL_EOB;
  169. data->reached_eof = SQTrue;
  170. return 0;
  171. }
  172. const SQChar *SQLexer::Tok2Str(SQInteger tok)
  173. {
  174. SQObjectPtr itr, key, val;
  175. SQInteger nitr;
  176. while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
  177. itr = (SQInteger)nitr;
  178. if(((SQInteger)_integer(val)) == tok)
  179. return _stringval(key);
  180. }
  181. return NULL;
  182. }
  183. const SQChar *SQLexer::GetTokenName(SQInteger tk_code) {
  184. const SQChar *str_tk;
  185. switch(tk_code){
  186. #define ENUM_TK(a) case TK_##a: str_tk = _SC("TK_" #a); break;
  187. SQ_KEYWORDS_LIST()
  188. #undef ENUM_TK
  189. default:
  190. str_tk = _SC("()");
  191. }
  192. return str_tk;
  193. }
  194. SQInteger SQLexer::LexBlockComment()
  195. {
  196. /*
  197. if(CUR_CHAR == _SC('*'))
  198. {
  199. NEXT();
  200. if(CUR_CHAR != _SC('*')){ //document comment
  201. printf("Doument comment found at line %d\n", data->currentline);
  202. }
  203. }
  204. */
  205. bool done = false;
  206. SQInteger nested = 0;
  207. if(_want_comments) INIT_TEMP_STRING();
  208. NEXT(); //remove the comment token '*'
  209. while(!done) {
  210. switch(CUR_CHAR) {
  211. case _SC('/'): {
  212. if(_want_comments) APPEND_CHAR(CUR_CHAR);
  213. NEXT();
  214. if(CUR_CHAR == _SC('*')) ++nested;
  215. else continue; //reevaluate, when it's a \n it'll be incremented line bellow
  216. }
  217. break;
  218. case _SC('*'): {
  219. NEXT();
  220. if(CUR_CHAR == _SC('/')) {
  221. if(nested) {
  222. --nested;
  223. if(_want_comments) APPEND_CHAR(_SC('*'));
  224. break;
  225. }
  226. else
  227. {
  228. done = true; NEXT(); continue;
  229. }
  230. }
  231. else if(_want_comments) APPEND_CHAR(_SC('*'));
  232. continue; //reevaluate, when it's a \n it'll be incremented line bellow
  233. }
  234. break;
  235. case _SC('\n'): data->currentline++; break;
  236. case SQUIRREL_EOB: return Error(_SC("missing \"*/\" in comment"));
  237. }
  238. if(_want_comments) APPEND_CHAR(CUR_CHAR);
  239. NEXT();
  240. }
  241. if(_want_comments)
  242. {
  243. TERMINATE_BUFFER();
  244. if(data->longstr.size() > 0) data->longstr.pop_back(); //remove the last '*'
  245. data->svalue = &data->longstr[0];
  246. }
  247. return 0;
  248. }
  249. SQInteger SQLexer::LexLineComment()
  250. {
  251. if(_want_comments) INIT_TEMP_STRING();
  252. NEXT(); //remove the comment token
  253. while (CUR_CHAR != _SC('\n') && (!IS_EOB())) {if(_want_comments) APPEND_CHAR(CUR_CHAR); NEXT();}
  254. if(_want_comments)
  255. {
  256. TERMINATE_BUFFER();
  257. data->svalue = &data->longstr[0];
  258. }
  259. return 0;
  260. }
  261. SQInteger SQLexer::LookaheadLex()
  262. {
  263. if(CUR_CHAR == SQUIRREL_EOB) return 0;
  264. if(_data_lookahead.currentline >= 0)
  265. {
  266. return Error(_SC("lex lookahead already done"));
  267. }
  268. _data_lookahead.copy(&_data);
  269. data = &_data_lookahead;
  270. Lex();
  271. data = &_data;
  272. return _data_lookahead.curtoken;
  273. }
  274. //dontThrowIntegerOverflow for when in 32bits parsing 64bits integer inside excluded ifdef
  275. SQInteger SQLexer::Lex(bool dontThrowIntegerOverflow)
  276. {
  277. if(_data_lookahead.currentline >= 0 && data != &_data_lookahead)
  278. {
  279. //we did a lookahead before, reuse it now
  280. _data.copy(&_data_lookahead);
  281. _data_lookahead.currentline = -1;
  282. return _data.curtoken;
  283. }
  284. data->lasttokenline = data->currentline;
  285. data->lasttokencolumn = data->currentcolumn;
  286. data->svalue = NULL;
  287. while(CUR_CHAR != SQUIRREL_EOB) {
  288. switch(CUR_CHAR){
  289. case _SC('\t'): case _SC('\r'): case _SC(' '):
  290. case _SC('\f'): case _SC('\v'): NEXT(); continue;
  291. case _SC('\n'):
  292. data->currentline++;
  293. data->prevtoken=data->curtoken;
  294. data->curtoken=_SC('\n');
  295. NEXT();
  296. data->currentcolumn=1;
  297. continue;
  298. case _SC('#'):
  299. NEXT();
  300. if(CUR_CHAR == _SC('!')) //shell shebang
  301. {
  302. if(LexLineComment()) return -1;
  303. if(_want_comments) RETURN_TOKEN(TK_COMMENT_LINE)
  304. continue;
  305. }
  306. RETURN_TOKEN(TK_PRAGMA);
  307. case _SC('/'):
  308. NEXT();
  309. switch(CUR_CHAR){
  310. case _SC('*'):
  311. if(LexBlockComment()) return -1;
  312. if(_want_comments) RETURN_TOKEN(TK_COMMENT_BLOCK)
  313. continue;
  314. case _SC('/'):
  315. if(LexLineComment()) return -1;
  316. if(_want_comments) RETURN_TOKEN(TK_COMMENT_LINE)
  317. continue;
  318. case _SC('='):
  319. NEXT();
  320. RETURN_TOKEN(TK_DIVEQ);
  321. case _SC('>'):
  322. NEXT();
  323. RETURN_TOKEN(TK_ATTR_CLOSE);
  324. default:
  325. RETURN_TOKEN('/');
  326. }
  327. case _SC('='):
  328. NEXT();
  329. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
  330. else {
  331. NEXT();
  332. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_EQ_IDENTITY) }
  333. else { RETURN_TOKEN(TK_EQ); }
  334. }
  335. case _SC('<'):
  336. NEXT();
  337. switch(CUR_CHAR) {
  338. case _SC('='):
  339. NEXT();
  340. if(CUR_CHAR == _SC('>')) {
  341. NEXT();
  342. RETURN_TOKEN(TK_3WAYSCMP);
  343. }
  344. RETURN_TOKEN(TK_LE)
  345. case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
  346. case _SC('<'):
  347. NEXT();
  348. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_SHIFT_LEFT_EQ);}
  349. RETURN_TOKEN(TK_SHIFTL); break;
  350. case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
  351. }
  352. RETURN_TOKEN('<');
  353. case _SC('>'):
  354. NEXT();
  355. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
  356. else if(CUR_CHAR == _SC('>')){
  357. NEXT();
  358. if(CUR_CHAR == _SC('>')){
  359. NEXT();
  360. RETURN_TOKEN(TK_USHIFTR);
  361. } else if (CUR_CHAR == _SC('=')){
  362. NEXT();
  363. RETURN_TOKEN(TK_BIT_SHIFT_RIGHT_EQ);
  364. }
  365. RETURN_TOKEN(TK_SHIFTR);
  366. }
  367. else { RETURN_TOKEN('>') }
  368. case _SC('!'):
  369. NEXT();
  370. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN(_SC('!'))}
  371. else {
  372. NEXT();
  373. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_NE_IDENTITY)}
  374. else { RETURN_TOKEN(TK_NE); }
  375. }
  376. case _SC('@'): {
  377. SQInteger stype;
  378. NEXT();
  379. if(CUR_CHAR != _SC('"')) {
  380. RETURN_TOKEN(_SC('@'));
  381. }
  382. if((stype=ReadString(_SC('"'),true))!=-1) {
  383. RETURN_TOKEN(stype);
  384. }
  385. return Error(_SC("error parsing the string"));
  386. }
  387. case _SC('"'):
  388. case _SC('\''): {
  389. SQInteger stype;
  390. if((stype=ReadString(CUR_CHAR,false))!=-1){
  391. RETURN_TOKEN(stype);
  392. }
  393. return Error(_SC("error parsing the string"));
  394. }
  395. case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
  396. case _SC(';'): case _SC(','): case _SC('?'): case _SC('~'):
  397. {
  398. SQInteger ret = CUR_CHAR;
  399. NEXT();
  400. if((ret == _SC('[') || ret == _SC('{') || ret == _SC('(')) && CUR_CHAR == _SC('=')){
  401. //lets try lua literal delimiters
  402. SQInteger stype;
  403. if((stype=ReadString(ret,true))!=-1){
  404. RETURN_TOKEN(stype);
  405. }
  406. return Error(_SC("error parsing the string"));
  407. }
  408. else RETURN_TOKEN(ret);
  409. }
  410. case _SC('.'):
  411. NEXT();
  412. if (CUR_CHAR != _SC('.')){
  413. if (scisdigit(CUR_CHAR)) {
  414. SQInteger ret = ReadNumber(_SC('.'), dontThrowIntegerOverflow);
  415. if(ret < 0) return -1;
  416. RETURN_TOKEN(ret);
  417. }
  418. RETURN_TOKEN(_SC('.'));
  419. }
  420. NEXT();
  421. if (CUR_CHAR != _SC('.')){ return Error(_SC("invalid token '..'")); }
  422. NEXT();
  423. RETURN_TOKEN(TK_VARPARAMS);
  424. case _SC('^'):
  425. NEXT();
  426. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_XOR_EQ);}
  427. RETURN_TOKEN(_SC('^'));
  428. case _SC('&'):
  429. NEXT();
  430. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_AND_EQ);}
  431. if (CUR_CHAR != _SC('&')){ RETURN_TOKEN(_SC('&')) }
  432. else { NEXT(); RETURN_TOKEN(TK_AND); }
  433. case _SC('|'):
  434. NEXT();
  435. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_OR_EQ);}
  436. if (CUR_CHAR != _SC('|')){ RETURN_TOKEN(_SC('|')) }
  437. else { NEXT(); RETURN_TOKEN(TK_OR); }
  438. case _SC(':'):
  439. NEXT();
  440. if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(_SC(':')) }
  441. else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
  442. case _SC('*'):
  443. NEXT();
  444. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
  445. else RETURN_TOKEN(_SC('*'));
  446. case _SC('%'):
  447. NEXT();
  448. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
  449. else RETURN_TOKEN(_SC('%'));
  450. case _SC('-'):
  451. NEXT();
  452. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
  453. else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
  454. else if (CUR_CHAR == _SC('>')){ NEXT(); RETURN_TOKEN(_SC('.'));} //accept C/C++ like pointers
  455. else RETURN_TOKEN(_SC('-'));
  456. case _SC('+'):
  457. NEXT();
  458. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
  459. else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
  460. else RETURN_TOKEN(_SC('+'));
  461. case SQUIRREL_EOB:
  462. return 0;
  463. default:{
  464. if (scisdigit(CUR_CHAR)) {
  465. SQInteger ret = ReadNumber(0, dontThrowIntegerOverflow);
  466. if(ret < 0) return -1;
  467. RETURN_TOKEN(ret);
  468. }
  469. else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
  470. SQInteger t = ReadID();
  471. if(t < 0) return -1;
  472. RETURN_TOKEN(t);
  473. }
  474. else {
  475. SQInteger c = CUR_CHAR;
  476. if (sciscntrl((int)c)) return Error(_SC("unexpected character(control)"));
  477. NEXT();
  478. RETURN_TOKEN(c);
  479. }
  480. RETURN_TOKEN(0);
  481. }
  482. }
  483. }
  484. return 0;
  485. }
  486. SQInteger SQLexer::GetIDType(const SQChar *s,SQInteger len)
  487. {
  488. SQObjectPtr t;
  489. if(_keywords->GetStr(s,len, t)) {
  490. return SQInteger(_integer(t));
  491. }
  492. return TK_IDENTIFIER;
  493. }
  494. #ifdef SQUNICODE
  495. #if WCHAR_SIZE == 2
  496. SQInteger SQLexer::AddUTF16(SQUnsignedInteger ch)
  497. {
  498. if (ch >= 0x10000)
  499. {
  500. SQUnsignedInteger code = (ch - 0x10000);
  501. APPEND_CHAR((SQChar)(0xD800 | (code >> 10)));
  502. APPEND_CHAR((SQChar)(0xDC00 | (code & 0x3FF)));
  503. return 2;
  504. }
  505. else {
  506. APPEND_CHAR((SQChar)ch);
  507. return 1;
  508. }
  509. }
  510. #endif
  511. #else
  512. SQInteger SQLexer::AddUTF8(SQUnsignedInteger ch)
  513. {
  514. if (ch < 0x80) {
  515. APPEND_CHAR((char)ch);
  516. return 1;
  517. }
  518. if (ch < 0x800) {
  519. APPEND_CHAR((SQChar)((ch >> 6) | 0xC0));
  520. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  521. return 2;
  522. }
  523. if (ch < 0x10000) {
  524. APPEND_CHAR((SQChar)((ch >> 12) | 0xE0));
  525. APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
  526. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  527. return 3;
  528. }
  529. if (ch < 0x110000) {
  530. APPEND_CHAR((SQChar)((ch >> 18) | 0xF0));
  531. APPEND_CHAR((SQChar)(((ch >> 12) & 0x3F) | 0x80));
  532. APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
  533. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  534. return 4;
  535. }
  536. return 0;
  537. }
  538. #endif
  539. SQInteger SQLexer::ProcessStringHexEscape(SQChar *dest, SQInteger maxdigits)
  540. {
  541. NEXT();
  542. if (!isxdigit(CUR_CHAR)) return Error(_SC("hexadecimal number expected"));
  543. SQInteger n = 0;
  544. while (isxdigit(CUR_CHAR) && n < maxdigits) {
  545. dest[n] = CUR_CHAR;
  546. n++;
  547. NEXT();
  548. }
  549. dest[n] = 0;
  550. return n;
  551. }
  552. SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
  553. SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
  554. {
  555. INIT_TEMP_STRING();
  556. SQInteger start_equals = 0, utf_len = 0; SQUnsignedInteger utf_value = 0;
  557. SQChar cpp_delimin[32], cdelim1, cdelim2, saved_ndelim = ndelim;
  558. if(ndelim == _SC('{')){
  559. cdelim1 = _SC('{');
  560. cdelim2 = _SC('}');
  561. }
  562. else if(ndelim == _SC('(')){
  563. cdelim1 = _SC('(');
  564. cdelim2 = _SC(')');
  565. }
  566. else if(ndelim == _SC('[')){
  567. cdelim1 = _SC('[');
  568. cdelim2 = _SC(']');
  569. }
  570. else if(ndelim == _SC('R')){
  571. cdelim1 = _SC('(');
  572. cdelim2 = _SC(')');
  573. ndelim = cdelim2;
  574. if(CUR_CHAR != _SC('"'))
  575. {
  576. return Error(_SC("expect '\"' on literal delimiter"));
  577. }
  578. NEXT();
  579. size_t i=0;
  580. for(;(i < sizeof(cpp_delimin)-1) && (CUR_CHAR != _SC('(')); ++i)
  581. {
  582. cpp_delimin[i] = CUR_CHAR;
  583. NEXT();
  584. }
  585. cpp_delimin[i] = _SC('\0');
  586. }
  587. else
  588. {
  589. cdelim1 = cdelim2 = _SC('\0');
  590. }
  591. if((cdelim1 == saved_ndelim) && (CUR_CHAR == _SC('='))){
  592. //lua like literal
  593. while(!IS_EOB() && CUR_CHAR == _SC('=')) {
  594. ++start_equals;
  595. NEXT();
  596. }
  597. if(CUR_CHAR != cdelim1){
  598. //it's not a lua literal delimiter
  599. return Error(_SC("expect '%c' on literal delimiter"), cdelim1);
  600. }
  601. ndelim = cdelim2;
  602. }
  603. NEXT();
  604. if(IS_EOB()) return -1;
  605. if(start_equals) {
  606. int cr_nl = CUR_CHAR == _SC('\r');
  607. if(cr_nl) NEXT();
  608. cr_nl = CUR_CHAR == _SC('\n');
  609. if(cr_nl) NEXT();
  610. if(cr_nl) {//if a new line follows the start of delimiter drop it
  611. ++data->currentline;
  612. if(IS_EOB())
  613. {
  614. return Error(_SC("unfinished string"));
  615. }
  616. }
  617. }
  618. for(;;) {
  619. try_again:
  620. while(CUR_CHAR != ndelim) {
  621. SQInteger x = CUR_CHAR;
  622. switch(x) {
  623. case SQUIRREL_EOB:
  624. return Error(_SC("unfinished string"));
  625. case _SC('\n'):
  626. if(!verbatim) return Error(_SC("newline in a constant"));
  627. APPEND_CHAR(CUR_CHAR); NEXT();
  628. data->currentline++;
  629. break;
  630. case _SC('\\'):
  631. if(verbatim) {
  632. APPEND_CHAR(_SC('\\')); NEXT();
  633. }
  634. else {
  635. NEXT();
  636. switch(CUR_CHAR) {
  637. case _SC('x'): {
  638. const SQInteger maxdigits = sizeof(SQChar) * 2;
  639. SQChar temp[maxdigits + 1];
  640. if(ProcessStringHexEscape(temp, maxdigits) < 0) return -1;
  641. SQChar *stemp;
  642. APPEND_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
  643. }
  644. break;
  645. case _SC('U'):
  646. case _SC('u'): {
  647. const SQInteger maxdigits = CUR_CHAR == _SC('u') ? 4 : 8;
  648. SQChar temp[8 + 1];
  649. if(ProcessStringHexEscape(temp, maxdigits) < 0) return -1;
  650. SQChar *stemp;
  651. #ifdef SQUNICODE
  652. #if WCHAR_SIZE == 2
  653. utf_value = scstrtoul(temp, &stemp, 16);
  654. utf_len = AddUTF16(utf_value);
  655. #else
  656. ADD_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
  657. #endif
  658. #else
  659. utf_value = scstrtoul(temp, &stemp, 16);
  660. utf_len = AddUTF8(utf_value);
  661. #endif
  662. }
  663. break;
  664. //end of string continuation
  665. case _SC('\n'):
  666. if(ndelim == _SC('"')){
  667. data->currentline++;
  668. NEXT();
  669. break;
  670. }
  671. //falthrough
  672. case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
  673. case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
  674. case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
  675. case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
  676. case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
  677. case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
  678. case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
  679. case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
  680. case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
  681. case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
  682. case _SC('0'):
  683. case _SC('1'):
  684. case _SC('2'):
  685. case _SC('3'):
  686. case _SC('4'):
  687. case _SC('5'):
  688. case _SC('6'):
  689. case _SC('7'):
  690. {
  691. int ndigits = 0;
  692. int octal_char = 0;
  693. while(scisodigit(CUR_CHAR)) {
  694. octal_char = (octal_char)*8+(CUR_CHAR-_SC('0'));
  695. NEXT();
  696. if(++ndigits == 3) break;
  697. }
  698. if(octal_char > 0xff) return Error(_SC("max value of embedded octal digits is \377"));
  699. APPEND_CHAR(octal_char);
  700. goto try_again;
  701. }
  702. break;
  703. case _SC('/'): APPEND_CHAR(CUR_CHAR); NEXT();
  704. break;
  705. default:
  706. return Error(_SC("unrecognised escaper char"));
  707. }
  708. }
  709. break;
  710. default:
  711. APPEND_CHAR(CUR_CHAR);
  712. NEXT();
  713. }
  714. }
  715. NEXT();
  716. if(start_equals){
  717. bool lastBraceAdded = false;
  718. if(CUR_CHAR == _SC('=')){
  719. SQInteger end_equals = start_equals;
  720. NEXT();
  721. if(CUR_CHAR == _SC('=') || CUR_CHAR == cdelim2){
  722. --end_equals;
  723. while(!IS_EOB() && CUR_CHAR == _SC('=')) {
  724. --end_equals;
  725. NEXT();
  726. }
  727. if(end_equals) return Error(_SC("expect same number of '=' on literal delimiter"));
  728. if(CUR_CHAR != cdelim2) return Error(_SC("expect '%c' to close literal delimiter"), cdelim2);
  729. NEXT();
  730. break;
  731. }
  732. APPEND_CHAR(cdelim2); //the first NEXT() after break the while loop
  733. APPEND_CHAR(_SC('='));
  734. lastBraceAdded = true;
  735. }
  736. if(!lastBraceAdded) APPEND_CHAR(cdelim2); //the first NEXT() after break the while loop
  737. APPEND_CHAR(CUR_CHAR);
  738. NEXT();
  739. }
  740. else if(saved_ndelim == _SC('R')) {
  741. if(CUR_CHAR == ndelim)
  742. {
  743. APPEND_CHAR(ndelim);
  744. goto try_again;
  745. }
  746. size_t i = 0;
  747. for(;(i < sizeof(cpp_delimin)-1) && (CUR_CHAR != _SC('"')) && cpp_delimin[i]; ++i)
  748. {
  749. if(CUR_CHAR != cpp_delimin[i])
  750. {
  751. //false positive append all chars till here and continue
  752. APPEND_CHAR(ndelim);
  753. for(size_t j=0; j < i; ++j) APPEND_CHAR(cpp_delimin[j]); //recover already eaten chars from buffer
  754. APPEND_CHAR(CUR_CHAR); //append the last one that mismatch
  755. if(CUR_CHAR == _SC('\n')) data->currentline++;
  756. NEXT();
  757. goto try_again;
  758. //return Error(_SC("expect \"%s\" to close literal delimiter"), cpp_delimin);
  759. }
  760. NEXT();
  761. }
  762. if(CUR_CHAR != _SC('"')) return Error(_SC("expect '\"' to close literal delimiter"));
  763. NEXT(); //eat last '"'
  764. break;
  765. }
  766. else if(verbatim && CUR_CHAR == _SC('"')) { //double quotation
  767. APPEND_CHAR(CUR_CHAR);
  768. NEXT();
  769. }
  770. else {
  771. break;
  772. }
  773. }
  774. TERMINATE_BUFFER();
  775. SQInteger len = data->longstr.size()-1;
  776. if(ndelim == _SC('\'') && !_want_stringSingleAndDoubleQuotes) {
  777. if(len == 0) return Error(_SC("empty constant"));
  778. if(len > 1 && len != utf_len) Error(_SC("constant too long"));
  779. data->nvalue = utf_len ? utf_value : data->longstr[0];
  780. data->isCharacter = SQTrue;
  781. return TK_INTEGER;
  782. }
  783. data->svalue = &data->longstr[0];
  784. return TK_STRING_LITERAL;
  785. }
  786. #define MAXBY10 (SQUnsignedInteger)(SQ_INT_MAX / 10)
  787. #define MAXLASTD (SQUnsignedInteger)(SQ_INT_MAX % 10)
  788. static int isneg (const SQChar **s) {
  789. if (**s == _SC('-')) { (*s)++; return 1; }
  790. else if (**s == _SC('+')) (*s)++;
  791. return 0;
  792. }
  793. #define ADD_CHECK_DIGIT(dig, base) \
  794. if (a >= MAXBY10 && (a > MAXBY10 || d > ((int)MAXLASTD + neg))) /* overflow? */ \
  795. return false; /* do not accept it (as integer) */ \
  796. a = a*base+dig;
  797. static bool LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
  798. {
  799. SQUnsignedInteger a = 0;
  800. int d = 0, neg = isneg(&s);
  801. while(*s != 0)
  802. {
  803. if(scisdigit(*s)) d = (*s++)-_SC('0');
  804. else if(scisxdigit(*s)) d = toupper(*s++)-_SC('A')+10;
  805. else { assert(0); }
  806. ADD_CHECK_DIGIT(d, 16);
  807. }
  808. *res = a;
  809. return true;
  810. }
  811. static bool LexInteger(const SQChar *s,SQUnsignedInteger *res)
  812. {
  813. SQUnsignedInteger a = 0;
  814. int d = 0, neg = isneg(&s);
  815. while(*s != 0)
  816. {
  817. d = (*s++)-_SC('0');
  818. ADD_CHECK_DIGIT(d, 10);
  819. }
  820. *res = neg ? (((SQUnsignedInteger)0)-a) : a;
  821. return true;
  822. }
  823. static bool LexOctal(const SQChar *s,SQUnsignedInteger *res)
  824. {
  825. SQUnsignedInteger a = 0;
  826. int d = 0, neg = isneg(&s);
  827. while(*s != 0)
  828. {
  829. if(scisodigit(*s)) d = (*s++)-_SC('0');
  830. else { assert(0); }
  831. ADD_CHECK_DIGIT(d, 8);
  832. }
  833. *res = neg ? (((SQUnsignedInteger)0)-a) : a;
  834. return true;
  835. }
  836. static SQInteger isexponent(SQInteger c) { return c == _SC('e') || c==_SC('E'); }
  837. #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
  838. //dontThrowIntegerOverflow for when in 32bits parsing 64bits integer inside excluded ifdef
  839. SQInteger SQLexer::ReadNumber(SQInteger startChar, bool dontThrowIntegerOverflow)
  840. {
  841. #define TINT 1
  842. #define TFLOAT 2
  843. #define THEX 3
  844. #define TSCIENTIFIC 4
  845. #define TOCTAL 5
  846. SQInteger rtype, type = TINT, firstchar = startChar ? startChar : CUR_CHAR;
  847. SQUnsignedInteger itmp=0;
  848. SQChar *sTemp;
  849. INIT_TEMP_STRING();
  850. if(!startChar) NEXT();
  851. if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
  852. if(scisodigit(CUR_CHAR)) {
  853. type = TOCTAL;
  854. while(scisodigit(CUR_CHAR)) {
  855. APPEND_CHAR(CUR_CHAR);
  856. NEXT();
  857. }
  858. if(scisdigit(CUR_CHAR)) return Error(_SC("invalid octal number"));
  859. }
  860. else {
  861. NEXT();
  862. type = THEX;
  863. while(isxdigit(CUR_CHAR)) {
  864. APPEND_CHAR(CUR_CHAR);
  865. NEXT();
  866. }
  867. if((data->longstr.size() > MAX_HEX_DIGITS) && !dontThrowIntegerOverflow)
  868. return Error(_SC("too many digits for an Hex number"));
  869. }
  870. }
  871. else {
  872. APPEND_CHAR((int)firstchar);
  873. while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
  874. if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
  875. if(isexponent(CUR_CHAR)) {
  876. if(type != TFLOAT) return Error(_SC("invalid numeric format"));
  877. type = TSCIENTIFIC;
  878. APPEND_CHAR(CUR_CHAR);
  879. NEXT();
  880. if(CUR_CHAR == _SC('+') || CUR_CHAR == _SC('-')){
  881. APPEND_CHAR(CUR_CHAR);
  882. NEXT();
  883. }
  884. if(!scisdigit(CUR_CHAR)) return Error(_SC("exponent expected"));
  885. }
  886. APPEND_CHAR(CUR_CHAR);
  887. NEXT();
  888. }
  889. }
  890. TERMINATE_BUFFER();
  891. bool okNumber = true;
  892. switch(type) {
  893. case TINT:
  894. okNumber = LexInteger(&data->longstr[0],&itmp);
  895. if(okNumber) break;
  896. //fallthrough
  897. case TSCIENTIFIC:
  898. case TFLOAT:
  899. data->fvalue = (SQFloat)scstrtod(&data->longstr[0],&sTemp);
  900. if(CUR_CHAR == _SC('f')) NEXT(); //0.0f C/C++ notation
  901. return TK_FLOAT;
  902. case THEX:
  903. okNumber = LexHexadecimal(&data->longstr[0],&itmp);
  904. break;
  905. case TOCTAL:
  906. okNumber = LexOctal(&data->longstr[0],&itmp);
  907. break;
  908. }
  909. if(!okNumber && !dontThrowIntegerOverflow) Error(_SC("integer overflow %s"), &data->longstr[0]);
  910. rtype = TK_INTEGER;
  911. data->isCharacter = SQFalse;
  912. switch(type) {
  913. case TINT:
  914. switch(CUR_CHAR)
  915. {
  916. case _SC('u'):
  917. case _SC('U'):
  918. rtype = TK_UNSIGNED_INTEGER;
  919. NEXT();
  920. break;
  921. }
  922. switch(CUR_CHAR)
  923. {
  924. case _SC('s'):
  925. case _SC('S'):
  926. rtype = (rtype == TK_UNSIGNED_INTEGER) ? TK_UNSIGNED_SHORT_INTEGER : TK_SHORT_INTEGER;
  927. NEXT();
  928. break;
  929. case _SC('l'):
  930. case _SC('L'):
  931. rtype = (rtype == TK_UNSIGNED_INTEGER) ? TK_UNSIGNED_LONG_INTEGER : TK_LONG_INTEGER;
  932. NEXT();
  933. if((CUR_CHAR == _SC('l')) || (CUR_CHAR == _SC('L')) )
  934. {
  935. switch(rtype)
  936. {
  937. case TK_UNSIGNED_INTEGER:
  938. case TK_UNSIGNED_LONG_INTEGER:
  939. rtype = TK_UNSIGNED_LONG_LONG_INTEGER;
  940. break;
  941. default:
  942. rtype = TK_LONG_LONG_INTEGER;
  943. }
  944. NEXT();
  945. }
  946. break;
  947. }
  948. case THEX:
  949. case TOCTAL:
  950. //to allow 64 bits integers comment bellow
  951. //if(itmp > INT_MAX) return Error(_SC("integer overflow %ulld %d"));
  952. data->nvalue = (SQInteger) itmp;
  953. return rtype;
  954. }
  955. return 0;
  956. }
  957. SQInteger SQLexer::ReadID()
  958. {
  959. SQInteger res;
  960. INIT_TEMP_STRING();
  961. do {
  962. APPEND_CHAR(CUR_CHAR);
  963. NEXT();
  964. } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
  965. TERMINATE_BUFFER();
  966. if(data->longstr.size() == 2)
  967. {
  968. if(CUR_CHAR == _SC('"'))
  969. {
  970. if(data->longstr[0] == _SC('R'))
  971. {
  972. //C++ multiline string
  973. return ReadString(_SC('R'),true);
  974. }
  975. if(data->longstr[0] == _SC('L'))
  976. {
  977. //C++ wchar string
  978. return ReadString(CUR_CHAR,false);
  979. }
  980. }
  981. if(CUR_CHAR == _SC('\''))
  982. {
  983. if(data->longstr[0] == _SC('L'))
  984. {
  985. //C++ wchar string
  986. return ReadString(CUR_CHAR,false);
  987. }
  988. }
  989. }
  990. res = GetIDType(&data->longstr[0],data->longstr.size() - 1);
  991. if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR || res == TK_DESTRUCTOR) {
  992. data->svalue = &data->longstr[0];
  993. }
  994. return res;
  995. }