sqlexer.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. /*
  2. see copyright notice in squirrel.h
  3. */
  4. #include "sqpcheader.h"
  5. #include <ctype.h>
  6. #include <stdlib.h>
  7. #include <limits.h>
  8. #include "sqtable.h"
  9. #include "sqstring.h"
  10. #include "sqcompiler.h"
  11. #include "sqlexer.h"
  12. #define CUR_CHAR (data->currdata)
  13. #define RETURN_TOKEN(t) { data->prevtoken = data->curtoken; data->curtoken = t; return t;}
  14. #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
  15. //#define NEXT() {SQInteger rc = Next(); if(rc < 0) return rc; data->currentcolumn++;}
  16. #define NEXT() {if(Next()) return -1;}
  17. #define INIT_TEMP_STRING() { data->longstr.resize(0);}
  18. #define APPEND_CHAR(c) { data->longstr.push_back(c);}
  19. #define TERMINATE_BUFFER() {data->longstr.push_back(_SC('\0'));}
  20. #define ADD_KEYWORD(key,id) tbl->NewSlot( SQString::Create(_sharedstate, _SC(#key)) ,SQInteger(id))
  21. SQLexer::SQLexer(){_keywords=0;}
  22. SQLexer::~SQLexer()
  23. {
  24. _keywords->Release();
  25. }
  26. SQInteger SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg,
  27. SQUserPointer up,CompilerErrorFunc efunc,void *ed, SQBool want_comments)
  28. {
  29. _want_comments = want_comments;
  30. data = &_data;
  31. _data_lookahead.currentline = -1;
  32. _errfunc = efunc;
  33. _errtarget = ed;
  34. _sharedstate = ss;
  35. if(_keywords) _keywords->Release();
  36. _keywords = GetKeywords();
  37. return ResetReader(rg, up, 1);
  38. }
  39. SQInteger SQLexer::ResetReader(SQLEXREADFUNC rg, SQUserPointer up, SQInteger line)
  40. {
  41. _readf = rg;
  42. _up = up;
  43. data->lasttokenline = data->currentline = line;
  44. data->lasttokencolumn = 0;
  45. data->currentcolumn = 0;
  46. data->prevtoken = -1;
  47. data->readcount = 0;
  48. data->reached_eof = SQFalse;
  49. return Next();
  50. }
  51. SQTable * SQLexer::GetKeywords()
  52. {
  53. SQTable *tbl = SQTable::Create(_sharedstate, (TK_LAST_ENUM_TOKEN - TK_FIRST_ENUM_TOKEN - 1) /*26*/);
  54. ADD_KEYWORD(any_t, TK_LOCAL_ANY_T);
  55. ADD_KEYWORD(array_t, TK_LOCAL_ARRAY_T);
  56. ADD_KEYWORD(as, TK_AS);
  57. ADD_KEYWORD(auto, TK_LOCAL);
  58. ADD_KEYWORD(base, TK_BASE);
  59. ADD_KEYWORD(bool_t, TK_LOCAL_BOOL_T);
  60. ADD_KEYWORD(break, TK_BREAK);
  61. ADD_KEYWORD(case, TK_CASE);
  62. ADD_KEYWORD(catch, TK_CATCH);
  63. ADD_KEYWORD(char_t, TK_LOCAL_CHAR_T);
  64. ADD_KEYWORD(class,TK_CLASS);
  65. ADD_KEYWORD(clone, TK_CLONE);
  66. ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
  67. ADD_KEYWORD(const,TK_CONST);
  68. ADD_KEYWORD(constexpr,TK_CONSTEXPR);
  69. ADD_KEYWORD(continue, TK_CONTINUE);
  70. ADD_KEYWORD(declare, TK_DECLARE);
  71. ADD_KEYWORD(default, TK_DEFAULT);
  72. ADD_KEYWORD(delete, TK_DELETE);
  73. ADD_KEYWORD(destructor,TK_DESTRUCTOR);
  74. ADD_KEYWORD(do, TK_DO);
  75. ADD_KEYWORD(double_t, TK_LOCAL_DOUBLE_T);
  76. ADD_KEYWORD(else, TK_ELSE);
  77. ADD_KEYWORD(enum,TK_ENUM);
  78. ADD_KEYWORD(extends,TK_EXTENDS);
  79. ADD_KEYWORD(extern,TK_EXTERN);
  80. ADD_KEYWORD(false,TK_FALSE);
  81. ADD_KEYWORD(__FILE__,TK___FILE__);
  82. ADD_KEYWORD(float_t, TK_LOCAL_FLOAT_T);
  83. ADD_KEYWORD(foreach, TK_FOREACH);
  84. ADD_KEYWORD(for, TK_FOR);
  85. ADD_KEYWORD(friend, TK_FRIEND);
  86. ADD_KEYWORD(function, TK_FUNCTION);
  87. ADD_KEYWORD(__FUNCTION__,TK___FUNCTION__);
  88. ADD_KEYWORD(goto, TK_GOTO);
  89. ADD_KEYWORD(if, TK_IF);
  90. ADD_KEYWORD(instanceof,TK_INSTANCEOF);
  91. ADD_KEYWORD(int16_t, TK_LOCAL_INT16_T);
  92. ADD_KEYWORD(int32_t, TK_LOCAL_INT32_T);
  93. ADD_KEYWORD(int64_t, TK_LOCAL_INT64_T);
  94. ADD_KEYWORD(int8_t, TK_LOCAL_INT8_T);
  95. ADD_KEYWORD(in, TK_IN);
  96. ADD_KEYWORD(inline, TK_INLINE);
  97. ADD_KEYWORD(int_t, TK_LOCAL_INT_T);
  98. ADD_KEYWORD(let, TK_LOCAL);
  99. ADD_KEYWORD(__LINE__,TK___LINE__);
  100. ADD_KEYWORD(local, TK_LOCAL);
  101. ADD_KEYWORD(long_double_t, TK_LOCAL_LONG_DOUBLE_T);
  102. ADD_KEYWORD(new,TK_IGNORE);
  103. ADD_KEYWORD(noexcept,TK_NOEXCEPT);
  104. ADD_KEYWORD(number_t, TK_LOCAL_NUMBER_T);
  105. ADD_KEYWORD(null, TK_NULL);
  106. ADD_KEYWORD(NULL, TK_NULL);
  107. ADD_KEYWORD(private,TK_PRIVATE);
  108. ADD_KEYWORD(protected,TK_PROTECTED);
  109. ADD_KEYWORD(public,TK_PUBLIC);
  110. ADD_KEYWORD(rawcall, TK_RAWCALL);
  111. ADD_KEYWORD(resume, TK_RESUME);
  112. ADD_KEYWORD(return, TK_RETURN);
  113. ADD_KEYWORD(size_t,TK_LOCAL_SIZE_T);
  114. ADD_KEYWORD(ssize_t,TK_LOCAL_SSIZE_T);
  115. ADD_KEYWORD(static,TK_STATIC);
  116. ADD_KEYWORD(string_t, TK_LOCAL_STRING_T);
  117. ADD_KEYWORD(struct,TK_STRUCT);
  118. ADD_KEYWORD(switch, TK_SWITCH);
  119. ADD_KEYWORD(table_t, TK_LOCAL_TABLE_T);
  120. ADD_KEYWORD(template, TK_TEMPLATE);
  121. ADD_KEYWORD(this, TK_THIS);
  122. ADD_KEYWORD(throw, TK_THROW);
  123. ADD_KEYWORD(typedef, TK_TYPEDEF);
  124. ADD_KEYWORD(true,TK_TRUE);
  125. ADD_KEYWORD(try, TK_TRY);
  126. ADD_KEYWORD(typeof, TK_TYPEOF);
  127. ADD_KEYWORD(uint16_t, TK_LOCAL_UINT16_T);
  128. ADD_KEYWORD(uint32_t, TK_LOCAL_UINT32_T);
  129. ADD_KEYWORD(uint64_t, TK_LOCAL_UINT64_T);
  130. ADD_KEYWORD(uint8_t, TK_LOCAL_UINT8_T);
  131. ADD_KEYWORD(uint_t, TK_LOCAL_UINT_T);
  132. ADD_KEYWORD(unsafe, TK_UNSAFE);
  133. ADD_KEYWORD(using, TK_USING);
  134. ADD_KEYWORD(var, TK_LOCAL);
  135. ADD_KEYWORD(virtual, TK_VIRTUAL);
  136. ADD_KEYWORD(void_ptr_t, TK_LOCAL_VOIDPTR_T);
  137. ADD_KEYWORD(void, TK_VOID);
  138. ADD_KEYWORD(volatile, TK_VOLATILE);
  139. ADD_KEYWORD(wchar_t, TK_LOCAL_WCHAR_T);
  140. ADD_KEYWORD(weakref_t, TK_LOCAL_WEAKREF_T);
  141. ADD_KEYWORD(while, TK_WHILE);
  142. ADD_KEYWORD(yield, TK_YIELD);
  143. return tbl;
  144. }
  145. SQInteger SQLexer::Error(const SQChar *fmt, ...)
  146. {
  147. va_list vl;
  148. va_start(vl, fmt);
  149. scvsprintf(data->lasterror, sizeof(data->lasterror), fmt, vl);
  150. va_end(vl);
  151. if(_errfunc) _errfunc(_errtarget,data->lasterror);
  152. return -1;
  153. }
  154. SQInteger SQLexer::Next()
  155. {
  156. SQInteger t = _readf(_up);
  157. if(t > MAX_CHAR) return Error(_SC("Invalid character"));
  158. if(t != 0) {
  159. data->currdata = (LexChar)t;
  160. ++data->currentcolumn;
  161. ++data->readcount;
  162. return 0;
  163. }
  164. data->currdata = SQUIRREL_EOB;
  165. data->reached_eof = SQTrue;
  166. return 0;
  167. }
  168. const SQChar *SQLexer::Tok2Str(SQInteger tok)
  169. {
  170. SQObjectPtr itr, key, val;
  171. SQInteger nitr;
  172. while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
  173. itr = (SQInteger)nitr;
  174. if(((SQInteger)_integer(val)) == tok)
  175. return _stringval(key);
  176. }
  177. return NULL;
  178. }
  179. const SQChar *SQLexer::GetTokenName(int tk_code) {
  180. const SQChar *str_tk;
  181. switch(tk_code){
  182. #define ENUM_TK(a) case TK_##a: str_tk = _SC("TK_" #a); break;
  183. SQ_KEYWORDS_LIST()
  184. #undef ENUM_TK
  185. default:
  186. str_tk = _SC("()");
  187. }
  188. return str_tk;
  189. }
  190. SQInteger SQLexer::LexBlockComment()
  191. {
  192. /*
  193. if(CUR_CHAR == _SC('*'))
  194. {
  195. NEXT();
  196. if(CUR_CHAR != _SC('*')){ //document comment
  197. printf("Doument comment found at line %d\n", data->currentline);
  198. }
  199. }
  200. */
  201. bool done = false;
  202. if(_want_comments) INIT_TEMP_STRING();
  203. NEXT(); //remove the comment token '*'
  204. while(!done) {
  205. switch(CUR_CHAR) {
  206. case _SC('*'): {
  207. NEXT();
  208. if(CUR_CHAR == _SC('/')) { done = true; NEXT(); continue;}
  209. if(_want_comments) APPEND_CHAR(_SC('*')); //this is the '*' before NEXT()
  210. continue; //reevaluate, when it's a \n it'll be incremented line bellow
  211. };
  212. break;
  213. case _SC('\n'): data->currentline++; break;
  214. case SQUIRREL_EOB: return Error(_SC("missing \"*/\" in comment"));
  215. }
  216. if(_want_comments) APPEND_CHAR(CUR_CHAR);
  217. NEXT();
  218. }
  219. if(_want_comments)
  220. {
  221. TERMINATE_BUFFER();
  222. if(data->longstr.size() > 0) data->longstr.pop_back(); //remove the last '*'
  223. data->svalue = &data->longstr[0];
  224. }
  225. return 0;
  226. }
  227. SQInteger SQLexer::LexLineComment()
  228. {
  229. if(_want_comments) INIT_TEMP_STRING();
  230. NEXT(); //remove the comment token
  231. while (CUR_CHAR != _SC('\n') && (!IS_EOB())) {if(_want_comments) APPEND_CHAR(CUR_CHAR); NEXT();}
  232. if(_want_comments)
  233. {
  234. TERMINATE_BUFFER();
  235. data->svalue = &data->longstr[0];
  236. }
  237. return 0;
  238. }
  239. SQInteger SQLexer::LookaheadLex()
  240. {
  241. if(CUR_CHAR == SQUIRREL_EOB) return 0;
  242. if(_data_lookahead.currentline >= 0)
  243. {
  244. return Error(_SC("lex lookahead already done"));
  245. }
  246. _data_lookahead.copy(&_data);
  247. data = &_data_lookahead;
  248. Lex();
  249. data = &_data;
  250. return _data_lookahead.curtoken;
  251. }
  252. SQInteger SQLexer::Lex()
  253. {
  254. if(_data_lookahead.currentline >= 0 && data != &_data_lookahead)
  255. {
  256. //we did a lookahead before, reuse it now
  257. _data.copy(&_data_lookahead);
  258. _data_lookahead.currentline = -1;
  259. return _data.curtoken;
  260. }
  261. data->lasttokenline = data->currentline;
  262. data->lasttokencolumn = data->currentcolumn;
  263. data->svalue = NULL;
  264. while(CUR_CHAR != SQUIRREL_EOB) {
  265. switch(CUR_CHAR){
  266. case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
  267. case _SC('\n'):
  268. data->currentline++;
  269. data->prevtoken=data->curtoken;
  270. data->curtoken=_SC('\n');
  271. NEXT();
  272. data->currentcolumn=1;
  273. continue;
  274. case _SC('#'):
  275. NEXT();
  276. if(CUR_CHAR == _SC('!')) //shell shebang
  277. {
  278. if(LexLineComment()) return -1;
  279. if(_want_comments) RETURN_TOKEN(TK_COMMENT_LINE)
  280. continue;
  281. }
  282. RETURN_TOKEN(TK_PRAGMA);
  283. continue;
  284. case _SC('/'):
  285. NEXT();
  286. switch(CUR_CHAR){
  287. case _SC('*'):
  288. if(LexBlockComment()) return -1;
  289. if(_want_comments) RETURN_TOKEN(TK_COMMENT_BLOCK)
  290. continue;
  291. case _SC('/'):
  292. if(LexLineComment()) return -1;
  293. if(_want_comments) RETURN_TOKEN(TK_COMMENT_LINE)
  294. continue;
  295. case _SC('='):
  296. NEXT();
  297. RETURN_TOKEN(TK_DIVEQ);
  298. continue;
  299. case _SC('>'):
  300. NEXT();
  301. RETURN_TOKEN(TK_ATTR_CLOSE);
  302. continue;
  303. default:
  304. RETURN_TOKEN('/');
  305. }
  306. case _SC('='):
  307. NEXT();
  308. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
  309. else {
  310. NEXT();
  311. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_EQ_IDENTITY) }
  312. else { RETURN_TOKEN(TK_EQ); }
  313. }
  314. case _SC('<'):
  315. NEXT();
  316. switch(CUR_CHAR) {
  317. case _SC('='):
  318. NEXT();
  319. if(CUR_CHAR == _SC('>')) {
  320. NEXT();
  321. RETURN_TOKEN(TK_3WAYSCMP);
  322. }
  323. RETURN_TOKEN(TK_LE)
  324. break;
  325. case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
  326. case _SC('<'): NEXT(); RETURN_TOKEN(TK_SHIFTL); break;
  327. case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
  328. }
  329. RETURN_TOKEN('<');
  330. case _SC('>'):
  331. NEXT();
  332. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
  333. else if(CUR_CHAR == _SC('>')){
  334. NEXT();
  335. if(CUR_CHAR == _SC('>')){
  336. NEXT();
  337. RETURN_TOKEN(TK_USHIFTR);
  338. }
  339. RETURN_TOKEN(TK_SHIFTR);
  340. }
  341. else { RETURN_TOKEN('>') }
  342. case _SC('!'):
  343. NEXT();
  344. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN(_SC('!'))}
  345. else {
  346. NEXT();
  347. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_NE_IDENTITY)}
  348. else { RETURN_TOKEN(TK_NE); }
  349. }
  350. case _SC('@'): {
  351. SQInteger stype;
  352. NEXT();
  353. if(CUR_CHAR != _SC('"')) {
  354. RETURN_TOKEN(_SC('@'));
  355. }
  356. if((stype=ReadString(_SC('"'),true))!=-1) {
  357. RETURN_TOKEN(stype);
  358. }
  359. return Error(_SC("error parsing the string"));
  360. }
  361. case _SC('"'):
  362. case _SC('\''): {
  363. SQInteger stype;
  364. if((stype=ReadString(CUR_CHAR,false))!=-1){
  365. RETURN_TOKEN(stype);
  366. }
  367. return Error(_SC("error parsing the string"));
  368. }
  369. case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
  370. case _SC(';'): case _SC(','): case _SC('?'): case _SC('~'):
  371. {
  372. SQInteger ret = CUR_CHAR;
  373. NEXT();
  374. if((ret == _SC('[') || ret == _SC('{') || ret == _SC('(')) && CUR_CHAR == _SC('=')){
  375. //lets try lua literal delimiters
  376. SQInteger stype;
  377. if((stype=ReadString(ret,true))!=-1){
  378. RETURN_TOKEN(stype);
  379. }
  380. return Error(_SC("error parsing the string"));
  381. }
  382. else RETURN_TOKEN(ret);
  383. }
  384. case _SC('.'):
  385. NEXT();
  386. if (CUR_CHAR != _SC('.')){ RETURN_TOKEN(_SC('.')) }
  387. NEXT();
  388. if (CUR_CHAR != _SC('.')){ return Error(_SC("invalid token '..'")); }
  389. NEXT();
  390. RETURN_TOKEN(TK_VARPARAMS);
  391. case _SC('^'):
  392. NEXT();
  393. //if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_XOR_EQ);}
  394. RETURN_TOKEN(_SC('^'));
  395. case _SC('&'):
  396. NEXT();
  397. //if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_AND_EQ);}
  398. if (CUR_CHAR != _SC('&')){ RETURN_TOKEN(_SC('&')) }
  399. else { NEXT(); RETURN_TOKEN(TK_AND); }
  400. case _SC('|'):
  401. NEXT();
  402. //if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_BIT_OR_EQ);}
  403. if (CUR_CHAR != _SC('|')){ RETURN_TOKEN(_SC('|')) }
  404. else { NEXT(); RETURN_TOKEN(TK_OR); }
  405. case _SC(':'):
  406. NEXT();
  407. if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(_SC(':')) }
  408. else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
  409. case _SC('*'):
  410. NEXT();
  411. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
  412. else RETURN_TOKEN(_SC('*'));
  413. case _SC('%'):
  414. NEXT();
  415. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
  416. else RETURN_TOKEN(_SC('%'));
  417. case _SC('-'):
  418. NEXT();
  419. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
  420. else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
  421. else if (CUR_CHAR == _SC('>')){ NEXT(); RETURN_TOKEN(_SC('.'));} //accept C/C++ like pointers
  422. else RETURN_TOKEN(_SC('-'));
  423. case _SC('+'):
  424. NEXT();
  425. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
  426. else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
  427. else RETURN_TOKEN(_SC('+'));
  428. case SQUIRREL_EOB:
  429. return 0;
  430. default:{
  431. if (scisdigit(CUR_CHAR)) {
  432. SQInteger ret = ReadNumber();
  433. if(ret < 0) return -1;
  434. RETURN_TOKEN(ret);
  435. }
  436. else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
  437. SQInteger t = ReadID();
  438. if(t < 0) return -1;
  439. RETURN_TOKEN(t);
  440. }
  441. else {
  442. SQInteger c = CUR_CHAR;
  443. if (sciscntrl((int)c)) return Error(_SC("unexpected character(control)"));
  444. NEXT();
  445. RETURN_TOKEN(c);
  446. }
  447. RETURN_TOKEN(0);
  448. }
  449. }
  450. }
  451. return 0;
  452. }
  453. SQInteger SQLexer::GetIDType(const SQChar *s,SQInteger len)
  454. {
  455. SQObjectPtr t;
  456. if(_keywords->GetStr(s,len, t)) {
  457. return SQInteger(_integer(t));
  458. }
  459. return TK_IDENTIFIER;
  460. }
  461. #ifdef SQUNICODE
  462. #if WCHAR_SIZE == 2
  463. SQInteger SQLexer::AddUTF16(SQUnsignedInteger ch)
  464. {
  465. if (ch >= 0x10000)
  466. {
  467. SQUnsignedInteger code = (ch - 0x10000);
  468. APPEND_CHAR((SQChar)(0xD800 | (code >> 10)));
  469. APPEND_CHAR((SQChar)(0xDC00 | (code & 0x3FF)));
  470. return 2;
  471. }
  472. else {
  473. APPEND_CHAR((SQChar)ch);
  474. return 1;
  475. }
  476. }
  477. #endif
  478. #else
  479. SQInteger SQLexer::AddUTF8(SQUnsignedInteger ch)
  480. {
  481. if (ch < 0x80) {
  482. APPEND_CHAR((char)ch);
  483. return 1;
  484. }
  485. if (ch < 0x800) {
  486. APPEND_CHAR((SQChar)((ch >> 6) | 0xC0));
  487. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  488. return 2;
  489. }
  490. if (ch < 0x10000) {
  491. APPEND_CHAR((SQChar)((ch >> 12) | 0xE0));
  492. APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
  493. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  494. return 3;
  495. }
  496. if (ch < 0x110000) {
  497. APPEND_CHAR((SQChar)((ch >> 18) | 0xF0));
  498. APPEND_CHAR((SQChar)(((ch >> 12) & 0x3F) | 0x80));
  499. APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
  500. APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
  501. return 4;
  502. }
  503. return 0;
  504. }
  505. #endif
  506. SQInteger SQLexer::ProcessStringHexEscape(SQChar *dest, SQInteger maxdigits)
  507. {
  508. NEXT();
  509. if (!isxdigit(CUR_CHAR)) return Error(_SC("hexadecimal number expected"));
  510. SQInteger n = 0;
  511. while (isxdigit(CUR_CHAR) && n < maxdigits) {
  512. dest[n] = CUR_CHAR;
  513. n++;
  514. NEXT();
  515. }
  516. dest[n] = 0;
  517. return n;
  518. }
  519. SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
  520. SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
  521. {
  522. INIT_TEMP_STRING();
  523. SQInteger start_equals = 0;
  524. SQChar cpp_delimin[32], cdelim1, cdelim2, saved_ndelim = ndelim;
  525. if(ndelim == _SC('{')){
  526. cdelim1 = _SC('{');
  527. cdelim2 = _SC('}');
  528. }
  529. else if(ndelim == _SC('(')){
  530. cdelim1 = _SC('(');
  531. cdelim2 = _SC(')');
  532. }
  533. else if(ndelim == _SC('[')){
  534. cdelim1 = _SC('[');
  535. cdelim2 = _SC(']');
  536. }
  537. else if(ndelim == _SC('R')){
  538. cdelim1 = _SC('(');
  539. cdelim2 = _SC(')');
  540. ndelim = cdelim2;
  541. if(CUR_CHAR != _SC('"'))
  542. {
  543. return Error(_SC("expect '\"' on literal delimiter"));
  544. }
  545. NEXT();
  546. size_t i=0;
  547. for(;(i < sizeof(cpp_delimin)-1) && (CUR_CHAR != _SC('(')); ++i)
  548. {
  549. cpp_delimin[i] = CUR_CHAR;
  550. NEXT();
  551. }
  552. cpp_delimin[i] = _SC('\0');
  553. }
  554. else
  555. {
  556. cdelim1 = cdelim2 = _SC('\0');
  557. }
  558. if((cdelim1 == saved_ndelim) && (CUR_CHAR == _SC('='))){
  559. //lua like literal
  560. while(!IS_EOB() && CUR_CHAR == _SC('=')) {
  561. ++start_equals;
  562. NEXT();
  563. }
  564. if(CUR_CHAR != cdelim1){
  565. //it's not a lua literal delimiter
  566. return Error(_SC("expect '%c' on literal delimiter"), cdelim1);
  567. }
  568. ndelim = cdelim2;
  569. }
  570. NEXT();
  571. if(IS_EOB()) return -1;
  572. if(start_equals) {
  573. int cr_nl = CUR_CHAR == _SC('\r');
  574. if(cr_nl) NEXT();
  575. cr_nl = CUR_CHAR == _SC('\n');
  576. if(cr_nl) NEXT();
  577. if(cr_nl) {//if a new line follows the start of delimiter drop it
  578. ++data->currentline;
  579. if(IS_EOB())
  580. {
  581. return Error(_SC("unfinished string"));
  582. }
  583. }
  584. }
  585. for(;;) {
  586. try_again:
  587. while(CUR_CHAR != ndelim) {
  588. SQInteger x = CUR_CHAR;
  589. switch(x) {
  590. case SQUIRREL_EOB:
  591. return Error(_SC("unfinished string"));
  592. case _SC('\n'):
  593. if(!verbatim) return Error(_SC("newline in a constant"));
  594. APPEND_CHAR(CUR_CHAR); NEXT();
  595. data->currentline++;
  596. break;
  597. case _SC('\\'):
  598. if(verbatim) {
  599. APPEND_CHAR(_SC('\\')); NEXT();
  600. }
  601. else {
  602. NEXT();
  603. switch(CUR_CHAR) {
  604. case _SC('x'): {
  605. const SQInteger maxdigits = sizeof(SQChar) * 2;
  606. SQChar temp[maxdigits + 1];
  607. if(ProcessStringHexEscape(temp, maxdigits) < 0) return -1;
  608. SQChar *stemp;
  609. APPEND_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
  610. }
  611. break;
  612. case _SC('U'):
  613. case _SC('u'): {
  614. const SQInteger maxdigits = CUR_CHAR == _SC('u') ? 4 : 8;
  615. SQChar temp[8 + 1];
  616. if(ProcessStringHexEscape(temp, maxdigits) < 0) return -1;
  617. SQChar *stemp;
  618. #ifdef SQUNICODE
  619. #if WCHAR_SIZE == 2
  620. AddUTF16(scstrtoul(temp, &stemp, 16));
  621. #else
  622. ADD_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
  623. #endif
  624. #else
  625. AddUTF8(scstrtoul(temp, &stemp, 16));
  626. #endif
  627. }
  628. break;
  629. case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
  630. case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
  631. case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
  632. case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
  633. case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
  634. case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
  635. case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
  636. case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
  637. case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
  638. case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
  639. case _SC('0'):
  640. case _SC('1'):
  641. case _SC('2'):
  642. case _SC('3'):
  643. case _SC('4'):
  644. case _SC('5'):
  645. case _SC('6'):
  646. case _SC('7'):
  647. {
  648. int ndigits = 0;
  649. int octal_char = 0;
  650. while(scisodigit(CUR_CHAR)) {
  651. octal_char = (octal_char)*8+(CUR_CHAR-_SC('0'));
  652. NEXT();
  653. if(++ndigits == 3) break;
  654. }
  655. if(octal_char > 0xff) return Error(_SC("max value of embedded octal digits is \377"));
  656. APPEND_CHAR(octal_char);
  657. goto try_again;
  658. }
  659. break;
  660. default:
  661. return Error(_SC("unrecognised escaper char"));
  662. break;
  663. }
  664. }
  665. break;
  666. default:
  667. APPEND_CHAR(CUR_CHAR);
  668. NEXT();
  669. }
  670. }
  671. NEXT();
  672. if(start_equals){
  673. bool lastBraceAdded = false;
  674. if(CUR_CHAR == _SC('=')){
  675. SQInteger end_equals = start_equals;
  676. NEXT();
  677. if(CUR_CHAR == _SC('=') || CUR_CHAR == cdelim2){
  678. --end_equals;
  679. while(!IS_EOB() && CUR_CHAR == _SC('=')) {
  680. --end_equals;
  681. NEXT();
  682. }
  683. if(end_equals) return Error(_SC("expect same number of '=' on literal delimiter"));
  684. if(CUR_CHAR != cdelim2) return Error(_SC("expect '%c' to close literal delimiter"), cdelim2);
  685. NEXT();
  686. break;
  687. }
  688. APPEND_CHAR(cdelim2); //the first NEXT() after break the while loop
  689. APPEND_CHAR(_SC('='));
  690. lastBraceAdded = true;
  691. }
  692. if(!lastBraceAdded) APPEND_CHAR(cdelim2); //the first NEXT() after break the while loop
  693. APPEND_CHAR(CUR_CHAR);
  694. NEXT();
  695. }
  696. else if(saved_ndelim == _SC('R')) {
  697. if(CUR_CHAR == ndelim)
  698. {
  699. APPEND_CHAR(ndelim);
  700. goto try_again;
  701. }
  702. size_t i = 0;
  703. for(;(i < sizeof(cpp_delimin)-1) && (CUR_CHAR != _SC('"')) && cpp_delimin[i]; ++i)
  704. {
  705. if(CUR_CHAR != cpp_delimin[i])
  706. {
  707. //false positive append all chars till here and continue
  708. APPEND_CHAR(ndelim);
  709. for(size_t j=0; j < i; ++j) APPEND_CHAR(cpp_delimin[j]); //recover already eaten chars from buffer
  710. APPEND_CHAR(CUR_CHAR); //append the last one that mismatch
  711. if(CUR_CHAR == _SC('\n')) data->currentline++;
  712. NEXT();
  713. goto try_again;
  714. //return Error(_SC("expect \"%s\" to close literal delimiter"), cpp_delimin);
  715. }
  716. NEXT();
  717. }
  718. if(CUR_CHAR != _SC('"')) return Error(_SC("expect '\"' to close literal delimiter"));
  719. NEXT(); //eat last '"'
  720. break;
  721. }
  722. else if(verbatim && CUR_CHAR == _SC('"')) { //double quotation
  723. APPEND_CHAR(CUR_CHAR);
  724. NEXT();
  725. }
  726. else {
  727. break;
  728. }
  729. }
  730. TERMINATE_BUFFER();
  731. SQInteger len = data->longstr.size()-1;
  732. if(ndelim == _SC('\'')) {
  733. if(len == 0) return Error(_SC("empty constant"));
  734. if(len > 1) return Error(_SC("constant too long"));
  735. data->nvalue = data->longstr[0];
  736. return TK_INTEGER;
  737. }
  738. data->svalue = &data->longstr[0];
  739. return TK_STRING_LITERAL;
  740. }
  741. void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
  742. {
  743. *res = 0;
  744. while(*s != 0)
  745. {
  746. if(scisdigit(*s)) *res = (*res)*16+((*s++)-_SC('0'));
  747. else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-_SC('A')+10);
  748. else { assert(0); }
  749. }
  750. }
  751. void LexInteger(const SQChar *s,SQUnsignedInteger *res)
  752. {
  753. *res = 0;
  754. while(*s != 0)
  755. {
  756. *res = (*res)*10+((*s++)-_SC('0'));
  757. }
  758. }
  759. void LexOctal(const SQChar *s,SQUnsignedInteger *res)
  760. {
  761. *res = 0;
  762. while(*s != 0)
  763. {
  764. if(scisodigit(*s)) *res = (*res)*8+((*s++)-_SC('0'));
  765. else { assert(0); }
  766. }
  767. }
  768. SQInteger isexponent(SQInteger c) { return c == _SC('e') || c==_SC('E'); }
  769. #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
  770. SQInteger SQLexer::ReadNumber()
  771. {
  772. #define TINT 1
  773. #define TFLOAT 2
  774. #define THEX 3
  775. #define TSCIENTIFIC 4
  776. #define TOCTAL 5
  777. SQInteger rtype, type = TINT, firstchar = CUR_CHAR;
  778. SQUnsignedInteger itmp=0;
  779. SQChar *sTemp;
  780. INIT_TEMP_STRING();
  781. NEXT();
  782. if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
  783. if(scisodigit(CUR_CHAR)) {
  784. type = TOCTAL;
  785. while(scisodigit(CUR_CHAR)) {
  786. APPEND_CHAR(CUR_CHAR);
  787. NEXT();
  788. }
  789. if(scisdigit(CUR_CHAR)) return Error(_SC("invalid octal number"));
  790. }
  791. else {
  792. NEXT();
  793. type = THEX;
  794. while(isxdigit(CUR_CHAR)) {
  795. APPEND_CHAR(CUR_CHAR);
  796. NEXT();
  797. }
  798. if(data->longstr.size() > MAX_HEX_DIGITS) return Error(_SC("too many digits for an Hex number"));
  799. }
  800. }
  801. else {
  802. APPEND_CHAR((int)firstchar);
  803. while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
  804. if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
  805. if(isexponent(CUR_CHAR)) {
  806. if(type != TFLOAT) return Error(_SC("invalid numeric format"));
  807. type = TSCIENTIFIC;
  808. APPEND_CHAR(CUR_CHAR);
  809. NEXT();
  810. if(CUR_CHAR == _SC('+') || CUR_CHAR == _SC('-')){
  811. APPEND_CHAR(CUR_CHAR);
  812. NEXT();
  813. }
  814. if(!scisdigit(CUR_CHAR)) return Error(_SC("exponent expected"));
  815. }
  816. APPEND_CHAR(CUR_CHAR);
  817. NEXT();
  818. }
  819. }
  820. TERMINATE_BUFFER();
  821. switch(type) {
  822. case TSCIENTIFIC:
  823. case TFLOAT:
  824. data->fvalue = (SQFloat)scstrtod(&data->longstr[0],&sTemp);
  825. return TK_FLOAT;
  826. case TINT:
  827. LexInteger(&data->longstr[0],&itmp);
  828. break;
  829. case THEX:
  830. LexHexadecimal(&data->longstr[0],&itmp);
  831. break;
  832. case TOCTAL:
  833. LexOctal(&data->longstr[0],&itmp);
  834. break;
  835. }
  836. rtype = TK_INTEGER;
  837. switch(type) {
  838. case TINT:
  839. switch(CUR_CHAR)
  840. {
  841. case _SC('u'):
  842. case _SC('U'):
  843. rtype = TK_UNSIGNED_INTEGER;
  844. NEXT();
  845. break;
  846. }
  847. switch(CUR_CHAR)
  848. {
  849. case _SC('s'):
  850. case _SC('S'):
  851. rtype = (rtype == TK_UNSIGNED_INTEGER) ? TK_UNSIGNED_SHORT_INTEGER : TK_SHORT_INTEGER;
  852. NEXT();
  853. break;
  854. case _SC('l'):
  855. case _SC('L'):
  856. rtype = (rtype == TK_UNSIGNED_INTEGER) ? TK_UNSIGNED_LONG_INTEGER : TK_LONG_INTEGER;
  857. NEXT();
  858. if((CUR_CHAR == _SC('l')) || (CUR_CHAR == _SC('L')) )
  859. {
  860. switch(rtype)
  861. {
  862. case TK_UNSIGNED_INTEGER:
  863. case TK_UNSIGNED_LONG_INTEGER:
  864. rtype = TK_UNSIGNED_LONG_LONG_INTEGER;
  865. break;
  866. default:
  867. rtype = TK_LONG_LONG_INTEGER;
  868. }
  869. NEXT();
  870. }
  871. break;
  872. }
  873. case THEX:
  874. case TOCTAL:
  875. //to allow 64 bits integers comment bellow
  876. //if(itmp > INT_MAX) return Error(_SC("integer overflow %ulld %d"));
  877. data->nvalue = (SQInteger) itmp;
  878. return rtype;
  879. }
  880. return 0;
  881. }
  882. SQInteger SQLexer::ReadID()
  883. {
  884. SQInteger res;
  885. INIT_TEMP_STRING();
  886. do {
  887. APPEND_CHAR(CUR_CHAR);
  888. NEXT();
  889. } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
  890. TERMINATE_BUFFER();
  891. if((CUR_CHAR == _SC('"')) && (data->longstr[0] == _SC('R')) && (data->longstr.size() == 2))
  892. {
  893. //C++ multiline string
  894. return ReadString(_SC('R'),true);
  895. }
  896. res = GetIDType(&data->longstr[0],data->longstr.size() - 1);
  897. if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR || res == TK_DESTRUCTOR) {
  898. data->svalue = &data->longstr[0];
  899. }
  900. return res;
  901. }