Scanner.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356
  1. #include "anki/util/Scanner.h"
  2. #include <boost/lexical_cast.hpp>
  3. #include <cstring>
  4. #include <iostream>
  5. #include <iomanip>
  6. #include <cmath>
  7. #include <sstream>
  8. #include <cassert>
  9. namespace anki { namespace scanner {
  10. //==============================================================================
  11. Exception::Exception(const std::string& err, int errNo_,
  12. const std::string& scriptFilename_, int scriptLineNmbr_)
  13. : error(err), errNo(errNo_), scriptFilename(scriptFilename_),
  14. scriptLineNmbr(scriptLineNmbr_)
  15. {}
  16. //==============================================================================
  17. Exception::Exception(const Exception& e)
  18. : std::exception(e), error(e.error), errNo(e.errNo),
  19. scriptFilename(e.scriptFilename), scriptLineNmbr(e.scriptLineNmbr)
  20. {}
  21. //==============================================================================
  22. const char* Exception::what() const throw()
  23. {
  24. errWhat = "Scanner exception (#" +
  25. boost::lexical_cast<std::string>(errNo) +
  26. ":" + scriptFilename + ':' +
  27. boost::lexical_cast<std::string>(scriptLineNmbr) + "): " + error;
  28. return errWhat.c_str();
  29. }
  30. //==============================================================================
  31. Token::Token(const Token& b)
  32. : code(b.code), dataType(b.dataType)
  33. {
  34. switch(b.dataType)
  35. {
  36. case DT_FLOAT:
  37. value.float_ = b.value.float_;
  38. break;
  39. case DT_INT:
  40. value.int_ = b.value.int_;
  41. break;
  42. case DT_CHAR:
  43. value.char_ = b.value.char_;
  44. break;
  45. case DT_STR:
  46. value.string = b.value.string;
  47. break;
  48. }
  49. memcpy(&asString[0], &b.asString[0], sizeof(asString));
  50. }
  51. //==============================================================================
  52. std::ostream& operator<<(std::ostream& s, const Token& x)
  53. {
  54. const TokenDataVal& val = x.getValue();
  55. TokenCode code = x.getCode();
  56. switch(code)
  57. {
  58. case TC_COMMENT:
  59. s << "comment";
  60. break;
  61. case TC_NEWLINE:
  62. s << "newline";
  63. break;
  64. case TC_END:
  65. s << "end of file";
  66. break;
  67. case TC_STRING:
  68. s << "string \"" << val.getString() << "\"";
  69. break;
  70. case TC_CHARACTER:
  71. s << "char '" << val.getChar() << "' (\"" <<
  72. x.getString() << "\")";
  73. break;
  74. case TC_NUMBER:
  75. if(x.getDataType() == DT_FLOAT)
  76. {
  77. s << "float " << val.getFloat() << " (\"" << x.getString() <<
  78. "\")";
  79. }
  80. else
  81. {
  82. s << "int " << val.getInt() << " (\"" <<
  83. x.getString() << "\")";
  84. }
  85. break;
  86. case TC_IDENTIFIER:
  87. s << "identifier \"" << val.getString() << "\"";
  88. break;
  89. case TC_ERROR:
  90. s << "scanner error";
  91. break;
  92. default:
  93. if(code >= TC_KE && code <= TC_KEYWORD)
  94. {
  95. s << "reserved word \"" << val.getString() << "\"";
  96. }
  97. else if(code >= TC_SCOPE_RESOLUTION && code <= TC_ASSIGN_OR)
  98. {
  99. s << "operator no " << (code - TC_SCOPE_RESOLUTION);
  100. }
  101. }
  102. return s;
  103. }
  104. //==============================================================================
  105. std::string Token::getInfoString() const
  106. {
  107. std::stringstream ss;
  108. ss << *this;
  109. return ss.str();
  110. }
  111. //==============================================================================
  112. #define SCANNER_EXCEPTION(x) \
  113. Exception(std::string() + x, __LINE__, scriptName, lineNmbr)
  114. char Scanner::eofChar = 0x7F;
  115. // reserved words grouped by length
  116. Scanner::ResWord Scanner::rw2 [] =
  117. {
  118. {"ke", TC_KE}, {NULL, TC_ERROR}
  119. };
  120. Scanner::ResWord Scanner::rw3 [] =
  121. {
  122. {"key", TC_KEY}, {NULL, TC_ERROR}
  123. };
  124. Scanner::ResWord Scanner::rw4 [] =
  125. {
  126. {"keyw", TC_KEYW}, {NULL, TC_ERROR}
  127. };
  128. Scanner::ResWord Scanner::rw5 [] =
  129. {
  130. {"keywo", TC_KEYWO}, {NULL, TC_ERROR}
  131. };
  132. Scanner::ResWord Scanner::rw6 [] =
  133. {
  134. {"keywor", TC_KEYWOR}, {NULL, TC_ERROR}
  135. };
  136. Scanner::ResWord Scanner::rw7 [] =
  137. {
  138. {"keyword", TC_KEYWORD}, {NULL, TC_ERROR}
  139. };
  140. Scanner::ResWord* Scanner::rwTable [] = // reserved word table
  141. {
  142. NULL, NULL, rw2, rw3, rw4, rw5, rw6, rw7,
  143. };
  144. // ascii table
  145. Scanner::AsciiFlag Scanner::asciiLookupTable [128] = {AC_ERROR};
  146. //==============================================================================
  147. Scanner::Scanner(bool newlinesAsWhitespace)
  148. {
  149. strcpy(scriptName, "unnamed-script");
  150. init(newlinesAsWhitespace);
  151. }
  152. //==============================================================================
  153. Scanner::Scanner(const char* filename, bool newlinesAsWhitespace)
  154. {
  155. strcpy(scriptName, "unnamed-script");
  156. init(newlinesAsWhitespace);
  157. loadFile(filename);
  158. }
  159. //==============================================================================
  160. Scanner::Scanner(std::istream& istream_, const char* scriptName_,
  161. bool newlinesAsWhitespace)
  162. {
  163. strcpy(scriptName, "unnamed-script");
  164. init(newlinesAsWhitespace);
  165. loadIstream(istream_, scriptName_);
  166. }
  167. //==============================================================================
  168. void Scanner::initAsciiMap()
  169. {
  170. memset(&asciiLookupTable[0], AC_ERROR, sizeof(asciiLookupTable));
  171. for(uint x = 'a'; x <= 'z'; x++)
  172. {
  173. lookupAscii(x) = AC_LETTER;
  174. }
  175. for(uint x = 'A'; x <= 'Z'; x++)
  176. {
  177. lookupAscii(x) = AC_LETTER;
  178. }
  179. for(uint x = '0'; x <= '9'; x++)
  180. {
  181. lookupAscii(x) = AC_DIGIT;
  182. }
  183. lookupAscii(':') = lookupAscii('[') = lookupAscii(']') =
  184. lookupAscii('(') = lookupAscii(')') = lookupAscii('.') =
  185. lookupAscii('{') = lookupAscii('}') = lookupAscii(',') =
  186. lookupAscii(';') = lookupAscii('?') = lookupAscii('=') =
  187. lookupAscii('!') = lookupAscii('<') = lookupAscii('>') =
  188. lookupAscii('|') = lookupAscii('&') = lookupAscii('+') =
  189. lookupAscii('-') = lookupAscii('*') = lookupAscii('/') =
  190. lookupAscii('~') = lookupAscii('%') = lookupAscii('#') =
  191. lookupAscii('^') = lookupAscii('\\') = AC_SPECIAL;
  192. lookupAscii('\t') = lookupAscii(' ') = lookupAscii('\0') =
  193. AC_WHITESPACE;
  194. lookupAscii('\n') = AC_ERROR; // newline is unacceptable char
  195. lookupAscii('@') = lookupAscii('`') = lookupAscii('$') =
  196. AC_ACCEPTABLE_IN_COMMENTS;
  197. lookupAscii('\"') = AC_DOUBLEQUOTE;
  198. lookupAscii('\'') = AC_QUOTE;
  199. lookupAscii((int)eofChar) = AC_EOF;
  200. lookupAscii('_') = AC_LETTER;
  201. }
  202. //==============================================================================
  203. void Scanner::init(bool newlinesAsWhitespace_)
  204. {
  205. newlinesAsWhitespace = newlinesAsWhitespace_;
  206. commentedLines = 0;
  207. inStream = NULL;
  208. if(lookupAscii('a') != AC_LETTER)
  209. {
  210. initAsciiMap();
  211. }
  212. lineNmbr = 0;
  213. memset(line, eofChar, sizeof(char) * MAX_SCRIPT_LINE_LEN);
  214. }
  215. //==============================================================================
  216. void Scanner::getLine()
  217. {
  218. if(!inStream->getline(line, MAX_SCRIPT_LINE_LEN - 1, '\n'))
  219. {
  220. pchar = &eofChar;
  221. }
  222. else
  223. {
  224. pchar = &line[0];
  225. ++lineNmbr;
  226. }
  227. assert(inStream->gcount() <= MAX_SCRIPT_LINE_LEN - 10); // too big line
  228. }
  229. //==============================================================================
  230. char Scanner::getNextChar()
  231. {
  232. if(*pchar=='\0')
  233. {
  234. getLine();
  235. }
  236. else
  237. {
  238. ++pchar;
  239. }
  240. if(*pchar == '\r') // windows crap
  241. {
  242. *pchar = '\0';
  243. }
  244. else if(lookupAscii(*pchar) == AC_ERROR)
  245. {
  246. throw SCANNER_EXCEPTION("Unacceptable char '" + *pchar + "' 0x" +
  247. boost::lexical_cast<std::string>(static_cast<uint>(*pchar)));
  248. }
  249. return *pchar;
  250. }
  251. //==============================================================================
  252. char Scanner::putBackChar()
  253. {
  254. if(pchar != line && *pchar != eofChar)
  255. {
  256. --pchar;
  257. }
  258. return *pchar;
  259. }
  260. //==============================================================================
  261. void Scanner::getAllPrintAll()
  262. {
  263. do
  264. {
  265. getNextToken();
  266. std::cout << std::setw(3) << std::setfill('0') << getLineNumber() <<
  267. ": " << crntToken << std::endl;
  268. } while(crntToken.code != TC_END);
  269. }
  270. //==============================================================================
  271. void Scanner::loadFile(const char* filename_)
  272. {
  273. inFstream.open(filename_);
  274. if(!inFstream.is_open())
  275. {
  276. throw SCANNER_EXCEPTION("Cannot open file \"" + filename_ + '\"');
  277. }
  278. loadIstream(inFstream, filename_);
  279. }
  280. //==============================================================================
  281. void Scanner::loadIstream(std::istream& istream_, const char* scriptName_)
  282. {
  283. if(inStream != NULL)
  284. {
  285. throw SCANNER_EXCEPTION("Tokenizer already initialized");
  286. }
  287. inStream = &istream_;
  288. // init globals
  289. // Too big name
  290. assert(strlen(scriptName_) <= sizeof(scriptName) / sizeof(char) - 1);
  291. crntToken.code = TC_ERROR;
  292. lineNmbr = 0;
  293. strcpy(scriptName, scriptName_);
  294. getLine();
  295. }
  296. //==============================================================================
  297. void Scanner::unload()
  298. {
  299. inFstream.close();
  300. }
  301. //==============================================================================
  302. const Token& Scanner::getNextToken()
  303. {
  304. start:
  305. //if(crntToken.code == TC_NEWLINE) getNextChar();
  306. if(commentedLines>0)
  307. {
  308. crntToken.code = TC_NEWLINE;
  309. --commentedLines;
  310. // the ultimate hack. I should remember not to do such crap in the
  311. // future
  312. ++lineNmbr;
  313. }
  314. else if(*pchar == '/')
  315. {
  316. char ch = getNextChar();
  317. if(ch == '/' || ch == '*')
  318. {
  319. putBackChar();
  320. int line = getLineNumber();
  321. checkComment();
  322. commentedLines = getLineNumber() - line; // update commentedLines
  323. lineNmbr -= commentedLines; // part of the ultimate hack
  324. }
  325. else
  326. {
  327. putBackChar();
  328. goto crappyLabel;
  329. }
  330. }
  331. else if(*pchar == '.')
  332. {
  333. uint asc = lookupAscii(getNextChar());
  334. putBackChar();
  335. if(asc == AC_DIGIT)
  336. {
  337. checkNumber();
  338. }
  339. else
  340. {
  341. checkSpecial();
  342. }
  343. }
  344. else if(*pchar=='\0') // if newline
  345. {
  346. if(lookupAscii(getNextChar()) == AC_EOF)
  347. {
  348. crntToken.code = TC_END;
  349. }
  350. else
  351. {
  352. crntToken.code = TC_NEWLINE;
  353. }
  354. }
  355. else
  356. {
  357. crappyLabel:
  358. switch(lookupAscii(*pchar))
  359. {
  360. case AC_WHITESPACE :
  361. getNextChar();
  362. goto start;
  363. case AC_LETTER:
  364. checkWord();
  365. break;
  366. case AC_DIGIT:
  367. checkNumber();
  368. break;
  369. case AC_SPECIAL:
  370. checkSpecial();
  371. break;
  372. case AC_QUOTE:
  373. checkChar();
  374. break;
  375. case AC_DOUBLEQUOTE:
  376. checkString();
  377. break;
  378. case AC_EOF:
  379. crntToken.code = TC_END;
  380. break;
  381. case AC_ERROR:
  382. default:
  383. getNextChar();
  384. throw SCANNER_EXCEPTION("Unexpected character \'" + *pchar +
  385. '\'');
  386. goto start;
  387. }
  388. }
  389. // skip comments
  390. if(crntToken.code == TC_COMMENT)
  391. {
  392. goto start;
  393. }
  394. // skip newlines
  395. if(crntToken.code == TC_NEWLINE && newlinesAsWhitespace)
  396. {
  397. goto start;
  398. }
  399. return crntToken;
  400. }
  401. //==============================================================================
  402. void Scanner::checkWord()
  403. {
  404. char* tmpStr = &crntToken.asString[0];
  405. char ch = *pchar;
  406. //build the string
  407. do
  408. {
  409. *tmpStr++ = ch;
  410. ch = getNextChar();
  411. } while(lookupAscii(ch) == AC_LETTER || lookupAscii(ch) == AC_DIGIT);
  412. *tmpStr = '\0'; // finalize it
  413. //check if reserved
  414. int len = tmpStr - &crntToken.asString[0];
  415. crntToken.code = TC_IDENTIFIER;
  416. crntToken.value.string = &crntToken.asString[0];
  417. crntToken.dataType = DT_STR; // not important
  418. if(len <= 7 && len >= 2)
  419. {
  420. int x = 0;
  421. while(true)
  422. {
  423. if(rwTable[len][x].string == NULL)
  424. {
  425. break;
  426. }
  427. if(strcmp(rwTable[len][x].string, &crntToken.asString[0]) == 0)
  428. {
  429. crntToken.code = rwTable[len][x].code;
  430. break;
  431. }
  432. ++x;
  433. }
  434. }
  435. }
  436. //==============================================================================
  437. void Scanner::checkComment()
  438. {
  439. // Beginning
  440. if(getNextChar()=='*')
  441. {
  442. goto cStyleCmnt;
  443. }
  444. // C++ style comment
  445. else if(*pchar=='/')
  446. {
  447. while(true)
  448. {
  449. char ch = getNextChar();
  450. if(ch == '\0')
  451. {
  452. crntToken.code = TC_COMMENT;
  453. return;
  454. }
  455. else if(ch == '\\')
  456. {
  457. if(getNextChar() == '\0')
  458. {
  459. getNextChar();
  460. }
  461. }
  462. }
  463. }
  464. else
  465. {
  466. goto error;
  467. }
  468. // C style comment
  469. cStyleCmnt:
  470. if(getNextChar()=='*')
  471. {
  472. goto finalizeCCmnt;
  473. }
  474. else if(*pchar==eofChar)
  475. {
  476. goto error;
  477. }
  478. else
  479. {
  480. goto cStyleCmnt;
  481. }
  482. // C++ style comment
  483. finalizeCCmnt:
  484. if(getNextChar()=='/')
  485. {
  486. crntToken.code = TC_COMMENT;
  487. getNextChar();
  488. return;
  489. }
  490. else
  491. {
  492. goto cStyleCmnt;
  493. }
  494. //error
  495. error:
  496. crntToken.code = TC_ERROR;
  497. throw SCANNER_EXCEPTION("Incorrect comment ending");
  498. }
  499. //==============================================================================
  500. void Scanner::checkNumber()
  501. {
  502. // This func is working great, dont try to understand it and dont even
  503. // think to try touching it.
  504. //RASSERT_THROW_EXCEPTION(sizeof(long) != 8); // ulong must be 64bit
  505. long num = 0; // value of the number & part of the float num before '.'
  506. long fnum = 0; // part of the float num after '.'
  507. long dad = 0; // digits after dot (for floats)
  508. bool expSign = 0; // exponent sign in case float is represented in mant/exp
  509. // format. 0 means positive and 1 negative
  510. long exp = 0; // the exponent in case float is represented in mant/exp
  511. // format
  512. char* tmpStr = &crntToken.asString[0];
  513. crntToken.dataType = DT_INT;
  514. uint asc;
  515. // begin
  516. if(*pchar == '0')
  517. {
  518. goto _0;
  519. }
  520. else if(lookupAscii(*pchar) == AC_DIGIT)
  521. {
  522. num = num*10 + *pchar-'0';
  523. goto _0_9;
  524. }
  525. else if (*pchar == '.')
  526. {
  527. goto _float;
  528. }
  529. else
  530. {
  531. goto error;
  532. }
  533. // 0????
  534. _0:
  535. *tmpStr++ = *pchar;
  536. getNextChar();
  537. asc = lookupAscii(*pchar);
  538. if (*pchar == 'x' || *pchar == 'X')
  539. {
  540. goto _0x;
  541. }
  542. else if(*pchar == 'e' || *pchar == 'E')
  543. {
  544. goto _0_9_dot_0_9_e;
  545. }
  546. else if(asc == AC_DIGIT)
  547. {
  548. putBackChar();
  549. goto _0_9;
  550. }
  551. else if(*pchar == '.')
  552. {
  553. goto _float;
  554. }
  555. else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  556. {
  557. goto finalize;
  558. }
  559. else
  560. {
  561. goto error;
  562. }
  563. // 0x????
  564. _0x:
  565. *tmpStr++ = *pchar;
  566. getNextChar();
  567. asc = lookupAscii(*pchar);
  568. if((asc == AC_DIGIT) ||
  569. (*pchar >= 'a' && *pchar <= 'f') ||
  570. (*pchar >= 'A' && *pchar <= 'F'))
  571. {
  572. num <<= 4;
  573. if(*pchar>='a' && *pchar<='f')
  574. {
  575. num += *pchar - 'a' + 0xA;
  576. }
  577. else if(*pchar>='A' && *pchar<='F')
  578. {
  579. num += *pchar - 'A' + 0xA;
  580. }
  581. else
  582. {
  583. num += *pchar - '0';
  584. }
  585. goto _0x0_9orA_F;
  586. }
  587. else
  588. goto error;
  589. // 0x{0-9 || a-f}??
  590. _0x0_9orA_F:
  591. *tmpStr++ = *pchar;
  592. getNextChar();
  593. asc = lookupAscii(*pchar);
  594. if((asc == AC_DIGIT) ||
  595. (*pchar >= 'a' && *pchar <= 'f') ||
  596. (*pchar >= 'A' && *pchar <= 'F'))
  597. {
  598. num <<= 4;
  599. if(*pchar>='a' && *pchar<='f')
  600. {
  601. num += *pchar - 'a' + 0xA;
  602. }
  603. else if(*pchar>='A' && *pchar<='F')
  604. {
  605. num += *pchar - 'A' + 0xA;
  606. }
  607. else
  608. {
  609. num += *pchar - '0';
  610. }
  611. goto _0x0_9orA_F;
  612. }
  613. else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  614. {
  615. goto finalize;
  616. }
  617. else
  618. {
  619. goto error; // err
  620. }
  621. // {0-9}
  622. _0_9:
  623. *tmpStr++ = *pchar;
  624. getNextChar();
  625. asc = lookupAscii(*pchar);
  626. if(asc == AC_DIGIT)
  627. {
  628. num = num * 10 + *pchar - '0';
  629. goto _0_9;
  630. }
  631. else if(*pchar == 'e' || *pchar == 'E')
  632. {
  633. goto _0_9_dot_0_9_e;
  634. }
  635. else if(*pchar == '.')
  636. {
  637. goto _float;
  638. }
  639. else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  640. {
  641. goto finalize;
  642. }
  643. else
  644. {
  645. goto error; // err
  646. }
  647. // {0-9}.??
  648. _float:
  649. *tmpStr++ = *pchar;
  650. getNextChar();
  651. asc = lookupAscii(*pchar);
  652. crntToken.dataType = DT_FLOAT;
  653. if(asc == AC_DIGIT)
  654. {
  655. fnum = fnum * 10 + *pchar - '0';
  656. ++dad;
  657. goto _float;
  658. }
  659. else if(*pchar == '.')
  660. {
  661. *tmpStr++ = *pchar;
  662. getNextChar();
  663. goto error;
  664. }
  665. else if(*pchar == 'f' || *pchar == 'F')
  666. {
  667. goto _0_9_dot_0_9_f;
  668. }
  669. else if(*pchar == 'e' || *pchar == 'E')
  670. {
  671. goto _0_9_dot_0_9_e;
  672. }
  673. else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  674. {
  675. goto finalize;
  676. }
  677. else
  678. {
  679. goto error;
  680. }
  681. // [{0-9}].[{0-9}]f??
  682. _0_9_dot_0_9_f:
  683. *tmpStr++ = *pchar;
  684. getNextChar();
  685. asc = lookupAscii(*pchar);
  686. if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  687. {
  688. goto finalize;
  689. }
  690. else
  691. {
  692. goto error;
  693. }
  694. // [{0-9}].[{0-9}]e??
  695. _0_9_dot_0_9_e:
  696. *tmpStr++ = *pchar;
  697. getNextChar();
  698. asc = lookupAscii(*pchar);
  699. crntToken.dataType = DT_FLOAT;
  700. if(*pchar == '+' || *pchar == '-')
  701. {
  702. if(*pchar == '-') expSign = 1;
  703. //*tmpStr++ = *pchar; getNextChar();
  704. goto _0_9_dot_0_9_e_sign;
  705. }
  706. else if(asc == AC_DIGIT)
  707. {
  708. exp = exp * 10 + *pchar - '0';
  709. goto _0_9_dot_0_9_e_sign_0_9;
  710. }
  711. else
  712. {
  713. goto error;
  714. }
  715. // [{0-9}].[{0-9}]e{+,-}??
  716. // After the sign we want number
  717. _0_9_dot_0_9_e_sign:
  718. *tmpStr++ = *pchar;
  719. getNextChar();
  720. asc = lookupAscii(*pchar);
  721. if(asc == AC_DIGIT)
  722. {
  723. exp = exp * 10 + *pchar - '0';
  724. goto _0_9_dot_0_9_e_sign_0_9;
  725. }
  726. else
  727. {
  728. goto error;
  729. }
  730. // [{0-9}].[{0-9}]e{+,-}{0-9}??
  731. // After the number in exponent we want other number or we finalize
  732. _0_9_dot_0_9_e_sign_0_9:
  733. *tmpStr++ = *pchar;
  734. getNextChar();
  735. asc = lookupAscii(*pchar);
  736. if(asc == AC_DIGIT)
  737. {
  738. exp = exp * 10 + *pchar - '0';
  739. goto _0_9_dot_0_9_e_sign_0_9;
  740. }
  741. else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  742. {
  743. goto finalize;
  744. }
  745. else
  746. {
  747. goto error;
  748. }
  749. // finalize
  750. finalize:
  751. crntToken.code = TC_NUMBER;
  752. if(crntToken.dataType == DT_INT)
  753. {
  754. crntToken.value.int_ = num;
  755. }
  756. else
  757. {
  758. double dbl = (double)num + (double)(pow(10, -dad)*fnum);
  759. if(exp != 0) // if we have exponent
  760. {
  761. if(expSign == true)
  762. {
  763. exp = -exp; // change the sign if necessary
  764. }
  765. dbl = dbl * pow(10, exp);
  766. }
  767. crntToken.value.float_ = dbl;
  768. }
  769. *tmpStr = '\0';
  770. return;
  771. //error
  772. error:
  773. crntToken.code = TC_ERROR;
  774. // run until white space or special
  775. asc = lookupAscii(*pchar);
  776. while(asc!=AC_WHITESPACE && asc!=AC_SPECIAL && asc!=AC_EOF)
  777. {
  778. *tmpStr++ = *pchar;
  779. asc = lookupAscii(getNextChar());
  780. }
  781. *tmpStr = '\0';
  782. throw SCANNER_EXCEPTION("Bad number suffix \"" +
  783. &crntToken.asString[0] + '\"');
  784. }
  785. //==============================================================================
  786. void Scanner::checkString()
  787. {
  788. char* tmpStr = &crntToken.asString[0];
  789. char ch = getNextChar();
  790. for(;;)
  791. {
  792. // Error
  793. if(ch == '\0' || ch == eofChar) // if end of line or eof
  794. {
  795. crntToken.code = TC_ERROR;
  796. *tmpStr = '\0';
  797. throw SCANNER_EXCEPTION("Incorrect string ending \"" +
  798. &crntToken.asString[0] + '\"');
  799. return;
  800. }
  801. // Escape Codes
  802. else if(ch == '\\')
  803. {
  804. ch = getNextChar();
  805. if(ch == eofChar)
  806. {
  807. crntToken.code = TC_ERROR;
  808. *tmpStr = '\0';
  809. throw SCANNER_EXCEPTION("Incorrect string ending \"" +
  810. &crntToken.asString[0] + '\"');
  811. return;
  812. }
  813. switch(ch)
  814. {
  815. case 'n':
  816. *tmpStr++ = '\n';
  817. break;
  818. case 't':
  819. *tmpStr++ = '\t';
  820. break;
  821. case '0':
  822. *tmpStr++ = '\0';
  823. break;
  824. case 'a':
  825. *tmpStr++ = '\a';
  826. break;
  827. case '\"':
  828. *tmpStr++ = '\"';
  829. break;
  830. case 'f':
  831. *tmpStr++ = '\f';
  832. break;
  833. case 'v':
  834. *tmpStr++ = '\v';
  835. break;
  836. case '\'':
  837. *tmpStr++ = '\'';
  838. break;
  839. case '\\':
  840. *tmpStr++ = '\\';
  841. break;
  842. case '\?':
  843. *tmpStr++ = '\?';
  844. break;
  845. case '\0':
  846. break; // not an escape char but works almost the same
  847. default:
  848. throw SCANNER_EXCEPTION(
  849. "Unrecognized escape character \'\\" + ch + '\'');
  850. *tmpStr++ = ch;
  851. }
  852. }
  853. // End
  854. else if(ch=='\"')
  855. {
  856. *tmpStr = '\0';
  857. crntToken.code = TC_STRING;
  858. crntToken.value.string = &crntToken.asString[0];
  859. getNextChar();
  860. return;
  861. }
  862. // Build str(main loop)
  863. else
  864. {
  865. *tmpStr++ = ch;
  866. }
  867. ch = getNextChar();
  868. }
  869. }
  870. //==============================================================================
  871. void Scanner::checkChar()
  872. {
  873. char ch = getNextChar();
  874. char ch0 = ch;
  875. char* tmpStr = &crntToken.asString[0];
  876. crntToken.code = TC_ERROR;
  877. *tmpStr++ = ch;
  878. if(ch=='\0' || ch==eofChar) // check char after '
  879. {
  880. throw SCANNER_EXCEPTION("Newline in constant");
  881. return;
  882. }
  883. if (ch=='\'') // if '
  884. {
  885. throw SCANNER_EXCEPTION("Empty constant");
  886. getNextChar();
  887. return;
  888. }
  889. if (ch=='\\') // if \ then maybe escape char
  890. {
  891. ch = getNextChar();
  892. *tmpStr++ = ch;
  893. if(ch == '\0' || ch == eofChar) //check again after the \.
  894. {
  895. throw SCANNER_EXCEPTION("Newline in constant");
  896. }
  897. switch (ch)
  898. {
  899. case 'n' :
  900. ch0 = '\n';
  901. break;
  902. case 't' :
  903. ch0 = '\t';
  904. break;
  905. case '0':
  906. ch0 = '\0';
  907. break;
  908. case 'a':
  909. ch0 = '\a';
  910. break;
  911. case '\"':
  912. ch0 = '\"';
  913. break;
  914. case 'f':
  915. ch0 = '\f';
  916. break;
  917. case 'v':
  918. ch0 = '\v';
  919. break;
  920. case '\'':
  921. ch0 = '\'';
  922. break;
  923. case '\\':
  924. ch0 = '\\';
  925. break;
  926. case '\?':
  927. ch0 = '\?';
  928. break;
  929. case 'r':
  930. ch0 = '\r';
  931. break;
  932. default:
  933. ch0 = ch;
  934. throw SCANNER_EXCEPTION("Unrecognized escape character \'\\" +
  935. ch + '\'');
  936. }
  937. crntToken.value.char_ = ch0;
  938. }
  939. else
  940. {
  941. crntToken.value.char_ = ch;
  942. }
  943. ch = getNextChar();
  944. if(ch=='\'') //end
  945. {
  946. *tmpStr = '\0';
  947. crntToken.code = TC_CHARACTER;
  948. getNextChar();
  949. return;
  950. }
  951. throw SCANNER_EXCEPTION("Expected \'");
  952. }
  953. //==============================================================================
  954. void Scanner::checkSpecial()
  955. {
  956. char ch = *pchar;
  957. TokenCode code = TC_ERROR;
  958. switch(ch)
  959. {
  960. case '#':
  961. code = TC_SHARP;
  962. break;
  963. case ',':
  964. code = TC_COMMA;
  965. break;
  966. case ';':
  967. code = TC_PERIOD;
  968. break;
  969. case '(':
  970. code = TC_L_PAREN;
  971. break;
  972. case ')':
  973. code = TC_R_PAREN;
  974. break;
  975. case '[':
  976. code = TC_L_SQ_BRACKET;
  977. break;
  978. case ']':
  979. code = TC_R_SQ_BRACKET;
  980. break;
  981. case '{':
  982. code = TC_L_BRACKET;
  983. break;
  984. case '}':
  985. code = TC_R_BRACKET;
  986. break;
  987. case '?':
  988. code = TC_QUESTIONMARK;
  989. break;
  990. case '~':
  991. code = TC_UNARAY_COMPLEMENT;
  992. break;
  993. case '.':
  994. ch = getNextChar();
  995. switch(ch)
  996. {
  997. case '*':
  998. code = TC_POINTER_TO_MEMBER;
  999. break;
  1000. default:
  1001. putBackChar();
  1002. code = TC_DOT;
  1003. }
  1004. break;
  1005. case ':':
  1006. ch = getNextChar();
  1007. switch(ch)
  1008. {
  1009. case ':':
  1010. code = TC_SCOPE_RESOLUTION;
  1011. break;
  1012. default:
  1013. putBackChar();
  1014. code = TC_UPDOWNDOT;
  1015. }
  1016. break;
  1017. case '-':
  1018. ch = getNextChar();
  1019. switch(ch)
  1020. {
  1021. case '>':
  1022. code = TC_POINTER_TO_MEMBER;
  1023. break;
  1024. case '-':
  1025. code = TC_DEC;
  1026. break;
  1027. case '=':
  1028. code = TC_ASSIGN_SUB;
  1029. break;
  1030. default:
  1031. putBackChar();
  1032. code = TC_MINUS;
  1033. }
  1034. break;
  1035. case '=':
  1036. ch = getNextChar();
  1037. switch(ch)
  1038. {
  1039. case '=':
  1040. code = TC_EQUAL;
  1041. break;
  1042. default:
  1043. putBackChar();
  1044. code = TC_ASSIGN;
  1045. }
  1046. break;
  1047. case '!':
  1048. ch = getNextChar();
  1049. switch(ch)
  1050. {
  1051. case '=':
  1052. code = TC_NOT_EQUAL;
  1053. break;
  1054. default:
  1055. putBackChar();
  1056. code = TC_NOT;
  1057. }
  1058. break;
  1059. case '<':
  1060. ch = getNextChar();
  1061. switch(ch)
  1062. {
  1063. case '=':
  1064. code = TC_LESS_EQUAL;
  1065. break;
  1066. case '<':
  1067. ch = getNextChar();
  1068. switch(ch)
  1069. {
  1070. case '=':
  1071. code = TC_ASSIGN_SHL;
  1072. break;
  1073. default:
  1074. putBackChar();
  1075. code = TC_SHL;
  1076. }
  1077. break;
  1078. default:
  1079. putBackChar();
  1080. code = TC_LESS;
  1081. }
  1082. break;
  1083. case '>':
  1084. ch = getNextChar();
  1085. switch(ch)
  1086. {
  1087. case '=':
  1088. code = TC_GREATER_EQUAL;
  1089. break;
  1090. case '>':
  1091. ch = getNextChar();
  1092. switch(ch)
  1093. {
  1094. case '=':
  1095. code = TC_ASSIGN_SHR;
  1096. break;
  1097. default:
  1098. putBackChar();
  1099. code = TC_SHR;
  1100. }
  1101. break;
  1102. default:
  1103. putBackChar();
  1104. code = TC_GREATER;
  1105. }
  1106. break;
  1107. case '|':
  1108. ch = getNextChar();
  1109. switch(ch)
  1110. {
  1111. case '|':
  1112. code = TC_LOGICAL_OR;
  1113. break;
  1114. case '=':
  1115. code = TC_ASSIGN_OR;
  1116. break;
  1117. default:
  1118. putBackChar();
  1119. code = TC_BITWISE_OR;
  1120. }
  1121. break;
  1122. case '&':
  1123. ch = getNextChar();
  1124. switch(ch)
  1125. {
  1126. case '&':
  1127. code = TC_LOGICAL_AND;
  1128. break;
  1129. case '=':
  1130. code = TC_ASSIGN_AND;
  1131. break;
  1132. default:
  1133. putBackChar();
  1134. code = TC_BITWISE_AND;
  1135. }
  1136. break;
  1137. case '+':
  1138. ch = getNextChar();
  1139. switch(ch)
  1140. {
  1141. case '+':
  1142. code = TC_INC;
  1143. break;
  1144. case '=':
  1145. code = TC_ASSIGN_ADD;
  1146. break;
  1147. default:
  1148. putBackChar();
  1149. code = TC_PLUS;
  1150. }
  1151. break;
  1152. case '*':
  1153. ch = getNextChar();
  1154. switch(ch)
  1155. {
  1156. case '=':
  1157. code = TC_ASSIGN_MUL;
  1158. break;
  1159. default:
  1160. putBackChar();
  1161. code = TC_STAR;
  1162. }
  1163. break;
  1164. case '/':
  1165. ch = getNextChar();
  1166. switch(ch)
  1167. {
  1168. case '=':
  1169. code = TC_ASSIGN_DIV;
  1170. break;
  1171. default:
  1172. putBackChar();
  1173. code = TC_BSLASH;
  1174. }
  1175. break;
  1176. case '%':
  1177. ch = getNextChar();
  1178. switch(ch)
  1179. {
  1180. case '=':
  1181. code = TC_ASSIGN_MOD;
  1182. break;
  1183. default:
  1184. putBackChar();
  1185. code = TC_MOD;
  1186. }
  1187. break;
  1188. case '^':
  1189. ch = getNextChar();
  1190. switch(ch)
  1191. {
  1192. case '=':
  1193. code = TC_ASSIGN_XOR;
  1194. break;
  1195. default:
  1196. putBackChar();
  1197. code = TC_XOR;
  1198. }
  1199. break;
  1200. case '\\':
  1201. code = TC_BACK_SLASH;
  1202. break;
  1203. }
  1204. getNextChar();
  1205. crntToken.code = code;
  1206. }
  1207. }} // end namespaces