Scanner.cpp 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355
  1. #include "anki/util/Scanner.h"
  2. #include <boost/lexical_cast.hpp>
  3. #include <cstring>
  4. #include <iostream>
  5. #include <iomanip>
  6. #include <cmath>
  7. #include <sstream>
  8. namespace anki { namespace scanner {
  9. //==============================================================================
  10. Exception::Exception(const std::string& err, int errNo_,
  11. const std::string& scriptFilename_, int scriptLineNmbr_)
  12. : error(err), errNo(errNo_), scriptFilename(scriptFilename_),
  13. scriptLineNmbr(scriptLineNmbr_)
  14. {}
  15. //==============================================================================
  16. Exception::Exception(const Exception& e)
  17. : std::exception(e), error(e.error), errNo(e.errNo),
  18. scriptFilename(e.scriptFilename), scriptLineNmbr(e.scriptLineNmbr)
  19. {}
  20. //==============================================================================
  21. const char* Exception::what() const throw()
  22. {
  23. errWhat = "Scanner exception (#" +
  24. boost::lexical_cast<std::string>(errNo) +
  25. ":" + scriptFilename + ':' +
  26. boost::lexical_cast<std::string>(scriptLineNmbr) + "): " + error;
  27. return errWhat.c_str();
  28. }
  29. //==============================================================================
  30. Token::Token(const Token& b)
  31. : code(b.code), dataType(b.dataType)
  32. {
  33. switch(b.dataType)
  34. {
  35. case DT_FLOAT:
  36. value.float_ = b.value.float_;
  37. break;
  38. case DT_INT:
  39. value.int_ = b.value.int_;
  40. break;
  41. case DT_CHAR:
  42. value.char_ = b.value.char_;
  43. break;
  44. case DT_STR:
  45. value.string = b.value.string;
  46. break;
  47. }
  48. memcpy(&asString[0], &b.asString[0], sizeof(asString));
  49. }
  50. //==============================================================================
  51. std::ostream& operator<<(std::ostream& s, const Token& x)
  52. {
  53. const TokenDataVal& val = x.getValue();
  54. TokenCode code = x.getCode();
  55. switch(code)
  56. {
  57. case TC_COMMENT:
  58. s << "comment";
  59. break;
  60. case TC_NEWLINE:
  61. s << "newline";
  62. break;
  63. case TC_END:
  64. s << "end of file";
  65. break;
  66. case TC_STRING:
  67. s << "string \"" << val.getString() << "\"";
  68. break;
  69. case TC_CHARACTER:
  70. s << "char '" << val.getChar() << "' (\"" <<
  71. x.getString() << "\")";
  72. break;
  73. case TC_NUMBER:
  74. if(x.getDataType() == DT_FLOAT)
  75. {
  76. s << "float " << val.getFloat() << " (\"" << x.getString() <<
  77. "\")";
  78. }
  79. else
  80. {
  81. s << "int " << val.getInt() << " (\"" <<
  82. x.getString() << "\")";
  83. }
  84. break;
  85. case TC_IDENTIFIER:
  86. s << "identifier \"" << val.getString() << "\"";
  87. break;
  88. case TC_ERROR:
  89. s << "scanner error";
  90. break;
  91. default:
  92. if(code >= TC_KE && code <= TC_KEYWORD)
  93. {
  94. s << "reserved word \"" << val.getString() << "\"";
  95. }
  96. else if(code >= TC_SCOPE_RESOLUTION && code <= TC_ASSIGN_OR)
  97. {
  98. s << "operator no " << (code - TC_SCOPE_RESOLUTION);
  99. }
  100. }
  101. return s;
  102. }
  103. //==============================================================================
  104. std::string Token::getInfoString() const
  105. {
  106. std::stringstream ss;
  107. ss << *this;
  108. return ss.str();
  109. }
  110. //==============================================================================
  111. #define SCANNER_EXCEPTION(x) \
  112. Exception(std::string() + x, __LINE__, scriptName, lineNmbr)
  113. char Scanner::eofChar = 0x7F;
  114. // reserved words grouped by length
  115. Scanner::ResWord Scanner::rw2 [] =
  116. {
  117. {"ke", TC_KE}, {NULL, TC_ERROR}
  118. };
  119. Scanner::ResWord Scanner::rw3 [] =
  120. {
  121. {"key", TC_KEY}, {NULL, TC_ERROR}
  122. };
  123. Scanner::ResWord Scanner::rw4 [] =
  124. {
  125. {"keyw", TC_KEYW}, {NULL, TC_ERROR}
  126. };
  127. Scanner::ResWord Scanner::rw5 [] =
  128. {
  129. {"keywo", TC_KEYWO}, {NULL, TC_ERROR}
  130. };
  131. Scanner::ResWord Scanner::rw6 [] =
  132. {
  133. {"keywor", TC_KEYWOR}, {NULL, TC_ERROR}
  134. };
  135. Scanner::ResWord Scanner::rw7 [] =
  136. {
  137. {"keyword", TC_KEYWORD}, {NULL, TC_ERROR}
  138. };
  139. Scanner::ResWord* Scanner::rwTable [] = // reserved word table
  140. {
  141. NULL, NULL, rw2, rw3, rw4, rw5, rw6, rw7,
  142. };
  143. // ascii table
  144. Scanner::AsciiFlag Scanner::asciiLookupTable [128] = {AC_ERROR};
  145. //==============================================================================
  146. Scanner::Scanner(bool newlinesAsWhitespace)
  147. {
  148. strcpy(scriptName, "unnamed-script");
  149. init(newlinesAsWhitespace);
  150. }
  151. //==============================================================================
  152. Scanner::Scanner(const char* filename, bool newlinesAsWhitespace)
  153. {
  154. strcpy(scriptName, "unnamed-script");
  155. init(newlinesAsWhitespace);
  156. loadFile(filename);
  157. }
  158. //==============================================================================
  159. Scanner::Scanner(std::istream& istream_, const char* scriptName_,
  160. bool newlinesAsWhitespace)
  161. {
  162. strcpy(scriptName, "unnamed-script");
  163. init(newlinesAsWhitespace);
  164. loadIstream(istream_, scriptName_);
  165. }
  166. //==============================================================================
  167. void Scanner::initAsciiMap()
  168. {
  169. memset(&asciiLookupTable[0], AC_ERROR, sizeof(asciiLookupTable));
  170. for(uint x = 'a'; x <= 'z'; x++)
  171. {
  172. lookupAscii(x) = AC_LETTER;
  173. }
  174. for(uint x = 'A'; x <= 'Z'; x++)
  175. {
  176. lookupAscii(x) = AC_LETTER;
  177. }
  178. for(uint x = '0'; x <= '9'; x++)
  179. {
  180. lookupAscii(x) = AC_DIGIT;
  181. }
  182. lookupAscii(':') = lookupAscii('[') = lookupAscii(']') =
  183. lookupAscii('(') = lookupAscii(')') = lookupAscii('.') =
  184. lookupAscii('{') = lookupAscii('}') = lookupAscii(',') =
  185. lookupAscii(';') = lookupAscii('?') = lookupAscii('=') =
  186. lookupAscii('!') = lookupAscii('<') = lookupAscii('>') =
  187. lookupAscii('|') = lookupAscii('&') = lookupAscii('+') =
  188. lookupAscii('-') = lookupAscii('*') = lookupAscii('/') =
  189. lookupAscii('~') = lookupAscii('%') = lookupAscii('#') =
  190. lookupAscii('^') = lookupAscii('\\') = AC_SPECIAL;
  191. lookupAscii('\t') = lookupAscii(' ') = lookupAscii('\0') =
  192. AC_WHITESPACE;
  193. lookupAscii('\n') = AC_ERROR; // newline is unacceptable char
  194. lookupAscii('@') = lookupAscii('`') = lookupAscii('$') =
  195. AC_ACCEPTABLE_IN_COMMENTS;
  196. lookupAscii('\"') = AC_DOUBLEQUOTE;
  197. lookupAscii('\'') = AC_QUOTE;
  198. lookupAscii((int)eofChar) = AC_EOF;
  199. lookupAscii('_') = AC_LETTER;
  200. }
  201. //==============================================================================
  202. void Scanner::init(bool newlinesAsWhitespace_)
  203. {
  204. newlinesAsWhitespace = newlinesAsWhitespace_;
  205. commentedLines = 0;
  206. inStream = NULL;
  207. if(lookupAscii('a') != AC_LETTER)
  208. {
  209. initAsciiMap();
  210. }
  211. lineNmbr = 0;
  212. memset(line, eofChar, sizeof(char) * MAX_SCRIPT_LINE_LEN);
  213. }
  214. //==============================================================================
  215. void Scanner::getLine()
  216. {
  217. if(!inStream->getline(line, MAX_SCRIPT_LINE_LEN - 1, '\n'))
  218. {
  219. pchar = &eofChar;
  220. }
  221. else
  222. {
  223. pchar = &line[0];
  224. ++lineNmbr;
  225. }
  226. assert(inStream->gcount() <= MAX_SCRIPT_LINE_LEN - 10); // too big line
  227. }
  228. //==============================================================================
  229. char Scanner::getNextChar()
  230. {
  231. if(*pchar=='\0')
  232. {
  233. getLine();
  234. }
  235. else
  236. {
  237. ++pchar;
  238. }
  239. if(*pchar == '\r') // windows crap
  240. {
  241. *pchar = '\0';
  242. }
  243. else if(lookupAscii(*pchar) == AC_ERROR)
  244. {
  245. throw SCANNER_EXCEPTION("Unacceptable char '" + *pchar + "' 0x" +
  246. boost::lexical_cast<std::string>(static_cast<uint>(*pchar)));
  247. }
  248. return *pchar;
  249. }
  250. //==============================================================================
  251. char Scanner::putBackChar()
  252. {
  253. if(pchar != line && *pchar != eofChar)
  254. {
  255. --pchar;
  256. }
  257. return *pchar;
  258. }
  259. //==============================================================================
  260. void Scanner::getAllPrintAll()
  261. {
  262. do
  263. {
  264. getNextToken();
  265. std::cout << std::setw(3) << std::setfill('0') << getLineNumber() <<
  266. ": " << crntToken << std::endl;
  267. } while(crntToken.code != TC_END);
  268. }
  269. //==============================================================================
  270. void Scanner::loadFile(const char* filename_)
  271. {
  272. inFstream.open(filename_);
  273. if(!inFstream.is_open())
  274. {
  275. throw SCANNER_EXCEPTION("Cannot open file \"" + filename_ + '\"');
  276. }
  277. loadIstream(inFstream, filename_);
  278. }
  279. //==============================================================================
  280. void Scanner::loadIstream(std::istream& istream_, const char* scriptName_)
  281. {
  282. if(inStream != NULL)
  283. {
  284. throw SCANNER_EXCEPTION("Tokenizer already initialized");
  285. }
  286. inStream = &istream_;
  287. // init globals
  288. // Too big name
  289. assert(strlen(scriptName_) <= sizeof(scriptName) / sizeof(char) - 1);
  290. crntToken.code = TC_ERROR;
  291. lineNmbr = 0;
  292. strcpy(scriptName, scriptName_);
  293. getLine();
  294. }
  295. //==============================================================================
  296. void Scanner::unload()
  297. {
  298. inFstream.close();
  299. }
  300. //==============================================================================
  301. const Token& Scanner::getNextToken()
  302. {
  303. start:
  304. //if(crntToken.code == TC_NEWLINE) getNextChar();
  305. if(commentedLines>0)
  306. {
  307. crntToken.code = TC_NEWLINE;
  308. --commentedLines;
  309. // the ultimate hack. I should remember not to do such crap in the
  310. // future
  311. ++lineNmbr;
  312. }
  313. else if(*pchar == '/')
  314. {
  315. char ch = getNextChar();
  316. if(ch == '/' || ch == '*')
  317. {
  318. putBackChar();
  319. int line = getLineNumber();
  320. checkComment();
  321. commentedLines = getLineNumber() - line; // update commentedLines
  322. lineNmbr -= commentedLines; // part of the ultimate hack
  323. }
  324. else
  325. {
  326. putBackChar();
  327. goto crappyLabel;
  328. }
  329. }
  330. else if(*pchar == '.')
  331. {
  332. uint asc = lookupAscii(getNextChar());
  333. putBackChar();
  334. if(asc == AC_DIGIT)
  335. {
  336. checkNumber();
  337. }
  338. else
  339. {
  340. checkSpecial();
  341. }
  342. }
  343. else if(*pchar=='\0') // if newline
  344. {
  345. if(lookupAscii(getNextChar()) == AC_EOF)
  346. {
  347. crntToken.code = TC_END;
  348. }
  349. else
  350. {
  351. crntToken.code = TC_NEWLINE;
  352. }
  353. }
  354. else
  355. {
  356. crappyLabel:
  357. switch(lookupAscii(*pchar))
  358. {
  359. case AC_WHITESPACE :
  360. getNextChar();
  361. goto start;
  362. case AC_LETTER:
  363. checkWord();
  364. break;
  365. case AC_DIGIT:
  366. checkNumber();
  367. break;
  368. case AC_SPECIAL:
  369. checkSpecial();
  370. break;
  371. case AC_QUOTE:
  372. checkChar();
  373. break;
  374. case AC_DOUBLEQUOTE:
  375. checkString();
  376. break;
  377. case AC_EOF:
  378. crntToken.code = TC_END;
  379. break;
  380. case AC_ERROR:
  381. default:
  382. getNextChar();
  383. throw SCANNER_EXCEPTION("Unexpected character \'" + *pchar +
  384. '\'');
  385. goto start;
  386. }
  387. }
  388. // skip comments
  389. if(crntToken.code == TC_COMMENT)
  390. {
  391. goto start;
  392. }
  393. // skip newlines
  394. if(crntToken.code == TC_NEWLINE && newlinesAsWhitespace)
  395. {
  396. goto start;
  397. }
  398. return crntToken;
  399. }
  400. //==============================================================================
  401. void Scanner::checkWord()
  402. {
  403. char* tmpStr = &crntToken.asString[0];
  404. char ch = *pchar;
  405. //build the string
  406. do
  407. {
  408. *tmpStr++ = ch;
  409. ch = getNextChar();
  410. } while(lookupAscii(ch) == AC_LETTER || lookupAscii(ch) == AC_DIGIT);
  411. *tmpStr = '\0'; // finalize it
  412. //check if reserved
  413. int len = tmpStr - &crntToken.asString[0];
  414. crntToken.code = TC_IDENTIFIER;
  415. crntToken.value.string = &crntToken.asString[0];
  416. crntToken.dataType = DT_STR; // not important
  417. if(len <= 7 && len >= 2)
  418. {
  419. int x = 0;
  420. while(true)
  421. {
  422. if(rwTable[len][x].string == NULL)
  423. {
  424. break;
  425. }
  426. if(strcmp(rwTable[len][x].string, &crntToken.asString[0]) == 0)
  427. {
  428. crntToken.code = rwTable[len][x].code;
  429. break;
  430. }
  431. ++x;
  432. }
  433. }
  434. }
  435. //==============================================================================
  436. void Scanner::checkComment()
  437. {
  438. // Beginning
  439. if(getNextChar()=='*')
  440. {
  441. goto cStyleCmnt;
  442. }
  443. // C++ style comment
  444. else if(*pchar=='/')
  445. {
  446. while(true)
  447. {
  448. char ch = getNextChar();
  449. if(ch == '\0')
  450. {
  451. crntToken.code = TC_COMMENT;
  452. return;
  453. }
  454. else if(ch == '\\')
  455. {
  456. if(getNextChar() == '\0')
  457. {
  458. getNextChar();
  459. }
  460. }
  461. }
  462. }
  463. else
  464. {
  465. goto error;
  466. }
  467. // C style comment
  468. cStyleCmnt:
  469. if(getNextChar()=='*')
  470. {
  471. goto finalizeCCmnt;
  472. }
  473. else if(*pchar==eofChar)
  474. {
  475. goto error;
  476. }
  477. else
  478. {
  479. goto cStyleCmnt;
  480. }
  481. // C++ style comment
  482. finalizeCCmnt:
  483. if(getNextChar()=='/')
  484. {
  485. crntToken.code = TC_COMMENT;
  486. getNextChar();
  487. return;
  488. }
  489. else
  490. {
  491. goto cStyleCmnt;
  492. }
  493. //error
  494. error:
  495. crntToken.code = TC_ERROR;
  496. throw SCANNER_EXCEPTION("Incorrect comment ending");
  497. }
  498. //==============================================================================
  499. void Scanner::checkNumber()
  500. {
  501. // This func is working great, dont try to understand it and dont even
  502. // think to try touching it.
  503. //RASSERT_THROW_EXCEPTION(sizeof(long) != 8); // ulong must be 64bit
  504. long num = 0; // value of the number & part of the float num before '.'
  505. long fnum = 0; // part of the float num after '.'
  506. long dad = 0; // digits after dot (for floats)
  507. bool expSign = 0; // exponent sign in case float is represented in mant/exp
  508. // format. 0 means positive and 1 negative
  509. long exp = 0; // the exponent in case float is represented in mant/exp
  510. // format
  511. char* tmpStr = &crntToken.asString[0];
  512. crntToken.dataType = DT_INT;
  513. uint asc;
  514. // begin
  515. if(*pchar == '0')
  516. {
  517. goto _0;
  518. }
  519. else if(lookupAscii(*pchar) == AC_DIGIT)
  520. {
  521. num = num*10 + *pchar-'0';
  522. goto _0_9;
  523. }
  524. else if (*pchar == '.')
  525. {
  526. goto _float;
  527. }
  528. else
  529. {
  530. goto error;
  531. }
  532. // 0????
  533. _0:
  534. *tmpStr++ = *pchar;
  535. getNextChar();
  536. asc = lookupAscii(*pchar);
  537. if (*pchar == 'x' || *pchar == 'X')
  538. {
  539. goto _0x;
  540. }
  541. else if(*pchar == 'e' || *pchar == 'E')
  542. {
  543. goto _0_9_dot_0_9_e;
  544. }
  545. else if(asc == AC_DIGIT)
  546. {
  547. putBackChar();
  548. goto _0_9;
  549. }
  550. else if(*pchar == '.')
  551. {
  552. goto _float;
  553. }
  554. else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  555. {
  556. goto finalize;
  557. }
  558. else
  559. {
  560. goto error;
  561. }
  562. // 0x????
  563. _0x:
  564. *tmpStr++ = *pchar;
  565. getNextChar();
  566. asc = lookupAscii(*pchar);
  567. if((asc == AC_DIGIT) ||
  568. (*pchar >= 'a' && *pchar <= 'f') ||
  569. (*pchar >= 'A' && *pchar <= 'F'))
  570. {
  571. num <<= 4;
  572. if(*pchar>='a' && *pchar<='f')
  573. {
  574. num += *pchar - 'a' + 0xA;
  575. }
  576. else if(*pchar>='A' && *pchar<='F')
  577. {
  578. num += *pchar - 'A' + 0xA;
  579. }
  580. else
  581. {
  582. num += *pchar - '0';
  583. }
  584. goto _0x0_9orA_F;
  585. }
  586. else
  587. goto error;
  588. // 0x{0-9 || a-f}??
  589. _0x0_9orA_F:
  590. *tmpStr++ = *pchar;
  591. getNextChar();
  592. asc = lookupAscii(*pchar);
  593. if((asc == AC_DIGIT) ||
  594. (*pchar >= 'a' && *pchar <= 'f') ||
  595. (*pchar >= 'A' && *pchar <= 'F'))
  596. {
  597. num <<= 4;
  598. if(*pchar>='a' && *pchar<='f')
  599. {
  600. num += *pchar - 'a' + 0xA;
  601. }
  602. else if(*pchar>='A' && *pchar<='F')
  603. {
  604. num += *pchar - 'A' + 0xA;
  605. }
  606. else
  607. {
  608. num += *pchar - '0';
  609. }
  610. goto _0x0_9orA_F;
  611. }
  612. else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  613. {
  614. goto finalize;
  615. }
  616. else
  617. {
  618. goto error; // err
  619. }
  620. // {0-9}
  621. _0_9:
  622. *tmpStr++ = *pchar;
  623. getNextChar();
  624. asc = lookupAscii(*pchar);
  625. if(asc == AC_DIGIT)
  626. {
  627. num = num * 10 + *pchar - '0';
  628. goto _0_9;
  629. }
  630. else if(*pchar == 'e' || *pchar == 'E')
  631. {
  632. goto _0_9_dot_0_9_e;
  633. }
  634. else if(*pchar == '.')
  635. {
  636. goto _float;
  637. }
  638. else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  639. {
  640. goto finalize;
  641. }
  642. else
  643. {
  644. goto error; // err
  645. }
  646. // {0-9}.??
  647. _float:
  648. *tmpStr++ = *pchar;
  649. getNextChar();
  650. asc = lookupAscii(*pchar);
  651. crntToken.dataType = DT_FLOAT;
  652. if(asc == AC_DIGIT)
  653. {
  654. fnum = fnum * 10 + *pchar - '0';
  655. ++dad;
  656. goto _float;
  657. }
  658. else if(*pchar == '.')
  659. {
  660. *tmpStr++ = *pchar;
  661. getNextChar();
  662. goto error;
  663. }
  664. else if(*pchar == 'f' || *pchar == 'F')
  665. {
  666. goto _0_9_dot_0_9_f;
  667. }
  668. else if(*pchar == 'e' || *pchar == 'E')
  669. {
  670. goto _0_9_dot_0_9_e;
  671. }
  672. else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  673. {
  674. goto finalize;
  675. }
  676. else
  677. {
  678. goto error;
  679. }
  680. // [{0-9}].[{0-9}]f??
  681. _0_9_dot_0_9_f:
  682. *tmpStr++ = *pchar;
  683. getNextChar();
  684. asc = lookupAscii(*pchar);
  685. if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  686. {
  687. goto finalize;
  688. }
  689. else
  690. {
  691. goto error;
  692. }
  693. // [{0-9}].[{0-9}]e??
  694. _0_9_dot_0_9_e:
  695. *tmpStr++ = *pchar;
  696. getNextChar();
  697. asc = lookupAscii(*pchar);
  698. crntToken.dataType = DT_FLOAT;
  699. if(*pchar == '+' || *pchar == '-')
  700. {
  701. if(*pchar == '-') expSign = 1;
  702. //*tmpStr++ = *pchar; getNextChar();
  703. goto _0_9_dot_0_9_e_sign;
  704. }
  705. else if(asc == AC_DIGIT)
  706. {
  707. exp = exp * 10 + *pchar - '0';
  708. goto _0_9_dot_0_9_e_sign_0_9;
  709. }
  710. else
  711. {
  712. goto error;
  713. }
  714. // [{0-9}].[{0-9}]e{+,-}??
  715. // After the sign we want number
  716. _0_9_dot_0_9_e_sign:
  717. *tmpStr++ = *pchar;
  718. getNextChar();
  719. asc = lookupAscii(*pchar);
  720. if(asc == AC_DIGIT)
  721. {
  722. exp = exp * 10 + *pchar - '0';
  723. goto _0_9_dot_0_9_e_sign_0_9;
  724. }
  725. else
  726. {
  727. goto error;
  728. }
  729. // [{0-9}].[{0-9}]e{+,-}{0-9}??
  730. // After the number in exponent we want other number or we finalize
  731. _0_9_dot_0_9_e_sign_0_9:
  732. *tmpStr++ = *pchar;
  733. getNextChar();
  734. asc = lookupAscii(*pchar);
  735. if(asc == AC_DIGIT)
  736. {
  737. exp = exp * 10 + *pchar - '0';
  738. goto _0_9_dot_0_9_e_sign_0_9;
  739. }
  740. else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
  741. {
  742. goto finalize;
  743. }
  744. else
  745. {
  746. goto error;
  747. }
  748. // finalize
  749. finalize:
  750. crntToken.code = TC_NUMBER;
  751. if(crntToken.dataType == DT_INT)
  752. {
  753. crntToken.value.int_ = num;
  754. }
  755. else
  756. {
  757. double dbl = (double)num + (double)(pow(10, -dad)*fnum);
  758. if(exp != 0) // if we have exponent
  759. {
  760. if(expSign == true)
  761. {
  762. exp = -exp; // change the sign if necessary
  763. }
  764. dbl = dbl * pow(10, exp);
  765. }
  766. crntToken.value.float_ = dbl;
  767. }
  768. *tmpStr = '\0';
  769. return;
  770. //error
  771. error:
  772. crntToken.code = TC_ERROR;
  773. // run until white space or special
  774. asc = lookupAscii(*pchar);
  775. while(asc!=AC_WHITESPACE && asc!=AC_SPECIAL && asc!=AC_EOF)
  776. {
  777. *tmpStr++ = *pchar;
  778. asc = lookupAscii(getNextChar());
  779. }
  780. *tmpStr = '\0';
  781. throw SCANNER_EXCEPTION("Bad number suffix \"" +
  782. &crntToken.asString[0] + '\"');
  783. }
  784. //==============================================================================
  785. void Scanner::checkString()
  786. {
  787. char* tmpStr = &crntToken.asString[0];
  788. char ch = getNextChar();
  789. for(;;)
  790. {
  791. // Error
  792. if(ch == '\0' || ch == eofChar) // if end of line or eof
  793. {
  794. crntToken.code = TC_ERROR;
  795. *tmpStr = '\0';
  796. throw SCANNER_EXCEPTION("Incorrect string ending \"" +
  797. &crntToken.asString[0] + '\"');
  798. return;
  799. }
  800. // Escape Codes
  801. else if(ch == '\\')
  802. {
  803. ch = getNextChar();
  804. if(ch == eofChar)
  805. {
  806. crntToken.code = TC_ERROR;
  807. *tmpStr = '\0';
  808. throw SCANNER_EXCEPTION("Incorrect string ending \"" +
  809. &crntToken.asString[0] + '\"');
  810. return;
  811. }
  812. switch(ch)
  813. {
  814. case 'n':
  815. *tmpStr++ = '\n';
  816. break;
  817. case 't':
  818. *tmpStr++ = '\t';
  819. break;
  820. case '0':
  821. *tmpStr++ = '\0';
  822. break;
  823. case 'a':
  824. *tmpStr++ = '\a';
  825. break;
  826. case '\"':
  827. *tmpStr++ = '\"';
  828. break;
  829. case 'f':
  830. *tmpStr++ = '\f';
  831. break;
  832. case 'v':
  833. *tmpStr++ = '\v';
  834. break;
  835. case '\'':
  836. *tmpStr++ = '\'';
  837. break;
  838. case '\\':
  839. *tmpStr++ = '\\';
  840. break;
  841. case '\?':
  842. *tmpStr++ = '\?';
  843. break;
  844. case '\0':
  845. break; // not an escape char but works almost the same
  846. default:
  847. throw SCANNER_EXCEPTION(
  848. "Unrecognized escape character \'\\" + ch + '\'');
  849. *tmpStr++ = ch;
  850. }
  851. }
  852. // End
  853. else if(ch=='\"')
  854. {
  855. *tmpStr = '\0';
  856. crntToken.code = TC_STRING;
  857. crntToken.value.string = &crntToken.asString[0];
  858. getNextChar();
  859. return;
  860. }
  861. // Build str(main loop)
  862. else
  863. {
  864. *tmpStr++ = ch;
  865. }
  866. ch = getNextChar();
  867. }
  868. }
  869. //==============================================================================
  870. void Scanner::checkChar()
  871. {
  872. char ch = getNextChar();
  873. char ch0 = ch;
  874. char* tmpStr = &crntToken.asString[0];
  875. crntToken.code = TC_ERROR;
  876. *tmpStr++ = ch;
  877. if(ch=='\0' || ch==eofChar) // check char after '
  878. {
  879. throw SCANNER_EXCEPTION("Newline in constant");
  880. return;
  881. }
  882. if (ch=='\'') // if '
  883. {
  884. throw SCANNER_EXCEPTION("Empty constant");
  885. getNextChar();
  886. return;
  887. }
  888. if (ch=='\\') // if \ then maybe escape char
  889. {
  890. ch = getNextChar();
  891. *tmpStr++ = ch;
  892. if(ch == '\0' || ch == eofChar) //check again after the \.
  893. {
  894. throw SCANNER_EXCEPTION("Newline in constant");
  895. }
  896. switch (ch)
  897. {
  898. case 'n' :
  899. ch0 = '\n';
  900. break;
  901. case 't' :
  902. ch0 = '\t';
  903. break;
  904. case '0':
  905. ch0 = '\0';
  906. break;
  907. case 'a':
  908. ch0 = '\a';
  909. break;
  910. case '\"':
  911. ch0 = '\"';
  912. break;
  913. case 'f':
  914. ch0 = '\f';
  915. break;
  916. case 'v':
  917. ch0 = '\v';
  918. break;
  919. case '\'':
  920. ch0 = '\'';
  921. break;
  922. case '\\':
  923. ch0 = '\\';
  924. break;
  925. case '\?':
  926. ch0 = '\?';
  927. break;
  928. case 'r':
  929. ch0 = '\r';
  930. break;
  931. default:
  932. ch0 = ch;
  933. throw SCANNER_EXCEPTION("Unrecognized escape character \'\\" +
  934. ch + '\'');
  935. }
  936. crntToken.value.char_ = ch0;
  937. }
  938. else
  939. {
  940. crntToken.value.char_ = ch;
  941. }
  942. ch = getNextChar();
  943. if(ch=='\'') //end
  944. {
  945. *tmpStr = '\0';
  946. crntToken.code = TC_CHARACTER;
  947. getNextChar();
  948. return;
  949. }
  950. throw SCANNER_EXCEPTION("Expected \'");
  951. }
  952. //==============================================================================
  953. void Scanner::checkSpecial()
  954. {
  955. char ch = *pchar;
  956. TokenCode code = TC_ERROR;
  957. switch(ch)
  958. {
  959. case '#':
  960. code = TC_SHARP;
  961. break;
  962. case ',':
  963. code = TC_COMMA;
  964. break;
  965. case ';':
  966. code = TC_PERIOD;
  967. break;
  968. case '(':
  969. code = TC_L_PAREN;
  970. break;
  971. case ')':
  972. code = TC_R_PAREN;
  973. break;
  974. case '[':
  975. code = TC_L_SQ_BRACKET;
  976. break;
  977. case ']':
  978. code = TC_R_SQ_BRACKET;
  979. break;
  980. case '{':
  981. code = TC_L_BRACKET;
  982. break;
  983. case '}':
  984. code = TC_R_BRACKET;
  985. break;
  986. case '?':
  987. code = TC_QUESTIONMARK;
  988. break;
  989. case '~':
  990. code = TC_UNARAY_COMPLEMENT;
  991. break;
  992. case '.':
  993. ch = getNextChar();
  994. switch(ch)
  995. {
  996. case '*':
  997. code = TC_POINTER_TO_MEMBER;
  998. break;
  999. default:
  1000. putBackChar();
  1001. code = TC_DOT;
  1002. }
  1003. break;
  1004. case ':':
  1005. ch = getNextChar();
  1006. switch(ch)
  1007. {
  1008. case ':':
  1009. code = TC_SCOPE_RESOLUTION;
  1010. break;
  1011. default:
  1012. putBackChar();
  1013. code = TC_UPDOWNDOT;
  1014. }
  1015. break;
  1016. case '-':
  1017. ch = getNextChar();
  1018. switch(ch)
  1019. {
  1020. case '>':
  1021. code = TC_POINTER_TO_MEMBER;
  1022. break;
  1023. case '-':
  1024. code = TC_DEC;
  1025. break;
  1026. case '=':
  1027. code = TC_ASSIGN_SUB;
  1028. break;
  1029. default:
  1030. putBackChar();
  1031. code = TC_MINUS;
  1032. }
  1033. break;
  1034. case '=':
  1035. ch = getNextChar();
  1036. switch(ch)
  1037. {
  1038. case '=':
  1039. code = TC_EQUAL;
  1040. break;
  1041. default:
  1042. putBackChar();
  1043. code = TC_ASSIGN;
  1044. }
  1045. break;
  1046. case '!':
  1047. ch = getNextChar();
  1048. switch(ch)
  1049. {
  1050. case '=':
  1051. code = TC_NOT_EQUAL;
  1052. break;
  1053. default:
  1054. putBackChar();
  1055. code = TC_NOT;
  1056. }
  1057. break;
  1058. case '<':
  1059. ch = getNextChar();
  1060. switch(ch)
  1061. {
  1062. case '=':
  1063. code = TC_LESS_EQUAL;
  1064. break;
  1065. case '<':
  1066. ch = getNextChar();
  1067. switch(ch)
  1068. {
  1069. case '=':
  1070. code = TC_ASSIGN_SHL;
  1071. break;
  1072. default:
  1073. putBackChar();
  1074. code = TC_SHL;
  1075. }
  1076. break;
  1077. default:
  1078. putBackChar();
  1079. code = TC_LESS;
  1080. }
  1081. break;
  1082. case '>':
  1083. ch = getNextChar();
  1084. switch(ch)
  1085. {
  1086. case '=':
  1087. code = TC_GREATER_EQUAL;
  1088. break;
  1089. case '>':
  1090. ch = getNextChar();
  1091. switch(ch)
  1092. {
  1093. case '=':
  1094. code = TC_ASSIGN_SHR;
  1095. break;
  1096. default:
  1097. putBackChar();
  1098. code = TC_SHR;
  1099. }
  1100. break;
  1101. default:
  1102. putBackChar();
  1103. code = TC_GREATER;
  1104. }
  1105. break;
  1106. case '|':
  1107. ch = getNextChar();
  1108. switch(ch)
  1109. {
  1110. case '|':
  1111. code = TC_LOGICAL_OR;
  1112. break;
  1113. case '=':
  1114. code = TC_ASSIGN_OR;
  1115. break;
  1116. default:
  1117. putBackChar();
  1118. code = TC_BITWISE_OR;
  1119. }
  1120. break;
  1121. case '&':
  1122. ch = getNextChar();
  1123. switch(ch)
  1124. {
  1125. case '&':
  1126. code = TC_LOGICAL_AND;
  1127. break;
  1128. case '=':
  1129. code = TC_ASSIGN_AND;
  1130. break;
  1131. default:
  1132. putBackChar();
  1133. code = TC_BITWISE_AND;
  1134. }
  1135. break;
  1136. case '+':
  1137. ch = getNextChar();
  1138. switch(ch)
  1139. {
  1140. case '+':
  1141. code = TC_INC;
  1142. break;
  1143. case '=':
  1144. code = TC_ASSIGN_ADD;
  1145. break;
  1146. default:
  1147. putBackChar();
  1148. code = TC_PLUS;
  1149. }
  1150. break;
  1151. case '*':
  1152. ch = getNextChar();
  1153. switch(ch)
  1154. {
  1155. case '=':
  1156. code = TC_ASSIGN_MUL;
  1157. break;
  1158. default:
  1159. putBackChar();
  1160. code = TC_STAR;
  1161. }
  1162. break;
  1163. case '/':
  1164. ch = getNextChar();
  1165. switch(ch)
  1166. {
  1167. case '=':
  1168. code = TC_ASSIGN_DIV;
  1169. break;
  1170. default:
  1171. putBackChar();
  1172. code = TC_BSLASH;
  1173. }
  1174. break;
  1175. case '%':
  1176. ch = getNextChar();
  1177. switch(ch)
  1178. {
  1179. case '=':
  1180. code = TC_ASSIGN_MOD;
  1181. break;
  1182. default:
  1183. putBackChar();
  1184. code = TC_MOD;
  1185. }
  1186. break;
  1187. case '^':
  1188. ch = getNextChar();
  1189. switch(ch)
  1190. {
  1191. case '=':
  1192. code = TC_ASSIGN_XOR;
  1193. break;
  1194. default:
  1195. putBackChar();
  1196. code = TC_XOR;
  1197. }
  1198. break;
  1199. case '\\':
  1200. code = TC_BACK_SLASH;
  1201. break;
  1202. }
  1203. getNextChar();
  1204. crntToken.code = code;
  1205. }
  1206. }} // end namespaces