Scanner.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110
  1. #include <fstream>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <math.h>
  5. #include <sstream>
  6. #include <iomanip>
  7. #include "Scanner.h"
  8. #define SERROR(x) ERROR( "SCAN_ERR (" << scriptName << ':' << lineNmbr << "): " << x )
  9. //=====================================================================================================================================
  10. // Token =
  11. //=====================================================================================================================================
  12. Scanner::Token::Token( const Token& b ): code(b.code), dataType(b.dataType)
  13. {
  14. switch( b.dataType )
  15. {
  16. case Scanner::DT_FLOAT:
  17. value.float_ = b.value.float_;
  18. break;
  19. case Scanner::DT_INT:
  20. value.int_ = b.value.int_;
  21. break;
  22. case Scanner::DT_CHAR:
  23. value.char_ = b.value.char_;
  24. break;
  25. case Scanner::DT_STR:
  26. value.string = b.value.string;
  27. break;
  28. }
  29. memcpy( asString, b.asString, Scanner::MAX_SCRIPT_LINE_LEN*sizeof(char) );
  30. }
  31. //=====================================================================================================================================
  32. // data vars =
  33. //=====================================================================================================================================
  34. char Scanner::eofChar = 0x7F;
  35. //=====================================================================================================================================
  36. // reserved words grouped by length =
  37. //=====================================================================================================================================
  38. Scanner::ResWord Scanner::rw2 [] =
  39. {
  40. {"ke", TC_KE}, {NULL, TC_ERROR}
  41. };
  42. Scanner::ResWord Scanner::rw3 [] =
  43. {
  44. {"key", TC_KEY}, {NULL, TC_ERROR}
  45. };
  46. Scanner::ResWord Scanner::rw4 [] =
  47. {
  48. {"keyw", TC_KEYW}, {NULL, TC_ERROR}
  49. };
  50. Scanner::ResWord Scanner::rw5 [] =
  51. {
  52. {"keywo", TC_KEYWO}, {NULL, TC_ERROR}
  53. };
  54. Scanner::ResWord Scanner::rw6 [] =
  55. {
  56. {"keywor", TC_KEYWOR}, {NULL, TC_ERROR}
  57. };
  58. Scanner::ResWord Scanner::rw7 [] =
  59. {
  60. {"keyword", TC_KEYWORD}, {NULL, TC_ERROR}
  61. };
  62. Scanner::ResWord* Scanner::rwTable [] = // reserved word table
  63. {
  64. NULL, NULL, rw2, rw3, rw4, rw5, rw6, rw7,
  65. };
  66. //=====================================================================================================================================
  67. // ascii map =
  68. //=====================================================================================================================================
  69. uint Scanner::asciiLookupTable [128] = {AC_ERROR};
  70. void Scanner::initAsciiMap()
  71. {
  72. memset( &asciiLookupTable[0], AC_ERROR, sizeof(asciiLookupTable) );
  73. for( uint x='a'; x<='z'; x++) asciiLookupTable[x] = AC_LETTER;
  74. for( uint x='A'; x<='Z'; x++) asciiLookupTable[x] = AC_LETTER;
  75. for( uint x='0'; x<='9'; x++) asciiLookupTable[x] = AC_DIGIT;
  76. asciiLookupTable[':'] = asciiLookupTable['['] = asciiLookupTable[']'] = asciiLookupTable['('] = AC_SPECIAL;
  77. asciiLookupTable[')'] = asciiLookupTable['.'] = asciiLookupTable['{'] = asciiLookupTable['}'] = AC_SPECIAL;
  78. asciiLookupTable[','] = asciiLookupTable[';'] = asciiLookupTable['?'] = asciiLookupTable['='] = AC_SPECIAL;
  79. asciiLookupTable['!'] = asciiLookupTable['<'] = asciiLookupTable['>'] = asciiLookupTable['|'] = AC_SPECIAL;
  80. asciiLookupTable['&'] = asciiLookupTable['+'] = asciiLookupTable['-'] = asciiLookupTable['*'] = AC_SPECIAL;
  81. asciiLookupTable['/'] = asciiLookupTable['~'] = asciiLookupTable['%'] = asciiLookupTable['#'] = AC_SPECIAL;
  82. asciiLookupTable['^'] = AC_SPECIAL;
  83. asciiLookupTable['\t'] = asciiLookupTable[' '] = asciiLookupTable['\0'] = asciiLookupTable['\r'] = AC_WHITESPACE;
  84. asciiLookupTable['\n'] = AC_ERROR; // newline is unacceptable char
  85. asciiLookupTable['\"'] = AC_DOUBLEQUOTE;
  86. asciiLookupTable['\''] = AC_QUOTE;
  87. asciiLookupTable[(int)eofChar] = AC_EOF;
  88. asciiLookupTable['_'] = AC_LETTER;
  89. }
  90. //=====================================================================================================================================
  91. // Scanner =
  92. //=====================================================================================================================================
  93. Scanner::Scanner( bool newlinesAsWhitespace_ ):
  94. newlinesAsWhitespace(newlinesAsWhitespace_), commentedLines(0), inStream(NULL)
  95. {
  96. if( asciiLookupTable['a'] != AC_LETTER )
  97. initAsciiMap();
  98. lineNmbr = 0;
  99. memset( line, eofChar, sizeof(char)*MAX_SCRIPT_LINE_LEN );
  100. }
  101. //=====================================================================================================================================
  102. // getLine =
  103. // it simply gets a new line from the file and it points to the first char of that line =
  104. //=====================================================================================================================================
  105. void Scanner::getLine()
  106. {
  107. /*if( inStream->eof() )
  108. pchar = &eofChar;
  109. else
  110. {
  111. inStream->getline( line, MAX_SCRIPT_LINE_LEN - 1 );
  112. pchar = &line[0];
  113. ++lineNmbr;
  114. }*/
  115. if( !inStream->getline( line, MAX_SCRIPT_LINE_LEN - 1, '\n' ) )
  116. pchar = &eofChar;
  117. else
  118. {
  119. pchar = &line[0];
  120. ++lineNmbr;
  121. }
  122. DEBUG_ERR( inStream->gcount() > MAX_SCRIPT_LINE_LEN - 10 ); // too big line
  123. /*if( *pchar != eofChar )
  124. {
  125. PRINT( lineNmbr << ": " << line );
  126. }
  127. else
  128. {
  129. PRINT( lineNmbr << ": eof" );
  130. }*/
  131. }
  132. //=====================================================================================================================================
  133. // getNextChar =
  134. //=====================================================================================================================================
  135. char Scanner::getNextChar()
  136. {
  137. if( *pchar=='\0' )
  138. getLine();
  139. else
  140. ++pchar;
  141. return *pchar;
  142. }
  143. //=====================================================================================================================================
  144. // putBackChar =
  145. //=====================================================================================================================================
  146. char Scanner::putBackChar()
  147. {
  148. if( pchar != line && *pchar != eofChar )
  149. --pchar;
  150. return *pchar;
  151. }
  152. //=====================================================================================================================================
  153. // GetTokenInfo =
  154. //=====================================================================================================================================
  155. string Scanner::getTokenInfo( const Token& token )
  156. {
  157. char tokenInfoStr[256];
  158. switch( token.code )
  159. {
  160. case TC_COMMENT:
  161. return "comment";
  162. case TC_NEWLINE:
  163. return "newline";
  164. case TC_EOF:
  165. return "end of file";
  166. case TC_STRING:
  167. sprintf( tokenInfoStr, "string \"%s\"", token.value.string );
  168. break;
  169. case TC_CHAR:
  170. sprintf( tokenInfoStr, "char '%c' (\"%s\")", token.value.char_, token.asString );
  171. break;
  172. case TC_NUMBER:
  173. if( token.dataType == DT_FLOAT )
  174. sprintf( tokenInfoStr, "float %f or %e (\"%s\")", token.value.float_, token.value.float_, token.asString );
  175. else
  176. sprintf( tokenInfoStr, "int %lu (\"%s\")", token.value.int_, token.asString );
  177. break;
  178. case TC_IDENTIFIER:
  179. sprintf( tokenInfoStr, "identifier \"%s\"", token.value.string );
  180. break;
  181. case TC_ERROR:
  182. return "scanner error";
  183. break;
  184. default:
  185. if( token.code>=TC_KE && token.code<=TC_KEYWORD )
  186. sprintf( tokenInfoStr, "reserved word \"%s\"", token.value.string );
  187. else if( token.code>=TC_SCOPERESOLUTION && token.code<=TC_ASSIGNOR )
  188. sprintf( tokenInfoStr, "operator no %d", token.code - TC_SCOPERESOLUTION );
  189. }
  190. return string(tokenInfoStr);
  191. }
  192. //=====================================================================================================================================
  193. // printTokenInfo =
  194. //=====================================================================================================================================
  195. void Scanner::printTokenInfo( const Token& token )
  196. {
  197. cout << "Token: " << getTokenInfo(token) << endl;
  198. }
  199. //=====================================================================================================================================
  200. // getAllprintAll =
  201. //=====================================================================================================================================
  202. void Scanner::getAllprintAll()
  203. {
  204. do
  205. {
  206. getNextToken();
  207. cout << setw(3) << setfill('0') << getLineNumber() << ": " << getTokenInfo( crntToken ) << endl;
  208. } while( crntToken.code != TC_EOF );
  209. }
  210. //=====================================================================================================================================
  211. // loadFile =
  212. //=====================================================================================================================================
  213. bool Scanner::loadFile( const char* filename_ )
  214. {
  215. inFstream.open( filename_, ios::in );
  216. if( !inFstream.good() )
  217. {
  218. ERROR( "Cannot open file \"" << filename_ << '\"' );
  219. return false;
  220. }
  221. return loadIoStream( &inFstream, filename_ );
  222. }
  223. //=====================================================================================================================================
  224. // loadIoStream =
  225. //=====================================================================================================================================
  226. bool Scanner::loadIoStream( iostream* iostream_, const char* scriptName_ )
  227. {
  228. if( inStream != NULL )
  229. {
  230. ERROR( "Tokenizer already initialized" );
  231. return false;
  232. }
  233. inStream = iostream_;
  234. // init globals
  235. DEBUG_ERR( strlen(scriptName_) > sizeof(scriptName)/sizeof(char) - 1 ) // Too big name
  236. crntToken.code = TC_ERROR;
  237. lineNmbr = 0;
  238. strcpy( scriptName, scriptName_ );
  239. getLine();
  240. return true;
  241. }
  242. //=====================================================================================================================================
  243. // unload =
  244. //=====================================================================================================================================
  245. void Scanner::unload()
  246. {
  247. inFstream.close();
  248. }
  249. //=====================================================================================================================================
  250. // getNextToken =
  251. //=====================================================================================================================================
  252. const Scanner::Token& Scanner::getNextToken()
  253. {
  254. start:
  255. //if( crntToken.code == TC_NEWLINE ) getNextChar();
  256. if( commentedLines>0 )
  257. {
  258. crntToken.code = TC_NEWLINE;
  259. --commentedLines;
  260. ++lineNmbr; // the ultimate hack. I should remember not to do such crapp
  261. }
  262. else if( *pchar == '/' )
  263. {
  264. char ch = getNextChar();
  265. if( ch == '/' || ch == '*' )
  266. {
  267. putBackChar();
  268. int line = getLineNumber();
  269. checkComment();
  270. commentedLines = getLineNumber() - line; // update commentedLines
  271. lineNmbr -= commentedLines; // part of the ultimate hack
  272. }
  273. else
  274. {
  275. putBackChar();
  276. goto crappy_label;
  277. }
  278. }
  279. else if( *pchar == '.' )
  280. {
  281. uint asc = asciiLookup(getNextChar());
  282. putBackChar();
  283. if( asc == AC_DIGIT )
  284. checkNumber();
  285. else
  286. checkSpecial();
  287. }
  288. else if( *pchar=='\0' ) // if newline
  289. {
  290. if( asciiLookup( getNextChar() ) == AC_EOF )
  291. crntToken.code = TC_EOF;
  292. else
  293. crntToken.code = TC_NEWLINE;
  294. }
  295. else
  296. {
  297. crappy_label:
  298. switch( asciiLookup(*pchar) )
  299. {
  300. case AC_WHITESPACE : getNextChar(); goto start;
  301. case AC_LETTER : checkWord(); break;
  302. case AC_DIGIT : checkNumber(); break;
  303. case AC_SPECIAL : checkSpecial(); break;
  304. case AC_QUOTE : checkChar(); break;
  305. case AC_DOUBLEQUOTE: checkString(); break;
  306. case AC_EOF:
  307. crntToken.code = TC_EOF;
  308. break;
  309. case AC_ERROR:
  310. default:
  311. SERROR( "Unexpected character \'" << *pchar << '\'');
  312. getNextChar();
  313. goto start;
  314. }
  315. }
  316. if( crntToken.code == TC_COMMENT ) goto start; // skip comments
  317. if( crntToken.code == TC_NEWLINE && newlinesAsWhitespace ) goto start;
  318. return crntToken;
  319. }
  320. /*
  321. =======================================================================================================================================
  322. CHECKERS (bellow only checkers) =
  323. =======================================================================================================================================
  324. */
  325. //=====================================================================================================================================
  326. // CheckWord =
  327. //=====================================================================================================================================
  328. bool Scanner::checkWord()
  329. {
  330. char* tmpStr = crntToken.asString;
  331. char ch = *pchar;
  332. //build the string
  333. do
  334. {
  335. *tmpStr++ = ch;
  336. ch = getNextChar();
  337. }while ( asciiLookup(ch)==AC_LETTER || asciiLookup(ch)==AC_DIGIT );
  338. *tmpStr = '\0'; // finalize it
  339. //check if reserved
  340. int len = tmpStr-crntToken.asString;
  341. crntToken.code = TC_IDENTIFIER;
  342. crntToken.value.string = crntToken.asString;
  343. crntToken.dataType = DT_STR; // not important
  344. if( len<=7 && len>=2 )
  345. {
  346. int x = 0;
  347. for (;;)
  348. {
  349. if( rwTable[len][x].string == NULL ) break;
  350. if( strcmp(rwTable[len][x].string, crntToken.asString ) == 0 )
  351. {
  352. crntToken.code = rwTable[len][x].code;
  353. break;
  354. }
  355. ++x;
  356. }
  357. }
  358. return true;
  359. }
  360. //=====================================================================================================================================
  361. // CheckComment =
  362. //=====================================================================================================================================
  363. bool Scanner::checkComment()
  364. {
  365. // begining
  366. if( getNextChar()=='*' )
  367. goto branchy_cmnt;
  368. else if( *pchar=='/' )
  369. {
  370. // end
  371. getLine();
  372. crntToken.code = TC_COMMENT;
  373. return true;
  374. }
  375. else
  376. goto error;
  377. // multi-line comment
  378. branchy_cmnt:
  379. if( getNextChar()=='*' )
  380. goto finalizeBranchy;
  381. else if( *pchar==eofChar )
  382. goto error;
  383. else
  384. goto branchy_cmnt;
  385. // multi-line "branchy"
  386. finalizeBranchy:
  387. if( getNextChar()=='/' )
  388. {
  389. crntToken.code = TC_COMMENT;
  390. getNextChar();
  391. return true;
  392. }
  393. else
  394. goto branchy_cmnt;
  395. //error
  396. error:
  397. crntToken.code = TC_ERROR;
  398. SERROR( "Incorrect comment ending" );
  399. return false;
  400. }
  401. //=====================================================================================================================================
  402. // CheckNumber =
  403. //=====================================================================================================================================
  404. bool Scanner::checkNumber()
  405. {
  406. //DEBUG_ERR( sizeof(long) != 8 ); // ulong must be 64bit
  407. long num = 0; // value of the number & part of the float num before '.'
  408. long fnum = 0; // part of the float num after '.'
  409. long dad = 0; // digits after dot (for floats)
  410. bool expSign = 0; // exponent sign in case float is represented in mant/exp format. 0 means positive and 1 negative
  411. long exp = 0; // the exponent in case float is represented in mant/exp format
  412. char* tmpStr = crntToken.asString;
  413. crntToken.dataType = DT_INT;
  414. uint asc;
  415. // begin
  416. if( *pchar == '0' )
  417. goto _0;
  418. else if( asciiLookup(*pchar) == AC_DIGIT )
  419. {
  420. num = num*10 + *pchar-'0';
  421. goto _0_9;
  422. }
  423. else if ( *pchar == '.' )
  424. goto _float;
  425. else
  426. goto error;
  427. // 0????
  428. _0:
  429. *tmpStr++ = *pchar;
  430. getNextChar();
  431. asc = asciiLookup(*pchar);
  432. if ( *pchar == 'x' || *pchar == 'X' )
  433. goto _0x;
  434. else if( *pchar == 'e' || *pchar == 'E' )
  435. goto _0_9_dot_0_9_e;
  436. else if( asc == AC_DIGIT )
  437. {
  438. putBackChar();
  439. goto _0_9;
  440. }
  441. else if( *pchar == '.' )
  442. goto _float;
  443. else if( asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF )
  444. goto finalize;
  445. else
  446. goto error;
  447. // 0x????
  448. _0x:
  449. *tmpStr++ = *pchar;
  450. getNextChar();
  451. asc = asciiLookup(*pchar);
  452. if( (asc == AC_DIGIT) ||
  453. (*pchar >= 'a' && *pchar <= 'f' ) ||
  454. (*pchar >= 'A' && *pchar <= 'F' ) )
  455. {
  456. num <<= 4;
  457. if( *pchar>='a' && *pchar<='f' )
  458. num += *pchar - 'a' + 0xA;
  459. else if( *pchar>='A' && *pchar<='F' )
  460. num += *pchar - 'A' + 0xA;
  461. else
  462. num += *pchar - '0';
  463. goto _0x0_9orA_F;
  464. }
  465. else
  466. goto error;
  467. // 0x{0-9 || a-f}??
  468. _0x0_9orA_F:
  469. *tmpStr++ = *pchar;
  470. getNextChar();
  471. asc = asciiLookup(*pchar);
  472. if( (asc == AC_DIGIT) ||
  473. (*pchar >= 'a' && *pchar <= 'f' ) ||
  474. (*pchar >= 'A' && *pchar <= 'F' ) )
  475. {
  476. num <<= 4;
  477. if( *pchar>='a' && *pchar<='f' )
  478. num += *pchar - 'a' + 0xA;
  479. else if( *pchar>='A' && *pchar<='F' )
  480. num += *pchar - 'A' + 0xA;
  481. else
  482. num += *pchar - '0';
  483. goto _0x0_9orA_F;
  484. }
  485. else if( asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF )
  486. goto finalize;
  487. else
  488. goto error; // err
  489. // {0-9}
  490. _0_9:
  491. *tmpStr++ = *pchar;
  492. getNextChar();
  493. asc = asciiLookup(*pchar);
  494. if( asc == AC_DIGIT )
  495. {
  496. num = num * 10 + *pchar - '0';
  497. goto _0_9;
  498. }
  499. else if( *pchar == 'e' || *pchar == 'E' )
  500. goto _0_9_dot_0_9_e;
  501. else if( *pchar == '.' )
  502. goto _float;
  503. else if( asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF )
  504. goto finalize;
  505. else
  506. goto error; // err
  507. // {0-9}.??
  508. _float:
  509. *tmpStr++ = *pchar;
  510. getNextChar();
  511. asc = asciiLookup(*pchar);
  512. crntToken.dataType = DT_FLOAT;
  513. if( asc == AC_DIGIT )
  514. {
  515. fnum = fnum * 10 + *pchar - '0';
  516. ++dad;
  517. goto _float;
  518. }
  519. else if( *pchar == '.' )
  520. {
  521. *tmpStr++ = *pchar;
  522. getNextChar();
  523. goto error;
  524. }
  525. else if( *pchar == 'f' || *pchar == 'F' )
  526. {
  527. goto _0_9_dot_0_9_f;
  528. }
  529. else if( *pchar == 'e' || *pchar == 'E' )
  530. goto _0_9_dot_0_9_e;
  531. else if( asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF )
  532. goto finalize;
  533. else
  534. goto error;
  535. // [{0-9}].[{0-9}]f??
  536. _0_9_dot_0_9_f:
  537. *tmpStr++ = *pchar;
  538. getNextChar();
  539. asc = asciiLookup(*pchar);
  540. if( asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF )
  541. goto finalize;
  542. else
  543. goto error;
  544. // [{0-9}].[{0-9}]e??
  545. _0_9_dot_0_9_e:
  546. *tmpStr++ = *pchar;
  547. getNextChar();
  548. asc = asciiLookup(*pchar);
  549. crntToken.dataType = DT_FLOAT;
  550. if( *pchar == '+' || *pchar == '-' )
  551. {
  552. if( *pchar == '-' ) expSign = 1;
  553. //*tmpStr++ = *pchar; getNextChar();
  554. goto _0_9_dot_0_9_e_sign;
  555. }
  556. else if( asc == AC_DIGIT )
  557. {
  558. exp = exp * 10 + *pchar - '0';
  559. goto _0_9_dot_0_9_e_sign_0_9;
  560. }
  561. else
  562. goto error;
  563. // [{0-9}].[{0-9}]e{+,-}??
  564. // After the sign we want number
  565. _0_9_dot_0_9_e_sign:
  566. *tmpStr++ = *pchar;
  567. getNextChar();
  568. asc = asciiLookup(*pchar);
  569. if( asc == AC_DIGIT )
  570. {
  571. exp = exp * 10 + *pchar - '0';
  572. goto _0_9_dot_0_9_e_sign_0_9;
  573. }
  574. else
  575. goto error;
  576. // [{0-9}].[{0-9}]e{+,-}{0-9}??
  577. // After the number in exponent we want other number or we finalize
  578. _0_9_dot_0_9_e_sign_0_9:
  579. *tmpStr++ = *pchar;
  580. getNextChar();
  581. asc = asciiLookup(*pchar);
  582. if( asc == AC_DIGIT )
  583. {
  584. exp = exp * 10 + *pchar - '0';
  585. goto _0_9_dot_0_9_e_sign_0_9;
  586. }
  587. else if( asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF )
  588. goto finalize;
  589. else
  590. goto error;
  591. // finalize
  592. finalize:
  593. crntToken.code = TC_NUMBER;
  594. if( crntToken.dataType == DT_INT )
  595. {
  596. crntToken.value.int_ = num;
  597. }
  598. else
  599. {
  600. double dbl = (double)num + (double)(pow(10, -dad)*fnum);
  601. if( exp != 0 ) // if we have exponent
  602. {
  603. if( expSign == 1 ) exp = -exp; // change the sign if necessary
  604. dbl = dbl * pow( 10, exp );
  605. }
  606. crntToken.value.float_ = dbl;
  607. }
  608. *tmpStr = '\0';
  609. return true;
  610. //error
  611. error:
  612. crntToken.code = TC_ERROR;
  613. // run until white space or special
  614. asc = asciiLookup(*pchar);
  615. while( asc!=AC_WHITESPACE && asc!=AC_SPECIAL && asc!=AC_EOF )
  616. {
  617. *tmpStr++ = *pchar;
  618. asc = asciiLookup(getNextChar());
  619. }
  620. *tmpStr = '\0';
  621. SERROR( "Bad number suffix \"" << crntToken.asString << '\"' );
  622. return false;
  623. }
  624. //=====================================================================================================================================
  625. // CheckString =
  626. //=====================================================================================================================================
  627. bool Scanner::checkString()
  628. {
  629. char* tmpStr = crntToken.asString;
  630. char ch = getNextChar();
  631. for(;;)
  632. {
  633. //Error
  634. if( ch=='\0' || ch==eofChar ) // if end of line or eof
  635. {
  636. crntToken.code = TC_ERROR;
  637. *tmpStr = '\0';
  638. SERROR( "Incorect string ending \"" << crntToken.asString );
  639. return false;
  640. }
  641. //Escape Codes
  642. else if( ch=='\\' )
  643. {
  644. ch = getNextChar();
  645. if( ch=='\0' || ch==eofChar )
  646. {
  647. crntToken.code = TC_ERROR;
  648. *tmpStr = '\0';
  649. SERROR( "Incorect string ending \"" << crntToken.asString << '\"' );
  650. return false;
  651. }
  652. switch( ch )
  653. {
  654. case 'n' : *tmpStr++ = '\n'; break;
  655. case 't' : *tmpStr++ = '\t'; break;
  656. case '0' : *tmpStr++ = '\0'; break;
  657. case 'a' : *tmpStr++ = '\a'; break;
  658. case '\"': *tmpStr++ = '\"'; break;
  659. case 'f' : *tmpStr++ = '\f'; break;
  660. case 'v' : *tmpStr++ = '\v'; break;
  661. case '\'': *tmpStr++ = '\''; break;
  662. case '\\': *tmpStr++ = '\\'; break;
  663. case '\?': *tmpStr++ = '\?'; break;
  664. default :
  665. SERROR( "Unrecognized escape charachter \'\\" << ch << '\'' );
  666. *tmpStr++ = ch;
  667. }
  668. }
  669. //End
  670. else if( ch=='\"' )
  671. {
  672. *tmpStr = '\0';
  673. crntToken.code = TC_STRING;
  674. crntToken.value.string = crntToken.asString;
  675. getNextChar();
  676. return true;
  677. }
  678. //Build str( main loop )
  679. else
  680. {
  681. *tmpStr++ = ch;
  682. }
  683. ch = getNextChar();
  684. }
  685. return false;
  686. }
  687. //=====================================================================================================================================
  688. // checkChar =
  689. //=====================================================================================================================================
  690. bool Scanner::checkChar()
  691. {
  692. char ch = getNextChar();
  693. char ch0 = ch;
  694. char* tmpStr = crntToken.asString;
  695. crntToken.code = TC_ERROR;
  696. *tmpStr++ = ch;
  697. if( ch=='\0' || ch==eofChar ) // check char after '
  698. {
  699. SERROR( "Newline in constant" );
  700. return false;
  701. }
  702. if (ch=='\'') // if '
  703. {
  704. SERROR( "Empty constant" );
  705. getNextChar();
  706. return false;
  707. }
  708. if (ch=='\\') // if \ then maybe escape char
  709. {
  710. ch = getNextChar();
  711. *tmpStr++ = ch;
  712. if( ch=='\0' || ch==eofChar ) //check again after the \.
  713. {
  714. SERROR( "Newline in constant" );
  715. return false;
  716. }
  717. switch (ch)
  718. {
  719. case 'n' : ch0 = '\n'; break;
  720. case 't' : ch0 = '\t'; break;
  721. case '0' : ch0 = '\0'; break;
  722. case 'a' : ch0 = '\a'; break;
  723. case '\"': ch0 = '\"'; break;
  724. case 'f' : ch0 = '\f'; break;
  725. case 'v' : ch0 = '\v'; break;
  726. case '\'': ch0 = '\''; break;
  727. case '\\': ch0 = '\\'; break;
  728. case '\?': ch0 = '\?'; break;
  729. default : ch0 = ch ; SERROR( "Unrecognized escape charachter \'\\" << ch << '\'' );
  730. }
  731. crntToken.value.char_ = ch0;
  732. }
  733. else
  734. {
  735. crntToken.value.char_ = ch;
  736. }
  737. ch = getNextChar();
  738. if( ch=='\'' ) //end
  739. {
  740. *tmpStr = '\0';
  741. crntToken.code = TC_CHAR;
  742. getNextChar();
  743. return true;
  744. }
  745. SERROR( "Expected \'");
  746. return false;
  747. }
  748. //=====================================================================================================================================
  749. // checkSpecial =
  750. //=====================================================================================================================================
  751. bool Scanner::checkSpecial()
  752. {
  753. char ch = *pchar;
  754. TokenCode code = TC_ERROR;
  755. switch( ch )
  756. {
  757. case '#': code = TC_SHARP; break;
  758. case ',': code = TC_COMMA; break;
  759. case ';': code = TC_PERIOD; break;
  760. case '(': code = TC_LPAREN; break;
  761. case ')': code = TC_RPAREN; break;
  762. case '[': code = TC_LSQBRACKET; break;
  763. case ']': code = TC_RSQBRACKET; break;
  764. case '{': code = TC_LBRACKET; break;
  765. case '}': code = TC_RBRACKET; break;
  766. case '?': code = TC_QUESTIONMARK; break;
  767. case '~': code = TC_ONESCOMPLEMENT; break;
  768. case '.':
  769. ch = getNextChar();
  770. switch( ch )
  771. {
  772. case '*':
  773. code = TC_POINTERTOMEMBER;
  774. break;
  775. default:
  776. putBackChar();
  777. code = TC_DOT;
  778. }
  779. break;
  780. case ':':
  781. ch = getNextChar();
  782. switch( ch )
  783. {
  784. case ':':
  785. code = TC_SCOPERESOLUTION;
  786. break;
  787. default:
  788. putBackChar();
  789. code = TC_UPDOWNDOT;
  790. }
  791. break;
  792. case '-':
  793. ch = getNextChar();
  794. switch( ch )
  795. {
  796. case '>':
  797. code = TC_POINTERTOMEMBER;
  798. break;
  799. case '-':
  800. code = TC_DEC;
  801. break;
  802. case '=':
  803. code = TC_ASSIGNSUB;
  804. break;
  805. default:
  806. putBackChar();
  807. code = TC_MINUS;
  808. }
  809. break;
  810. case '=':
  811. ch = getNextChar();
  812. switch( ch )
  813. {
  814. case '=':
  815. code = TC_EQUAL;
  816. break;
  817. default:
  818. putBackChar();
  819. code = TC_ASSIGN;
  820. }
  821. break;
  822. case '!':
  823. ch = getNextChar();
  824. switch( ch )
  825. {
  826. case '=':
  827. code = TC_NOTEQUAL;
  828. break;
  829. default:
  830. putBackChar();
  831. code = TC_NOT;
  832. }
  833. break;
  834. case '<':
  835. ch = getNextChar();
  836. switch( ch )
  837. {
  838. case '=':
  839. code = TC_LESSEQUAL;
  840. break;
  841. case '<':
  842. ch = getNextChar();
  843. switch( ch )
  844. {
  845. case '=':
  846. code = TC_ASSIGNSHL;
  847. break;
  848. default:
  849. putBackChar();
  850. code = TC_SHL;
  851. }
  852. break;
  853. default:
  854. putBackChar();
  855. code = TC_LESS;
  856. }
  857. break;
  858. case '>':
  859. ch = getNextChar();
  860. switch( ch )
  861. {
  862. case '=':
  863. code = TC_GREATEREQUAL;
  864. break;
  865. case '>':
  866. ch = getNextChar();
  867. switch( ch )
  868. {
  869. case '=':
  870. code = TC_ASSIGNSHR;
  871. break;
  872. default:
  873. putBackChar();
  874. code = TC_SHR;
  875. }
  876. break;
  877. default:
  878. putBackChar();
  879. code = TC_GREATER;
  880. }
  881. break;
  882. case '|':
  883. ch = getNextChar();
  884. switch( ch )
  885. {
  886. case '|':
  887. code = TC_LOGICALOR;
  888. break;
  889. case '=':
  890. code = TC_ASSIGNOR;
  891. break;
  892. default:
  893. putBackChar();
  894. code = TC_BITWISEOR;
  895. }
  896. break;
  897. case '&':
  898. ch = getNextChar();
  899. switch( ch )
  900. {
  901. case '&':
  902. code = TC_LOGICALAND;
  903. break;
  904. case '=':
  905. code = TC_ASSIGNAND;
  906. break;
  907. default:
  908. putBackChar();
  909. code = TC_BITWISEAND;
  910. }
  911. break;
  912. case '+':
  913. ch = getNextChar();
  914. switch( ch )
  915. {
  916. case '+':
  917. code = TC_INC;
  918. break;
  919. case '=':
  920. code = TC_ASSIGNADD;
  921. break;
  922. default:
  923. putBackChar();
  924. code = TC_PLUS;
  925. }
  926. break;
  927. case '*':
  928. ch = getNextChar();
  929. switch( ch )
  930. {
  931. case '=':
  932. code = TC_ASSIGNMUL;
  933. break;
  934. default:
  935. putBackChar();
  936. code = TC_STAR;
  937. }
  938. break;
  939. case '/':
  940. ch = getNextChar();
  941. switch( ch )
  942. {
  943. case '=':
  944. code = TC_ASSIGNDIV;
  945. break;
  946. default:
  947. putBackChar();
  948. code = TC_BSLASH;
  949. }
  950. break;
  951. case '%':
  952. ch = getNextChar();
  953. switch( ch )
  954. {
  955. case '=':
  956. code = TC_ASSIGNMOD;
  957. break;
  958. default:
  959. putBackChar();
  960. code = TC_MOD;
  961. }
  962. break;
  963. case '^':
  964. ch = getNextChar();
  965. switch( ch )
  966. {
  967. case '=':
  968. code = TC_ASSIGNXOR;
  969. break;
  970. default:
  971. putBackChar();
  972. code = TC_XOR;
  973. }
  974. break;
  975. }
  976. getNextChar();
  977. crntToken.code = code;
  978. return true;
  979. }