| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356 |
- #include "anki/util/Scanner.h"
- #include <boost/lexical_cast.hpp>
- #include <cstring>
- #include <iostream>
- #include <iomanip>
- #include <cmath>
- #include <sstream>
- #include <cassert>
- namespace anki { namespace scanner {
- //==============================================================================
- Exception::Exception(const std::string& err, int errNo_,
- const std::string& scriptFilename_, int scriptLineNmbr_)
- : error(err), errNo(errNo_), scriptFilename(scriptFilename_),
- scriptLineNmbr(scriptLineNmbr_)
- {}
- //==============================================================================
- Exception::Exception(const Exception& e)
- : std::exception(e), error(e.error), errNo(e.errNo),
- scriptFilename(e.scriptFilename), scriptLineNmbr(e.scriptLineNmbr)
- {}
- //==============================================================================
- const char* Exception::what() const throw()
- {
- errWhat = "Scanner exception (#" +
- boost::lexical_cast<std::string>(errNo) +
- ":" + scriptFilename + ':' +
- boost::lexical_cast<std::string>(scriptLineNmbr) + "): " + error;
- return errWhat.c_str();
- }
- //==============================================================================
- Token::Token(const Token& b)
- : code(b.code), dataType(b.dataType)
- {
- switch(b.dataType)
- {
- case DT_FLOAT:
- value.float_ = b.value.float_;
- break;
- case DT_INT:
- value.int_ = b.value.int_;
- break;
- case DT_CHAR:
- value.char_ = b.value.char_;
- break;
- case DT_STR:
- value.string = b.value.string;
- break;
- }
- memcpy(&asString[0], &b.asString[0], sizeof(asString));
- }
- //==============================================================================
- std::ostream& operator<<(std::ostream& s, const Token& x)
- {
- const TokenDataVal& val = x.getValue();
- TokenCode code = x.getCode();
- switch(code)
- {
- case TC_COMMENT:
- s << "comment";
- break;
- case TC_NEWLINE:
- s << "newline";
- break;
- case TC_END:
- s << "end of file";
- break;
- case TC_STRING:
- s << "string \"" << val.getString() << "\"";
- break;
- case TC_CHARACTER:
- s << "char '" << val.getChar() << "' (\"" <<
- x.getString() << "\")";
- break;
- case TC_NUMBER:
- if(x.getDataType() == DT_FLOAT)
- {
- s << "float " << val.getFloat() << " (\"" << x.getString() <<
- "\")";
- }
- else
- {
- s << "int " << val.getInt() << " (\"" <<
- x.getString() << "\")";
- }
- break;
- case TC_IDENTIFIER:
- s << "identifier \"" << val.getString() << "\"";
- break;
- case TC_ERROR:
- s << "scanner error";
- break;
- default:
- if(code >= TC_KE && code <= TC_KEYWORD)
- {
- s << "reserved word \"" << val.getString() << "\"";
- }
- else if(code >= TC_SCOPE_RESOLUTION && code <= TC_ASSIGN_OR)
- {
- s << "operator no " << (code - TC_SCOPE_RESOLUTION);
- }
- }
- return s;
- }
- //==============================================================================
- std::string Token::getInfoString() const
- {
- std::stringstream ss;
- ss << *this;
- return ss.str();
- }
- //==============================================================================
- #define SCANNER_EXCEPTION(x) \
- Exception(std::string() + x, __LINE__, scriptName, lineNmbr)
- char Scanner::eofChar = 0x7F;
- // reserved words grouped by length
- Scanner::ResWord Scanner::rw2 [] =
- {
- {"ke", TC_KE}, {NULL, TC_ERROR}
- };
- Scanner::ResWord Scanner::rw3 [] =
- {
- {"key", TC_KEY}, {NULL, TC_ERROR}
- };
- Scanner::ResWord Scanner::rw4 [] =
- {
- {"keyw", TC_KEYW}, {NULL, TC_ERROR}
- };
- Scanner::ResWord Scanner::rw5 [] =
- {
- {"keywo", TC_KEYWO}, {NULL, TC_ERROR}
- };
- Scanner::ResWord Scanner::rw6 [] =
- {
- {"keywor", TC_KEYWOR}, {NULL, TC_ERROR}
- };
- Scanner::ResWord Scanner::rw7 [] =
- {
- {"keyword", TC_KEYWORD}, {NULL, TC_ERROR}
- };
- Scanner::ResWord* Scanner::rwTable [] = // reserved word table
- {
- NULL, NULL, rw2, rw3, rw4, rw5, rw6, rw7,
- };
- // ascii table
- Scanner::AsciiFlag Scanner::asciiLookupTable [128] = {AC_ERROR};
- //==============================================================================
- Scanner::Scanner(bool newlinesAsWhitespace)
- {
- strcpy(scriptName, "unnamed-script");
- init(newlinesAsWhitespace);
- }
- //==============================================================================
- Scanner::Scanner(const char* filename, bool newlinesAsWhitespace)
- {
- strcpy(scriptName, "unnamed-script");
- init(newlinesAsWhitespace);
- loadFile(filename);
- }
- //==============================================================================
- Scanner::Scanner(std::istream& istream_, const char* scriptName_,
- bool newlinesAsWhitespace)
- {
- strcpy(scriptName, "unnamed-script");
- init(newlinesAsWhitespace);
- loadIstream(istream_, scriptName_);
- }
- //==============================================================================
- void Scanner::initAsciiMap()
- {
- memset(&asciiLookupTable[0], AC_ERROR, sizeof(asciiLookupTable));
- for(uint x = 'a'; x <= 'z'; x++)
- {
- lookupAscii(x) = AC_LETTER;
- }
- for(uint x = 'A'; x <= 'Z'; x++)
- {
- lookupAscii(x) = AC_LETTER;
- }
- for(uint x = '0'; x <= '9'; x++)
- {
- lookupAscii(x) = AC_DIGIT;
- }
- lookupAscii(':') = lookupAscii('[') = lookupAscii(']') =
- lookupAscii('(') = lookupAscii(')') = lookupAscii('.') =
- lookupAscii('{') = lookupAscii('}') = lookupAscii(',') =
- lookupAscii(';') = lookupAscii('?') = lookupAscii('=') =
- lookupAscii('!') = lookupAscii('<') = lookupAscii('>') =
- lookupAscii('|') = lookupAscii('&') = lookupAscii('+') =
- lookupAscii('-') = lookupAscii('*') = lookupAscii('/') =
- lookupAscii('~') = lookupAscii('%') = lookupAscii('#') =
- lookupAscii('^') = lookupAscii('\\') = AC_SPECIAL;
- lookupAscii('\t') = lookupAscii(' ') = lookupAscii('\0') =
- AC_WHITESPACE;
- lookupAscii('\n') = AC_ERROR; // newline is unacceptable char
- lookupAscii('@') = lookupAscii('`') = lookupAscii('$') =
- AC_ACCEPTABLE_IN_COMMENTS;
- lookupAscii('\"') = AC_DOUBLEQUOTE;
- lookupAscii('\'') = AC_QUOTE;
- lookupAscii((int)eofChar) = AC_EOF;
- lookupAscii('_') = AC_LETTER;
- }
- //==============================================================================
- void Scanner::init(bool newlinesAsWhitespace_)
- {
- newlinesAsWhitespace = newlinesAsWhitespace_;
- commentedLines = 0;
- inStream = NULL;
- if(lookupAscii('a') != AC_LETTER)
- {
- initAsciiMap();
- }
- lineNmbr = 0;
- memset(line, eofChar, sizeof(char) * MAX_SCRIPT_LINE_LEN);
- }
- //==============================================================================
- void Scanner::getLine()
- {
- if(!inStream->getline(line, MAX_SCRIPT_LINE_LEN - 1, '\n'))
- {
- pchar = &eofChar;
- }
- else
- {
- pchar = &line[0];
- ++lineNmbr;
- }
- assert(inStream->gcount() <= MAX_SCRIPT_LINE_LEN - 10); // too big line
- }
- //==============================================================================
- char Scanner::getNextChar()
- {
- if(*pchar=='\0')
- {
- getLine();
- }
- else
- {
- ++pchar;
- }
- if(*pchar == '\r') // windows crap
- {
- *pchar = '\0';
- }
- else if(lookupAscii(*pchar) == AC_ERROR)
- {
- throw SCANNER_EXCEPTION("Unacceptable char '" + *pchar + "' 0x" +
- boost::lexical_cast<std::string>(static_cast<uint>(*pchar)));
- }
- return *pchar;
- }
- //==============================================================================
- char Scanner::putBackChar()
- {
- if(pchar != line && *pchar != eofChar)
- {
- --pchar;
- }
- return *pchar;
- }
- //==============================================================================
- void Scanner::getAllPrintAll()
- {
- do
- {
- getNextToken();
- std::cout << std::setw(3) << std::setfill('0') << getLineNumber() <<
- ": " << crntToken << std::endl;
- } while(crntToken.code != TC_END);
- }
- //==============================================================================
- void Scanner::loadFile(const char* filename_)
- {
- inFstream.open(filename_);
- if(!inFstream.is_open())
- {
- throw SCANNER_EXCEPTION("Cannot open file \"" + filename_ + '\"');
- }
- loadIstream(inFstream, filename_);
- }
- //==============================================================================
- void Scanner::loadIstream(std::istream& istream_, const char* scriptName_)
- {
- if(inStream != NULL)
- {
- throw SCANNER_EXCEPTION("Tokenizer already initialized");
- }
- inStream = &istream_;
- // init globals
- // Too big name
- assert(strlen(scriptName_) <= sizeof(scriptName) / sizeof(char) - 1);
- crntToken.code = TC_ERROR;
- lineNmbr = 0;
- strcpy(scriptName, scriptName_);
- getLine();
- }
- //==============================================================================
- void Scanner::unload()
- {
- inFstream.close();
- }
- //==============================================================================
- const Token& Scanner::getNextToken()
- {
- start:
- //if(crntToken.code == TC_NEWLINE) getNextChar();
- if(commentedLines>0)
- {
- crntToken.code = TC_NEWLINE;
- --commentedLines;
- // the ultimate hack. I should remember not to do such crap in the
- // future
- ++lineNmbr;
- }
- else if(*pchar == '/')
- {
- char ch = getNextChar();
- if(ch == '/' || ch == '*')
- {
- putBackChar();
- int line = getLineNumber();
- checkComment();
- commentedLines = getLineNumber() - line; // update commentedLines
- lineNmbr -= commentedLines; // part of the ultimate hack
- }
- else
- {
- putBackChar();
- goto crappyLabel;
- }
- }
- else if(*pchar == '.')
- {
- uint asc = lookupAscii(getNextChar());
- putBackChar();
- if(asc == AC_DIGIT)
- {
- checkNumber();
- }
- else
- {
- checkSpecial();
- }
- }
- else if(*pchar=='\0') // if newline
- {
- if(lookupAscii(getNextChar()) == AC_EOF)
- {
- crntToken.code = TC_END;
- }
- else
- {
- crntToken.code = TC_NEWLINE;
- }
- }
- else
- {
- crappyLabel:
- switch(lookupAscii(*pchar))
- {
- case AC_WHITESPACE :
- getNextChar();
- goto start;
- case AC_LETTER:
- checkWord();
- break;
- case AC_DIGIT:
- checkNumber();
- break;
- case AC_SPECIAL:
- checkSpecial();
- break;
- case AC_QUOTE:
- checkChar();
- break;
- case AC_DOUBLEQUOTE:
- checkString();
- break;
- case AC_EOF:
- crntToken.code = TC_END;
- break;
- case AC_ERROR:
- default:
- getNextChar();
- throw SCANNER_EXCEPTION("Unexpected character \'" + *pchar +
- '\'');
- goto start;
- }
- }
- // skip comments
- if(crntToken.code == TC_COMMENT)
- {
- goto start;
- }
- // skip newlines
- if(crntToken.code == TC_NEWLINE && newlinesAsWhitespace)
- {
- goto start;
- }
- return crntToken;
- }
- //==============================================================================
- void Scanner::checkWord()
- {
- char* tmpStr = &crntToken.asString[0];
- char ch = *pchar;
- //build the string
- do
- {
- *tmpStr++ = ch;
- ch = getNextChar();
- } while(lookupAscii(ch) == AC_LETTER || lookupAscii(ch) == AC_DIGIT);
- *tmpStr = '\0'; // finalize it
- //check if reserved
- int len = tmpStr - &crntToken.asString[0];
- crntToken.code = TC_IDENTIFIER;
- crntToken.value.string = &crntToken.asString[0];
- crntToken.dataType = DT_STR; // not important
- if(len <= 7 && len >= 2)
- {
- int x = 0;
- while(true)
- {
- if(rwTable[len][x].string == NULL)
- {
- break;
- }
- if(strcmp(rwTable[len][x].string, &crntToken.asString[0]) == 0)
- {
- crntToken.code = rwTable[len][x].code;
- break;
- }
- ++x;
- }
- }
- }
- //==============================================================================
- void Scanner::checkComment()
- {
- // Beginning
- if(getNextChar()=='*')
- {
- goto cStyleCmnt;
- }
- // C++ style comment
- else if(*pchar=='/')
- {
- while(true)
- {
- char ch = getNextChar();
- if(ch == '\0')
- {
- crntToken.code = TC_COMMENT;
- return;
- }
- else if(ch == '\\')
- {
- if(getNextChar() == '\0')
- {
- getNextChar();
- }
- }
- }
- }
- else
- {
- goto error;
- }
- // C style comment
- cStyleCmnt:
- if(getNextChar()=='*')
- {
- goto finalizeCCmnt;
- }
- else if(*pchar==eofChar)
- {
- goto error;
- }
- else
- {
- goto cStyleCmnt;
- }
- // C++ style comment
- finalizeCCmnt:
- if(getNextChar()=='/')
- {
- crntToken.code = TC_COMMENT;
- getNextChar();
- return;
- }
- else
- {
- goto cStyleCmnt;
- }
- //error
- error:
- crntToken.code = TC_ERROR;
- throw SCANNER_EXCEPTION("Incorrect comment ending");
- }
- //==============================================================================
- void Scanner::checkNumber()
- {
- // This func is working great, dont try to understand it and dont even
- // think to try touching it.
- //RASSERT_THROW_EXCEPTION(sizeof(long) != 8); // ulong must be 64bit
- long num = 0; // value of the number & part of the float num before '.'
- long fnum = 0; // part of the float num after '.'
- long dad = 0; // digits after dot (for floats)
- bool expSign = 0; // exponent sign in case float is represented in mant/exp
- // format. 0 means positive and 1 negative
- long exp = 0; // the exponent in case float is represented in mant/exp
- // format
- char* tmpStr = &crntToken.asString[0];
- crntToken.dataType = DT_INT;
- uint asc;
- // begin
- if(*pchar == '0')
- {
- goto _0;
- }
- else if(lookupAscii(*pchar) == AC_DIGIT)
- {
- num = num*10 + *pchar-'0';
- goto _0_9;
- }
- else if (*pchar == '.')
- {
- goto _float;
- }
- else
- {
- goto error;
- }
- // 0????
- _0:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if (*pchar == 'x' || *pchar == 'X')
- {
- goto _0x;
- }
- else if(*pchar == 'e' || *pchar == 'E')
- {
- goto _0_9_dot_0_9_e;
- }
- else if(asc == AC_DIGIT)
- {
- putBackChar();
- goto _0_9;
- }
- else if(*pchar == '.')
- {
- goto _float;
- }
- else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error;
- }
- // 0x????
- _0x:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if((asc == AC_DIGIT) ||
- (*pchar >= 'a' && *pchar <= 'f') ||
- (*pchar >= 'A' && *pchar <= 'F'))
- {
- num <<= 4;
- if(*pchar>='a' && *pchar<='f')
- {
- num += *pchar - 'a' + 0xA;
- }
- else if(*pchar>='A' && *pchar<='F')
- {
- num += *pchar - 'A' + 0xA;
- }
- else
- {
- num += *pchar - '0';
- }
- goto _0x0_9orA_F;
- }
- else
- goto error;
- // 0x{0-9 || a-f}??
- _0x0_9orA_F:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if((asc == AC_DIGIT) ||
- (*pchar >= 'a' && *pchar <= 'f') ||
- (*pchar >= 'A' && *pchar <= 'F'))
- {
- num <<= 4;
- if(*pchar>='a' && *pchar<='f')
- {
- num += *pchar - 'a' + 0xA;
- }
- else if(*pchar>='A' && *pchar<='F')
- {
- num += *pchar - 'A' + 0xA;
- }
- else
- {
- num += *pchar - '0';
- }
- goto _0x0_9orA_F;
- }
- else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error; // err
- }
- // {0-9}
- _0_9:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if(asc == AC_DIGIT)
- {
- num = num * 10 + *pchar - '0';
- goto _0_9;
- }
- else if(*pchar == 'e' || *pchar == 'E')
- {
- goto _0_9_dot_0_9_e;
- }
- else if(*pchar == '.')
- {
- goto _float;
- }
- else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error; // err
- }
- // {0-9}.??
- _float:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- crntToken.dataType = DT_FLOAT;
- if(asc == AC_DIGIT)
- {
- fnum = fnum * 10 + *pchar - '0';
- ++dad;
- goto _float;
- }
- else if(*pchar == '.')
- {
- *tmpStr++ = *pchar;
- getNextChar();
- goto error;
- }
- else if(*pchar == 'f' || *pchar == 'F')
- {
- goto _0_9_dot_0_9_f;
- }
- else if(*pchar == 'e' || *pchar == 'E')
- {
- goto _0_9_dot_0_9_e;
- }
- else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error;
- }
- // [{0-9}].[{0-9}]f??
- _0_9_dot_0_9_f:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error;
- }
- // [{0-9}].[{0-9}]e??
- _0_9_dot_0_9_e:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- crntToken.dataType = DT_FLOAT;
- if(*pchar == '+' || *pchar == '-')
- {
- if(*pchar == '-') expSign = 1;
- //*tmpStr++ = *pchar; getNextChar();
- goto _0_9_dot_0_9_e_sign;
- }
- else if(asc == AC_DIGIT)
- {
- exp = exp * 10 + *pchar - '0';
- goto _0_9_dot_0_9_e_sign_0_9;
- }
- else
- {
- goto error;
- }
- // [{0-9}].[{0-9}]e{+,-}??
- // After the sign we want number
- _0_9_dot_0_9_e_sign:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if(asc == AC_DIGIT)
- {
- exp = exp * 10 + *pchar - '0';
- goto _0_9_dot_0_9_e_sign_0_9;
- }
- else
- {
- goto error;
- }
- // [{0-9}].[{0-9}]e{+,-}{0-9}??
- // After the number in exponent we want other number or we finalize
- _0_9_dot_0_9_e_sign_0_9:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if(asc == AC_DIGIT)
- {
- exp = exp * 10 + *pchar - '0';
- goto _0_9_dot_0_9_e_sign_0_9;
- }
- else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error;
- }
- // finalize
- finalize:
- crntToken.code = TC_NUMBER;
- if(crntToken.dataType == DT_INT)
- {
- crntToken.value.int_ = num;
- }
- else
- {
- double dbl = (double)num + (double)(pow(10, -dad)*fnum);
- if(exp != 0) // if we have exponent
- {
- if(expSign == true)
- {
- exp = -exp; // change the sign if necessary
- }
- dbl = dbl * pow(10, exp);
- }
- crntToken.value.float_ = dbl;
- }
- *tmpStr = '\0';
- return;
- //error
- error:
- crntToken.code = TC_ERROR;
- // run until white space or special
- asc = lookupAscii(*pchar);
- while(asc!=AC_WHITESPACE && asc!=AC_SPECIAL && asc!=AC_EOF)
- {
- *tmpStr++ = *pchar;
- asc = lookupAscii(getNextChar());
- }
- *tmpStr = '\0';
- throw SCANNER_EXCEPTION("Bad number suffix \"" +
- &crntToken.asString[0] + '\"');
- }
- //==============================================================================
- void Scanner::checkString()
- {
- char* tmpStr = &crntToken.asString[0];
- char ch = getNextChar();
- for(;;)
- {
- // Error
- if(ch == '\0' || ch == eofChar) // if end of line or eof
- {
- crntToken.code = TC_ERROR;
- *tmpStr = '\0';
- throw SCANNER_EXCEPTION("Incorrect string ending \"" +
- &crntToken.asString[0] + '\"');
- return;
- }
- // Escape Codes
- else if(ch == '\\')
- {
- ch = getNextChar();
- if(ch == eofChar)
- {
- crntToken.code = TC_ERROR;
- *tmpStr = '\0';
- throw SCANNER_EXCEPTION("Incorrect string ending \"" +
- &crntToken.asString[0] + '\"');
- return;
- }
- switch(ch)
- {
- case 'n':
- *tmpStr++ = '\n';
- break;
- case 't':
- *tmpStr++ = '\t';
- break;
- case '0':
- *tmpStr++ = '\0';
- break;
- case 'a':
- *tmpStr++ = '\a';
- break;
- case '\"':
- *tmpStr++ = '\"';
- break;
- case 'f':
- *tmpStr++ = '\f';
- break;
- case 'v':
- *tmpStr++ = '\v';
- break;
- case '\'':
- *tmpStr++ = '\'';
- break;
- case '\\':
- *tmpStr++ = '\\';
- break;
- case '\?':
- *tmpStr++ = '\?';
- break;
- case '\0':
- break; // not an escape char but works almost the same
- default:
- throw SCANNER_EXCEPTION(
- "Unrecognized escape character \'\\" + ch + '\'');
- *tmpStr++ = ch;
- }
- }
- // End
- else if(ch=='\"')
- {
- *tmpStr = '\0';
- crntToken.code = TC_STRING;
- crntToken.value.string = &crntToken.asString[0];
- getNextChar();
- return;
- }
- // Build str(main loop)
- else
- {
- *tmpStr++ = ch;
- }
- ch = getNextChar();
- }
- }
- //==============================================================================
- void Scanner::checkChar()
- {
- char ch = getNextChar();
- char ch0 = ch;
- char* tmpStr = &crntToken.asString[0];
- crntToken.code = TC_ERROR;
- *tmpStr++ = ch;
- if(ch=='\0' || ch==eofChar) // check char after '
- {
- throw SCANNER_EXCEPTION("Newline in constant");
- return;
- }
- if (ch=='\'') // if '
- {
- throw SCANNER_EXCEPTION("Empty constant");
- getNextChar();
- return;
- }
- if (ch=='\\') // if \ then maybe escape char
- {
- ch = getNextChar();
- *tmpStr++ = ch;
- if(ch == '\0' || ch == eofChar) //check again after the \.
- {
- throw SCANNER_EXCEPTION("Newline in constant");
- }
- switch (ch)
- {
- case 'n' :
- ch0 = '\n';
- break;
- case 't' :
- ch0 = '\t';
- break;
- case '0':
- ch0 = '\0';
- break;
- case 'a':
- ch0 = '\a';
- break;
- case '\"':
- ch0 = '\"';
- break;
- case 'f':
- ch0 = '\f';
- break;
- case 'v':
- ch0 = '\v';
- break;
- case '\'':
- ch0 = '\'';
- break;
- case '\\':
- ch0 = '\\';
- break;
- case '\?':
- ch0 = '\?';
- break;
- case 'r':
- ch0 = '\r';
- break;
- default:
- ch0 = ch;
- throw SCANNER_EXCEPTION("Unrecognized escape character \'\\" +
- ch + '\'');
- }
- crntToken.value.char_ = ch0;
- }
- else
- {
- crntToken.value.char_ = ch;
- }
- ch = getNextChar();
- if(ch=='\'') //end
- {
- *tmpStr = '\0';
- crntToken.code = TC_CHARACTER;
- getNextChar();
- return;
- }
- throw SCANNER_EXCEPTION("Expected \'");
- }
- //==============================================================================
- void Scanner::checkSpecial()
- {
- char ch = *pchar;
- TokenCode code = TC_ERROR;
- switch(ch)
- {
- case '#':
- code = TC_SHARP;
- break;
- case ',':
- code = TC_COMMA;
- break;
- case ';':
- code = TC_PERIOD;
- break;
- case '(':
- code = TC_L_PAREN;
- break;
- case ')':
- code = TC_R_PAREN;
- break;
- case '[':
- code = TC_L_SQ_BRACKET;
- break;
- case ']':
- code = TC_R_SQ_BRACKET;
- break;
- case '{':
- code = TC_L_BRACKET;
- break;
- case '}':
- code = TC_R_BRACKET;
- break;
- case '?':
- code = TC_QUESTIONMARK;
- break;
- case '~':
- code = TC_UNARAY_COMPLEMENT;
- break;
- case '.':
- ch = getNextChar();
- switch(ch)
- {
- case '*':
- code = TC_POINTER_TO_MEMBER;
- break;
- default:
- putBackChar();
- code = TC_DOT;
- }
- break;
- case ':':
- ch = getNextChar();
- switch(ch)
- {
- case ':':
- code = TC_SCOPE_RESOLUTION;
- break;
- default:
- putBackChar();
- code = TC_UPDOWNDOT;
- }
- break;
- case '-':
- ch = getNextChar();
- switch(ch)
- {
- case '>':
- code = TC_POINTER_TO_MEMBER;
- break;
- case '-':
- code = TC_DEC;
- break;
- case '=':
- code = TC_ASSIGN_SUB;
- break;
- default:
- putBackChar();
- code = TC_MINUS;
- }
- break;
- case '=':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_EQUAL;
- break;
- default:
- putBackChar();
- code = TC_ASSIGN;
- }
- break;
- case '!':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_NOT_EQUAL;
- break;
- default:
- putBackChar();
- code = TC_NOT;
- }
- break;
- case '<':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_LESS_EQUAL;
- break;
- case '<':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_SHL;
- break;
- default:
- putBackChar();
- code = TC_SHL;
- }
- break;
- default:
- putBackChar();
- code = TC_LESS;
- }
- break;
- case '>':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_GREATER_EQUAL;
- break;
- case '>':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_SHR;
- break;
- default:
- putBackChar();
- code = TC_SHR;
- }
- break;
- default:
- putBackChar();
- code = TC_GREATER;
- }
- break;
- case '|':
- ch = getNextChar();
- switch(ch)
- {
- case '|':
- code = TC_LOGICAL_OR;
- break;
- case '=':
- code = TC_ASSIGN_OR;
- break;
- default:
- putBackChar();
- code = TC_BITWISE_OR;
- }
- break;
- case '&':
- ch = getNextChar();
- switch(ch)
- {
- case '&':
- code = TC_LOGICAL_AND;
- break;
- case '=':
- code = TC_ASSIGN_AND;
- break;
- default:
- putBackChar();
- code = TC_BITWISE_AND;
- }
- break;
- case '+':
- ch = getNextChar();
- switch(ch)
- {
- case '+':
- code = TC_INC;
- break;
- case '=':
- code = TC_ASSIGN_ADD;
- break;
- default:
- putBackChar();
- code = TC_PLUS;
- }
- break;
- case '*':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_MUL;
- break;
- default:
- putBackChar();
- code = TC_STAR;
- }
- break;
- case '/':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_DIV;
- break;
- default:
- putBackChar();
- code = TC_BSLASH;
- }
- break;
- case '%':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_MOD;
- break;
- default:
- putBackChar();
- code = TC_MOD;
- }
- break;
- case '^':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_XOR;
- break;
- default:
- putBackChar();
- code = TC_XOR;
- }
- break;
- case '\\':
- code = TC_BACK_SLASH;
- break;
- }
- getNextChar();
- crntToken.code = code;
- }
- }} // end namespaces
|