| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355 |
- #include "anki/util/Scanner.h"
- #include <boost/lexical_cast.hpp>
- #include <cstring>
- #include <iostream>
- #include <iomanip>
- #include <cmath>
- #include <sstream>
- namespace anki { namespace scanner {
- //==============================================================================
- Exception::Exception(const std::string& err, int errNo_,
- const std::string& scriptFilename_, int scriptLineNmbr_)
- : error(err), errNo(errNo_), scriptFilename(scriptFilename_),
- scriptLineNmbr(scriptLineNmbr_)
- {}
- //==============================================================================
- Exception::Exception(const Exception& e)
- : std::exception(e), error(e.error), errNo(e.errNo),
- scriptFilename(e.scriptFilename), scriptLineNmbr(e.scriptLineNmbr)
- {}
- //==============================================================================
- const char* Exception::what() const throw()
- {
- errWhat = "Scanner exception (#" +
- boost::lexical_cast<std::string>(errNo) +
- ":" + scriptFilename + ':' +
- boost::lexical_cast<std::string>(scriptLineNmbr) + "): " + error;
- return errWhat.c_str();
- }
- //==============================================================================
- Token::Token(const Token& b)
- : code(b.code), dataType(b.dataType)
- {
- switch(b.dataType)
- {
- case DT_FLOAT:
- value.float_ = b.value.float_;
- break;
- case DT_INT:
- value.int_ = b.value.int_;
- break;
- case DT_CHAR:
- value.char_ = b.value.char_;
- break;
- case DT_STR:
- value.string = b.value.string;
- break;
- }
- memcpy(&asString[0], &b.asString[0], sizeof(asString));
- }
- //==============================================================================
- std::ostream& operator<<(std::ostream& s, const Token& x)
- {
- const TokenDataVal& val = x.getValue();
- TokenCode code = x.getCode();
- switch(code)
- {
- case TC_COMMENT:
- s << "comment";
- break;
- case TC_NEWLINE:
- s << "newline";
- break;
- case TC_END:
- s << "end of file";
- break;
- case TC_STRING:
- s << "string \"" << val.getString() << "\"";
- break;
- case TC_CHARACTER:
- s << "char '" << val.getChar() << "' (\"" <<
- x.getString() << "\")";
- break;
- case TC_NUMBER:
- if(x.getDataType() == DT_FLOAT)
- {
- s << "float " << val.getFloat() << " (\"" << x.getString() <<
- "\")";
- }
- else
- {
- s << "int " << val.getInt() << " (\"" <<
- x.getString() << "\")";
- }
- break;
- case TC_IDENTIFIER:
- s << "identifier \"" << val.getString() << "\"";
- break;
- case TC_ERROR:
- s << "scanner error";
- break;
- default:
- if(code >= TC_KE && code <= TC_KEYWORD)
- {
- s << "reserved word \"" << val.getString() << "\"";
- }
- else if(code >= TC_SCOPE_RESOLUTION && code <= TC_ASSIGN_OR)
- {
- s << "operator no " << (code - TC_SCOPE_RESOLUTION);
- }
- }
- return s;
- }
- //==============================================================================
- std::string Token::getInfoString() const
- {
- std::stringstream ss;
- ss << *this;
- return ss.str();
- }
- //==============================================================================
- #define SCANNER_EXCEPTION(x) \
- Exception(std::string() + x, __LINE__, scriptName, lineNmbr)
- char Scanner::eofChar = 0x7F;
- // reserved words grouped by length
- Scanner::ResWord Scanner::rw2 [] =
- {
- {"ke", TC_KE}, {NULL, TC_ERROR}
- };
- Scanner::ResWord Scanner::rw3 [] =
- {
- {"key", TC_KEY}, {NULL, TC_ERROR}
- };
- Scanner::ResWord Scanner::rw4 [] =
- {
- {"keyw", TC_KEYW}, {NULL, TC_ERROR}
- };
- Scanner::ResWord Scanner::rw5 [] =
- {
- {"keywo", TC_KEYWO}, {NULL, TC_ERROR}
- };
- Scanner::ResWord Scanner::rw6 [] =
- {
- {"keywor", TC_KEYWOR}, {NULL, TC_ERROR}
- };
- Scanner::ResWord Scanner::rw7 [] =
- {
- {"keyword", TC_KEYWORD}, {NULL, TC_ERROR}
- };
- Scanner::ResWord* Scanner::rwTable [] = // reserved word table
- {
- NULL, NULL, rw2, rw3, rw4, rw5, rw6, rw7,
- };
- // ascii table
- Scanner::AsciiFlag Scanner::asciiLookupTable [128] = {AC_ERROR};
- //==============================================================================
- Scanner::Scanner(bool newlinesAsWhitespace)
- {
- strcpy(scriptName, "unnamed-script");
- init(newlinesAsWhitespace);
- }
- //==============================================================================
- Scanner::Scanner(const char* filename, bool newlinesAsWhitespace)
- {
- strcpy(scriptName, "unnamed-script");
- init(newlinesAsWhitespace);
- loadFile(filename);
- }
- //==============================================================================
- Scanner::Scanner(std::istream& istream_, const char* scriptName_,
- bool newlinesAsWhitespace)
- {
- strcpy(scriptName, "unnamed-script");
- init(newlinesAsWhitespace);
- loadIstream(istream_, scriptName_);
- }
- //==============================================================================
- void Scanner::initAsciiMap()
- {
- memset(&asciiLookupTable[0], AC_ERROR, sizeof(asciiLookupTable));
- for(uint x = 'a'; x <= 'z'; x++)
- {
- lookupAscii(x) = AC_LETTER;
- }
- for(uint x = 'A'; x <= 'Z'; x++)
- {
- lookupAscii(x) = AC_LETTER;
- }
- for(uint x = '0'; x <= '9'; x++)
- {
- lookupAscii(x) = AC_DIGIT;
- }
- lookupAscii(':') = lookupAscii('[') = lookupAscii(']') =
- lookupAscii('(') = lookupAscii(')') = lookupAscii('.') =
- lookupAscii('{') = lookupAscii('}') = lookupAscii(',') =
- lookupAscii(';') = lookupAscii('?') = lookupAscii('=') =
- lookupAscii('!') = lookupAscii('<') = lookupAscii('>') =
- lookupAscii('|') = lookupAscii('&') = lookupAscii('+') =
- lookupAscii('-') = lookupAscii('*') = lookupAscii('/') =
- lookupAscii('~') = lookupAscii('%') = lookupAscii('#') =
- lookupAscii('^') = lookupAscii('\\') = AC_SPECIAL;
- lookupAscii('\t') = lookupAscii(' ') = lookupAscii('\0') =
- AC_WHITESPACE;
- lookupAscii('\n') = AC_ERROR; // newline is unacceptable char
- lookupAscii('@') = lookupAscii('`') = lookupAscii('$') =
- AC_ACCEPTABLE_IN_COMMENTS;
- lookupAscii('\"') = AC_DOUBLEQUOTE;
- lookupAscii('\'') = AC_QUOTE;
- lookupAscii((int)eofChar) = AC_EOF;
- lookupAscii('_') = AC_LETTER;
- }
- //==============================================================================
- void Scanner::init(bool newlinesAsWhitespace_)
- {
- newlinesAsWhitespace = newlinesAsWhitespace_;
- commentedLines = 0;
- inStream = NULL;
- if(lookupAscii('a') != AC_LETTER)
- {
- initAsciiMap();
- }
- lineNmbr = 0;
- memset(line, eofChar, sizeof(char) * MAX_SCRIPT_LINE_LEN);
- }
- //==============================================================================
- void Scanner::getLine()
- {
- if(!inStream->getline(line, MAX_SCRIPT_LINE_LEN - 1, '\n'))
- {
- pchar = &eofChar;
- }
- else
- {
- pchar = &line[0];
- ++lineNmbr;
- }
- assert(inStream->gcount() <= MAX_SCRIPT_LINE_LEN - 10); // too big line
- }
- //==============================================================================
- char Scanner::getNextChar()
- {
- if(*pchar=='\0')
- {
- getLine();
- }
- else
- {
- ++pchar;
- }
- if(*pchar == '\r') // windows crap
- {
- *pchar = '\0';
- }
- else if(lookupAscii(*pchar) == AC_ERROR)
- {
- throw SCANNER_EXCEPTION("Unacceptable char '" + *pchar + "' 0x" +
- boost::lexical_cast<std::string>(static_cast<uint>(*pchar)));
- }
- return *pchar;
- }
- //==============================================================================
- char Scanner::putBackChar()
- {
- if(pchar != line && *pchar != eofChar)
- {
- --pchar;
- }
- return *pchar;
- }
- //==============================================================================
- void Scanner::getAllPrintAll()
- {
- do
- {
- getNextToken();
- std::cout << std::setw(3) << std::setfill('0') << getLineNumber() <<
- ": " << crntToken << std::endl;
- } while(crntToken.code != TC_END);
- }
- //==============================================================================
- void Scanner::loadFile(const char* filename_)
- {
- inFstream.open(filename_);
- if(!inFstream.is_open())
- {
- throw SCANNER_EXCEPTION("Cannot open file \"" + filename_ + '\"');
- }
- loadIstream(inFstream, filename_);
- }
- //==============================================================================
- void Scanner::loadIstream(std::istream& istream_, const char* scriptName_)
- {
- if(inStream != NULL)
- {
- throw SCANNER_EXCEPTION("Tokenizer already initialized");
- }
- inStream = &istream_;
- // init globals
- // Too big name
- assert(strlen(scriptName_) <= sizeof(scriptName) / sizeof(char) - 1);
- crntToken.code = TC_ERROR;
- lineNmbr = 0;
- strcpy(scriptName, scriptName_);
- getLine();
- }
- //==============================================================================
- void Scanner::unload()
- {
- inFstream.close();
- }
- //==============================================================================
- const Token& Scanner::getNextToken()
- {
- start:
- //if(crntToken.code == TC_NEWLINE) getNextChar();
- if(commentedLines>0)
- {
- crntToken.code = TC_NEWLINE;
- --commentedLines;
- // the ultimate hack. I should remember not to do such crap in the
- // future
- ++lineNmbr;
- }
- else if(*pchar == '/')
- {
- char ch = getNextChar();
- if(ch == '/' || ch == '*')
- {
- putBackChar();
- int line = getLineNumber();
- checkComment();
- commentedLines = getLineNumber() - line; // update commentedLines
- lineNmbr -= commentedLines; // part of the ultimate hack
- }
- else
- {
- putBackChar();
- goto crappyLabel;
- }
- }
- else if(*pchar == '.')
- {
- uint asc = lookupAscii(getNextChar());
- putBackChar();
- if(asc == AC_DIGIT)
- {
- checkNumber();
- }
- else
- {
- checkSpecial();
- }
- }
- else if(*pchar=='\0') // if newline
- {
- if(lookupAscii(getNextChar()) == AC_EOF)
- {
- crntToken.code = TC_END;
- }
- else
- {
- crntToken.code = TC_NEWLINE;
- }
- }
- else
- {
- crappyLabel:
- switch(lookupAscii(*pchar))
- {
- case AC_WHITESPACE :
- getNextChar();
- goto start;
- case AC_LETTER:
- checkWord();
- break;
- case AC_DIGIT:
- checkNumber();
- break;
- case AC_SPECIAL:
- checkSpecial();
- break;
- case AC_QUOTE:
- checkChar();
- break;
- case AC_DOUBLEQUOTE:
- checkString();
- break;
- case AC_EOF:
- crntToken.code = TC_END;
- break;
- case AC_ERROR:
- default:
- getNextChar();
- throw SCANNER_EXCEPTION("Unexpected character \'" + *pchar +
- '\'');
- goto start;
- }
- }
- // skip comments
- if(crntToken.code == TC_COMMENT)
- {
- goto start;
- }
- // skip newlines
- if(crntToken.code == TC_NEWLINE && newlinesAsWhitespace)
- {
- goto start;
- }
- return crntToken;
- }
- //==============================================================================
- void Scanner::checkWord()
- {
- char* tmpStr = &crntToken.asString[0];
- char ch = *pchar;
- //build the string
- do
- {
- *tmpStr++ = ch;
- ch = getNextChar();
- } while(lookupAscii(ch) == AC_LETTER || lookupAscii(ch) == AC_DIGIT);
- *tmpStr = '\0'; // finalize it
- //check if reserved
- int len = tmpStr - &crntToken.asString[0];
- crntToken.code = TC_IDENTIFIER;
- crntToken.value.string = &crntToken.asString[0];
- crntToken.dataType = DT_STR; // not important
- if(len <= 7 && len >= 2)
- {
- int x = 0;
- while(true)
- {
- if(rwTable[len][x].string == NULL)
- {
- break;
- }
- if(strcmp(rwTable[len][x].string, &crntToken.asString[0]) == 0)
- {
- crntToken.code = rwTable[len][x].code;
- break;
- }
- ++x;
- }
- }
- }
- //==============================================================================
- void Scanner::checkComment()
- {
- // Beginning
- if(getNextChar()=='*')
- {
- goto cStyleCmnt;
- }
- // C++ style comment
- else if(*pchar=='/')
- {
- while(true)
- {
- char ch = getNextChar();
- if(ch == '\0')
- {
- crntToken.code = TC_COMMENT;
- return;
- }
- else if(ch == '\\')
- {
- if(getNextChar() == '\0')
- {
- getNextChar();
- }
- }
- }
- }
- else
- {
- goto error;
- }
- // C style comment
- cStyleCmnt:
- if(getNextChar()=='*')
- {
- goto finalizeCCmnt;
- }
- else if(*pchar==eofChar)
- {
- goto error;
- }
- else
- {
- goto cStyleCmnt;
- }
- // C++ style comment
- finalizeCCmnt:
- if(getNextChar()=='/')
- {
- crntToken.code = TC_COMMENT;
- getNextChar();
- return;
- }
- else
- {
- goto cStyleCmnt;
- }
- //error
- error:
- crntToken.code = TC_ERROR;
- throw SCANNER_EXCEPTION("Incorrect comment ending");
- }
- //==============================================================================
- void Scanner::checkNumber()
- {
- // This func is working great, dont try to understand it and dont even
- // think to try touching it.
- //RASSERT_THROW_EXCEPTION(sizeof(long) != 8); // ulong must be 64bit
- long num = 0; // value of the number & part of the float num before '.'
- long fnum = 0; // part of the float num after '.'
- long dad = 0; // digits after dot (for floats)
- bool expSign = 0; // exponent sign in case float is represented in mant/exp
- // format. 0 means positive and 1 negative
- long exp = 0; // the exponent in case float is represented in mant/exp
- // format
- char* tmpStr = &crntToken.asString[0];
- crntToken.dataType = DT_INT;
- uint asc;
- // begin
- if(*pchar == '0')
- {
- goto _0;
- }
- else if(lookupAscii(*pchar) == AC_DIGIT)
- {
- num = num*10 + *pchar-'0';
- goto _0_9;
- }
- else if (*pchar == '.')
- {
- goto _float;
- }
- else
- {
- goto error;
- }
- // 0????
- _0:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if (*pchar == 'x' || *pchar == 'X')
- {
- goto _0x;
- }
- else if(*pchar == 'e' || *pchar == 'E')
- {
- goto _0_9_dot_0_9_e;
- }
- else if(asc == AC_DIGIT)
- {
- putBackChar();
- goto _0_9;
- }
- else if(*pchar == '.')
- {
- goto _float;
- }
- else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error;
- }
- // 0x????
- _0x:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if((asc == AC_DIGIT) ||
- (*pchar >= 'a' && *pchar <= 'f') ||
- (*pchar >= 'A' && *pchar <= 'F'))
- {
- num <<= 4;
- if(*pchar>='a' && *pchar<='f')
- {
- num += *pchar - 'a' + 0xA;
- }
- else if(*pchar>='A' && *pchar<='F')
- {
- num += *pchar - 'A' + 0xA;
- }
- else
- {
- num += *pchar - '0';
- }
- goto _0x0_9orA_F;
- }
- else
- goto error;
- // 0x{0-9 || a-f}??
- _0x0_9orA_F:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if((asc == AC_DIGIT) ||
- (*pchar >= 'a' && *pchar <= 'f') ||
- (*pchar >= 'A' && *pchar <= 'F'))
- {
- num <<= 4;
- if(*pchar>='a' && *pchar<='f')
- {
- num += *pchar - 'a' + 0xA;
- }
- else if(*pchar>='A' && *pchar<='F')
- {
- num += *pchar - 'A' + 0xA;
- }
- else
- {
- num += *pchar - '0';
- }
- goto _0x0_9orA_F;
- }
- else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error; // err
- }
- // {0-9}
- _0_9:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if(asc == AC_DIGIT)
- {
- num = num * 10 + *pchar - '0';
- goto _0_9;
- }
- else if(*pchar == 'e' || *pchar == 'E')
- {
- goto _0_9_dot_0_9_e;
- }
- else if(*pchar == '.')
- {
- goto _float;
- }
- else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error; // err
- }
- // {0-9}.??
- _float:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- crntToken.dataType = DT_FLOAT;
- if(asc == AC_DIGIT)
- {
- fnum = fnum * 10 + *pchar - '0';
- ++dad;
- goto _float;
- }
- else if(*pchar == '.')
- {
- *tmpStr++ = *pchar;
- getNextChar();
- goto error;
- }
- else if(*pchar == 'f' || *pchar == 'F')
- {
- goto _0_9_dot_0_9_f;
- }
- else if(*pchar == 'e' || *pchar == 'E')
- {
- goto _0_9_dot_0_9_e;
- }
- else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error;
- }
- // [{0-9}].[{0-9}]f??
- _0_9_dot_0_9_f:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error;
- }
- // [{0-9}].[{0-9}]e??
- _0_9_dot_0_9_e:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- crntToken.dataType = DT_FLOAT;
- if(*pchar == '+' || *pchar == '-')
- {
- if(*pchar == '-') expSign = 1;
- //*tmpStr++ = *pchar; getNextChar();
- goto _0_9_dot_0_9_e_sign;
- }
- else if(asc == AC_DIGIT)
- {
- exp = exp * 10 + *pchar - '0';
- goto _0_9_dot_0_9_e_sign_0_9;
- }
- else
- {
- goto error;
- }
- // [{0-9}].[{0-9}]e{+,-}??
- // After the sign we want number
- _0_9_dot_0_9_e_sign:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if(asc == AC_DIGIT)
- {
- exp = exp * 10 + *pchar - '0';
- goto _0_9_dot_0_9_e_sign_0_9;
- }
- else
- {
- goto error;
- }
- // [{0-9}].[{0-9}]e{+,-}{0-9}??
- // After the number in exponent we want other number or we finalize
- _0_9_dot_0_9_e_sign_0_9:
- *tmpStr++ = *pchar;
- getNextChar();
- asc = lookupAscii(*pchar);
- if(asc == AC_DIGIT)
- {
- exp = exp * 10 + *pchar - '0';
- goto _0_9_dot_0_9_e_sign_0_9;
- }
- else if(asc == AC_SPECIAL || asc == AC_WHITESPACE || asc == AC_EOF)
- {
- goto finalize;
- }
- else
- {
- goto error;
- }
- // finalize
- finalize:
- crntToken.code = TC_NUMBER;
- if(crntToken.dataType == DT_INT)
- {
- crntToken.value.int_ = num;
- }
- else
- {
- double dbl = (double)num + (double)(pow(10, -dad)*fnum);
- if(exp != 0) // if we have exponent
- {
- if(expSign == true)
- {
- exp = -exp; // change the sign if necessary
- }
- dbl = dbl * pow(10, exp);
- }
- crntToken.value.float_ = dbl;
- }
- *tmpStr = '\0';
- return;
- //error
- error:
- crntToken.code = TC_ERROR;
- // run until white space or special
- asc = lookupAscii(*pchar);
- while(asc!=AC_WHITESPACE && asc!=AC_SPECIAL && asc!=AC_EOF)
- {
- *tmpStr++ = *pchar;
- asc = lookupAscii(getNextChar());
- }
- *tmpStr = '\0';
- throw SCANNER_EXCEPTION("Bad number suffix \"" +
- &crntToken.asString[0] + '\"');
- }
- //==============================================================================
- void Scanner::checkString()
- {
- char* tmpStr = &crntToken.asString[0];
- char ch = getNextChar();
- for(;;)
- {
- // Error
- if(ch == '\0' || ch == eofChar) // if end of line or eof
- {
- crntToken.code = TC_ERROR;
- *tmpStr = '\0';
- throw SCANNER_EXCEPTION("Incorrect string ending \"" +
- &crntToken.asString[0] + '\"');
- return;
- }
- // Escape Codes
- else if(ch == '\\')
- {
- ch = getNextChar();
- if(ch == eofChar)
- {
- crntToken.code = TC_ERROR;
- *tmpStr = '\0';
- throw SCANNER_EXCEPTION("Incorrect string ending \"" +
- &crntToken.asString[0] + '\"');
- return;
- }
- switch(ch)
- {
- case 'n':
- *tmpStr++ = '\n';
- break;
- case 't':
- *tmpStr++ = '\t';
- break;
- case '0':
- *tmpStr++ = '\0';
- break;
- case 'a':
- *tmpStr++ = '\a';
- break;
- case '\"':
- *tmpStr++ = '\"';
- break;
- case 'f':
- *tmpStr++ = '\f';
- break;
- case 'v':
- *tmpStr++ = '\v';
- break;
- case '\'':
- *tmpStr++ = '\'';
- break;
- case '\\':
- *tmpStr++ = '\\';
- break;
- case '\?':
- *tmpStr++ = '\?';
- break;
- case '\0':
- break; // not an escape char but works almost the same
- default:
- throw SCANNER_EXCEPTION(
- "Unrecognized escape character \'\\" + ch + '\'');
- *tmpStr++ = ch;
- }
- }
- // End
- else if(ch=='\"')
- {
- *tmpStr = '\0';
- crntToken.code = TC_STRING;
- crntToken.value.string = &crntToken.asString[0];
- getNextChar();
- return;
- }
- // Build str(main loop)
- else
- {
- *tmpStr++ = ch;
- }
- ch = getNextChar();
- }
- }
- //==============================================================================
- void Scanner::checkChar()
- {
- char ch = getNextChar();
- char ch0 = ch;
- char* tmpStr = &crntToken.asString[0];
- crntToken.code = TC_ERROR;
- *tmpStr++ = ch;
- if(ch=='\0' || ch==eofChar) // check char after '
- {
- throw SCANNER_EXCEPTION("Newline in constant");
- return;
- }
- if (ch=='\'') // if '
- {
- throw SCANNER_EXCEPTION("Empty constant");
- getNextChar();
- return;
- }
- if (ch=='\\') // if \ then maybe escape char
- {
- ch = getNextChar();
- *tmpStr++ = ch;
- if(ch == '\0' || ch == eofChar) //check again after the \.
- {
- throw SCANNER_EXCEPTION("Newline in constant");
- }
- switch (ch)
- {
- case 'n' :
- ch0 = '\n';
- break;
- case 't' :
- ch0 = '\t';
- break;
- case '0':
- ch0 = '\0';
- break;
- case 'a':
- ch0 = '\a';
- break;
- case '\"':
- ch0 = '\"';
- break;
- case 'f':
- ch0 = '\f';
- break;
- case 'v':
- ch0 = '\v';
- break;
- case '\'':
- ch0 = '\'';
- break;
- case '\\':
- ch0 = '\\';
- break;
- case '\?':
- ch0 = '\?';
- break;
- case 'r':
- ch0 = '\r';
- break;
- default:
- ch0 = ch;
- throw SCANNER_EXCEPTION("Unrecognized escape character \'\\" +
- ch + '\'');
- }
- crntToken.value.char_ = ch0;
- }
- else
- {
- crntToken.value.char_ = ch;
- }
- ch = getNextChar();
- if(ch=='\'') //end
- {
- *tmpStr = '\0';
- crntToken.code = TC_CHARACTER;
- getNextChar();
- return;
- }
- throw SCANNER_EXCEPTION("Expected \'");
- }
- //==============================================================================
- void Scanner::checkSpecial()
- {
- char ch = *pchar;
- TokenCode code = TC_ERROR;
- switch(ch)
- {
- case '#':
- code = TC_SHARP;
- break;
- case ',':
- code = TC_COMMA;
- break;
- case ';':
- code = TC_PERIOD;
- break;
- case '(':
- code = TC_L_PAREN;
- break;
- case ')':
- code = TC_R_PAREN;
- break;
- case '[':
- code = TC_L_SQ_BRACKET;
- break;
- case ']':
- code = TC_R_SQ_BRACKET;
- break;
- case '{':
- code = TC_L_BRACKET;
- break;
- case '}':
- code = TC_R_BRACKET;
- break;
- case '?':
- code = TC_QUESTIONMARK;
- break;
- case '~':
- code = TC_UNARAY_COMPLEMENT;
- break;
- case '.':
- ch = getNextChar();
- switch(ch)
- {
- case '*':
- code = TC_POINTER_TO_MEMBER;
- break;
- default:
- putBackChar();
- code = TC_DOT;
- }
- break;
- case ':':
- ch = getNextChar();
- switch(ch)
- {
- case ':':
- code = TC_SCOPE_RESOLUTION;
- break;
- default:
- putBackChar();
- code = TC_UPDOWNDOT;
- }
- break;
- case '-':
- ch = getNextChar();
- switch(ch)
- {
- case '>':
- code = TC_POINTER_TO_MEMBER;
- break;
- case '-':
- code = TC_DEC;
- break;
- case '=':
- code = TC_ASSIGN_SUB;
- break;
- default:
- putBackChar();
- code = TC_MINUS;
- }
- break;
- case '=':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_EQUAL;
- break;
- default:
- putBackChar();
- code = TC_ASSIGN;
- }
- break;
- case '!':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_NOT_EQUAL;
- break;
- default:
- putBackChar();
- code = TC_NOT;
- }
- break;
- case '<':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_LESS_EQUAL;
- break;
- case '<':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_SHL;
- break;
- default:
- putBackChar();
- code = TC_SHL;
- }
- break;
- default:
- putBackChar();
- code = TC_LESS;
- }
- break;
- case '>':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_GREATER_EQUAL;
- break;
- case '>':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_SHR;
- break;
- default:
- putBackChar();
- code = TC_SHR;
- }
- break;
- default:
- putBackChar();
- code = TC_GREATER;
- }
- break;
- case '|':
- ch = getNextChar();
- switch(ch)
- {
- case '|':
- code = TC_LOGICAL_OR;
- break;
- case '=':
- code = TC_ASSIGN_OR;
- break;
- default:
- putBackChar();
- code = TC_BITWISE_OR;
- }
- break;
- case '&':
- ch = getNextChar();
- switch(ch)
- {
- case '&':
- code = TC_LOGICAL_AND;
- break;
- case '=':
- code = TC_ASSIGN_AND;
- break;
- default:
- putBackChar();
- code = TC_BITWISE_AND;
- }
- break;
- case '+':
- ch = getNextChar();
- switch(ch)
- {
- case '+':
- code = TC_INC;
- break;
- case '=':
- code = TC_ASSIGN_ADD;
- break;
- default:
- putBackChar();
- code = TC_PLUS;
- }
- break;
- case '*':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_MUL;
- break;
- default:
- putBackChar();
- code = TC_STAR;
- }
- break;
- case '/':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_DIV;
- break;
- default:
- putBackChar();
- code = TC_BSLASH;
- }
- break;
- case '%':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_MOD;
- break;
- default:
- putBackChar();
- code = TC_MOD;
- }
- break;
- case '^':
- ch = getNextChar();
- switch(ch)
- {
- case '=':
- code = TC_ASSIGN_XOR;
- break;
- default:
- putBackChar();
- code = TC_XOR;
- }
- break;
- case '\\':
- code = TC_BACK_SLASH;
- break;
- }
- getNextChar();
- crntToken.code = code;
- }
- }} // end namespaces
|