123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640 |
- //-----------------------------------------------------------------------------
- // Copyright (c) 2012 GarageGames, LLC
- //
- // Permission is hereby granted, free of charge, to any person obtaining a copy
- // of this software and associated documentation files (the "Software"), to
- // deal in the Software without restriction, including without limitation the
- // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- // sell copies of the Software, and to permit persons to whom the Software is
- // furnished to do so, subject to the following conditions:
- //
- // The above copyright notice and this permission notice shall be included in
- // all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- // IN THE SOFTWARE.
- //-----------------------------------------------------------------------------
- #include "core/tokenizer.h"
- #include "platform/platform.h"
- #include "core/stream/fileStream.h"
- #include "core/strings/stringFunctions.h"
- #include "core/util/safeDelete.h"
- Tokenizer::Tokenizer()
- {
- dMemset(mFileName, 0, sizeof(mFileName));
- mpBuffer = NULL;
- mBufferSize = 0;
- mStartPos = 0;
- mCurrPos = 0;
- mTokenIsQuoted = false;
- dMemset(mCurrTokenBuffer, 0, sizeof(mCurrTokenBuffer));
- mTokenIsCurrent = false;
- mSingleTokens = NULL;
- VECTOR_SET_ASSOCIATION(mLinePositions);
- }
- Tokenizer::~Tokenizer()
- {
- clear();
- }
- bool Tokenizer::openFile(const char* pFileName)
- {
- AssertFatal(mFileName[0] == '\0', "Reuse of Tokenizers not allowed!");
- FileStream* pStream = new FileStream;
- if (pStream->open(pFileName, Torque::FS::File::Read) == false)
- {
- delete pStream;
- return false;
- }
- dStrcpy(mFileName, pFileName, 1024);
- mBufferSize = pStream->getStreamSize();
- mpBuffer = new char[mBufferSize];
- pStream->read(mBufferSize, mpBuffer);
- pStream->close();
- delete pStream;
- reset();
- buildLinePositions();
- return true;
- }
- bool Tokenizer::openFile(Stream* pStream)
- {
- mBufferSize = pStream->getStreamSize();
- mpBuffer = new char[mBufferSize];
- pStream->read(mBufferSize, mpBuffer);
- reset();
- buildLinePositions();
- return true;
- }
- void Tokenizer::setBuffer(const char* buffer, U32 bufferSize)
- {
- if (mpBuffer)
- {
- SAFE_DELETE_ARRAY(mpBuffer);
- mBufferSize = 0;
- }
- mBufferSize = bufferSize;
- mpBuffer = new char[mBufferSize + 1];
- dStrcpy(mpBuffer, buffer, mBufferSize + 1);
- reset();
- buildLinePositions();
- }
- void Tokenizer::setSingleTokens(const char* singleTokens)
- {
- if (mSingleTokens)
- {
- free(mSingleTokens);
- mSingleTokens = NULL;
- }
- if (singleTokens)
- mSingleTokens = dStrdup(singleTokens);
- }
- bool Tokenizer::reset()
- {
- mStartPos = 0;
- mCurrPos = 0;
- mTokenIsQuoted = false;
- dMemset(mCurrTokenBuffer, 0, sizeof(mCurrTokenBuffer));
- mTokenIsCurrent = false;
- return true;
- }
- bool Tokenizer::clear()
- {
- // Delete our buffer
- if (mpBuffer)
- SAFE_DELETE_ARRAY(mpBuffer);
- // Reset the buffer size
- mBufferSize = 0;
- // Reset our active data
- reset();
- // Clear our line positions
- mLinePositions.clear();
- // Reset our file name
- dMemset(mFileName, 0, 1024);
- // Wipe the single tokens
- setSingleTokens(NULL);
- return true;
- }
- bool Tokenizer::setCurrentPos(U32 pos)
- {
- mCurrPos = pos;
- mTokenIsCurrent = false;
- return advanceToken(true);
- }
- void Tokenizer::buildLinePositions()
- {
- if (mBufferSize == 0)
- return;
- // We can safely assume that the first line is at position 0
- mLinePositions.push_back(0);
- U32 currPos = 0;
- while (currPos + 1 < mBufferSize)
- {
- // Windows line ending
- if (mpBuffer[currPos] == '\r' && mpBuffer[currPos + 1] == '\n')
- {
- currPos += 2;
- mLinePositions.push_back(currPos);
- }
- // Not sure if this ever happens but just in case
- else if (mpBuffer[currPos] == '\n' && mpBuffer[currPos + 1] == '\r')
- {
- currPos += 2;
- mLinePositions.push_back(currPos);
- }
- // Unix line endings should only have a single line break character
- else if (mpBuffer[currPos] == '\n' || mpBuffer[currPos] == '\r')
- {
- currPos++;
- mLinePositions.push_back(currPos);
- }
- else
- currPos++;
- }
- }
- U32 Tokenizer::getLinePosition(const U32 pos, U32 lowIndex, S32 highIndex)
- {
- // If we have one or less lines then
- // the result is easy
- if (mLinePositions.size() <= 1)
- return 0;
- // Now that we know we have at least one position
- // we can do a quick test against the last line
- if (pos >= mLinePositions.last())
- return mLinePositions.size() - 1;
- // If this is the beginning of the search
- // set a good starting point (the middle)
- if (highIndex < 0)
- highIndex = mLinePositions.size() - 1;
- // Just in case bad values got handed in
- if (lowIndex > highIndex)
- lowIndex = highIndex;
- // Compute our test index (middle)
- U32 testIndex = (lowIndex + highIndex) / 2;
- // Make sure that our test indices are valid
- if (testIndex >= mLinePositions.size() ||
- testIndex + 1 >= mLinePositions.size())
- return mLinePositions.size() - 1;
- // See if we are already at the right line
- if (pos >= mLinePositions[testIndex] && pos < mLinePositions[testIndex + 1])
- return testIndex;
- if (pos < mLinePositions[testIndex])
- highIndex = testIndex;
- else
- lowIndex = testIndex;
- return getLinePosition(pos, lowIndex, highIndex);
- }
- U32 Tokenizer::getCurrentLine()
- {
- // Binary search for the line number whose
- // position is equal to or lower than the
- // current position
- return getLinePosition(mStartPos);
- }
- U32 Tokenizer::getTokenLineOffset()
- {
- U32 lineNumber = getCurrentLine();
- if (lineNumber >= mLinePositions.size())
- return 0;
- U32 linePosition = mLinePositions[lineNumber];
- if (linePosition >= mStartPos)
- return 0;
- return mStartPos - linePosition;
- }
- bool Tokenizer::advanceToken(const bool crossLine, const bool assertAvail)
- {
- if (mTokenIsCurrent == true)
- {
- AssertFatal(mCurrTokenBuffer[0] != '\0', "No token, but marked as current?");
- mTokenIsCurrent = false;
- return true;
- }
- U32 currPosition = 0;
- mCurrTokenBuffer[0] = '\0';
- mTokenIsQuoted = false;
- // Store the beginning of the previous advance
- // and the beginning of the current advance
- mStartPos = mCurrPos;
- while (mCurrPos < mBufferSize)
- {
- char c = mpBuffer[mCurrPos];
- bool cont = true;
- if (mSingleTokens && dStrchr(mSingleTokens, c))
- {
- if (currPosition == 0)
- {
- mCurrTokenBuffer[currPosition++] = c;
- mCurrPos++;
- cont = false;
- break;
- }
- else
- {
- // End of token
- cont = false;
- }
- }
- else
- {
- switch (c)
- {
- case ' ':
- case '\t':
- if (currPosition == 0)
- {
- // Token hasn't started yet...
- mCurrPos++;
- }
- else
- {
- // End of token
- mCurrPos++;
- cont = false;
- }
- break;
- case '\r':
- case '\n':
- if (crossLine == true)
- {
- // Windows line ending
- if (mpBuffer[mCurrPos] == '\r' && mpBuffer[mCurrPos + 1] == '\n')
- mCurrPos += 2;
- // Not sure if this ever happens but just in case
- else if (mpBuffer[mCurrPos] == '\n' && mpBuffer[mCurrPos + 1] == '\r')
- mCurrPos += 2;
- // Unix line endings should only have a single line break character
- else
- mCurrPos++;
- }
- else
- {
- cont = false;
- break;
- }
- break;
- default:
- if (c == '\"' || c == '\'')
- {
- // Quoted token
- U32 startLine = getCurrentLine();
- mCurrPos++;
- // Store the beginning of the token
- mStartPos = mCurrPos;
- while (mpBuffer[mCurrPos] != c)
- {
- AssertISV(mCurrPos < mBufferSize,
- avar("End of file before quote closed. Quote started: (%s: %d)",
- getFileName(), startLine));
- AssertISV((mpBuffer[mCurrPos] != '\n' && mpBuffer[mCurrPos] != '\r'),
- avar("End of line reached before end of quote. Quote started: (%s: %d)",
- getFileName(), startLine));
- mCurrTokenBuffer[currPosition++] = mpBuffer[mCurrPos++];
- }
- mTokenIsQuoted = true;
- mCurrPos++;
- cont = false;
- }
- else if (c == '/' && mpBuffer[mCurrPos+1] == '/')
- {
- // Line quote...
- if (currPosition == 0)
- {
- // continue to end of line, then let crossLine determine on the next pass
- while (mCurrPos < mBufferSize && (mpBuffer[mCurrPos] != '\n' && mpBuffer[mCurrPos] != '\r'))
- mCurrPos++;
- }
- else
- {
- // This is the end of the token. Continue to EOL
- while (mCurrPos < mBufferSize && (mpBuffer[mCurrPos] != '\n' && mpBuffer[mCurrPos] != '\r'))
- mCurrPos++;
- cont = false;
- }
- }
- else if (c == '/' && mpBuffer[mCurrPos+1] == '*')
- {
- // Block quote...
- if (currPosition == 0)
- {
- // continue to end of block, then let crossLine determine on the next pass
- while (mCurrPos < mBufferSize - 1 && (mpBuffer[mCurrPos] != '*' || mpBuffer[mCurrPos + 1] != '/'))
- mCurrPos++;
- if (mCurrPos < mBufferSize - 1)
- mCurrPos += 2;
- }
- else
- {
- // This is the end of the token. Continue to EOL
- while (mCurrPos < mBufferSize - 1 && (mpBuffer[mCurrPos] != '*' || mpBuffer[mCurrPos + 1] != '/'))
- mCurrPos++;
- if (mCurrPos < mBufferSize - 1)
- mCurrPos += 2;
- cont = false;
- }
- }
- else
- {
- // If this is the first non-token character then store the
- // beginning of the token
- if (currPosition == 0)
- mStartPos = mCurrPos;
- mCurrTokenBuffer[currPosition++] = c;
- mCurrPos++;
- }
- break;
- }
- }
- if (cont == false)
- break;
- }
- mCurrTokenBuffer[currPosition] = '\0';
- if (assertAvail == true)
- AssertISV(currPosition != 0, avar("Error parsing: %s at or around line: %d", getFileName(), getCurrentLine()));
- if (mCurrPos == mBufferSize)
- return false;
- return true;
- }
- bool Tokenizer::regressToken(const bool crossLine)
- {
- if (mTokenIsCurrent == true)
- {
- AssertFatal(mCurrTokenBuffer[0] != '\0', "No token, but marked as current?");
- mTokenIsCurrent = false;
- return true;
- }
- U32 currPosition = 0;
- mCurrTokenBuffer[0] = '\0';
- mTokenIsQuoted = false;
- // Store the beginning of the previous advance
- // and the beginning of the current advance
- mCurrPos = mStartPos;
- // Back up to the first character of the previous token
- mStartPos--;
- while (mStartPos > 0)
- {
- char c = mpBuffer[mStartPos];
- bool cont = true;
- if (mSingleTokens && dStrchr(mSingleTokens, c))
- {
- if (currPosition == 0)
- {
- mCurrTokenBuffer[currPosition++] = c;
- mStartPos--;
- cont = false;
- break;
- }
- else
- {
- // End of token
- cont = false;
- }
- }
- else
- {
- switch (c)
- {
- case ' ':
- case '\t':
- if (currPosition == 0)
- {
- // Token hasn't started yet...
- mStartPos--;
- }
- else
- {
- // End of token
- mStartPos--;
- cont = false;
- }
- break;
- case '\r':
- case '\n':
- if (crossLine == true && currPosition == 0)
- {
- // Windows line ending
- if (mStartPos > 0 && mpBuffer[mStartPos] == '\r' && mpBuffer[mStartPos - 1] == '\n')
- mStartPos -= 2;
- // Not sure if this ever happens but just in case
- else if (mStartPos > 0 && mpBuffer[mStartPos] == '\n' && mpBuffer[mStartPos - 1] == '\r')
- mStartPos -= 2;
- // Unix line endings should only have a single line break character
- else
- mStartPos--;
- }
- else
- {
- cont = false;
- break;
- }
- break;
- default:
- if (c == '\"' || c == '\'')
- {
- // Quoted token
- U32 endLine = getCurrentLine();
- mStartPos--;
- while (mpBuffer[mStartPos] != c)
- {
- AssertISV(mStartPos < 0,
- avar("Beginning of file reached before finding begin quote. Quote ended: (%s: %d)",
- getFileName(), endLine));
- mCurrTokenBuffer[currPosition++] = mpBuffer[mStartPos--];
- }
- mTokenIsQuoted = true;
- mStartPos--;
- cont = false;
- }
- else if (c == '/' && mStartPos > 0 && mpBuffer[mStartPos - 1] == '/')
- {
- // Line quote...
- // Clear out anything saved already
- currPosition = 0;
- mStartPos -= 2;
- }
- else
- {
- mCurrTokenBuffer[currPosition++] = c;
- mStartPos--;
- }
- break;
- }
- }
- if (cont == false)
- break;
- }
- mCurrTokenBuffer[currPosition] = '\0';
- // Reveres the token
- for (U32 i = 0; i < currPosition / 2; i++)
- {
- char c = mCurrTokenBuffer[i];
- mCurrTokenBuffer[i] = mCurrTokenBuffer[currPosition - i - 1];
- mCurrTokenBuffer[currPosition - i - 1] = c;
- }
- mStartPos++;
- if (mStartPos == mCurrPos)
- return false;
- return true;
- }
- bool Tokenizer::tokenAvailable()
- {
- // Note: this implies that when advanceToken(false) fails, it must cap the
- // token buffer.
- //
- return mCurrTokenBuffer[0] != '\0';
- }
- const char* Tokenizer::getToken() const
- {
- return mCurrTokenBuffer;
- }
- const char* Tokenizer::getNextToken()
- {
- advanceToken(true);
- return getToken();
- }
- bool Tokenizer::tokenICmp(const char* pCmp) const
- {
- return dStricmp(mCurrTokenBuffer, pCmp) == 0;
- }
- bool Tokenizer::findToken(U32 start, const char* pCmp)
- {
- // Move to the start
- setCurrentPos(start);
- // In case the first token is what we are looking for
- if (tokenICmp(pCmp))
- return true;
- // Loop through the file and see if the token exists
- while (advanceToken(true))
- {
- if (tokenICmp(pCmp))
- return true;
- }
- return false;
- }
- bool Tokenizer::findToken(const char* pCmp)
- {
- return findToken(0, pCmp);
- }
- bool Tokenizer::endOfFile()
- {
- if (mCurrPos < mBufferSize)
- return false;
- else
- return true;
- }
|