| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585 |
- // Copyright (c) 2008 Roberto Raggi <[email protected]>
- //
- // Permission is hereby granted, free of charge, to any person obtaining a copy
- // of this software and associated documentation files (the "Software"), to deal
- // in the Software without restriction, including without limitation the rights
- // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- // copies of the Software, and to permit persons to whom the Software is
- // furnished to do so, subject to the following conditions:
- //
- // The above copyright notice and this permission notice shall be included in
- // all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- // THE SOFTWARE.
- #include "TranslationUnit.h"
- #include "Control.h"
- #include "Parser.h"
- #include "Lexer.h"
- #include "MemoryPool.h"
- #include "AST.h"
- #include "Literals.h"
- #include "DiagnosticClient.h"
- #include <stack>
- #include <vector>
- #include <cstdarg>
- #include <algorithm>
- #include <utility>
- #if defined(_MSC_VER) && (_MSC_VER < 1800)
- # define va_copy(dst, src) ((dst) = (src))
- #elif defined(__INTEL_COMPILER) && !defined(va_copy)
- # define va_copy __va_copy
- #endif
- using namespace CPlusPlus;
- const Token TranslationUnit::nullToken;
- TranslationUnit::TranslationUnit(Control *control, const StringLiteral *fileId)
- : _control(control),
- _fileId(fileId),
- _firstSourceChar(0),
- _lastSourceChar(0),
- _pool(0),
- _ast(0),
- _flags(0)
- {
- _tokens = new std::vector<Token>();
- _comments = new std::vector<Token>();
- _previousTranslationUnit = control->switchTranslationUnit(this);
- _pool = new MemoryPool();
- }
- TranslationUnit::~TranslationUnit()
- {
- (void) _control->switchTranslationUnit(_previousTranslationUnit);
- release();
- }
- Control *TranslationUnit::control() const
- { return _control; }
- const StringLiteral *TranslationUnit::fileId() const
- { return _fileId; }
- const char *TranslationUnit::fileName() const
- { return _fileId->chars(); }
- unsigned TranslationUnit::fileNameLength() const
- { return _fileId->size(); }
- const char *TranslationUnit::firstSourceChar() const
- { return _firstSourceChar; }
- const char *TranslationUnit::lastSourceChar() const
- { return _lastSourceChar; }
- unsigned TranslationUnit::sourceLength() const
- { return _lastSourceChar - _firstSourceChar; }
- void TranslationUnit::setSource(const char *source, unsigned size)
- {
- _firstSourceChar = source;
- _lastSourceChar = source + size;
- }
- const char *TranslationUnit::spell(unsigned index) const
- {
- if (! index)
- return 0;
- return tokenAt(index).spell();
- }
- unsigned TranslationUnit::commentCount() const
- { return unsigned(_comments->size()); }
- const Token &TranslationUnit::commentAt(unsigned index) const
- { return _comments->at(index); }
- const Identifier *TranslationUnit::identifier(unsigned index) const
- { return tokenAt(index).identifier; }
- const Literal *TranslationUnit::literal(unsigned index) const
- { return tokenAt(index).literal; }
- const StringLiteral *TranslationUnit::stringLiteral(unsigned index) const
- { return tokenAt(index).string; }
- const NumericLiteral *TranslationUnit::numericLiteral(unsigned index) const
- { return tokenAt(index).number; }
- unsigned TranslationUnit::matchingBrace(unsigned index) const
- { return tokenAt(index).close_brace; }
- MemoryPool *TranslationUnit::memoryPool() const
- { return _pool; }
- AST *TranslationUnit::ast() const
- { return _ast; }
- bool TranslationUnit::isTokenized() const
- { return f._tokenized; }
- bool TranslationUnit::isParsed() const
- { return f._parsed; }
- void TranslationUnit::tokenize()
- {
- if (isTokenized())
- return;
- f._tokenized = true;
- Lexer lex(this);
- lex.setLanguageFeatures(_languageFeatures);
- lex.setScanCommentTokens(true);
- std::stack<unsigned> braces;
- _tokens->push_back(nullToken); // the first token needs to be invalid!
- pushLineOffset(0);
- pushPreprocessorLine(0, 1, fileId());
- const Identifier *lineId = control()->identifier("line");
- const Identifier *expansionId = control()->identifier("expansion");
- const Identifier *beginId = control()->identifier("begin");
- const Identifier *endId = control()->identifier("end");
- // We need to track information about the expanded tokens. A vector with an addition
- // explicit index control is used instead of queue mainly for performance reasons.
- std::vector<std::pair<unsigned, unsigned> > lineColumn;
- unsigned lineColumnIdx = 0;
- Token tk;
- do {
- lex(&tk);
- recognize:
- if (tk.is(T_POUND) && tk.newline()) {
- const unsigned utf16CharOffset = tk.utf16charOffset;
- lex(&tk);
- if (! tk.newline() && tk.is(T_IDENTIFIER) && tk.identifier == expansionId) {
- // It's an expansion mark.
- lex(&tk);
- if (!tk.newline() && tk.is(T_IDENTIFIER)) {
- if (tk.identifier == beginId) {
- // Start of a macro expansion section.
- lex(&tk);
- // Gather where the expansion happens and its length.
- //unsigned macroOffset = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
- lex(&tk);
- lex(&tk); // Skip the separating comma
- //unsigned macroLength = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
- lex(&tk);
- // NOTE: We are currently not using the macro offset and length. They
- // are kept here for now because of future use.
- //Q_UNUSED(macroOffset)
- //Q_UNUSED(macroLength)
- // Now we need to gather the real line and columns from the upcoming
- // tokens. But notice this is only relevant for tokens which are expanded
- // but not generated.
- while (tk.isNot(T_EOF_SYMBOL) && !tk.newline()) {
- // When we get a ~ it means there's a number of generated tokens
- // following. Otherwise, we have actual data.
- if (tk.is(T_TILDE)) {
- lex(&tk);
- // Get the total number of generated tokens and specify "null"
- // information for them.
- unsigned totalGenerated =
- static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
- const std::size_t previousSize = lineColumn.size();
- lineColumn.resize(previousSize + totalGenerated);
- std::fill(lineColumn.begin() + previousSize,
- lineColumn.end(),
- std::make_pair(0, 0));
- lex(&tk);
- } else if (tk.is(T_NUMERIC_LITERAL)) {
- unsigned line = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
- lex(&tk);
- lex(&tk); // Skip the separating colon
- unsigned column = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
- // Store line and column for this non-generated token.
- lineColumn.push_back(std::make_pair(line, column));
- lex(&tk);
- }
- }
- } else if (tk.identifier == endId) {
- // End of a macro expansion.
- lineColumn.clear();
- lineColumnIdx = 0;
- lex(&tk);
- }
- }
- } else {
- if (! tk.newline() && tk.is(T_IDENTIFIER) && tk.identifier == lineId)
- lex(&tk);
- if (! tk.newline() && tk.is(T_NUMERIC_LITERAL)) {
- unsigned line = (unsigned) strtoul(tk.spell(), 0, 0);
- lex(&tk);
- if (! tk.newline() && tk.is(T_STRING_LITERAL)) {
- const StringLiteral *fileName =
- control()->stringLiteral(tk.string->chars(), tk.string->size());
- pushPreprocessorLine(utf16CharOffset, line, fileName);
- lex(&tk);
- }
- }
- while (tk.isNot(T_EOF_SYMBOL) && ! tk.newline())
- lex(&tk);
- }
- goto recognize;
- } else if (tk.kind() == T_LBRACE) {
- braces.push(unsigned(_tokens->size()));
- } else if (tk.kind() == T_RBRACE && ! braces.empty()) {
- const unsigned open_brace_index = braces.top();
- braces.pop();
- if (open_brace_index < tokenCount())
- (*_tokens)[open_brace_index].close_brace = unsigned(_tokens->size());
- } else if (tk.isComment()) {
- _comments->push_back(tk);
- continue; // comments are not in the regular token stream
- }
- bool currentExpanded = false;
- bool currentGenerated = false;
- if (!lineColumn.empty() && lineColumnIdx < lineColumn.size()) {
- currentExpanded = true;
- const std::pair<unsigned, unsigned> &p = lineColumn[lineColumnIdx];
- if (p.first)
- _expandedLineColumn.insert(std::make_pair(tk.utf16charsBegin(), p));
- else
- currentGenerated = true;
- ++lineColumnIdx;
- }
- tk.f.expanded = currentExpanded;
- tk.f.generated = currentGenerated;
- _tokens->push_back(tk);
- } while (tk.kind());
- for (; ! braces.empty(); braces.pop()) {
- unsigned open_brace_index = braces.top();
- (*_tokens)[open_brace_index].close_brace = unsigned(_tokens->size());
- }
- }
- bool TranslationUnit::skipFunctionBody() const
- { return f._skipFunctionBody; }
- void TranslationUnit::setSkipFunctionBody(bool skipFunctionBody)
- { f._skipFunctionBody = skipFunctionBody; }
- bool TranslationUnit::parse(ParseMode mode)
- {
- if (isParsed())
- return false;
- if (! isTokenized())
- tokenize();
- f._parsed = true;
- Parser parser(this);
- bool parsed = false;
- switch (mode) {
- case ParseTranlationUnit: {
- TranslationUnitAST *node = 0;
- parsed = parser.parseTranslationUnit(node);
- _ast = node;
- } break;
- case ParseDeclaration: {
- DeclarationAST *node = 0;
- parsed = parser.parseDeclaration(node);
- _ast = node;
- } break;
- case ParseExpression: {
- ExpressionAST *node = 0;
- parsed = parser.parseExpression(node);
- _ast = node;
- } break;
- case ParseDeclarator: {
- DeclaratorAST *node = 0;
- parsed = parser.parseDeclarator(node, /*decl_specifier_list =*/ 0);
- _ast = node;
- } break;
- case ParseStatement: {
- StatementAST *node = 0;
- parsed = parser.parseStatement(node);
- _ast = node;
- } break;
- default:
- break;
- } // switch
- return parsed;
- }
- void TranslationUnit::pushLineOffset(unsigned offset)
- { _lineOffsets.push_back(offset); }
- void TranslationUnit::pushPreprocessorLine(unsigned utf16charOffset,
- unsigned line,
- const StringLiteral *fileName)
- { _ppLines.push_back(PPLine(utf16charOffset, line, fileName)); }
- unsigned TranslationUnit::findLineNumber(unsigned utf16charOffset) const
- {
- std::vector<unsigned>::const_iterator it =
- std::lower_bound(_lineOffsets.begin(), _lineOffsets.end(), utf16charOffset);
- if (it != _lineOffsets.begin())
- --it;
- return it - _lineOffsets.begin();
- }
- TranslationUnit::PPLine TranslationUnit::findPreprocessorLine(unsigned utf16charOffset) const
- {
- std::vector<PPLine>::const_iterator it =
- std::lower_bound(_ppLines.begin(), _ppLines.end(), PPLine(utf16charOffset));
- if (it != _ppLines.begin())
- --it;
- return *it;
- }
- unsigned TranslationUnit::findColumnNumber(unsigned utf16CharOffset, unsigned lineNumber) const
- {
- if (! utf16CharOffset)
- return 0;
- return utf16CharOffset - _lineOffsets[lineNumber];
- }
- void TranslationUnit::getTokenPosition(unsigned index,
- unsigned *line,
- unsigned *column,
- const StringLiteral **fileName) const
- { return getPosition(tokenAt(index).utf16charsBegin(), line, column, fileName); }
- void TranslationUnit::getTokenStartPosition(unsigned index, unsigned *line,
- unsigned *column,
- const StringLiteral **fileName) const
- { return getPosition(tokenAt(index).utf16charsBegin(), line, column, fileName); }
- void TranslationUnit::getTokenEndPosition(unsigned index, unsigned *line,
- unsigned *column,
- const StringLiteral **fileName) const
- { return getPosition(tokenAt(index).utf16charsEnd(), line, column, fileName); }
- void TranslationUnit::getPosition(unsigned utf16charOffset,
- unsigned *line,
- unsigned *column,
- const StringLiteral **fileName) const
- {
- unsigned lineNumber = 0;
- unsigned columnNumber = 0;
- const StringLiteral *file = 0;
- // If this token is expanded we already have the information directly from the expansion
- // section header. Otherwise, we need to calculate it.
- TokenLineColumn::const_iterator it = _expandedLineColumn.find(utf16charOffset);
- if (it != _expandedLineColumn.end()) {
- lineNumber = it->second.first;
- columnNumber = it->second.second + 1;
- file = _fileId;
- } else {
- // Identify line within the entire translation unit.
- lineNumber = findLineNumber(utf16charOffset);
- // Identify column.
- columnNumber = findColumnNumber(utf16charOffset, lineNumber);
- // Adjust the line in regards to the preprocessing markers.
- const PPLine ppLine = findPreprocessorLine(utf16charOffset);
- lineNumber -= findLineNumber(ppLine.utf16charOffset) + 1;
- lineNumber += ppLine.line;
- file = ppLine.fileName;
- }
- if (line)
- *line = lineNumber;
- if (column)
- *column = columnNumber;
- if (fileName)
- *fileName = file;
- }
- void TranslationUnit::message(DiagnosticClient::Level level, unsigned index, const char *format, va_list args)
- {
- if (f._blockErrors)
- return;
- index = std::min(index, tokenCount() - 1);
- unsigned line = 0, column = 0;
- const StringLiteral *fileName = 0;
- getTokenPosition(index, &line, &column, &fileName);
- if (DiagnosticClient *client = control()->diagnosticClient()) {
- client->report(level, fileName, line, column, format, args);
- } else {
- fprintf(stderr, "%s:%u: ", fileName->chars(), line);
- const char *l = "error";
- if (level == DiagnosticClient::Warning)
- l = "warning";
- else if (level == DiagnosticClient::Fatal)
- l = "fatal";
- fprintf(stderr, "%s: ", l);
- vfprintf(stderr, format, args);
- fputc('\n', stderr);
- showErrorLine(index, column, stderr);
- }
- if (level == DiagnosticClient::Fatal)
- exit(EXIT_FAILURE);
- }
- void TranslationUnit::warning(unsigned index, const char *format, ...)
- {
- if (f._blockErrors)
- return;
- va_list args, ap;
- va_start(args, format);
- va_copy(ap, args);
- message(DiagnosticClient::Warning, index, format, args);
- va_end(ap);
- va_end(args);
- }
- void TranslationUnit::error(unsigned index, const char *format, ...)
- {
- if (f._blockErrors)
- return;
- va_list args, ap;
- va_start(args, format);
- va_copy(ap, args);
- message(DiagnosticClient::Error, index, format, args);
- va_end(ap);
- va_end(args);
- }
- void TranslationUnit::fatal(unsigned index, const char *format, ...)
- {
- if (f._blockErrors)
- return;
- va_list args, ap;
- va_start(args, format);
- va_copy(ap, args);
- message(DiagnosticClient::Fatal, index, format, args);
- va_end(ap);
- va_end(args);
- }
- unsigned TranslationUnit::findPreviousLineOffset(unsigned tokenIndex) const
- {
- unsigned lineOffset = _lineOffsets[findLineNumber(tokenAt(tokenIndex).utf16charsBegin())];
- return lineOffset;
- }
- bool TranslationUnit::maybeSplitGreaterGreaterToken(unsigned tokenIndex)
- {
- if (tokenIndex >= tokenCount())
- return false;
- Token &tok = (*_tokens)[tokenIndex];
- if (tok.kind() != T_GREATER_GREATER)
- return false;
- tok.f.kind = T_GREATER;
- tok.f.bytes = 1;
- tok.f.utf16chars = 1;
- Token newGreater;
- newGreater.f.kind = T_GREATER;
- newGreater.f.expanded = tok.expanded();
- newGreater.f.generated = tok.generated();
- newGreater.f.bytes = 1;
- newGreater.f.utf16chars = 1;
- newGreater.byteOffset = tok.byteOffset + 1;
- newGreater.utf16charOffset = tok.utf16charOffset + 1;
- _tokens->insert(_tokens->begin() + tokenIndex + 1, newGreater);
- TokenLineColumn::const_iterator it = _expandedLineColumn.find(tok.bytesBegin());
- if (it != _expandedLineColumn.end()) {
- const std::pair<unsigned, unsigned> newPosition(it->second.first, it->second.second + 1);
- _expandedLineColumn.insert(std::make_pair(newGreater.bytesBegin(), newPosition));
- }
- return true;
- }
- void TranslationUnit::releaseTokensAndComments()
- {
- delete _tokens;
- _tokens = 0;
- delete _comments;
- _comments = 0;
- }
- void TranslationUnit::showErrorLine(unsigned index, unsigned column, FILE *out)
- {
- unsigned lineOffset = _lineOffsets[findLineNumber(tokenAt(index).utf16charsBegin())];
- for (const char *cp = _firstSourceChar + lineOffset + 1; *cp && *cp != '\n'; ++cp) {
- fputc(*cp, out);
- }
- fputc('\n', out);
- const char *end = _firstSourceChar + lineOffset + 1 + column - 1;
- for (const char *cp = _firstSourceChar + lineOffset + 1; cp != end; ++cp) {
- if (*cp != '\t')
- fputc(' ', out);
- else
- fputc('\t', out);
- }
- fputc('^', out);
- fputc('\n', out);
- }
- void TranslationUnit::resetAST()
- {
- delete _pool;
- _pool = 0;
- _ast = 0;
- }
- void TranslationUnit::release()
- {
- resetAST();
- releaseTokensAndComments();
- }
|