TranslationUnit.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. // Copyright (c) 2008 Roberto Raggi <[email protected]>
  2. //
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy
  4. // of this software and associated documentation files (the "Software"), to deal
  5. // in the Software without restriction, including without limitation the rights
  6. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. // copies of the Software, and to permit persons to whom the Software is
  8. // furnished to do so, subject to the following conditions:
  9. //
  10. // The above copyright notice and this permission notice shall be included in
  11. // all copies or substantial portions of the Software.
  12. //
  13. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. // THE SOFTWARE.
  20. #include "TranslationUnit.h"
  21. #include "Control.h"
  22. #include "Parser.h"
  23. #include "Lexer.h"
  24. #include "MemoryPool.h"
  25. #include "AST.h"
  26. #include "Literals.h"
  27. #include "DiagnosticClient.h"
  28. #include <stack>
  29. #include <vector>
  30. #include <cstdarg>
  31. #include <algorithm>
  32. #include <utility>
  33. #if defined(_MSC_VER) && (_MSC_VER < 1800)
  34. # define va_copy(dst, src) ((dst) = (src))
  35. #elif defined(__INTEL_COMPILER) && !defined(va_copy)
  36. # define va_copy __va_copy
  37. #endif
  38. using namespace CPlusPlus;
  39. const Token TranslationUnit::nullToken;
  40. TranslationUnit::TranslationUnit(Control *control, const StringLiteral *fileId)
  41. : _control(control),
  42. _fileId(fileId),
  43. _firstSourceChar(0),
  44. _lastSourceChar(0),
  45. _pool(0),
  46. _ast(0),
  47. _flags(0)
  48. {
  49. _tokens = new std::vector<Token>();
  50. _comments = new std::vector<Token>();
  51. _previousTranslationUnit = control->switchTranslationUnit(this);
  52. _pool = new MemoryPool();
  53. }
  54. TranslationUnit::~TranslationUnit()
  55. {
  56. (void) _control->switchTranslationUnit(_previousTranslationUnit);
  57. release();
  58. }
  59. Control *TranslationUnit::control() const
  60. { return _control; }
  61. const StringLiteral *TranslationUnit::fileId() const
  62. { return _fileId; }
  63. const char *TranslationUnit::fileName() const
  64. { return _fileId->chars(); }
  65. unsigned TranslationUnit::fileNameLength() const
  66. { return _fileId->size(); }
  67. const char *TranslationUnit::firstSourceChar() const
  68. { return _firstSourceChar; }
  69. const char *TranslationUnit::lastSourceChar() const
  70. { return _lastSourceChar; }
  71. unsigned TranslationUnit::sourceLength() const
  72. { return _lastSourceChar - _firstSourceChar; }
  73. void TranslationUnit::setSource(const char *source, unsigned size)
  74. {
  75. _firstSourceChar = source;
  76. _lastSourceChar = source + size;
  77. }
  78. const char *TranslationUnit::spell(unsigned index) const
  79. {
  80. if (! index)
  81. return 0;
  82. return tokenAt(index).spell();
  83. }
  84. unsigned TranslationUnit::commentCount() const
  85. { return unsigned(_comments->size()); }
  86. const Token &TranslationUnit::commentAt(unsigned index) const
  87. { return _comments->at(index); }
  88. const Identifier *TranslationUnit::identifier(unsigned index) const
  89. { return tokenAt(index).identifier; }
  90. const Literal *TranslationUnit::literal(unsigned index) const
  91. { return tokenAt(index).literal; }
  92. const StringLiteral *TranslationUnit::stringLiteral(unsigned index) const
  93. { return tokenAt(index).string; }
  94. const NumericLiteral *TranslationUnit::numericLiteral(unsigned index) const
  95. { return tokenAt(index).number; }
  96. unsigned TranslationUnit::matchingBrace(unsigned index) const
  97. { return tokenAt(index).close_brace; }
  98. MemoryPool *TranslationUnit::memoryPool() const
  99. { return _pool; }
  100. AST *TranslationUnit::ast() const
  101. { return _ast; }
  102. bool TranslationUnit::isTokenized() const
  103. { return f._tokenized; }
  104. bool TranslationUnit::isParsed() const
  105. { return f._parsed; }
  106. void TranslationUnit::tokenize()
  107. {
  108. if (isTokenized())
  109. return;
  110. f._tokenized = true;
  111. Lexer lex(this);
  112. lex.setLanguageFeatures(_languageFeatures);
  113. lex.setScanCommentTokens(true);
  114. std::stack<unsigned> braces;
  115. _tokens->push_back(nullToken); // the first token needs to be invalid!
  116. pushLineOffset(0);
  117. pushPreprocessorLine(0, 1, fileId());
  118. const Identifier *lineId = control()->identifier("line");
  119. const Identifier *expansionId = control()->identifier("expansion");
  120. const Identifier *beginId = control()->identifier("begin");
  121. const Identifier *endId = control()->identifier("end");
  122. // We need to track information about the expanded tokens. A vector with an addition
  123. // explicit index control is used instead of queue mainly for performance reasons.
  124. std::vector<std::pair<unsigned, unsigned> > lineColumn;
  125. unsigned lineColumnIdx = 0;
  126. Token tk;
  127. do {
  128. lex(&tk);
  129. recognize:
  130. if (tk.is(T_POUND) && tk.newline()) {
  131. const unsigned utf16CharOffset = tk.utf16charOffset;
  132. lex(&tk);
  133. if (! tk.newline() && tk.is(T_IDENTIFIER) && tk.identifier == expansionId) {
  134. // It's an expansion mark.
  135. lex(&tk);
  136. if (!tk.newline() && tk.is(T_IDENTIFIER)) {
  137. if (tk.identifier == beginId) {
  138. // Start of a macro expansion section.
  139. lex(&tk);
  140. // Gather where the expansion happens and its length.
  141. //unsigned macroOffset = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
  142. lex(&tk);
  143. lex(&tk); // Skip the separating comma
  144. //unsigned macroLength = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
  145. lex(&tk);
  146. // NOTE: We are currently not using the macro offset and length. They
  147. // are kept here for now because of future use.
  148. //Q_UNUSED(macroOffset)
  149. //Q_UNUSED(macroLength)
  150. // Now we need to gather the real line and columns from the upcoming
  151. // tokens. But notice this is only relevant for tokens which are expanded
  152. // but not generated.
  153. while (tk.isNot(T_EOF_SYMBOL) && !tk.newline()) {
  154. // When we get a ~ it means there's a number of generated tokens
  155. // following. Otherwise, we have actual data.
  156. if (tk.is(T_TILDE)) {
  157. lex(&tk);
  158. // Get the total number of generated tokens and specify "null"
  159. // information for them.
  160. unsigned totalGenerated =
  161. static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
  162. const std::size_t previousSize = lineColumn.size();
  163. lineColumn.resize(previousSize + totalGenerated);
  164. std::fill(lineColumn.begin() + previousSize,
  165. lineColumn.end(),
  166. std::make_pair(0, 0));
  167. lex(&tk);
  168. } else if (tk.is(T_NUMERIC_LITERAL)) {
  169. unsigned line = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
  170. lex(&tk);
  171. lex(&tk); // Skip the separating colon
  172. unsigned column = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
  173. // Store line and column for this non-generated token.
  174. lineColumn.push_back(std::make_pair(line, column));
  175. lex(&tk);
  176. }
  177. }
  178. } else if (tk.identifier == endId) {
  179. // End of a macro expansion.
  180. lineColumn.clear();
  181. lineColumnIdx = 0;
  182. lex(&tk);
  183. }
  184. }
  185. } else {
  186. if (! tk.newline() && tk.is(T_IDENTIFIER) && tk.identifier == lineId)
  187. lex(&tk);
  188. if (! tk.newline() && tk.is(T_NUMERIC_LITERAL)) {
  189. unsigned line = (unsigned) strtoul(tk.spell(), 0, 0);
  190. lex(&tk);
  191. if (! tk.newline() && tk.is(T_STRING_LITERAL)) {
  192. const StringLiteral *fileName =
  193. control()->stringLiteral(tk.string->chars(), tk.string->size());
  194. pushPreprocessorLine(utf16CharOffset, line, fileName);
  195. lex(&tk);
  196. }
  197. }
  198. while (tk.isNot(T_EOF_SYMBOL) && ! tk.newline())
  199. lex(&tk);
  200. }
  201. goto recognize;
  202. } else if (tk.kind() == T_LBRACE) {
  203. braces.push(unsigned(_tokens->size()));
  204. } else if (tk.kind() == T_RBRACE && ! braces.empty()) {
  205. const unsigned open_brace_index = braces.top();
  206. braces.pop();
  207. if (open_brace_index < tokenCount())
  208. (*_tokens)[open_brace_index].close_brace = unsigned(_tokens->size());
  209. } else if (tk.isComment()) {
  210. _comments->push_back(tk);
  211. continue; // comments are not in the regular token stream
  212. }
  213. bool currentExpanded = false;
  214. bool currentGenerated = false;
  215. if (!lineColumn.empty() && lineColumnIdx < lineColumn.size()) {
  216. currentExpanded = true;
  217. const std::pair<unsigned, unsigned> &p = lineColumn[lineColumnIdx];
  218. if (p.first)
  219. _expandedLineColumn.insert(std::make_pair(tk.utf16charsBegin(), p));
  220. else
  221. currentGenerated = true;
  222. ++lineColumnIdx;
  223. }
  224. tk.f.expanded = currentExpanded;
  225. tk.f.generated = currentGenerated;
  226. _tokens->push_back(tk);
  227. } while (tk.kind());
  228. for (; ! braces.empty(); braces.pop()) {
  229. unsigned open_brace_index = braces.top();
  230. (*_tokens)[open_brace_index].close_brace = unsigned(_tokens->size());
  231. }
  232. }
  233. bool TranslationUnit::skipFunctionBody() const
  234. { return f._skipFunctionBody; }
  235. void TranslationUnit::setSkipFunctionBody(bool skipFunctionBody)
  236. { f._skipFunctionBody = skipFunctionBody; }
  237. bool TranslationUnit::parse(ParseMode mode)
  238. {
  239. if (isParsed())
  240. return false;
  241. if (! isTokenized())
  242. tokenize();
  243. f._parsed = true;
  244. Parser parser(this);
  245. bool parsed = false;
  246. switch (mode) {
  247. case ParseTranlationUnit: {
  248. TranslationUnitAST *node = 0;
  249. parsed = parser.parseTranslationUnit(node);
  250. _ast = node;
  251. } break;
  252. case ParseDeclaration: {
  253. DeclarationAST *node = 0;
  254. parsed = parser.parseDeclaration(node);
  255. _ast = node;
  256. } break;
  257. case ParseExpression: {
  258. ExpressionAST *node = 0;
  259. parsed = parser.parseExpression(node);
  260. _ast = node;
  261. } break;
  262. case ParseDeclarator: {
  263. DeclaratorAST *node = 0;
  264. parsed = parser.parseDeclarator(node, /*decl_specifier_list =*/ 0);
  265. _ast = node;
  266. } break;
  267. case ParseStatement: {
  268. StatementAST *node = 0;
  269. parsed = parser.parseStatement(node);
  270. _ast = node;
  271. } break;
  272. default:
  273. break;
  274. } // switch
  275. return parsed;
  276. }
  277. void TranslationUnit::pushLineOffset(unsigned offset)
  278. { _lineOffsets.push_back(offset); }
  279. void TranslationUnit::pushPreprocessorLine(unsigned utf16charOffset,
  280. unsigned line,
  281. const StringLiteral *fileName)
  282. { _ppLines.push_back(PPLine(utf16charOffset, line, fileName)); }
  283. unsigned TranslationUnit::findLineNumber(unsigned utf16charOffset) const
  284. {
  285. std::vector<unsigned>::const_iterator it =
  286. std::lower_bound(_lineOffsets.begin(), _lineOffsets.end(), utf16charOffset);
  287. if (it != _lineOffsets.begin())
  288. --it;
  289. return it - _lineOffsets.begin();
  290. }
  291. TranslationUnit::PPLine TranslationUnit::findPreprocessorLine(unsigned utf16charOffset) const
  292. {
  293. std::vector<PPLine>::const_iterator it =
  294. std::lower_bound(_ppLines.begin(), _ppLines.end(), PPLine(utf16charOffset));
  295. if (it != _ppLines.begin())
  296. --it;
  297. return *it;
  298. }
  299. unsigned TranslationUnit::findColumnNumber(unsigned utf16CharOffset, unsigned lineNumber) const
  300. {
  301. if (! utf16CharOffset)
  302. return 0;
  303. return utf16CharOffset - _lineOffsets[lineNumber];
  304. }
  305. void TranslationUnit::getTokenPosition(unsigned index,
  306. unsigned *line,
  307. unsigned *column,
  308. const StringLiteral **fileName) const
  309. { return getPosition(tokenAt(index).utf16charsBegin(), line, column, fileName); }
  310. void TranslationUnit::getTokenStartPosition(unsigned index, unsigned *line,
  311. unsigned *column,
  312. const StringLiteral **fileName) const
  313. { return getPosition(tokenAt(index).utf16charsBegin(), line, column, fileName); }
  314. void TranslationUnit::getTokenEndPosition(unsigned index, unsigned *line,
  315. unsigned *column,
  316. const StringLiteral **fileName) const
  317. { return getPosition(tokenAt(index).utf16charsEnd(), line, column, fileName); }
  318. void TranslationUnit::getPosition(unsigned utf16charOffset,
  319. unsigned *line,
  320. unsigned *column,
  321. const StringLiteral **fileName) const
  322. {
  323. unsigned lineNumber = 0;
  324. unsigned columnNumber = 0;
  325. const StringLiteral *file = 0;
  326. // If this token is expanded we already have the information directly from the expansion
  327. // section header. Otherwise, we need to calculate it.
  328. TokenLineColumn::const_iterator it = _expandedLineColumn.find(utf16charOffset);
  329. if (it != _expandedLineColumn.end()) {
  330. lineNumber = it->second.first;
  331. columnNumber = it->second.second + 1;
  332. file = _fileId;
  333. } else {
  334. // Identify line within the entire translation unit.
  335. lineNumber = findLineNumber(utf16charOffset);
  336. // Identify column.
  337. columnNumber = findColumnNumber(utf16charOffset, lineNumber);
  338. // Adjust the line in regards to the preprocessing markers.
  339. const PPLine ppLine = findPreprocessorLine(utf16charOffset);
  340. lineNumber -= findLineNumber(ppLine.utf16charOffset) + 1;
  341. lineNumber += ppLine.line;
  342. file = ppLine.fileName;
  343. }
  344. if (line)
  345. *line = lineNumber;
  346. if (column)
  347. *column = columnNumber;
  348. if (fileName)
  349. *fileName = file;
  350. }
  351. void TranslationUnit::message(DiagnosticClient::Level level, unsigned index, const char *format, va_list args)
  352. {
  353. if (f._blockErrors)
  354. return;
  355. index = std::min(index, tokenCount() - 1);
  356. unsigned line = 0, column = 0;
  357. const StringLiteral *fileName = 0;
  358. getTokenPosition(index, &line, &column, &fileName);
  359. if (DiagnosticClient *client = control()->diagnosticClient()) {
  360. client->report(level, fileName, line, column, format, args);
  361. } else {
  362. fprintf(stderr, "%s:%u: ", fileName->chars(), line);
  363. const char *l = "error";
  364. if (level == DiagnosticClient::Warning)
  365. l = "warning";
  366. else if (level == DiagnosticClient::Fatal)
  367. l = "fatal";
  368. fprintf(stderr, "%s: ", l);
  369. vfprintf(stderr, format, args);
  370. fputc('\n', stderr);
  371. showErrorLine(index, column, stderr);
  372. }
  373. if (level == DiagnosticClient::Fatal)
  374. exit(EXIT_FAILURE);
  375. }
  376. void TranslationUnit::warning(unsigned index, const char *format, ...)
  377. {
  378. if (f._blockErrors)
  379. return;
  380. va_list args, ap;
  381. va_start(args, format);
  382. va_copy(ap, args);
  383. message(DiagnosticClient::Warning, index, format, args);
  384. va_end(ap);
  385. va_end(args);
  386. }
  387. void TranslationUnit::error(unsigned index, const char *format, ...)
  388. {
  389. if (f._blockErrors)
  390. return;
  391. va_list args, ap;
  392. va_start(args, format);
  393. va_copy(ap, args);
  394. message(DiagnosticClient::Error, index, format, args);
  395. va_end(ap);
  396. va_end(args);
  397. }
  398. void TranslationUnit::fatal(unsigned index, const char *format, ...)
  399. {
  400. if (f._blockErrors)
  401. return;
  402. va_list args, ap;
  403. va_start(args, format);
  404. va_copy(ap, args);
  405. message(DiagnosticClient::Fatal, index, format, args);
  406. va_end(ap);
  407. va_end(args);
  408. }
  409. unsigned TranslationUnit::findPreviousLineOffset(unsigned tokenIndex) const
  410. {
  411. unsigned lineOffset = _lineOffsets[findLineNumber(tokenAt(tokenIndex).utf16charsBegin())];
  412. return lineOffset;
  413. }
  414. bool TranslationUnit::maybeSplitGreaterGreaterToken(unsigned tokenIndex)
  415. {
  416. if (tokenIndex >= tokenCount())
  417. return false;
  418. Token &tok = (*_tokens)[tokenIndex];
  419. if (tok.kind() != T_GREATER_GREATER)
  420. return false;
  421. tok.f.kind = T_GREATER;
  422. tok.f.bytes = 1;
  423. tok.f.utf16chars = 1;
  424. Token newGreater;
  425. newGreater.f.kind = T_GREATER;
  426. newGreater.f.expanded = tok.expanded();
  427. newGreater.f.generated = tok.generated();
  428. newGreater.f.bytes = 1;
  429. newGreater.f.utf16chars = 1;
  430. newGreater.byteOffset = tok.byteOffset + 1;
  431. newGreater.utf16charOffset = tok.utf16charOffset + 1;
  432. _tokens->insert(_tokens->begin() + tokenIndex + 1, newGreater);
  433. TokenLineColumn::const_iterator it = _expandedLineColumn.find(tok.bytesBegin());
  434. if (it != _expandedLineColumn.end()) {
  435. const std::pair<unsigned, unsigned> newPosition(it->second.first, it->second.second + 1);
  436. _expandedLineColumn.insert(std::make_pair(newGreater.bytesBegin(), newPosition));
  437. }
  438. return true;
  439. }
  440. void TranslationUnit::releaseTokensAndComments()
  441. {
  442. delete _tokens;
  443. _tokens = 0;
  444. delete _comments;
  445. _comments = 0;
  446. }
  447. void TranslationUnit::showErrorLine(unsigned index, unsigned column, FILE *out)
  448. {
  449. unsigned lineOffset = _lineOffsets[findLineNumber(tokenAt(index).utf16charsBegin())];
  450. for (const char *cp = _firstSourceChar + lineOffset + 1; *cp && *cp != '\n'; ++cp) {
  451. fputc(*cp, out);
  452. }
  453. fputc('\n', out);
  454. const char *end = _firstSourceChar + lineOffset + 1 + column - 1;
  455. for (const char *cp = _firstSourceChar + lineOffset + 1; cp != end; ++cp) {
  456. if (*cp != '\t')
  457. fputc(' ', out);
  458. else
  459. fputc('\t', out);
  460. }
  461. fputc('^', out);
  462. fputc('\n', out);
  463. }
  464. void TranslationUnit::resetAST()
  465. {
  466. delete _pool;
  467. _pool = 0;
  468. _ast = 0;
  469. }
  470. void TranslationUnit::release()
  471. {
  472. resetAST();
  473. releaseTokensAndComments();
  474. }