UnwrappedLineParser.h 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. //===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. ///
  10. /// \file
  11. /// \brief This file contains the declaration of the UnwrappedLineParser,
  12. /// which turns a stream of tokens into UnwrappedLines.
  13. ///
  14. //===----------------------------------------------------------------------===//
  15. #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
  16. #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
  17. #include "FormatToken.h"
  18. #include "clang/Basic/IdentifierTable.h"
  19. #include "clang/Format/Format.h"
  20. #include <list>
  21. #include <stack>
  22. namespace clang {
  23. namespace format {
  24. struct UnwrappedLineNode;
  25. /// \brief An unwrapped line is a sequence of \c Token, that we would like to
  26. /// put on a single line if there was no column limit.
  27. ///
  28. /// This is used as a main interface between the \c UnwrappedLineParser and the
  29. /// \c UnwrappedLineFormatter. The key property is that changing the formatting
  30. /// within an unwrapped line does not affect any other unwrapped lines.
  31. struct UnwrappedLine {
  32. UnwrappedLine();
  33. // FIXME: Don't use std::list here.
  34. /// \brief The \c Tokens comprising this \c UnwrappedLine.
  35. std::list<UnwrappedLineNode> Tokens;
  36. /// \brief The indent level of the \c UnwrappedLine.
  37. unsigned Level;
  38. /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive.
  39. bool InPPDirective;
  40. bool MustBeDeclaration;
  41. };
  42. class UnwrappedLineConsumer {
  43. public:
  44. virtual ~UnwrappedLineConsumer() {}
  45. virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
  46. virtual void finishRun() = 0;
  47. };
  48. class FormatTokenSource;
  49. class UnwrappedLineParser {
  50. public:
  51. UnwrappedLineParser(const FormatStyle &Style,
  52. const AdditionalKeywords &Keywords,
  53. ArrayRef<FormatToken *> Tokens,
  54. UnwrappedLineConsumer &Callback);
  55. void parse();
  56. private:
  57. void reset();
  58. void parseFile();
  59. void parseLevel(bool HasOpeningBrace);
  60. void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
  61. bool MunchSemi = true);
  62. void parseChildBlock();
  63. void parsePPDirective();
  64. void parsePPDefine();
  65. void parsePPIf(bool IfDef);
  66. void parsePPElIf();
  67. void parsePPElse();
  68. void parsePPEndIf();
  69. void parsePPUnknown();
  70. void parseStructuralElement();
  71. bool tryToParseBracedList();
  72. bool parseBracedList(bool ContinueOnSemicolons = false);
  73. void parseParens();
  74. void parseSquare();
  75. void parseIfThenElse();
  76. void parseTryCatch();
  77. void parseForOrWhileLoop();
  78. void parseDoWhile();
  79. void parseLabel();
  80. void parseCaseLabel();
  81. void parseSwitch();
  82. void parseNamespace();
  83. void parseNew();
  84. void parseAccessSpecifier();
  85. void parseEnum();
  86. void parseJavaEnumBody();
  87. void parseRecord();
  88. void parseObjCProtocolList();
  89. void parseObjCUntilAtEnd();
  90. void parseObjCInterfaceOrImplementation();
  91. void parseObjCProtocol();
  92. void parseJavaScriptEs6ImportExport();
  93. bool tryToParseLambda();
  94. bool tryToParseLambdaIntroducer();
  95. void tryToParseJSFunction();
  96. void addUnwrappedLine();
  97. bool eof() const;
  98. void nextToken();
  99. void readToken();
  100. void flushComments(bool NewlineBeforeNext);
  101. void pushToken(FormatToken *Tok);
  102. void calculateBraceTypes(bool ExpectClassBody = false);
  103. // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
  104. // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
  105. // this branch either cannot be taken (for example '#if false'), or should
  106. // not be taken in this round.
  107. void conditionalCompilationCondition(bool Unreachable);
  108. void conditionalCompilationStart(bool Unreachable);
  109. void conditionalCompilationAlternative();
  110. void conditionalCompilationEnd();
  111. bool isOnNewLine(const FormatToken &FormatTok);
  112. // FIXME: We are constantly running into bugs where Line.Level is incorrectly
  113. // subtracted from beyond 0. Introduce a method to subtract from Line.Level
  114. // and use that everywhere in the Parser.
  115. std::unique_ptr<UnwrappedLine> Line;
  116. // Comments are sorted into unwrapped lines by whether they are in the same
  117. // line as the previous token, or not. If not, they belong to the next token.
  118. // Since the next token might already be in a new unwrapped line, we need to
  119. // store the comments belonging to that token.
  120. SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
  121. FormatToken *FormatTok;
  122. bool MustBreakBeforeNextToken;
  123. // The parsed lines. Only added to through \c CurrentLines.
  124. SmallVector<UnwrappedLine, 8> Lines;
  125. // Preprocessor directives are parsed out-of-order from other unwrapped lines.
  126. // Thus, we need to keep a list of preprocessor directives to be reported
  127. // after an unwarpped line that has been started was finished.
  128. SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
  129. // New unwrapped lines are added via CurrentLines.
  130. // Usually points to \c &Lines. While parsing a preprocessor directive when
  131. // there is an unfinished previous unwrapped line, will point to
  132. // \c &PreprocessorDirectives.
  133. SmallVectorImpl<UnwrappedLine> *CurrentLines;
  134. // We store for each line whether it must be a declaration depending on
  135. // whether we are in a compound statement or not.
  136. std::vector<bool> DeclarationScopeStack;
  137. const FormatStyle &Style;
  138. const AdditionalKeywords &Keywords;
  139. FormatTokenSource *Tokens;
  140. UnwrappedLineConsumer &Callback;
  141. // FIXME: This is a temporary measure until we have reworked the ownership
  142. // of the format tokens. The goal is to have the actual tokens created and
  143. // owned outside of and handed into the UnwrappedLineParser.
  144. ArrayRef<FormatToken *> AllTokens;
  145. // Represents preprocessor branch type, so we can find matching
  146. // #if/#else/#endif directives.
  147. enum PPBranchKind {
  148. PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
  149. PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0
  150. };
  151. // Keeps a stack of currently active preprocessor branching directives.
  152. SmallVector<PPBranchKind, 16> PPStack;
  153. // The \c UnwrappedLineParser re-parses the code for each combination
  154. // of preprocessor branches that can be taken.
  155. // To that end, we take the same branch (#if, #else, or one of the #elif
  156. // branches) for each nesting level of preprocessor branches.
  157. // \c PPBranchLevel stores the current nesting level of preprocessor
  158. // branches during one pass over the code.
  159. int PPBranchLevel;
  160. // Contains the current branch (#if, #else or one of the #elif branches)
  161. // for each nesting level.
  162. SmallVector<int, 8> PPLevelBranchIndex;
  163. // Contains the maximum number of branches at each nesting level.
  164. SmallVector<int, 8> PPLevelBranchCount;
  165. // Contains the number of branches per nesting level we are currently
  166. // in while parsing a preprocessor branch sequence.
  167. // This is used to update PPLevelBranchCount at the end of a branch
  168. // sequence.
  169. std::stack<int> PPChainBranchIndex;
  170. friend class ScopedLineState;
  171. friend class CompoundStatementIndenter;
  172. };
  173. struct UnwrappedLineNode {
  174. UnwrappedLineNode() : Tok(nullptr) {}
  175. UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
  176. FormatToken *Tok;
  177. SmallVector<UnwrappedLine, 0> Children;
  178. };
  179. inline UnwrappedLine::UnwrappedLine()
  180. : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
  181. } // end namespace format
  182. } // end namespace clang
  183. #endif