ContinuationIndenter.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. ///
  10. /// \file
  11. /// \brief This file implements an indenter that manages the indentation of
  12. /// continuations.
  13. ///
  14. //===----------------------------------------------------------------------===//
  15. #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
  16. #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
  17. #include "Encoding.h"
  18. #include "FormatToken.h"
  19. #include "clang/Format/Format.h"
  20. #include "llvm/Support/Regex.h"
  21. namespace clang {
  22. class SourceManager;
  23. namespace format {
  24. class AnnotatedLine;
  25. struct FormatToken;
  26. struct LineState;
  27. struct ParenState;
  28. class WhitespaceManager;
  29. class ContinuationIndenter {
  30. public:
  31. /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
  32. /// column \p FirstIndent.
  33. ContinuationIndenter(const FormatStyle &Style,
  34. const AdditionalKeywords &Keywords,
  35. SourceManager &SourceMgr, WhitespaceManager &Whitespaces,
  36. encoding::Encoding Encoding,
  37. bool BinPackInconclusiveFunctions);
  38. /// \brief Get the initial state, i.e. the state after placing \p Line's
  39. /// first token at \p FirstIndent.
  40. LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
  41. bool DryRun);
  42. // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
  43. // better home.
  44. /// \brief Returns \c true, if a line break after \p State is allowed.
  45. bool canBreak(const LineState &State);
  46. /// \brief Returns \c true, if a line break after \p State is mandatory.
  47. bool mustBreak(const LineState &State);
  48. /// \brief Appends the next token to \p State and updates information
  49. /// necessary for indentation.
  50. ///
  51. /// Puts the token on the current line if \p Newline is \c false and adds a
  52. /// line break and necessary indentation otherwise.
  53. ///
  54. /// If \p DryRun is \c false, also creates and stores the required
  55. /// \c Replacement.
  56. unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
  57. unsigned ExtraSpaces = 0);
  58. /// \brief Get the column limit for this line. This is the style's column
  59. /// limit, potentially reduced for preprocessor definitions.
  60. unsigned getColumnLimit(const LineState &State) const;
  61. private:
  62. /// \brief Mark the next token as consumed in \p State and modify its stacks
  63. /// accordingly.
  64. unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
  65. /// \brief Update 'State' according to the next token's fake left parentheses.
  66. void moveStatePastFakeLParens(LineState &State, bool Newline);
  67. /// \brief Update 'State' according to the next token's fake r_parens.
  68. void moveStatePastFakeRParens(LineState &State);
  69. /// \brief Update 'State' according to the next token being one of "(<{[".
  70. void moveStatePastScopeOpener(LineState &State, bool Newline);
  71. /// \brief Update 'State' according to the next token being one of ")>}]".
  72. void moveStatePastScopeCloser(LineState &State);
  73. /// \brief Update 'State' with the next token opening a nested block.
  74. void moveStateToNewBlock(LineState &State);
  75. /// \brief If the current token sticks out over the end of the line, break
  76. /// it if possible.
  77. ///
  78. /// \returns An extra penalty if a token was broken, otherwise 0.
  79. ///
  80. /// The returned penalty will cover the cost of the additional line breaks and
  81. /// column limit violation in all lines except for the last one. The penalty
  82. /// for the column limit violation in the last line (and in single line
  83. /// tokens) is handled in \c addNextStateToQueue.
  84. unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
  85. bool DryRun);
  86. /// \brief Appends the next token to \p State and updates information
  87. /// necessary for indentation.
  88. ///
  89. /// Puts the token on the current line.
  90. ///
  91. /// If \p DryRun is \c false, also creates and stores the required
  92. /// \c Replacement.
  93. void addTokenOnCurrentLine(LineState &State, bool DryRun,
  94. unsigned ExtraSpaces);
  95. /// \brief Appends the next token to \p State and updates information
  96. /// necessary for indentation.
  97. ///
  98. /// Adds a line break and necessary indentation.
  99. ///
  100. /// If \p DryRun is \c false, also creates and stores the required
  101. /// \c Replacement.
  102. unsigned addTokenOnNewLine(LineState &State, bool DryRun);
  103. /// \brief Calculate the new column for a line wrap before the next token.
  104. unsigned getNewLineColumn(const LineState &State);
  105. /// \brief Adds a multiline token to the \p State.
  106. ///
  107. /// \returns Extra penalty for the first line of the literal: last line is
  108. /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
  109. /// matter, as we don't change them.
  110. unsigned addMultilineToken(const FormatToken &Current, LineState &State);
  111. /// \brief Returns \c true if the next token starts a multiline string
  112. /// literal.
  113. ///
  114. /// This includes implicitly concatenated strings, strings that will be broken
  115. /// by clang-format and string literals with escaped newlines.
  116. bool nextIsMultilineString(const LineState &State);
  117. FormatStyle Style;
  118. const AdditionalKeywords &Keywords;
  119. SourceManager &SourceMgr;
  120. WhitespaceManager &Whitespaces;
  121. encoding::Encoding Encoding;
  122. bool BinPackInconclusiveFunctions;
  123. llvm::Regex CommentPragmasRegex;
  124. };
  125. struct ParenState {
  126. ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
  127. bool AvoidBinPacking, bool NoLineBreak)
  128. : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
  129. NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
  130. AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
  131. NoLineBreak(NoLineBreak), LastOperatorWrapped(true),
  132. ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
  133. AlignColons(true), ObjCSelectorNameFound(false),
  134. HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
  135. /// \brief The position to which a specific parenthesis level needs to be
  136. /// indented.
  137. unsigned Indent;
  138. /// \brief The number of indentation levels of the block.
  139. unsigned IndentLevel;
  140. /// \brief The position of the last space on each level.
  141. ///
  142. /// Used e.g. to break like:
  143. /// functionCall(Parameter, otherCall(
  144. /// OtherParameter));
  145. unsigned LastSpace;
  146. /// \brief If a block relative to this parenthesis level gets wrapped, indent
  147. /// it this much.
  148. unsigned NestedBlockIndent;
  149. /// \brief The position the first "<<" operator encountered on each level.
  150. ///
  151. /// Used to align "<<" operators. 0 if no such operator has been encountered
  152. /// on a level.
  153. unsigned FirstLessLess = 0;
  154. /// \brief The column of a \c ? in a conditional expression;
  155. unsigned QuestionColumn = 0;
  156. /// \brief The position of the colon in an ObjC method declaration/call.
  157. unsigned ColonPos = 0;
  158. /// \brief The start of the most recent function in a builder-type call.
  159. unsigned StartOfFunctionCall = 0;
  160. /// \brief Contains the start of array subscript expressions, so that they
  161. /// can be aligned.
  162. unsigned StartOfArraySubscripts = 0;
  163. /// \brief If a nested name specifier was broken over multiple lines, this
  164. /// contains the start column of the second line. Otherwise 0.
  165. unsigned NestedNameSpecifierContinuation = 0;
  166. /// \brief If a call expression was broken over multiple lines, this
  167. /// contains the start column of the second line. Otherwise 0.
  168. unsigned CallContinuation = 0;
  169. /// \brief The column of the first variable name in a variable declaration.
  170. ///
  171. /// Used to align further variables if necessary.
  172. unsigned VariablePos = 0;
  173. /// \brief Whether a newline needs to be inserted before the block's closing
  174. /// brace.
  175. ///
  176. /// We only want to insert a newline before the closing brace if there also
  177. /// was a newline after the beginning left brace.
  178. bool BreakBeforeClosingBrace : 1;
  179. /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
  180. /// lines, in this context.
  181. bool AvoidBinPacking : 1;
  182. /// \brief Break after the next comma (or all the commas in this context if
  183. /// \c AvoidBinPacking is \c true).
  184. bool BreakBeforeParameter : 1;
  185. /// \brief Line breaking in this context would break a formatting rule.
  186. bool NoLineBreak : 1;
  187. /// \brief True if the last binary operator on this level was wrapped to the
  188. /// next line.
  189. bool LastOperatorWrapped : 1;
  190. /// \brief \c true if this \c ParenState already contains a line-break.
  191. ///
  192. /// The first line break in a certain \c ParenState causes extra penalty so
  193. /// that clang-format prefers similar breaks, i.e. breaks in the same
  194. /// parenthesis.
  195. bool ContainsLineBreak : 1;
  196. /// \brief \c true if this \c ParenState contains multiple segments of a
  197. /// builder-type call on one line.
  198. bool ContainsUnwrappedBuilder : 1;
  199. /// \brief \c true if the colons of the curren ObjC method expression should
  200. /// be aligned.
  201. ///
  202. /// Not considered for memoization as it will always have the same value at
  203. /// the same token.
  204. bool AlignColons : 1;
  205. /// \brief \c true if at least one selector name was found in the current
  206. /// ObjC method expression.
  207. ///
  208. /// Not considered for memoization as it will always have the same value at
  209. /// the same token.
  210. bool ObjCSelectorNameFound : 1;
  211. /// \brief \c true if there are multiple nested blocks inside these parens.
  212. ///
  213. /// Not considered for memoization as it will always have the same value at
  214. /// the same token.
  215. bool HasMultipleNestedBlocks : 1;
  216. // \brief The start of a nested block (e.g. lambda introducer in C++ or
  217. // "function" in JavaScript) is not wrapped to a new line.
  218. bool NestedBlockInlined : 1;
  219. bool operator<(const ParenState &Other) const {
  220. if (Indent != Other.Indent)
  221. return Indent < Other.Indent;
  222. if (LastSpace != Other.LastSpace)
  223. return LastSpace < Other.LastSpace;
  224. if (NestedBlockIndent != Other.NestedBlockIndent)
  225. return NestedBlockIndent < Other.NestedBlockIndent;
  226. if (FirstLessLess != Other.FirstLessLess)
  227. return FirstLessLess < Other.FirstLessLess;
  228. if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
  229. return BreakBeforeClosingBrace;
  230. if (QuestionColumn != Other.QuestionColumn)
  231. return QuestionColumn < Other.QuestionColumn;
  232. if (AvoidBinPacking != Other.AvoidBinPacking)
  233. return AvoidBinPacking;
  234. if (BreakBeforeParameter != Other.BreakBeforeParameter)
  235. return BreakBeforeParameter;
  236. if (NoLineBreak != Other.NoLineBreak)
  237. return NoLineBreak;
  238. if (LastOperatorWrapped != Other.LastOperatorWrapped)
  239. return LastOperatorWrapped;
  240. if (ColonPos != Other.ColonPos)
  241. return ColonPos < Other.ColonPos;
  242. if (StartOfFunctionCall != Other.StartOfFunctionCall)
  243. return StartOfFunctionCall < Other.StartOfFunctionCall;
  244. if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
  245. return StartOfArraySubscripts < Other.StartOfArraySubscripts;
  246. if (CallContinuation != Other.CallContinuation)
  247. return CallContinuation < Other.CallContinuation;
  248. if (VariablePos != Other.VariablePos)
  249. return VariablePos < Other.VariablePos;
  250. if (ContainsLineBreak != Other.ContainsLineBreak)
  251. return ContainsLineBreak;
  252. if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
  253. return ContainsUnwrappedBuilder;
  254. if (NestedBlockInlined != Other.NestedBlockInlined)
  255. return NestedBlockInlined;
  256. return false;
  257. }
  258. };
  259. /// \brief The current state when indenting a unwrapped line.
  260. ///
  261. /// As the indenting tries different combinations this is copied by value.
  262. struct LineState {
  263. /// \brief The number of used columns in the current line.
  264. unsigned Column;
  265. /// \brief The token that needs to be next formatted.
  266. FormatToken *NextToken;
  267. /// \brief \c true if this line contains a continued for-loop section.
  268. bool LineContainsContinuedForLoopSection;
  269. /// \brief The \c NestingLevel at the start of this line.
  270. unsigned StartOfLineLevel;
  271. /// \brief The lowest \c NestingLevel on the current line.
  272. unsigned LowestLevelOnLine;
  273. /// \brief The start column of the string literal, if we're in a string
  274. /// literal sequence, 0 otherwise.
  275. unsigned StartOfStringLiteral;
  276. /// \brief A stack keeping track of properties applying to parenthesis
  277. /// levels.
  278. std::vector<ParenState> Stack;
  279. /// \brief Ignore the stack of \c ParenStates for state comparison.
  280. ///
  281. /// In long and deeply nested unwrapped lines, the current algorithm can
  282. /// be insufficient for finding the best formatting with a reasonable amount
  283. /// of time and memory. Setting this flag will effectively lead to the
  284. /// algorithm not analyzing some combinations. However, these combinations
  285. /// rarely contain the optimal solution: In short, accepting a higher
  286. /// penalty early would need to lead to different values in the \c
  287. /// ParenState stack (in an otherwise identical state) and these different
  288. /// values would need to lead to a significant amount of avoided penalty
  289. /// later.
  290. ///
  291. /// FIXME: Come up with a better algorithm instead.
  292. bool IgnoreStackForComparison;
  293. /// \brief The indent of the first token.
  294. unsigned FirstIndent;
  295. /// \brief The line that is being formatted.
  296. ///
  297. /// Does not need to be considered for memoization because it doesn't change.
  298. const AnnotatedLine *Line;
  299. /// \brief Comparison operator to be able to used \c LineState in \c map.
  300. bool operator<(const LineState &Other) const {
  301. if (NextToken != Other.NextToken)
  302. return NextToken < Other.NextToken;
  303. if (Column != Other.Column)
  304. return Column < Other.Column;
  305. if (LineContainsContinuedForLoopSection !=
  306. Other.LineContainsContinuedForLoopSection)
  307. return LineContainsContinuedForLoopSection;
  308. if (StartOfLineLevel != Other.StartOfLineLevel)
  309. return StartOfLineLevel < Other.StartOfLineLevel;
  310. if (LowestLevelOnLine != Other.LowestLevelOnLine)
  311. return LowestLevelOnLine < Other.LowestLevelOnLine;
  312. if (StartOfStringLiteral != Other.StartOfStringLiteral)
  313. return StartOfStringLiteral < Other.StartOfStringLiteral;
  314. if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
  315. return false;
  316. return Stack < Other.Stack;
  317. }
  318. };
  319. } // end namespace format
  320. } // end namespace clang
  321. #endif