Twine.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. //===-- Twine.h - Fast Temporary String Concatenation -----------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #ifndef LLVM_ADT_TWINE_H
  10. #define LLVM_ADT_TWINE_H
  11. #include "llvm/ADT/SmallVector.h"
  12. #include "llvm/ADT/StringRef.h"
  13. #include "llvm/Support/DataTypes.h"
  14. #include "llvm/Support/ErrorHandling.h"
  15. #include <cassert>
  16. #include <string>
  17. namespace llvm {
  18. class raw_ostream;
  19. /// Twine - A lightweight data structure for efficiently representing the
  20. /// concatenation of temporary values as strings.
  21. ///
  22. /// A Twine is a kind of rope, it represents a concatenated string using a
  23. /// binary-tree, where the string is the preorder of the nodes. Since the
  24. /// Twine can be efficiently rendered into a buffer when its result is used,
  25. /// it avoids the cost of generating temporary values for intermediate string
  26. /// results -- particularly in cases when the Twine result is never
  27. /// required. By explicitly tracking the type of leaf nodes, we can also avoid
  28. /// the creation of temporary strings for conversions operations (such as
  29. /// appending an integer to a string).
  30. ///
  31. /// A Twine is not intended for use directly and should not be stored, its
  32. /// implementation relies on the ability to store pointers to temporary stack
  33. /// objects which may be deallocated at the end of a statement. Twines should
  34. /// only be used accepted as const references in arguments, when an API wishes
  35. /// to accept possibly-concatenated strings.
  36. ///
  37. /// Twines support a special 'null' value, which always concatenates to form
  38. /// itself, and renders as an empty string. This can be returned from APIs to
  39. /// effectively nullify any concatenations performed on the result.
  40. ///
  41. /// \b Implementation
  42. ///
  43. /// Given the nature of a Twine, it is not possible for the Twine's
  44. /// concatenation method to construct interior nodes; the result must be
  45. /// represented inside the returned value. For this reason a Twine object
  46. /// actually holds two values, the left- and right-hand sides of a
  47. /// concatenation. We also have nullary Twine objects, which are effectively
  48. /// sentinel values that represent empty strings.
  49. ///
  50. /// Thus, a Twine can effectively have zero, one, or two children. The \see
  51. /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
  52. /// testing the number of children.
  53. ///
  54. /// We maintain a number of invariants on Twine objects (FIXME: Why):
  55. /// - Nullary twines are always represented with their Kind on the left-hand
  56. /// side, and the Empty kind on the right-hand side.
  57. /// - Unary twines are always represented with the value on the left-hand
  58. /// side, and the Empty kind on the right-hand side.
  59. /// - If a Twine has another Twine as a child, that child should always be
  60. /// binary (otherwise it could have been folded into the parent).
  61. ///
  62. /// These invariants are check by \see isValid().
  63. ///
  64. /// \b Efficiency Considerations
  65. ///
  66. /// The Twine is designed to yield efficient and small code for common
  67. /// situations. For this reason, the concat() method is inlined so that
  68. /// concatenations of leaf nodes can be optimized into stores directly into a
  69. /// single stack allocated object.
  70. ///
  71. /// In practice, not all compilers can be trusted to optimize concat() fully,
  72. /// so we provide two additional methods (and accompanying operator+
  73. /// overloads) to guarantee that particularly important cases (cstring plus
  74. /// StringRef) codegen as desired.
  75. class Twine {
  76. /// NodeKind - Represent the type of an argument.
  77. enum NodeKind : unsigned char {
  78. /// An empty string; the result of concatenating anything with it is also
  79. /// empty.
  80. NullKind,
  81. /// The empty string.
  82. EmptyKind,
  83. /// A pointer to a Twine instance.
  84. TwineKind,
  85. /// A pointer to a C string instance.
  86. CStringKind,
  87. /// A pointer to an std::string instance.
  88. StdStringKind,
  89. /// A pointer to a StringRef instance.
  90. StringRefKind,
  91. /// A pointer to a SmallString instance.
  92. SmallStringKind,
  93. /// A char value reinterpreted as a pointer, to render as a character.
  94. CharKind,
  95. /// An unsigned int value reinterpreted as a pointer, to render as an
  96. /// unsigned decimal integer.
  97. DecUIKind,
  98. /// An int value reinterpreted as a pointer, to render as a signed
  99. /// decimal integer.
  100. DecIKind,
  101. /// A pointer to an unsigned long value, to render as an unsigned decimal
  102. /// integer.
  103. DecULKind,
  104. /// A pointer to a long value, to render as a signed decimal integer.
  105. DecLKind,
  106. /// A pointer to an unsigned long long value, to render as an unsigned
  107. /// decimal integer.
  108. DecULLKind,
  109. /// A pointer to a long long value, to render as a signed decimal integer.
  110. DecLLKind,
  111. /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
  112. /// integer.
  113. UHexKind
  114. };
  115. union Child
  116. {
  117. const Twine *twine;
  118. const char *cString;
  119. const std::string *stdString;
  120. const StringRef *stringRef;
  121. const SmallVectorImpl<char> *smallString;
  122. char character;
  123. unsigned int decUI;
  124. int decI;
  125. const unsigned long *decUL;
  126. const long *decL;
  127. const unsigned long long *decULL;
  128. const long long *decLL;
  129. const uint64_t *uHex;
  130. };
  131. private:
  132. /// LHS - The prefix in the concatenation, which may be uninitialized for
  133. /// Null or Empty kinds.
  134. Child LHS;
  135. /// RHS - The suffix in the concatenation, which may be uninitialized for
  136. /// Null or Empty kinds.
  137. Child RHS;
  138. /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
  139. NodeKind LHSKind;
  140. /// RHSKind - The NodeKind of the right hand side, \see getRHSKind().
  141. NodeKind RHSKind;
  142. private:
  143. /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
  144. explicit Twine(NodeKind Kind)
  145. : LHSKind(Kind), RHSKind(EmptyKind) {
  146. assert(isNullary() && "Invalid kind!");
  147. }
  148. /// Construct a binary twine.
  149. explicit Twine(const Twine &LHS, const Twine &RHS)
  150. : LHSKind(TwineKind), RHSKind(TwineKind) {
  151. this->LHS.twine = &LHS;
  152. this->RHS.twine = &RHS;
  153. assert(isValid() && "Invalid twine!");
  154. }
  155. /// Construct a twine from explicit values.
  156. explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind)
  157. : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) {
  158. assert(isValid() && "Invalid twine!");
  159. }
  160. /// Since the intended use of twines is as temporary objects, assignments
  161. /// when concatenating might cause undefined behavior or stack corruptions
  162. Twine &operator=(const Twine &Other) = delete;
  163. /// Check for the null twine.
  164. bool isNull() const {
  165. return getLHSKind() == NullKind;
  166. }
  167. /// Check for the empty twine.
  168. bool isEmpty() const {
  169. return getLHSKind() == EmptyKind;
  170. }
  171. /// Check if this is a nullary twine (null or empty).
  172. bool isNullary() const {
  173. return isNull() || isEmpty();
  174. }
  175. /// Check if this is a unary twine.
  176. bool isUnary() const {
  177. return getRHSKind() == EmptyKind && !isNullary();
  178. }
  179. /// Check if this is a binary twine.
  180. bool isBinary() const {
  181. return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
  182. }
  183. /// Check if this is a valid twine (satisfying the invariants on
  184. /// order and number of arguments).
  185. bool isValid() const {
  186. // Nullary twines always have Empty on the RHS.
  187. if (isNullary() && getRHSKind() != EmptyKind)
  188. return false;
  189. // Null should never appear on the RHS.
  190. if (getRHSKind() == NullKind)
  191. return false;
  192. // The RHS cannot be non-empty if the LHS is empty.
  193. if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
  194. return false;
  195. // A twine child should always be binary.
  196. if (getLHSKind() == TwineKind &&
  197. !LHS.twine->isBinary())
  198. return false;
  199. if (getRHSKind() == TwineKind &&
  200. !RHS.twine->isBinary())
  201. return false;
  202. return true;
  203. }
  204. /// Get the NodeKind of the left-hand side.
  205. NodeKind getLHSKind() const { return LHSKind; }
  206. /// Get the NodeKind of the right-hand side.
  207. NodeKind getRHSKind() const { return RHSKind; }
  208. /// Print one child from a twine.
  209. void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const;
  210. /// Print the representation of one child from a twine.
  211. void printOneChildRepr(raw_ostream &OS, Child Ptr,
  212. NodeKind Kind) const;
  213. public:
  214. /// @name Constructors
  215. /// @{
  216. /// Construct from an empty string.
  217. /*implicit*/ Twine() : LHSKind(EmptyKind), RHSKind(EmptyKind) {
  218. assert(isValid() && "Invalid twine!");
  219. }
  220. Twine(const Twine &) = default;
  221. /// Construct from a C string.
  222. ///
  223. /// We take care here to optimize "" into the empty twine -- this will be
  224. /// optimized out for string constants. This allows Twine arguments have
  225. /// default "" values, without introducing unnecessary string constants.
  226. /*implicit*/ Twine(const char *Str)
  227. : RHSKind(EmptyKind) {
  228. if (Str[0] != '\0') {
  229. LHS.cString = Str;
  230. LHSKind = CStringKind;
  231. } else
  232. LHSKind = EmptyKind;
  233. assert(isValid() && "Invalid twine!");
  234. }
  235. /// Construct from an std::string.
  236. /*implicit*/ Twine(const std::string &Str)
  237. : LHSKind(StdStringKind), RHSKind(EmptyKind) {
  238. LHS.stdString = &Str;
  239. assert(isValid() && "Invalid twine!");
  240. }
  241. /// Construct from a StringRef.
  242. /*implicit*/ Twine(const StringRef &Str)
  243. : LHSKind(StringRefKind), RHSKind(EmptyKind) {
  244. LHS.stringRef = &Str;
  245. assert(isValid() && "Invalid twine!");
  246. }
  247. /// Construct from a SmallString.
  248. /*implicit*/ Twine(const SmallVectorImpl<char> &Str)
  249. : LHSKind(SmallStringKind), RHSKind(EmptyKind) {
  250. LHS.smallString = &Str;
  251. assert(isValid() && "Invalid twine!");
  252. }
  253. /// Construct from a char.
  254. explicit Twine(char Val)
  255. : LHSKind(CharKind), RHSKind(EmptyKind) {
  256. LHS.character = Val;
  257. }
  258. /// Construct from a signed char.
  259. explicit Twine(signed char Val)
  260. : LHSKind(CharKind), RHSKind(EmptyKind) {
  261. LHS.character = static_cast<char>(Val);
  262. }
  263. /// Construct from an unsigned char.
  264. explicit Twine(unsigned char Val)
  265. : LHSKind(CharKind), RHSKind(EmptyKind) {
  266. LHS.character = static_cast<char>(Val);
  267. }
  268. /// Construct a twine to print \p Val as an unsigned decimal integer.
  269. explicit Twine(unsigned Val)
  270. : LHSKind(DecUIKind), RHSKind(EmptyKind) {
  271. LHS.decUI = Val;
  272. }
  273. /// Construct a twine to print \p Val as a signed decimal integer.
  274. explicit Twine(int Val)
  275. : LHSKind(DecIKind), RHSKind(EmptyKind) {
  276. LHS.decI = Val;
  277. }
  278. /// Construct a twine to print \p Val as an unsigned decimal integer.
  279. explicit Twine(const unsigned long &Val)
  280. : LHSKind(DecULKind), RHSKind(EmptyKind) {
  281. LHS.decUL = &Val;
  282. }
  283. /// Construct a twine to print \p Val as a signed decimal integer.
  284. explicit Twine(const long &Val)
  285. : LHSKind(DecLKind), RHSKind(EmptyKind) {
  286. LHS.decL = &Val;
  287. }
  288. /// Construct a twine to print \p Val as an unsigned decimal integer.
  289. explicit Twine(const unsigned long long &Val)
  290. : LHSKind(DecULLKind), RHSKind(EmptyKind) {
  291. LHS.decULL = &Val;
  292. }
  293. /// Construct a twine to print \p Val as a signed decimal integer.
  294. explicit Twine(const long long &Val)
  295. : LHSKind(DecLLKind), RHSKind(EmptyKind) {
  296. LHS.decLL = &Val;
  297. }
  298. // FIXME: Unfortunately, to make sure this is as efficient as possible we
  299. // need extra binary constructors from particular types. We can't rely on
  300. // the compiler to be smart enough to fold operator+()/concat() down to the
  301. // right thing. Yet.
  302. /// Construct as the concatenation of a C string and a StringRef.
  303. /*implicit*/ Twine(const char *LHS, const StringRef &RHS)
  304. : LHSKind(CStringKind), RHSKind(StringRefKind) {
  305. this->LHS.cString = LHS;
  306. this->RHS.stringRef = &RHS;
  307. assert(isValid() && "Invalid twine!");
  308. }
  309. /// Construct as the concatenation of a StringRef and a C string.
  310. /*implicit*/ Twine(const StringRef &LHS, const char *RHS)
  311. : LHSKind(StringRefKind), RHSKind(CStringKind) {
  312. this->LHS.stringRef = &LHS;
  313. this->RHS.cString = RHS;
  314. assert(isValid() && "Invalid twine!");
  315. }
  316. /// Create a 'null' string, which is an empty string that always
  317. /// concatenates to form another empty string.
  318. static Twine createNull() {
  319. return Twine(NullKind);
  320. }
  321. /// @}
  322. /// @name Numeric Conversions
  323. /// @{
  324. // Construct a twine to print \p Val as an unsigned hexadecimal integer.
  325. static Twine utohexstr(const uint64_t &Val) {
  326. Child LHS, RHS;
  327. LHS.uHex = &Val;
  328. RHS.twine = nullptr;
  329. return Twine(LHS, UHexKind, RHS, EmptyKind);
  330. }
  331. /// @}
  332. /// @name Predicate Operations
  333. /// @{
  334. /// Check if this twine is trivially empty; a false return value does not
  335. /// necessarily mean the twine is empty.
  336. bool isTriviallyEmpty() const {
  337. return isNullary();
  338. }
  339. /// Return true if this twine can be dynamically accessed as a single
  340. /// StringRef value with getSingleStringRef().
  341. bool isSingleStringRef() const {
  342. if (getRHSKind() != EmptyKind) return false;
  343. switch (getLHSKind()) {
  344. case EmptyKind:
  345. case CStringKind:
  346. case StdStringKind:
  347. case StringRefKind:
  348. case SmallStringKind:
  349. return true;
  350. default:
  351. return false;
  352. }
  353. }
  354. /// @}
  355. /// @name String Operations
  356. /// @{
  357. Twine concat(const Twine &Suffix) const;
  358. /// @}
  359. /// @name Output & Conversion.
  360. /// @{
  361. /// Return the twine contents as a std::string.
  362. std::string str() const;
  363. /// Append the concatenated string into the given SmallString or SmallVector.
  364. void toVector(SmallVectorImpl<char> &Out) const;
  365. /// This returns the twine as a single StringRef. This method is only valid
  366. /// if isSingleStringRef() is true.
  367. StringRef getSingleStringRef() const {
  368. assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
  369. switch (getLHSKind()) {
  370. default: llvm_unreachable("Out of sync with isSingleStringRef");
  371. case EmptyKind: return StringRef();
  372. case CStringKind: return StringRef(LHS.cString);
  373. case StdStringKind: return StringRef(*LHS.stdString);
  374. case StringRefKind: return *LHS.stringRef;
  375. case SmallStringKind:
  376. return StringRef(LHS.smallString->data(), LHS.smallString->size());
  377. }
  378. }
  379. /// This returns the twine as a single StringRef if it can be
  380. /// represented as such. Otherwise the twine is written into the given
  381. /// SmallVector and a StringRef to the SmallVector's data is returned.
  382. StringRef toStringRef(SmallVectorImpl<char> &Out) const {
  383. if (isSingleStringRef())
  384. return getSingleStringRef();
  385. toVector(Out);
  386. return StringRef(Out.data(), Out.size());
  387. }
  388. /// This returns the twine as a single null terminated StringRef if it
  389. /// can be represented as such. Otherwise the twine is written into the
  390. /// given SmallVector and a StringRef to the SmallVector's data is returned.
  391. ///
  392. /// The returned StringRef's size does not include the null terminator.
  393. StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
  394. /// Write the concatenated string represented by this twine to the
  395. /// stream \p OS.
  396. void print(raw_ostream &OS) const;
  397. /// Dump the concatenated string represented by this twine to stderr.
  398. void dump() const;
  399. /// Write the representation of this twine to the stream \p OS.
  400. void printRepr(raw_ostream &OS) const;
  401. /// Dump the representation of this twine to stderr.
  402. void dumpRepr() const;
  403. /// @}
  404. };
  405. /// @name Twine Inline Implementations
  406. /// @{
  407. inline Twine Twine::concat(const Twine &Suffix) const {
  408. // Concatenation with null is null.
  409. if (isNull() || Suffix.isNull())
  410. return Twine(NullKind);
  411. // Concatenation with empty yields the other side.
  412. if (isEmpty())
  413. return Suffix;
  414. if (Suffix.isEmpty())
  415. return *this;
  416. // Otherwise we need to create a new node, taking care to fold in unary
  417. // twines.
  418. Child NewLHS, NewRHS;
  419. NewLHS.twine = this;
  420. NewRHS.twine = &Suffix;
  421. NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
  422. if (isUnary()) {
  423. NewLHS = LHS;
  424. NewLHSKind = getLHSKind();
  425. }
  426. if (Suffix.isUnary()) {
  427. NewRHS = Suffix.LHS;
  428. NewRHSKind = Suffix.getLHSKind();
  429. }
  430. return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
  431. }
  432. inline Twine operator+(const Twine &LHS, const Twine &RHS) {
  433. return LHS.concat(RHS);
  434. }
  435. /// Additional overload to guarantee simplified codegen; this is equivalent to
  436. /// concat().
  437. inline Twine operator+(const char *LHS, const StringRef &RHS) {
  438. return Twine(LHS, RHS);
  439. }
  440. /// Additional overload to guarantee simplified codegen; this is equivalent to
  441. /// concat().
  442. inline Twine operator+(const StringRef &LHS, const char *RHS) {
  443. return Twine(LHS, RHS);
  444. }
  445. inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
  446. RHS.print(OS);
  447. return OS;
  448. }
  449. /// @}
  450. }
  451. #endif