StringRef.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. //===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #ifndef LLVM_ADT_STRINGREF_H
  10. #define LLVM_ADT_STRINGREF_H
  11. #include <algorithm>
  12. #include <cassert>
  13. #include <cstring>
  14. #include <limits>
  15. #include <string>
  16. #include <utility>
  17. namespace llvm {
  18. template <typename T>
  19. class SmallVectorImpl;
  20. class APInt;
  21. class hash_code;
  22. class StringRef;
  23. /// Helper functions for StringRef::getAsInteger.
  24. bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
  25. unsigned long long &Result);
  26. bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
  27. /// StringRef - Represent a constant reference to a string, i.e. a character
  28. /// array and a length, which need not be null terminated.
  29. ///
  30. /// This class does not own the string data, it is expected to be used in
  31. /// situations where the character data resides in some other buffer, whose
  32. /// lifetime extends past that of the StringRef. For this reason, it is not in
  33. /// general safe to store a StringRef.
  34. class StringRef {
  35. public:
  36. typedef const char *iterator;
  37. typedef const char *const_iterator;
  38. static const size_t npos = ~size_t(0);
  39. typedef size_t size_type;
  40. private:
  41. /// The start of the string, in an external buffer.
  42. const char *Data;
  43. /// The length of the string.
  44. size_t Length;
  45. // Workaround memcmp issue with null pointers (undefined behavior)
  46. // by providing a specialized version
  47. static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
  48. if (Length == 0) { return 0; }
  49. return ::memcmp(Lhs,Rhs,Length);
  50. }
  51. public:
  52. /// @name Constructors
  53. /// @{
  54. /// Construct an empty string ref.
  55. /*implicit*/ StringRef() : Data(nullptr), Length(0) {}
  56. /// Construct a string ref from a cstring.
  57. /*implicit*/ StringRef(const char *Str)
  58. : Data(Str) {
  59. assert(Str && "StringRef cannot be built from a NULL argument");
  60. Length = ::strlen(Str); // invoking strlen(NULL) is undefined behavior
  61. }
  62. /// Construct a string ref from a pointer and length.
  63. /*implicit*/ StringRef(const char *data, size_t length)
  64. : Data(data), Length(length) {
  65. assert((data || length == 0) &&
  66. "StringRef cannot be built from a NULL argument with non-null length");
  67. }
  68. /// Construct a string ref from an std::string.
  69. /*implicit*/ StringRef(const std::string &Str)
  70. : Data(Str.data()), Length(Str.length()) {}
  71. /// @}
  72. /// @name Iterators
  73. /// @{
  74. iterator begin() const { return Data; }
  75. iterator end() const { return Data + Length; }
  76. const unsigned char *bytes_begin() const {
  77. return reinterpret_cast<const unsigned char *>(begin());
  78. }
  79. const unsigned char *bytes_end() const {
  80. return reinterpret_cast<const unsigned char *>(end());
  81. }
  82. /// @}
  83. /// @name String Operations
  84. /// @{
  85. /// data - Get a pointer to the start of the string (which may not be null
  86. /// terminated).
  87. const char *data() const { return Data; }
  88. /// empty - Check if the string is empty.
  89. bool empty() const { return Length == 0; }
  90. /// size - Get the string size.
  91. size_t size() const { return Length; }
  92. /// front - Get the first character in the string.
  93. char front() const {
  94. assert(!empty());
  95. return Data[0];
  96. }
  97. /// back - Get the last character in the string.
  98. char back() const {
  99. assert(!empty());
  100. return Data[Length-1];
  101. }
  102. // copy - Allocate copy in Allocator and return StringRef to it.
  103. template <typename Allocator> StringRef copy(Allocator &A) const {
  104. char *S = A.template Allocate<char>(Length);
  105. std::copy(begin(), end(), S);
  106. return StringRef(S, Length);
  107. }
  108. /// equals - Check for string equality, this is more efficient than
  109. /// compare() when the relative ordering of inequal strings isn't needed.
  110. bool equals(StringRef RHS) const {
  111. return (Length == RHS.Length &&
  112. compareMemory(Data, RHS.Data, RHS.Length) == 0);
  113. }
  114. /// equals_lower - Check for string equality, ignoring case.
  115. bool equals_lower(StringRef RHS) const {
  116. return Length == RHS.Length && compare_lower(RHS) == 0;
  117. }
  118. /// compare - Compare two strings; the result is -1, 0, or 1 if this string
  119. /// is lexicographically less than, equal to, or greater than the \p RHS.
  120. int compare(StringRef RHS) const {
  121. // Check the prefix for a mismatch.
  122. if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
  123. return Res < 0 ? -1 : 1;
  124. // Otherwise the prefixes match, so we only need to check the lengths.
  125. if (Length == RHS.Length)
  126. return 0;
  127. return Length < RHS.Length ? -1 : 1;
  128. }
  129. /// compare_lower - Compare two strings, ignoring case.
  130. int compare_lower(StringRef RHS) const;
  131. /// compare_numeric - Compare two strings, treating sequences of digits as
  132. /// numbers.
  133. int compare_numeric(StringRef RHS) const;
  134. /// \brief Determine the edit distance between this string and another
  135. /// string.
  136. ///
  137. /// \param Other the string to compare this string against.
  138. ///
  139. /// \param AllowReplacements whether to allow character
  140. /// replacements (change one character into another) as a single
  141. /// operation, rather than as two operations (an insertion and a
  142. /// removal).
  143. ///
  144. /// \param MaxEditDistance If non-zero, the maximum edit distance that
  145. /// this routine is allowed to compute. If the edit distance will exceed
  146. /// that maximum, returns \c MaxEditDistance+1.
  147. ///
  148. /// \returns the minimum number of character insertions, removals,
  149. /// or (if \p AllowReplacements is \c true) replacements needed to
  150. /// transform one of the given strings into the other. If zero,
  151. /// the strings are identical.
  152. unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
  153. unsigned MaxEditDistance = 0) const;
  154. /// str - Get the contents as an std::string.
  155. std::string str() const {
  156. if (!Data) return std::string();
  157. return std::string(Data, Length);
  158. }
  159. /// @}
  160. /// @name Operator Overloads
  161. /// @{
  162. char operator[](size_t Index) const {
  163. assert(Index < Length && "Invalid index!");
  164. return Data[Index];
  165. }
  166. /// @}
  167. /// @name Type Conversions
  168. /// @{
  169. operator std::string() const {
  170. return str();
  171. }
  172. /// @}
  173. /// @name String Predicates
  174. /// @{
  175. /// Check if this string starts with the given \p Prefix.
  176. bool startswith(StringRef Prefix) const {
  177. return Length >= Prefix.Length &&
  178. compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
  179. }
  180. /// Check if this string starts with the given \p Prefix, ignoring case.
  181. bool startswith_lower(StringRef Prefix) const;
  182. /// Check if this string ends with the given \p Suffix.
  183. bool endswith(StringRef Suffix) const {
  184. return Length >= Suffix.Length &&
  185. compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
  186. }
  187. /// Check if this string ends with the given \p Suffix, ignoring case.
  188. bool endswith_lower(StringRef Suffix) const;
  189. /// @}
  190. /// @name String Searching
  191. /// @{
  192. /// Search for the first character \p C in the string.
  193. ///
  194. /// \returns The index of the first occurrence of \p C, or npos if not
  195. /// found.
  196. size_t find(char C, size_t From = 0) const {
  197. size_t FindBegin = std::min(From, Length);
  198. if (FindBegin < Length) { // Avoid calling memchr with nullptr.
  199. // Just forward to memchr, which is faster than a hand-rolled loop.
  200. if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
  201. return static_cast<const char *>(P) - Data;
  202. }
  203. return npos;
  204. }
  205. /// Search for the first string \p Str in the string.
  206. ///
  207. /// \returns The index of the first occurrence of \p Str, or npos if not
  208. /// found.
  209. size_t find(StringRef Str, size_t From = 0) const;
  210. /// Search for the last character \p C in the string.
  211. ///
  212. /// \returns The index of the last occurrence of \p C, or npos if not
  213. /// found.
  214. size_t rfind(char C, size_t From = npos) const {
  215. From = std::min(From, Length);
  216. size_t i = From;
  217. while (i != 0) {
  218. --i;
  219. if (Data[i] == C)
  220. return i;
  221. }
  222. return npos;
  223. }
  224. /// Search for the last string \p Str in the string.
  225. ///
  226. /// \returns The index of the last occurrence of \p Str, or npos if not
  227. /// found.
  228. size_t rfind(StringRef Str) const;
  229. /// Find the first character in the string that is \p C, or npos if not
  230. /// found. Same as find.
  231. size_t find_first_of(char C, size_t From = 0) const {
  232. return find(C, From);
  233. }
  234. /// Find the first character in the string that is in \p Chars, or npos if
  235. /// not found.
  236. ///
  237. /// Complexity: O(size() + Chars.size())
  238. size_t find_first_of(StringRef Chars, size_t From = 0) const;
  239. /// Find the first character in the string that is not \p C or npos if not
  240. /// found.
  241. size_t find_first_not_of(char C, size_t From = 0) const;
  242. /// Find the first character in the string that is not in the string
  243. /// \p Chars, or npos if not found.
  244. ///
  245. /// Complexity: O(size() + Chars.size())
  246. size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
  247. /// Find the last character in the string that is \p C, or npos if not
  248. /// found.
  249. size_t find_last_of(char C, size_t From = npos) const {
  250. return rfind(C, From);
  251. }
  252. /// Find the last character in the string that is in \p C, or npos if not
  253. /// found.
  254. ///
  255. /// Complexity: O(size() + Chars.size())
  256. size_t find_last_of(StringRef Chars, size_t From = npos) const;
  257. /// Find the last character in the string that is not \p C, or npos if not
  258. /// found.
  259. size_t find_last_not_of(char C, size_t From = npos) const;
  260. /// Find the last character in the string that is not in \p Chars, or
  261. /// npos if not found.
  262. ///
  263. /// Complexity: O(size() + Chars.size())
  264. size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
  265. /// @}
  266. /// @name Helpful Algorithms
  267. /// @{
  268. /// Return the number of occurrences of \p C in the string.
  269. size_t count(char C) const {
  270. size_t Count = 0;
  271. for (size_t i = 0, e = Length; i != e; ++i)
  272. if (Data[i] == C)
  273. ++Count;
  274. return Count;
  275. }
  276. /// Return the number of non-overlapped occurrences of \p Str in
  277. /// the string.
  278. size_t count(StringRef Str) const;
  279. /// Parse the current string as an integer of the specified radix. If
  280. /// \p Radix is specified as zero, this does radix autosensing using
  281. /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
  282. ///
  283. /// If the string is invalid or if only a subset of the string is valid,
  284. /// this returns true to signify the error. The string is considered
  285. /// erroneous if empty or if it overflows T.
  286. template <typename T>
  287. typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
  288. getAsInteger(unsigned Radix, T &Result) const {
  289. long long LLVal;
  290. if (getAsSignedInteger(*this, Radix, LLVal) ||
  291. static_cast<T>(LLVal) != LLVal)
  292. return true;
  293. Result = LLVal;
  294. return false;
  295. }
  296. template <typename T>
  297. typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
  298. getAsInteger(unsigned Radix, T &Result) const {
  299. unsigned long long ULLVal;
  300. // The additional cast to unsigned long long is required to avoid the
  301. // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
  302. // 'unsigned __int64' when instantiating getAsInteger with T = bool.
  303. if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
  304. static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
  305. return true;
  306. Result = ULLVal;
  307. return false;
  308. }
  309. /// Parse the current string as an integer of the specified \p Radix, or of
  310. /// an autosensed radix if the \p Radix given is 0. The current value in
  311. /// \p Result is discarded, and the storage is changed to be wide enough to
  312. /// store the parsed integer.
  313. ///
  314. /// \returns true if the string does not solely consist of a valid
  315. /// non-empty number in the appropriate base.
  316. ///
  317. /// APInt::fromString is superficially similar but assumes the
  318. /// string is well-formed in the given radix.
  319. bool getAsInteger(unsigned Radix, APInt &Result) const;
  320. /// @}
  321. /// @name String Operations
  322. /// @{
  323. // Convert the given ASCII string to lowercase.
  324. std::string lower() const;
  325. /// Convert the given ASCII string to uppercase.
  326. std::string upper() const;
  327. /// @}
  328. /// @name Substring Operations
  329. /// @{
  330. /// Return a reference to the substring from [Start, Start + N).
  331. ///
  332. /// \param Start The index of the starting character in the substring; if
  333. /// the index is npos or greater than the length of the string then the
  334. /// empty substring will be returned.
  335. ///
  336. /// \param N The number of characters to included in the substring. If N
  337. /// exceeds the number of characters remaining in the string, the string
  338. /// suffix (starting with \p Start) will be returned.
  339. StringRef substr(size_t Start, size_t N = npos) const {
  340. Start = std::min(Start, Length);
  341. return StringRef(Data + Start, std::min(N, Length - Start));
  342. }
  343. /// Return a StringRef equal to 'this' but with the first \p N elements
  344. /// dropped.
  345. StringRef drop_front(size_t N = 1) const {
  346. assert(size() >= N && "Dropping more elements than exist");
  347. return substr(N);
  348. }
  349. /// Return a StringRef equal to 'this' but with the last \p N elements
  350. /// dropped.
  351. StringRef drop_back(size_t N = 1) const {
  352. assert(size() >= N && "Dropping more elements than exist");
  353. return substr(0, size()-N);
  354. }
  355. /// Return a reference to the substring from [Start, End).
  356. ///
  357. /// \param Start The index of the starting character in the substring; if
  358. /// the index is npos or greater than the length of the string then the
  359. /// empty substring will be returned.
  360. ///
  361. /// \param End The index following the last character to include in the
  362. /// substring. If this is npos, or less than \p Start, or exceeds the
  363. /// number of characters remaining in the string, the string suffix
  364. /// (starting with \p Start) will be returned.
  365. StringRef slice(size_t Start, size_t End) const {
  366. Start = std::min(Start, Length);
  367. End = std::min(std::max(Start, End), Length);
  368. return StringRef(Data + Start, End - Start);
  369. }
  370. /// Split into two substrings around the first occurrence of a separator
  371. /// character.
  372. ///
  373. /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
  374. /// such that (*this == LHS + Separator + RHS) is true and RHS is
  375. /// maximal. If \p Separator is not in the string, then the result is a
  376. /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
  377. ///
  378. /// \param Separator The character to split on.
  379. /// \returns The split substrings.
  380. std::pair<StringRef, StringRef> split(char Separator) const {
  381. size_t Idx = find(Separator);
  382. if (Idx == npos)
  383. return std::make_pair(*this, StringRef());
  384. return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
  385. }
  386. /// Split into two substrings around the first occurrence of a separator
  387. /// string.
  388. ///
  389. /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
  390. /// such that (*this == LHS + Separator + RHS) is true and RHS is
  391. /// maximal. If \p Separator is not in the string, then the result is a
  392. /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
  393. ///
  394. /// \param Separator - The string to split on.
  395. /// \return - The split substrings.
  396. std::pair<StringRef, StringRef> split(StringRef Separator) const {
  397. size_t Idx = find(Separator);
  398. if (Idx == npos)
  399. return std::make_pair(*this, StringRef());
  400. return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
  401. }
  402. /// Split into substrings around the occurrences of a separator string.
  403. ///
  404. /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
  405. /// \p MaxSplit splits are done and consequently <= \p MaxSplit
  406. /// elements are added to A.
  407. /// If \p KeepEmpty is false, empty strings are not added to \p A. They
  408. /// still count when considering \p MaxSplit
  409. /// An useful invariant is that
  410. /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
  411. ///
  412. /// \param A - Where to put the substrings.
  413. /// \param Separator - The string to split on.
  414. /// \param MaxSplit - The maximum number of times the string is split.
  415. /// \param KeepEmpty - True if empty substring should be added.
  416. void split(SmallVectorImpl<StringRef> &A,
  417. StringRef Separator, int MaxSplit = -1,
  418. bool KeepEmpty = true) const;
  419. /// Split into two substrings around the last occurrence of a separator
  420. /// character.
  421. ///
  422. /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
  423. /// such that (*this == LHS + Separator + RHS) is true and RHS is
  424. /// minimal. If \p Separator is not in the string, then the result is a
  425. /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
  426. ///
  427. /// \param Separator - The character to split on.
  428. /// \return - The split substrings.
  429. std::pair<StringRef, StringRef> rsplit(char Separator) const {
  430. size_t Idx = rfind(Separator);
  431. if (Idx == npos)
  432. return std::make_pair(*this, StringRef());
  433. return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
  434. }
  435. /// Return string with consecutive characters in \p Chars starting from
  436. /// the left removed.
  437. StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
  438. return drop_front(std::min(Length, find_first_not_of(Chars)));
  439. }
  440. /// Return string with consecutive characters in \p Chars starting from
  441. /// the right removed.
  442. StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
  443. return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
  444. }
  445. /// Return string with consecutive characters in \p Chars starting from
  446. /// the left and right removed.
  447. StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
  448. return ltrim(Chars).rtrim(Chars);
  449. }
  450. /// @}
  451. };
  452. /// @name StringRef Comparison Operators
  453. /// @{
  454. inline bool operator==(StringRef LHS, StringRef RHS) {
  455. return LHS.equals(RHS);
  456. }
  457. inline bool operator!=(StringRef LHS, StringRef RHS) {
  458. return !(LHS == RHS);
  459. }
  460. inline bool operator<(StringRef LHS, StringRef RHS) {
  461. return LHS.compare(RHS) == -1;
  462. }
  463. inline bool operator<=(StringRef LHS, StringRef RHS) {
  464. return LHS.compare(RHS) != 1;
  465. }
  466. inline bool operator>(StringRef LHS, StringRef RHS) {
  467. return LHS.compare(RHS) == 1;
  468. }
  469. inline bool operator>=(StringRef LHS, StringRef RHS) {
  470. return LHS.compare(RHS) != -1;
  471. }
  472. inline std::string &operator+=(std::string &buffer, StringRef string) {
  473. return buffer.append(string.data(), string.size());
  474. }
  475. /// @}
  476. /// \brief Compute a hash_code for a StringRef.
  477. hash_code hash_value(StringRef S);
  478. // StringRefs can be treated like a POD type.
  479. template <typename T> struct isPodLike;
  480. template <> struct isPodLike<StringRef> { static const bool value = true; };
  481. }
  482. // HLSL Change Starts
  483. // StringRef provides an operator string; that trips up the std::pair noexcept specification,
  484. // which (a) enables the moves constructor (because conversion is allowed), but (b)
  485. // misclassifies the the construction as nothrow.
  486. namespace std {
  487. template<>
  488. struct is_nothrow_constructible <std::string, llvm::StringRef>
  489. : std::false_type {
  490. };
  491. template<>
  492. struct is_nothrow_constructible <std::string, llvm::StringRef &>
  493. : std::false_type {
  494. };
  495. template<>
  496. struct is_nothrow_constructible <std::string, const llvm::StringRef &>
  497. : std::false_type {
  498. };
  499. }
  500. // HLSL Change Ends
  501. #endif