str.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. //-----------------------------------------------------------------------------
  2. // Copyright (c) 2012 GarageGames, LLC
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to
  6. // deal in the Software without restriction, including without limitation the
  7. // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8. // sell copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  20. // IN THE SOFTWARE.
  21. //-----------------------------------------------------------------------------
  22. #ifndef _TORQUE_STRING_H_
  23. #define _TORQUE_STRING_H_
  24. #include <cstdarg>
  25. #ifndef _TORQUE_TYPES_H_
  26. #include "platform/types.h"
  27. #endif
  28. #include <string.h>
  29. template< class T > class Vector;
  30. typedef UTF8 StringChar;
  31. /// The String class represents a 0-terminated array of characters.
  32. class String
  33. {
  34. public:
  35. class StringData;
  36. /// Default mode is case sensitive starting from the left
  37. enum Mode
  38. {
  39. Case = 0, ///< Case sensitive
  40. NoCase = 1, ///< Case insensitive
  41. Left = 0, ///< Start at left end of string
  42. Right = 2, ///< Start at right end of string
  43. };
  44. typedef U32 SizeType;
  45. typedef StringChar ValueType;
  46. static const SizeType NPos; ///< Indicates 'not found' when using find() functions
  47. /// A predefined empty string.
  48. static const String EmptyString;
  49. String();
  50. String(const String &str);
  51. String(const StringChar *str);
  52. String(const StringChar *str, SizeType size); ///< Copy from raw data
  53. String(const UTF16 *str);
  54. ~String();
  55. const UTF8 *c_str() const; ///< Return the string as a native type
  56. const UTF16 *utf16() const;
  57. const UTF8* utf8() const { return c_str(); }
  58. SizeType length() const; ///< Returns the length of the string in bytes.
  59. SizeType size() const; ///< Returns the length of the string in bytes including the NULL terminator.
  60. SizeType numChars() const; ///< Returns the length of the string in characters.
  61. bool isEmpty() const; ///< Is this an empty string [""]?
  62. static bool isEmpty(const char*); // is the input empty?
  63. bool isNotEmpty() const { return !isEmpty(); } ///< Is this not an empty string [""]?
  64. /// Erases all characters in a string.
  65. void clear() { *this = EmptyString; }
  66. bool isShared() const; ///< Is this string's reference count greater than 1?
  67. bool isSame( const String& str ) const; ///< Return true if both strings refer to the same shared data.
  68. U32 getHashCaseSensitive() const; ///< Get the case-sensitive hash of the string [only calculates the hash as necessary]
  69. U32 getHashCaseInsensitive() const; ///< Get the case-insensitive hash of the string [only calculates the hash as necessary]
  70. String& operator=(StringChar);
  71. String& operator+=(StringChar);
  72. String& operator=(const StringChar*);
  73. String& operator+=(const StringChar*);
  74. String& operator=(const String&);
  75. String& operator+=(const String&);
  76. /**
  77. Compare this string with another.
  78. @param str The string to compare against.
  79. @param len If len is non-zero, then at most len characters are compared.
  80. @param mode Comparison mode.
  81. @return Difference between the first two characters that don't match.
  82. */
  83. S32 compare(const StringChar *str, SizeType len = 0, U32 mode = Case|Left) const;
  84. S32 compare(const String &str, SizeType len = 0, U32 mode = Case|Left) const; ///< @see compare(const StringChar *, SizeType, U32) const
  85. static S32 compare(const char *str1, const char *str2);
  86. static S32 compare(const UTF16 *str1, const UTF16 *str2);
  87. /**
  88. Compare two strings for equality.
  89. It will use the string hashes to determine inequality.
  90. @param str The string to compare against.
  91. @param mode Comparison mode - case sensitive or not.
  92. */
  93. bool equal(const String &str, U32 mode = Case) const;
  94. SizeType find(StringChar c, SizeType pos = 0, U32 mode = Case|Left) const;
  95. SizeType find(const StringChar *str, SizeType pos = 0, U32 mode = Case|Left) const;
  96. SizeType find(const String &str, SizeType pos = 0, U32 mode = Case|Left) const;
  97. String &insert(SizeType pos, const StringChar c) { return insert(pos,&c,1); }
  98. String &insert(SizeType pos, const StringChar *str);
  99. String &insert(SizeType pos, const String &str);
  100. String &insert(SizeType pos, const StringChar *str, SizeType len);
  101. String &erase(SizeType pos, SizeType len);
  102. String &replace(SizeType pos, SizeType len, const StringChar *str);
  103. String &replace(SizeType pos, SizeType len, const String &str);
  104. /// Replace all occurrences of character 'c1' with 'c2'
  105. String &replace( StringChar c1, StringChar c2 );
  106. /// Replace all occurrences of StringData 's1' with StringData 's2'
  107. String &replace(const String &s1, const String &s2);
  108. String substr( SizeType pos, SizeType len = -1 ) const;
  109. /// Remove leading and trailing whitespace.
  110. String trim() const;
  111. /// Replace all characters that need to be escaped for the string to be a valid string literal with their
  112. /// respective escape sequences.
  113. String expandEscapes() const;
  114. /// Replace all escape sequences in with their respective character codes.
  115. String collapseEscapes() const;
  116. /// Split the string into its components separated by the given delimiter.
  117. void split( const char* delimiter, Vector< String >& outElements ) const;
  118. /// Return true if the string starts with the given text.
  119. bool startsWith( const char* text ) const;
  120. /// Return true if the string ends with the given text.
  121. bool endsWith( const char* text ) const;
  122. operator const StringChar*() const { return c_str(); }
  123. StringChar operator []( U32 i ) const { return c_str()[i]; }
  124. StringChar operator []( S32 i ) const { return c_str()[i]; }
  125. bool operator==(const String &str) const;
  126. bool operator!=(const String &str) const { return !(*this == str); }
  127. bool operator==( StringChar c ) const;
  128. bool operator!=( StringChar c ) const { return !(*this == c); }
  129. bool operator<(const String &str) const;
  130. bool operator>(const String &str) const;
  131. bool operator<=(const String &str) const;
  132. bool operator>=(const String &str) const;
  133. friend String operator+(const String &a, StringChar c);
  134. friend String operator+(StringChar c, const String &a);
  135. friend String operator+(const String &a, const StringChar *b);
  136. friend String operator+(const String &a, const String &b);
  137. friend String operator+(const StringChar *a, const String &b);
  138. public:
  139. /// @name String Utility routines
  140. /// @{
  141. static String ToString(const char *format, ...);
  142. static String VToString(const char* format, va_list args);
  143. static String ToString( bool v );
  144. static inline String ToString( U32 v ) { return ToString( "%u", v ); }
  145. static inline String ToString( S32 v ) { return ToString( "%d", v ); }
  146. static inline String ToString( F32 v ) { return ToString( "%g", v ); }
  147. static inline String ToString( F64 v ) { return ToString( "%Lg", v ); }
  148. inline operator const char* () { return c_str(); }
  149. static String SpanToString(const char* start, const char* end);
  150. static String ToLower(const String &string);
  151. static String ToUpper(const String &string);
  152. static String GetTrailingNumber(const char* str, S32& number);
  153. static String GetFirstNumber(const char* str, U32& startPos, U32& endPos);
  154. /// @}
  155. /// @name Interning
  156. ///
  157. /// Interning maps identical strings to unique instances so that equality
  158. /// amounts to simple pointer comparisons.
  159. ///
  160. /// Note that using interned strings within global destructors is not safe
  161. /// as table destruction runs within this phase as well. Uses o interned
  162. /// strings in global destructors is thus dependent on object file ordering.
  163. ///
  164. /// Also, interned strings are not reference-counted. Once interned, a
  165. /// string will persist until shutdown. This is to avoid costly concurrent
  166. /// reference counting that would otherwise be necessary.
  167. ///
  168. /// @{
  169. /// Return the interned version of the string.
  170. /// @note Interning is case-sensitive.
  171. String intern() const;
  172. /// Return true if this string is interned.
  173. bool isInterned() const;
  174. /// @}
  175. /** An internal support class for ToString().
  176. StrFormat manages the formatting of arbitrary length strings.
  177. The class starts with a default internal fixed size buffer and
  178. moves to dynamic allocation from the heap when that is exceeded.
  179. Constructing the class on the stack will result in its most
  180. efficient use. This class is meant to be used as a helper class,
  181. and not for the permanent storage of string data.
  182. @code
  183. char* indexString(U32 index)
  184. {
  185. StrFormat format("Index: %d",index);
  186. char* str = new char[format.size()];
  187. format.copy(str);
  188. return str;
  189. }
  190. @endcode
  191. */
  192. class StrFormat
  193. {
  194. public:
  195. StrFormat()
  196. : _dynamicBuffer( NULL ),
  197. _dynamicSize( 0 ),
  198. _len( 0 )
  199. {
  200. strncpy(_fixedBuffer, "", 2048);
  201. }
  202. StrFormat(const char *formatStr, va_list args)
  203. : _dynamicBuffer( NULL ),
  204. _dynamicSize( 0 ),
  205. _len( 0 )
  206. {
  207. format(formatStr, args);
  208. }
  209. ~StrFormat();
  210. S32 format( const char *format, va_list args );
  211. S32 formatAppend( const char *format, va_list args );
  212. S32 append(const char * str, S32 len);
  213. S32 append(const char * str);
  214. String getString() { return String(c_str(),_len); }
  215. const char * c_str() const { return _dynamicBuffer ? _dynamicBuffer : _fixedBuffer; }
  216. void reset()
  217. {
  218. _len = 0;
  219. strncpy(_fixedBuffer, "", 2048);
  220. }
  221. /// Copy the formatted string into the output buffer which must be at least size() characters.
  222. char *copy(char* buffer) const;
  223. /// Return the length of the formated string (does not include the terminating 0)
  224. U32 length() const { return _len; };
  225. public:
  226. char _fixedBuffer[2048]; //< Fixed size buffer
  227. char *_dynamicBuffer; //< Temporary format buffer
  228. U32 _dynamicSize; //< Dynamic buffer size
  229. U32 _len; //< Len of the formatted string
  230. };
  231. private:
  232. String(StringData *str)
  233. : _string( str ) {}
  234. // Generate compile error if operator bool is used. Without this we use
  235. // operator const char *, which is always true...including operator bool
  236. // causes an ambiguous cast compile error. Making it private is simply
  237. // more insurance that it isn't used on different compilers.
  238. // NOTE: disable on GCC since it causes hyper casting to U32 on gcc.
  239. #if !defined(TORQUE_COMPILER_GCC) && !defined(__clang__)
  240. operator const bool() const { return false; }
  241. #endif
  242. static void copy(StringChar *dst, const StringChar *src, U32 size);
  243. StringData *_string;
  244. };
  245. // Utility class for formatting strings.
  246. class StringBuilder
  247. {
  248. protected:
  249. ///
  250. String::StrFormat mFormat;
  251. public:
  252. StringBuilder() {}
  253. U32 length() const
  254. {
  255. return mFormat.length();
  256. }
  257. void copy( char* buffer ) const
  258. {
  259. mFormat.copy( buffer );
  260. }
  261. const char* data() const
  262. {
  263. return mFormat.c_str();
  264. }
  265. String end()
  266. {
  267. return mFormat.getString();
  268. }
  269. S32 append( char ch )
  270. {
  271. char str[2];
  272. str[0]=ch;
  273. str[1]='\0';
  274. return mFormat.append(str);
  275. }
  276. S32 append( const char* str )
  277. {
  278. return mFormat.append(str);
  279. }
  280. S32 append( const String& str )
  281. {
  282. return mFormat.append( str.c_str(), str.length() );
  283. }
  284. S32 append( const char* str, U32 length )
  285. {
  286. return mFormat.append(str,length);
  287. }
  288. S32 format( const char* fmt, ... )
  289. {
  290. va_list args;
  291. va_start(args, fmt);
  292. const S32 result = mFormat.formatAppend(fmt, args);
  293. va_end(args);
  294. return result;
  295. }
  296. };
  297. // For use in hash tables and the like for explicitly requesting case sensitive hashing.
  298. // Meant to only appear in hash table definition (casting will take care of the rest).
  299. class StringCase : public String
  300. {
  301. public:
  302. StringCase() : String() {}
  303. StringCase(const String & s) : String(s) {}
  304. };
  305. // For use in hash tables and the like for explicitly requesting case insensitive hashing.
  306. // Meant to only appear in hash table definition (casting will take care of the rest).
  307. class StringNoCase : public String
  308. {
  309. public:
  310. StringNoCase() : String() {}
  311. StringNoCase(const String & s) : String(s) {}
  312. };
  313. class FileName : public String
  314. {
  315. public:
  316. FileName() : String() {}
  317. FileName(const String & s) : String(s) {}
  318. FileName & operator=(const String & s) { String::operator=(s); return *this; }
  319. };
  320. //-----------------------------------------------------------------------------
  321. extern String operator+(const String &a, StringChar c);
  322. extern String operator+(StringChar c, const String &a);
  323. extern String operator+(const String &a, const StringChar *b);
  324. extern String operator+(const String &a, const String &b);
  325. extern String operator+(const StringChar *a, const String &b);
  326. #endif