UnicodeString.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /*
  2. ** Command & Conquer Generals(tm)
  3. ** Copyright 2025 Electronic Arts Inc.
  4. **
  5. ** This program is free software: you can redistribute it and/or modify
  6. ** it under the terms of the GNU General Public License as published by
  7. ** the Free Software Foundation, either version 3 of the License, or
  8. ** (at your option) any later version.
  9. **
  10. ** This program is distributed in the hope that it will be useful,
  11. ** but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ** GNU General Public License for more details.
  14. **
  15. ** You should have received a copy of the GNU General Public License
  16. ** along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. ////////////////////////////////////////////////////////////////////////////////
  19. // //
  20. // (c) 2001-2003 Electronic Arts Inc. //
  21. // //
  22. ////////////////////////////////////////////////////////////////////////////////
  23. // FILE: UnicodeString.h
  24. //-----------------------------------------------------------------------------
  25. //
  26. // Westwood Studios Pacific.
  27. //
  28. // Confidential Information
  29. // Copyright (C) 2001 - All Rights Reserved
  30. //
  31. //-----------------------------------------------------------------------------
  32. //
  33. // Project: RTS3
  34. //
  35. // File name: UnicodeString.h
  36. //
  37. // Created: Steven Johnson, October 2001
  38. //
  39. // Desc: General-purpose string classes
  40. //
  41. //-----------------------------------------------------------------------------
  42. ///////////////////////////////////////////////////////////////////////////////
  43. #pragma once
  44. #ifndef UNICODESTRING_H
  45. #define UNICODESTRING_H
  46. #include <stdarg.h>
  47. #include <stdio.h>
  48. #include <string.h>
  49. #include "Lib/BaseType.h"
  50. #include "Common/Debug.h"
  51. #include "Common/Errors.h"
  52. class AsciiString;
  53. // -----------------------------------------------------
  54. /**
  55. UnicodeString is the fundamental double-byte string type used in the Generals
  56. code base, and should be preferred over all other string constructions
  57. (e.g., array of WideChar, STL string<>, WWVegas StringClass, etc.)
  58. Of course, other string setups may be used when necessary or appropriate!
  59. UnicodeString is modeled after the MFC CString class, with some minor
  60. syntactic differences to keep in line with our coding conventions.
  61. Basically, UnicodeString allows you to treat a string as an intrinsic
  62. type, rather analogous to 'int' -- when passed by value, a new string
  63. is created, and modifying the new string doesn't modify the original.
  64. This is done fairly efficiently, so that no new memory allocation is done
  65. unless the string is actually modified.
  66. Naturally, UnicodeString handles all memory issues, so there's no need
  67. to do anything to free memory... just allow the UnicodeString's
  68. destructor to run.
  69. UnicodeStrings are suitable for use as automatic, member, or static variables.
  70. */
  71. class UnicodeString
  72. {
  73. private:
  74. // Note, this is a Plain Old Data Structure... don't
  75. // add a ctor/dtor, 'cuz they won't ever be called.
  76. struct UnicodeStringData
  77. {
  78. #if defined(_DEBUG) || defined(_INTERNAL)
  79. const WideChar* m_debugptr; // just makes it easier to read in the debugger
  80. #endif
  81. unsigned short m_refCount; // reference count
  82. unsigned short m_numCharsAllocated; // length of data allocated
  83. // WideChar m_stringdata[];
  84. inline WideChar* peek() { return (WideChar*)(this+1); }
  85. };
  86. #ifdef _DEBUG
  87. void validate() const;
  88. #else
  89. inline void validate() const { }
  90. #endif
  91. protected:
  92. UnicodeStringData* m_data; // pointer to ref counted string data
  93. WideChar* peek() const;
  94. void releaseBuffer();
  95. void ensureUniqueBufferOfSize(int numCharsNeeded, Bool preserveData, const WideChar* strToCpy, const WideChar* strToCat);
  96. public:
  97. enum
  98. {
  99. MAX_FORMAT_BUF_LEN = 2048, ///< max total len of string created by format/format_va
  100. MAX_LEN = 32767 ///< max total len of any UnicodeString, in chars
  101. };
  102. /**
  103. This is a convenient global used to indicate the empty
  104. string, so we don't need to construct temporaries
  105. for such a common thing.
  106. */
  107. static UnicodeString TheEmptyString;
  108. /**
  109. Default constructor -- construct a new, empty UnicodeString.
  110. */
  111. UnicodeString();
  112. /**
  113. Copy constructor -- make this UnicodeString identical to the
  114. other UnicodeString. (This is actually quite efficient, because
  115. they will simply share the same string and increment the
  116. refcount.)
  117. */
  118. UnicodeString(const UnicodeString& stringSrc);
  119. /**
  120. Constructor -- from a literal string. Constructs an UnicodeString
  121. with the given string. Note that a copy of the string is made;
  122. the input ptr is not saved. Note also that this is declared
  123. 'explicit' to avoid implicit conversions from const-WideChar-*
  124. (e.g., as input arguments).
  125. */
  126. explicit UnicodeString(const WideChar* s);
  127. /**
  128. Destructor. Not too exciting... clean up the works and such.
  129. */
  130. ~UnicodeString();
  131. /**
  132. Return the length, in characters (not bytes!), of the string.
  133. */
  134. int getLength() const;
  135. /**
  136. Return true iff the length of the string is zero. Equivalent
  137. to (getLength() == 0) but slightly more efficient.
  138. */
  139. Bool isEmpty() const;
  140. /**
  141. Make the string empty. Equivalent to (str = "") but slightly more efficient.
  142. */
  143. void clear();
  144. /**
  145. Return the character and the given (zero-based) index into the string.
  146. No range checking is done (except in debug mode).
  147. */
  148. WideChar getCharAt(int index) const;
  149. /**
  150. Return a pointer to the (null-terminated) string. Note that this is
  151. a const pointer: do NOT change this! It is imperative that it be
  152. impossible (or at least, really difficuly) for someone to change our
  153. private data, since it might be shared amongst other UnicodeStrings.
  154. */
  155. const WideChar* str() const;
  156. /**
  157. Makes sure there is room for a string of len+1 characters, and
  158. returns a pointer to the string buffer. This ensures that the
  159. string buffer is NOT shared. This is intended for the file reader,
  160. that is reading new strings in from a file. jba.
  161. */
  162. WideChar* getBufferForRead(Int len);
  163. /**
  164. Replace the contents of self with the given string.
  165. (This is actually quite efficient, because
  166. they will simply share the same string and increment the
  167. refcount.)
  168. */
  169. void set(const UnicodeString& stringSrc);
  170. /**
  171. Replace the contents of self with the given string.
  172. Note that a copy of the string is made; the input ptr is not saved.
  173. */
  174. void set(const WideChar* s);
  175. /**
  176. replace contents of self with the given string. Note the
  177. nomenclature is translate rather than set; this is because
  178. not all single-byte strings translate one-for-one into
  179. UnicodeStrings, so some data manipulation may be necessary,
  180. and the resulting strings may not be equivalent.
  181. */
  182. void translate(const AsciiString& stringSrc);
  183. /**
  184. Concatenate the given string onto self.
  185. */
  186. void concat(const UnicodeString& stringSrc);
  187. /**
  188. Concatenate the given string onto self.
  189. */
  190. void concat(const WideChar* s);
  191. /**
  192. Concatenate the given character onto self.
  193. */
  194. void concat(const WideChar c);
  195. /**
  196. Remove leading and trailing whitespace from the string.
  197. */
  198. void trim( void );
  199. /**
  200. Remove the final character in the string. If the string is empty,
  201. do nothing. (This is a rather dorky method, but used a lot in
  202. text editing, thus its presence here.)
  203. */
  204. void removeLastChar();
  205. /**
  206. Analogous to sprintf() -- this formats a string according to the
  207. given sprintf-style format string (and the variable argument list)
  208. and stores the result in self.
  209. */
  210. void format(UnicodeString format, ...);
  211. void format(const WideChar* format, ...);
  212. /**
  213. Identical to format(), but takes a va_list rather than
  214. a variable argument list. (i.e., analogous to vsprintf.)
  215. */
  216. void format_va(const UnicodeString& format, va_list args);
  217. void format_va(const WideChar* format, va_list args);
  218. /**
  219. Conceptually identical to wsccmp().
  220. */
  221. int compare(const UnicodeString& stringSrc) const;
  222. /**
  223. Conceptually identical to wsccmp().
  224. */
  225. int compare(const WideChar* s) const;
  226. /**
  227. Conceptually identical to _wcsicmp().
  228. */
  229. int compareNoCase(const UnicodeString& stringSrc) const;
  230. /**
  231. Conceptually identical to _wcsicmp().
  232. */
  233. int compareNoCase(const WideChar* s) const;
  234. /**
  235. conceptually similar to strtok():
  236. extract the next whitespace-delimited token from the front
  237. of 'this' and copy it into 'token', returning true if a nonempty
  238. token was found. (note that this modifies 'this' as well, stripping
  239. the token off!)
  240. */
  241. Bool nextToken(UnicodeString* token, UnicodeString delimiters = UnicodeString::TheEmptyString);
  242. //
  243. // You might think it would be a good idea to overload the * operator
  244. // to allow for an implicit conversion to an WideChar*. This is
  245. // in theory a good idea, but in practice, there's lots of code
  246. // that assumes it should check text fields for null, which
  247. // is meaningless for us, since we never return a null ptr.
  248. //
  249. // operator const WideChar*() const { return str(); }
  250. //
  251. UnicodeString& operator=(const UnicodeString& stringSrc); ///< the same as set()
  252. UnicodeString& operator=(const WideChar* s); ///< the same as set()
  253. };
  254. // -----------------------------------------------------
  255. inline WideChar* UnicodeString::peek() const
  256. {
  257. DEBUG_ASSERTCRASH(m_data, ("null string ptr"));
  258. validate();
  259. return m_data->peek();
  260. }
  261. // -----------------------------------------------------
  262. inline UnicodeString::UnicodeString() : m_data(0)
  263. {
  264. validate();
  265. }
  266. // -----------------------------------------------------
  267. inline UnicodeString::~UnicodeString()
  268. {
  269. validate();
  270. releaseBuffer();
  271. }
  272. // -----------------------------------------------------
  273. inline int UnicodeString::getLength() const
  274. {
  275. validate();
  276. return m_data ? wcslen(peek()) : 0;
  277. }
  278. // -----------------------------------------------------
  279. inline Bool UnicodeString::isEmpty() const
  280. {
  281. validate();
  282. return m_data == NULL || peek()[0] == 0;
  283. }
  284. // -----------------------------------------------------
  285. inline void UnicodeString::clear()
  286. {
  287. validate();
  288. releaseBuffer();
  289. validate();
  290. }
  291. // -----------------------------------------------------
  292. inline WideChar UnicodeString::getCharAt(int index) const
  293. {
  294. DEBUG_ASSERTCRASH(index >= 0 && index < getLength(), ("bad index in getCharAt"));
  295. validate();
  296. return m_data ? peek()[index] : 0;
  297. }
  298. // -----------------------------------------------------
  299. inline const WideChar* UnicodeString::str() const
  300. {
  301. validate();
  302. static const WideChar TheNullChr = 0;
  303. return m_data ? peek() : &TheNullChr;
  304. }
  305. // -----------------------------------------------------
  306. inline UnicodeString& UnicodeString::operator=(const UnicodeString& stringSrc)
  307. {
  308. validate();
  309. set(stringSrc);
  310. validate();
  311. return *this;
  312. }
  313. // -----------------------------------------------------
  314. inline UnicodeString& UnicodeString::operator=(const WideChar* s)
  315. {
  316. validate();
  317. set(s);
  318. validate();
  319. return *this;
  320. }
  321. // -----------------------------------------------------
  322. inline void UnicodeString::concat(const UnicodeString& stringSrc)
  323. {
  324. validate();
  325. concat(stringSrc.str());
  326. validate();
  327. }
  328. // -----------------------------------------------------
  329. inline void UnicodeString::concat(const WideChar c)
  330. {
  331. validate();
  332. /// this can probably be made more efficient, if necessary
  333. WideChar tmp[2] = { c, 0 };
  334. concat(tmp);
  335. validate();
  336. }
  337. // -----------------------------------------------------
  338. inline int UnicodeString::compare(const UnicodeString& stringSrc) const
  339. {
  340. validate();
  341. return wcscmp(this->str(), stringSrc.str());
  342. }
  343. // -----------------------------------------------------
  344. inline int UnicodeString::compare(const WideChar* s) const
  345. {
  346. validate();
  347. return wcscmp(this->str(), s);
  348. }
  349. // -----------------------------------------------------
  350. inline int UnicodeString::compareNoCase(const UnicodeString& stringSrc) const
  351. {
  352. validate();
  353. return _wcsicmp(this->str(), stringSrc.str());
  354. }
  355. // -----------------------------------------------------
  356. inline int UnicodeString::compareNoCase(const WideChar* s) const
  357. {
  358. validate();
  359. return _wcsicmp(this->str(), s);
  360. }
  361. // -----------------------------------------------------
  362. inline Bool operator==(const UnicodeString& s1, const UnicodeString& s2)
  363. {
  364. return wcscmp(s1.str(), s2.str()) == 0;
  365. }
  366. // -----------------------------------------------------
  367. inline Bool operator!=(const UnicodeString& s1, const UnicodeString& s2)
  368. {
  369. return wcscmp(s1.str(), s2.str()) != 0;
  370. }
  371. // -----------------------------------------------------
  372. inline Bool operator<(const UnicodeString& s1, const UnicodeString& s2)
  373. {
  374. return wcscmp(s1.str(), s2.str()) < 0;
  375. }
  376. // -----------------------------------------------------
  377. inline Bool operator<=(const UnicodeString& s1, const UnicodeString& s2)
  378. {
  379. return wcscmp(s1.str(), s2.str()) <= 0;
  380. }
  381. // -----------------------------------------------------
  382. inline Bool operator>(const UnicodeString& s1, const UnicodeString& s2)
  383. {
  384. return wcscmp(s1.str(), s2.str()) > 0;
  385. }
  386. // -----------------------------------------------------
  387. inline Bool operator>=(const UnicodeString& s1, const UnicodeString& s2)
  388. {
  389. return wcscmp(s1.str(), s2.str()) >= 0;
  390. }
  391. #endif // UNICODESTRING_H