| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453 |
- /*
- ** Command & Conquer Generals Zero Hour(tm)
- ** Copyright 2025 Electronic Arts Inc.
- **
- ** This program is free software: you can redistribute it and/or modify
- ** it under the terms of the GNU General Public License as published by
- ** the Free Software Foundation, either version 3 of the License, or
- ** (at your option) any later version.
- **
- ** This program is distributed in the hope that it will be useful,
- ** but WITHOUT ANY WARRANTY; without even the implied warranty of
- ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ** GNU General Public License for more details.
- **
- ** You should have received a copy of the GNU General Public License
- ** along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- ////////////////////////////////////////////////////////////////////////////////
- // //
- // (c) 2001-2003 Electronic Arts Inc. //
- // //
- ////////////////////////////////////////////////////////////////////////////////
- // FILE: UnicodeString.h
- //-----------------------------------------------------------------------------
- //
- // Westwood Studios Pacific.
- //
- // Confidential Information
- // Copyright (C) 2001 - All Rights Reserved
- //
- //-----------------------------------------------------------------------------
- //
- // Project: RTS3
- //
- // File name: UnicodeString.h
- //
- // Created: Steven Johnson, October 2001
- //
- // Desc: General-purpose string classes
- //
- //-----------------------------------------------------------------------------
- ///////////////////////////////////////////////////////////////////////////////
- #pragma once
- #ifndef UNICODESTRING_H
- #define UNICODESTRING_H
- #include <stdarg.h>
- #include <stdio.h>
- #include <string.h>
- #include "Lib/BaseType.h"
- #include "Common/Debug.h"
- #include "Common/Errors.h"
- class AsciiString;
- // -----------------------------------------------------
- /**
- UnicodeString is the fundamental double-byte string type used in the Generals
- code base, and should be preferred over all other string constructions
- (e.g., array of WideChar, STL string<>, WWVegas StringClass, etc.)
- Of course, other string setups may be used when necessary or appropriate!
- UnicodeString is modeled after the MFC CString class, with some minor
- syntactic differences to keep in line with our coding conventions.
- Basically, UnicodeString allows you to treat a string as an intrinsic
- type, rather analogous to 'int' -- when passed by value, a new string
- is created, and modifying the new string doesn't modify the original.
- This is done fairly efficiently, so that no new memory allocation is done
- unless the string is actually modified.
- Naturally, UnicodeString handles all memory issues, so there's no need
- to do anything to free memory... just allow the UnicodeString's
- destructor to run.
- UnicodeStrings are suitable for use as automatic, member, or static variables.
- */
- class UnicodeString
- {
- private:
-
- // Note, this is a Plain Old Data Structure... don't
- // add a ctor/dtor, 'cuz they won't ever be called.
- struct UnicodeStringData
- {
- #if defined(_DEBUG) || defined(_INTERNAL)
- const WideChar* m_debugptr; // just makes it easier to read in the debugger
- #endif
- unsigned short m_refCount; // reference count
- unsigned short m_numCharsAllocated; // length of data allocated
- // WideChar m_stringdata[];
- inline WideChar* peek() { return (WideChar*)(this+1); }
- };
- #ifdef _DEBUG
- void validate() const;
- #else
- inline void validate() const { }
- #endif
- protected:
- UnicodeStringData* m_data; // pointer to ref counted string data
- WideChar* peek() const;
- void releaseBuffer();
- void ensureUniqueBufferOfSize(int numCharsNeeded, Bool preserveData, const WideChar* strToCpy, const WideChar* strToCat);
- public:
- enum
- {
- MAX_FORMAT_BUF_LEN = 2048, ///< max total len of string created by format/format_va
- MAX_LEN = 32767 ///< max total len of any UnicodeString, in chars
- };
- /**
- This is a convenient global used to indicate the empty
- string, so we don't need to construct temporaries
- for such a common thing.
- */
- static UnicodeString TheEmptyString;
- /**
- Default constructor -- construct a new, empty UnicodeString.
- */
- UnicodeString();
- /**
- Copy constructor -- make this UnicodeString identical to the
- other UnicodeString. (This is actually quite efficient, because
- they will simply share the same string and increment the
- refcount.)
- */
- UnicodeString(const UnicodeString& stringSrc);
- /**
- Constructor -- from a literal string. Constructs an UnicodeString
- with the given string. Note that a copy of the string is made;
- the input ptr is not saved. Note also that this is declared
- 'explicit' to avoid implicit conversions from const-WideChar-*
- (e.g., as input arguments).
- */
- explicit UnicodeString(const WideChar* s);
- /**
- Destructor. Not too exciting... clean up the works and such.
- */
- ~UnicodeString();
- /**
- Return the length, in characters (not bytes!), of the string.
- */
- int getLength() const;
- /**
- Return true iff the length of the string is zero. Equivalent
- to (getLength() == 0) but slightly more efficient.
- */
- Bool isEmpty() const;
- /**
- Make the string empty. Equivalent to (str = "") but slightly more efficient.
- */
- void clear();
- /**
- Return the character and the given (zero-based) index into the string.
- No range checking is done (except in debug mode).
- */
- WideChar getCharAt(int index) const;
- /**
- Return a pointer to the (null-terminated) string. Note that this is
- a const pointer: do NOT change this! It is imperative that it be
- impossible (or at least, really difficuly) for someone to change our
- private data, since it might be shared amongst other UnicodeStrings.
- */
- const WideChar* str() const;
- /**
- Makes sure there is room for a string of len+1 characters, and
- returns a pointer to the string buffer. This ensures that the
- string buffer is NOT shared. This is intended for the file reader,
- that is reading new strings in from a file. jba.
- */
- WideChar* getBufferForRead(Int len);
- /**
- Replace the contents of self with the given string.
- (This is actually quite efficient, because
- they will simply share the same string and increment the
- refcount.)
- */
- void set(const UnicodeString& stringSrc);
- /**
- Replace the contents of self with the given string.
- Note that a copy of the string is made; the input ptr is not saved.
- */
- void set(const WideChar* s);
- /**
- replace contents of self with the given string. Note the
- nomenclature is translate rather than set; this is because
- not all single-byte strings translate one-for-one into
- UnicodeStrings, so some data manipulation may be necessary,
- and the resulting strings may not be equivalent.
- */
- void translate(const AsciiString& stringSrc);
- /**
- Concatenate the given string onto self.
- */
- void concat(const UnicodeString& stringSrc);
- /**
- Concatenate the given string onto self.
- */
- void concat(const WideChar* s);
- /**
- Concatenate the given character onto self.
- */
- void concat(const WideChar c);
- /**
- Remove leading and trailing whitespace from the string.
- */
- void trim( void );
- /**
- Remove the final character in the string. If the string is empty,
- do nothing. (This is a rather dorky method, but used a lot in
- text editing, thus its presence here.)
- */
- void removeLastChar();
- /**
- Analogous to sprintf() -- this formats a string according to the
- given sprintf-style format string (and the variable argument list)
- and stores the result in self.
- */
- void format(UnicodeString format, ...);
- void format(const WideChar* format, ...);
- /**
- Identical to format(), but takes a va_list rather than
- a variable argument list. (i.e., analogous to vsprintf.)
- */
- void format_va(const UnicodeString& format, va_list args);
- void format_va(const WideChar* format, va_list args);
- /**
- Conceptually identical to wsccmp().
- */
- int compare(const UnicodeString& stringSrc) const;
- /**
- Conceptually identical to wsccmp().
- */
- int compare(const WideChar* s) const;
- /**
- Conceptually identical to _wcsicmp().
- */
- int compareNoCase(const UnicodeString& stringSrc) const;
- /**
- Conceptually identical to _wcsicmp().
- */
- int compareNoCase(const WideChar* s) const;
- /**
- conceptually similar to strtok():
- extract the next whitespace-delimited token from the front
- of 'this' and copy it into 'token', returning true if a nonempty
- token was found. (note that this modifies 'this' as well, stripping
- the token off!)
- */
- Bool nextToken(UnicodeString* token, UnicodeString delimiters = UnicodeString::TheEmptyString);
- //
- // You might think it would be a good idea to overload the * operator
- // to allow for an implicit conversion to an WideChar*. This is
- // in theory a good idea, but in practice, there's lots of code
- // that assumes it should check text fields for null, which
- // is meaningless for us, since we never return a null ptr.
- //
- // operator const WideChar*() const { return str(); }
- //
- UnicodeString& operator=(const UnicodeString& stringSrc); ///< the same as set()
- UnicodeString& operator=(const WideChar* s); ///< the same as set()
- };
- // -----------------------------------------------------
- inline WideChar* UnicodeString::peek() const
- {
- DEBUG_ASSERTCRASH(m_data, ("null string ptr"));
- validate();
- return m_data->peek();
- }
- // -----------------------------------------------------
- inline UnicodeString::UnicodeString() : m_data(0)
- {
- validate();
- }
- // -----------------------------------------------------
- inline UnicodeString::~UnicodeString()
- {
- validate();
- releaseBuffer();
- }
- // -----------------------------------------------------
- inline int UnicodeString::getLength() const
- {
- validate();
- return m_data ? wcslen(peek()) : 0;
- }
- // -----------------------------------------------------
- inline Bool UnicodeString::isEmpty() const
- {
- validate();
- return m_data == NULL || peek()[0] == 0;
- }
- // -----------------------------------------------------
- inline void UnicodeString::clear()
- {
- validate();
- releaseBuffer();
- validate();
- }
- // -----------------------------------------------------
- inline WideChar UnicodeString::getCharAt(int index) const
- {
- DEBUG_ASSERTCRASH(index >= 0 && index < getLength(), ("bad index in getCharAt"));
- validate();
- return m_data ? peek()[index] : 0;
- }
- // -----------------------------------------------------
- inline const WideChar* UnicodeString::str() const
- {
- validate();
- static const WideChar TheNullChr = 0;
- return m_data ? peek() : &TheNullChr;
- }
- // -----------------------------------------------------
- inline UnicodeString& UnicodeString::operator=(const UnicodeString& stringSrc)
- {
- validate();
- set(stringSrc);
- validate();
- return *this;
- }
- // -----------------------------------------------------
- inline UnicodeString& UnicodeString::operator=(const WideChar* s)
- {
- validate();
- set(s);
- validate();
- return *this;
- }
- // -----------------------------------------------------
- inline void UnicodeString::concat(const UnicodeString& stringSrc)
- {
- validate();
- concat(stringSrc.str());
- validate();
- }
- // -----------------------------------------------------
- inline void UnicodeString::concat(const WideChar c)
- {
- validate();
- /// this can probably be made more efficient, if necessary
- WideChar tmp[2] = { c, 0 };
- concat(tmp);
- validate();
- }
- // -----------------------------------------------------
- inline int UnicodeString::compare(const UnicodeString& stringSrc) const
- {
- validate();
- return wcscmp(this->str(), stringSrc.str());
- }
- // -----------------------------------------------------
- inline int UnicodeString::compare(const WideChar* s) const
- {
- validate();
- return wcscmp(this->str(), s);
- }
- // -----------------------------------------------------
- inline int UnicodeString::compareNoCase(const UnicodeString& stringSrc) const
- {
- validate();
- return _wcsicmp(this->str(), stringSrc.str());
- }
- // -----------------------------------------------------
- inline int UnicodeString::compareNoCase(const WideChar* s) const
- {
- validate();
- return _wcsicmp(this->str(), s);
- }
- // -----------------------------------------------------
- inline Bool operator==(const UnicodeString& s1, const UnicodeString& s2)
- {
- return wcscmp(s1.str(), s2.str()) == 0;
- }
- // -----------------------------------------------------
- inline Bool operator!=(const UnicodeString& s1, const UnicodeString& s2)
- {
- return wcscmp(s1.str(), s2.str()) != 0;
- }
- // -----------------------------------------------------
- inline Bool operator<(const UnicodeString& s1, const UnicodeString& s2)
- {
- return wcscmp(s1.str(), s2.str()) < 0;
- }
- // -----------------------------------------------------
- inline Bool operator<=(const UnicodeString& s1, const UnicodeString& s2)
- {
- return wcscmp(s1.str(), s2.str()) <= 0;
- }
- // -----------------------------------------------------
- inline Bool operator>(const UnicodeString& s1, const UnicodeString& s2)
- {
- return wcscmp(s1.str(), s2.str()) > 0;
- }
- // -----------------------------------------------------
- inline Bool operator>=(const UnicodeString& s1, const UnicodeString& s2)
- {
- return wcscmp(s1.str(), s2.str()) >= 0;
- }
- #endif // UNICODESTRING_H
|