string.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. //
  2. // Copyright (c) 2017-2023, Manticore Software LTD (https://manticoresearch.com)
  3. // Copyright (c) 2001-2016, Andrew Aksyonoff
  4. // Copyright (c) 2008-2016, Sphinx Technologies Inc
  5. // All rights reserved
  6. //
  7. // This program is free software; you can redistribute it and/or modify
  8. // it under the terms of the GNU General Public License. You should have
  9. // received a copy of the GPL license along with this program; if you
  10. // did not, you can find it at http://www.gnu.org
  11. //
  12. #pragma once
  13. #include "attrstub.h"
  14. #include "vector.h"
  15. #include "blobs.h"
  16. bool StrEq ( const char* l, const char* r );
  17. bool StrEqN ( const char* l, const char* r );
  18. /// immutable C string proxy
  19. struct CSphString
  20. {
  21. protected:
  22. char* m_sValue = nullptr;
  23. // Empty ("") string optimization.
  24. static char EMPTY[];
  25. private:
  26. /// safety gap after the string end; for instance, UTF-8 Russian stemmer
  27. /// which treats strings as 16-bit word sequences needs this in some cases.
  28. /// note that this zero-filled gap does NOT include trailing C-string zero,
  29. /// and does NOT affect strlen() as well.
  30. static constexpr int SAFETY_GAP = 4;
  31. void SafeFree();
  32. public:
  33. CSphString() noexcept = default;
  34. // take a note this is not an explicit constructor
  35. // so a lot of silent constructing and deleting of strings is possible
  36. // Example:
  37. // SmallStringHash_T<int> hHash;
  38. // ...
  39. // hHash.Exists ( "asdf" ); // implicit CSphString construction and deletion here
  40. CSphString ( const CSphString& rhs );
  41. CSphString ( CSphString&& rhs ) noexcept
  42. {
  43. Swap ( rhs );
  44. }
  45. ~CSphString()
  46. {
  47. SafeFree();
  48. }
  49. const char* cstr() const
  50. {
  51. return m_sValue;
  52. }
  53. const char* scstr() const
  54. {
  55. return m_sValue ? m_sValue : EMPTY;
  56. }
  57. inline bool operator== ( const char* t ) const
  58. {
  59. return StrEq ( t, m_sValue );
  60. }
  61. inline bool operator== ( const CSphString& t ) const
  62. {
  63. return operator== ( t.cstr() );
  64. }
  65. inline bool operator!= ( const CSphString& t ) const
  66. {
  67. return !operator== ( t );
  68. }
  69. bool operator!= ( const char* t ) const
  70. {
  71. return !operator== ( t );
  72. }
  73. // compare ignoring case
  74. inline bool EqN ( const char* t ) const
  75. {
  76. return StrEqN ( t, m_sValue );
  77. }
  78. inline bool EqN ( const CSphString& t ) const
  79. {
  80. return EqN ( t.cstr() );
  81. }
  82. CSphString ( const char* szString );
  83. // create even if source is null/empty (result will be valid string with valid tail of zeros - for parsers)
  84. enum guarded_e { always_create };
  85. CSphString ( const char* szString, guarded_e );
  86. CSphString ( const char* sValue, int iLen )
  87. {
  88. SetBinary ( sValue, iLen );
  89. }
  90. CSphString ( Str_t sValue )
  91. {
  92. SetBinary ( sValue );
  93. }
  94. // pass by value - replaces both copy and move assignments.
  95. CSphString& operator= ( CSphString rhs )
  96. {
  97. Swap ( rhs );
  98. return *this;
  99. }
  100. CSphString SubString ( int iStart, int iCount ) const;
  101. // tries to reuse memory buffer, but calls Length() every time
  102. // hope this won't kill performance on a huge strings
  103. void SetBinary ( const char* sValue, int iLen );
  104. void SetBinary ( Str_t sValue ) { SetBinary ( sValue.first, sValue.second ); }
  105. void Reserve ( int iLen );
  106. const CSphString& SetSprintf ( const char* sTemplate, ... ) __attribute__ ( ( format ( printf, 2, 3 ) ) );
  107. /// format value using provided va_list
  108. const CSphString& SetSprintfVa ( const char* sTemplate, va_list ap );
  109. /// \return true if internal char* ptr is null, of value is empty.
  110. bool IsEmpty() const;
  111. CSphString& ToLower();
  112. CSphString& ToUpper();
  113. void Swap ( CSphString& rhs );
  114. /// \return true if the string begins with sPrefix
  115. bool Begins ( const char* sPrefix ) const;
  116. /// \return true if the string ends with sSuffix
  117. bool Ends ( const char* sSuffix ) const;
  118. /// trim leading and trailing spaces
  119. CSphString& Trim();
  120. int Length() const;
  121. /// \return internal string and releases it from being destroyed in d-tr
  122. char* Leak();
  123. /// internal string and releases it from being destroyed in d-tr
  124. void LeakToVec ( CSphVector<BYTE>& dVec );
  125. /// take string from outside and 'adopt' it as own child.
  126. void Adopt ( char** sValue );
  127. void Adopt ( char*&& sValue );
  128. /// compares using strcmp
  129. bool operator<( const CSphString& b ) const;
  130. void Unquote();
  131. static int GetGap() { return SAFETY_GAP; }
  132. explicit operator ByteBlob_t() const;
  133. };
  134. /// string swapper
  135. void Swap ( CSphString& v1, CSphString& v2 );
  136. /// directly make formatted string
  137. CSphString SphSprintfVa ( const char* sTemplate, va_list ap );
  138. CSphString SphSprintf ( const char* sTemplate, ... );
  139. /// commonly used
  140. void ToLower ( Str_t sVal );
  141. using StrVec_t = CSphVector<CSphString>;
  142. using StrtVec_t = CSphVector<Str_t>;
  143. #include "fixedvector.h"
  144. using FixedStrVec_t = CSphFixedVector<CSphString>;
  145. using FixedStrtVec_t = CSphFixedVector<Str_t>;
  146. #include "string_impl.h"