regexpr.cpp 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. /*
  2. ** Command & Conquer Generals Zero Hour(tm)
  3. ** Copyright 2025 Electronic Arts Inc.
  4. **
  5. ** This program is free software: you can redistribute it and/or modify
  6. ** it under the terms of the GNU General Public License as published by
  7. ** the Free Software Foundation, either version 3 of the License, or
  8. ** (at your option) any later version.
  9. **
  10. ** This program is distributed in the hope that it will be useful,
  11. ** but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ** GNU General Public License for more details.
  14. **
  15. ** You should have received a copy of the GNU General Public License
  16. ** along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. // regexpr.cpp
  19. #include "always.h"
  20. #include "regexpr.h"
  21. #include "wwstring.h"
  22. #include <assert.h>
  23. // Pull in the gnu_regex library's definitions.
  24. #define __STDC__ 1
  25. extern "C" {
  26. #include "gnu_regex.h"
  27. }
  28. // The regular expression syntax options that RegularExpressionClass uses.
  29. // The dirty details of each option are described in "gnu_regex.h"
  30. #define OUR_SYNTAX_OPTIONS \
  31. RE_CHAR_CLASSES | /* Support character classes such as [:alpha:] and [:digit:] */ \
  32. RE_CONTEXT_INDEP_ANCHORS | /* ^ and $ are always anchors (outside bracket expressions) */ \
  33. RE_CONTEXT_INDEP_OPS | /* operators such as + * ? are always considered operators */ \
  34. RE_CONTEXT_INVALID_OPS | /* operators are invalid as the first characters in a string */ \
  35. RE_INTERVALS | /* { } are used to define intervals */ \
  36. RE_NO_BK_BRACES | /* { } are interval markers and \{ \} are literals */ \
  37. RE_NO_BK_PARENS | /* ( ) are group markers and \( \) are literals */ \
  38. RE_NO_BK_VBAR | /* | is the OR operator and \| is a literal */ \
  39. RE_NO_EMPTY_RANGES /* [z-a] is an invalid range but [a-z] is valid */
  40. /*
  41. ** Definition of private DataStruct for RegularExpressionClass
  42. */
  43. struct RegularExpressionClass::DataStruct
  44. {
  45. DataStruct (void)
  46. : IsValid(false)
  47. {
  48. // Blank out the expression structure.
  49. memset(&CompiledExpr, 0, sizeof(CompiledExpr));
  50. }
  51. ~DataStruct (void)
  52. {
  53. ClearExpression();
  54. }
  55. void ClearExpression (void)
  56. {
  57. // If the expression was valid, let the gnu_regex library
  58. // deallocate any memory it had allocated for it.
  59. if (IsValid)
  60. regfree(&CompiledExpr);
  61. // Blank out the expression structure.
  62. memset(&CompiledExpr, 0, sizeof(CompiledExpr));
  63. // Erase the expression string.
  64. ExprString = "";
  65. // No longer a valid compiled expression.
  66. IsValid = false;
  67. }
  68. // The regular expression that has been compiled.
  69. StringClass ExprString;
  70. // gnu_regex compiled version of the regular expression used
  71. // during matching or any form of evaluation
  72. regex_t CompiledExpr;
  73. // True if CompiledExpr is valid.
  74. bool IsValid;
  75. };
  76. /*
  77. ** RegularExpressionClass Implementation
  78. */
  79. RegularExpressionClass::RegularExpressionClass (const char *expression)
  80. : Data(0)
  81. {
  82. // Allocate our private members.
  83. Data = new DataStruct;
  84. assert(Data);
  85. // Compile the expression if we were given one.
  86. if (expression)
  87. Compile(expression);
  88. }
  89. RegularExpressionClass::RegularExpressionClass (const RegularExpressionClass &copy)
  90. : Data(0)
  91. {
  92. // Allocate our private members.
  93. Data = new DataStruct;
  94. assert(Data);
  95. // Compile the expression if the given object had one.
  96. if (copy.Is_Valid())
  97. {
  98. Compile(copy.Data->ExprString);
  99. assert(Is_Valid());
  100. }
  101. }
  102. RegularExpressionClass::~RegularExpressionClass (void)
  103. {
  104. delete Data;
  105. Data = 0;
  106. }
  107. bool RegularExpressionClass::Compile (const char *expression)
  108. {
  109. assert(Data);
  110. assert(expression);
  111. // Clear any existing expression data. This makes it safe to
  112. // call Compile() twice on one object.
  113. Data->ClearExpression();
  114. // Set the regular expression module to the syntax that we
  115. // would like to use.
  116. reg_syntax_t old_syntax = re_set_syntax(OUR_SYNTAX_OPTIONS);
  117. // Compile the given expression.
  118. const char *error_str = re_compile_pattern(expression,
  119. strlen(expression), &Data->CompiledExpr);
  120. // Restore the old syntax setting.
  121. re_set_syntax(old_syntax);
  122. // If no error string was returned, the expression was good!
  123. if (error_str == 0)
  124. {
  125. Data->IsValid = true;
  126. Data->ExprString = expression;
  127. return true;
  128. }
  129. return false;
  130. }
  131. bool RegularExpressionClass::Is_Valid (void) const
  132. {
  133. assert(Data);
  134. return Data->IsValid;
  135. }
  136. bool RegularExpressionClass::Match (const char *string) const
  137. {
  138. assert(Data);
  139. // If we have no valid compiled expression, we can't match Jack.
  140. if (!Data->IsValid)
  141. return false;
  142. // Set the regular expression module to the syntax that we
  143. // would like to use.
  144. reg_syntax_t old_syntax = re_set_syntax(OUR_SYNTAX_OPTIONS);
  145. // Try to match the given string with our regular expression.
  146. int retval = re_match(&Data->CompiledExpr, string, strlen(string), 0, 0);
  147. // Restore the old syntax setting.
  148. re_set_syntax(old_syntax);
  149. // -1 means no match, -2 means internal gnu_regex lib error, otherwise
  150. // re_match returned the number of characters matched. A 0 character
  151. // match is valid, and distinctly different than no match at all.
  152. if (retval < 0)
  153. return false;
  154. // The given string matched our regular expression!
  155. return true;
  156. }
  157. /*
  158. ** Operators
  159. */
  160. RegularExpressionClass & RegularExpressionClass::operator = (const RegularExpressionClass &rhs)
  161. {
  162. // Check for assignment to self.
  163. if (*this == rhs)
  164. return *this;
  165. // Assign that object to this one.
  166. assert(rhs.Data);
  167. Compile(rhs.Data->ExprString);
  168. assert(Is_Valid());
  169. // Return this object.
  170. return *this;
  171. }
  172. bool RegularExpressionClass::operator == (const RegularExpressionClass &rhs) const
  173. {
  174. // Two RegularExpressionClass objects are equivalent if they both
  175. // have the same validity state, and if that state is 'true' both
  176. // of their expressions are the same.
  177. // Check validity states for equality.
  178. if (Is_Valid() != rhs.Is_Valid())
  179. return false;
  180. // If they're valid, check their expressions.
  181. if (Is_Valid())
  182. {
  183. // The objects are not equivalent if their expression strings
  184. // don't match.
  185. if (Data->ExprString != rhs.Data->ExprString)
  186. return false;
  187. }
  188. return true;
  189. }
  190. inline bool RegularExpressionClass::operator != (const RegularExpressionClass &rhs) const
  191. {
  192. return !(*this == rhs);
  193. }