Просмотр исходного кода

Merge pull request #2993 from leezh/nrex-v0.1

updated the RegEx library nrex to v0.1
Juan Linietsky 9 лет назад
Родитель
Сommit
afb895f197
6 измененных файлов с 111 добавлено и 48 удалено
  1. 3 2
      doc/base/classes.xml
  2. 3 1
      drivers/nrex/README.md
  3. 67 34
      drivers/nrex/nrex.cpp
  4. 34 7
      drivers/nrex/nrex.hpp
  5. 3 3
      drivers/nrex/regex.cpp
  6. 1 1
      drivers/nrex/regex.h

+ 3 - 2
doc/base/classes.xml

@@ -26960,7 +26960,7 @@ This method controls whether the position between two cached points is interpola
 		Lazy (non-greedy) quantifiers [code]*?[/code]
 		Begining [code]^[/code] and end [code]$[/code] anchors
 		Alternation [code]|[/code]
-		Backreferences [code]\1[/code] to [code]\9[/code]
+		Backreferences [code]\1[/code] and [code]\g{1}[/code]
 		POSIX character classes [code][[:alnum:]][/code]
 		Lookahead [code](?=)[/code], [code](?!)[/code] and lookbehind [code](?<=)[/code], [code](?<!)[/code]
 		ASCII [code]\xFF[/code] and Unicode [code]\uFFFF[/code] code points (in a style similar to Python)
@@ -26972,9 +26972,10 @@ This method controls whether the position between two cached points is interpola
 			</return>
 			<argument index="0" name="pattern" type="String">
 			</argument>
-			<argument index="1" name="expanded" type="bool" default="true">
+			<argument index="1" name="capture" type="int" default="9">
 			</argument>
 			<description>
+            Compiles and assign the regular expression pattern to use. The limit on the number of capturing groups can be specified or made unlimited if negative.
 			</description>
 		</method>
 		<method name="find" qualifiers="const">

+ 3 - 1
drivers/nrex/README.md

@@ -1,5 +1,7 @@
 # NREX: Node RegEx
 
+Version 0.1
+
 Small node-based regular expression library. It only does text pattern
 matchhing, not replacement. To use add the files `nrex.hpp`, `nrex.cpp`
 and `nrex_config.h` to your project and follow the example:
@@ -32,7 +34,7 @@ Currently supported features:
  * Unicode `\uFFFF` code points
  * Positive `(?=)` and negative `(?!)` lookahead
  * Positive `(?<=)` and negative `(?<!)` lookbehind (fixed length and no alternations)
- * Backreferences `\1` to `\9` (with option to expand to `\99`)
+ * Backreferences `\1` and `\g{1}` (limited by default to 9 - can be unlimited)
 
 ## License
 

+ 67 - 34
drivers/nrex/nrex.cpp

@@ -1,4 +1,5 @@
 //  NREX: Node RegEx
+//  Version 0.1
 //
 //  Copyright (c) 2015, Zher Huei Lee
 //  All rights reserved.
@@ -299,6 +300,10 @@ struct nrex_node_group : public nrex_node
             {
                 length = 1;
             }
+            if (mode == LookAhead || mode == LookBehind)
+            {
+                quantifiable = false;
+            }
         }
 
         virtual ~nrex_node_group()
@@ -322,6 +327,10 @@ struct nrex_node_group : public nrex_node
                 int offset = 0;
                 if (mode == LookBehind)
                 {
+                    if (pos < length)
+                    {
+                        return -1;
+                    }
                     offset = length;
                 }
                 int res = childset[i]->test(s, pos - offset);
@@ -450,7 +459,7 @@ struct nrex_node_char : public nrex_node
 
         int test(nrex_search* s, int pos) const
         {
-            if (s->end == pos || s->at(pos) != ch)
+            if (s->end <= pos || 0 > pos || s->at(pos) != ch)
             {
                 return -1;
             }
@@ -473,7 +482,7 @@ struct nrex_node_range : public nrex_node
 
         int test(nrex_search* s, int pos) const
         {
-            if (s->end == pos)
+            if (s->end <= pos || 0 > pos)
             {
                 return -1;
             }
@@ -555,7 +564,7 @@ struct nrex_node_class : public nrex_node
 
         int test(nrex_search* s, int pos) const
         {
-            if (s->end == pos)
+            if (s->end <= pos || 0 > pos)
             {
                 return -1;
             }
@@ -727,7 +736,7 @@ struct nrex_node_shorthand : public nrex_node
 
         int test(nrex_search* s, int pos) const
         {
-            if (s->end == pos)
+            if (s->end <= pos || 0 > pos)
             {
                 return -1;
             }
@@ -811,16 +820,12 @@ struct nrex_node_quantifier : public nrex_node
 
         int test(nrex_search* s, int pos) const
         {
-            return test_step(s, pos, 1);
+            return test_step(s, pos, 0, pos);
         }
 
-        int test_step(nrex_search* s, int pos, int level) const
+        int test_step(nrex_search* s, int pos, int level, int start) const
         {
-            if (max == 0)
-            {
-                return pos;
-            }
-            if ((max >= 1 && level > max) || pos > s->end)
+            if (pos > s->end)
             {
                 return -1;
             }
@@ -840,14 +845,26 @@ struct nrex_node_quantifier : public nrex_node
                     return res;
                 }
             }
-            int res = child->test(s, pos);
-            if (s->complete)
+            if (max >= 0 && level > max)
             {
-                return res;
+                return -1;
+            }
+            if (level > 1 && level > min + 1 && pos == start)
+            {
+                return -1;
+            }
+            int res = pos;
+            if (level >= 1)
+            {
+                res = child->test(s, pos);
+                if (s->complete)
+                {
+                    return res;
+                }
             }
             if (res >= 0)
             {
-                int res_step = test_step(s, res, level + 1);
+                int res_step = test_step(s, res, level + 1, start);
                 if (res_step >= 0)
                 {
                     return res_step;
@@ -983,6 +1000,13 @@ nrex::nrex()
 {
 }
 
+nrex::nrex(const nrex_char* pattern, int captures)
+    : _capturing(0)
+    , _root(NULL)
+{
+    compile(pattern, captures);
+}
+
 nrex::~nrex()
 {
     if (_root)
@@ -1008,10 +1032,14 @@ void nrex::reset()
 
 int nrex::capture_size() const
 {
-    return _capturing + 1;
+    if (_root)
+    {
+        return _capturing + 1;
+    }
+    return 0;
 }
 
-bool nrex::compile(const nrex_char* pattern, bool extended)
+bool nrex::compile(const nrex_char* pattern, int captures)
 {
     reset();
     nrex_node_group* root = NREX_NEW(nrex_node_group(_capturing));
@@ -1053,7 +1081,7 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
                     NREX_COMPILE_ERROR("unrecognised qualifier for group");
                 }
             }
-            else if ((!extended && _capturing < 9) || (extended && _capturing < 99))
+            else if (captures >= 0 && _capturing < captures)
             {
                 nrex_node_group* group = NREX_NEW(nrex_node_group(++_capturing));
                 stack.top()->add_child(group);
@@ -1190,15 +1218,6 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
         }
         else if (nrex_is_quantifier(c[0]))
         {
-            if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
-            {
-                if (c[0] == '{')
-                {
-                    stack.top()->add_child(NREX_NEW(nrex_node_char('{')));
-                    continue;
-                }
-                NREX_COMPILE_ERROR("element not quantifiable");
-            }
             int min = 0;
             int max = -1;
             bool valid_quantifier = true;
@@ -1270,6 +1289,10 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
             }
             if (valid_quantifier)
             {
+                if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
+                {
+                    NREX_COMPILE_ERROR("element not quantifiable");
+                }
                 nrex_node_quantifier* quant = NREX_NEW(nrex_node_quantifier(min, max));
                 if (min == max)
                 {
@@ -1323,20 +1346,26 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
                 stack.top()->add_child(NREX_NEW(nrex_node_shorthand(c[1])));
                 ++c;
             }
-            else if ('1' <= c[1] && c[1] <= '9')
+            else if (('1' <= c[1] && c[1] <= '9') || (c[1] == 'g' && c[2] == '{'))
             {
                 int ref = 0;
-                if (extended && '0' <= c[2] && c[2] <= '9')
+                bool unclosed = false;
+                if (c[1] == 'g')
                 {
-                    ref = int(c[1] - '0') * 10 + int(c[2] - '0');
+                    unclosed = true;
                     c = &c[2];
                 }
-                else
+                while ('0' <= c[1] && c[1] <= '9')
                 {
-                    ref = int(c[1] - '0');
+                    ref = ref * 10 + int(c[1] - '0');
                     ++c;
                 }
-                if (ref > _capturing)
+                if (c[1] == '}')
+                {
+                    unclosed = false;
+                    ++c;
+                }
+                if (ref > _capturing || ref <= 0 || unclosed)
                 {
                     NREX_COMPILE_ERROR("backreference to non-existent capture");
                 }
@@ -1377,6 +1406,10 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
 
 bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int end) const
 {
+    if (!_root)
+    {
+        return false;
+    }
     nrex_search s(str, captures);
     if (end >= offset)
     {
@@ -1386,7 +1419,7 @@ bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int en
     {
         s.end = NREX_STRLEN(str);
     }
-    for (int i = offset; i < s.end; ++i)
+    for (int i = offset; i <= s.end; ++i)
     {
         for (int c = 0; c <= _capturing; ++c)
         {

+ 34 - 7
drivers/nrex/nrex.hpp

@@ -1,4 +1,5 @@
 //  NREX: Node RegEx
+//  Version 0.1
 //
 //  Copyright (c) 2015, Zher Huei Lee
 //  All rights reserved.
@@ -59,7 +60,32 @@ class nrex
         int _capturing;
         nrex_node* _root;
     public:
+
+        /*!
+         * \brief Initialises an empty regex container
+         */
         nrex();
+
+        /*!
+         * \brief Initialises and compiles the regex pattern
+         *
+         * This calls nrex::compile() with the same arguments. To check whether
+         * the compilation was successfull, use nrex::valid().
+         *
+         * If the NREX_THROW_ERROR was defined it would automatically throw a
+         * runtime error nrex_compile_error if it encounters a problem when
+         * parsing the pattern.
+         *
+         * \param pattern   The regex pattern
+         * \param captures  The maximum number of capture groups to allow. Any
+         *                  extra would be converted to non-capturing groups.
+         *                  If negative, no limit would be imposed. Defaults
+         *                  to 9.
+         *
+         * \see nrex::compile()
+         */
+        nrex(const nrex_char* pattern, int captures = 9);
+
         ~nrex();
 
         /*!
@@ -78,9 +104,9 @@ class nrex
          *
          * This is used to provide the array size of the captures needed for
          * nrex::match() to work. The size is actually the number of capture
-         * groups + one for the matching of the entire pattern. The result is
-         * always capped at 10 or 100, depending on the extend option given in
-         * nrex::compile() (default 10).
+         * groups + one for the matching of the entire pattern. This can be
+         * capped using the extra argument given in nrex::compile()
+         * (default 10).
          *
          * \return The number of captures
          */
@@ -97,12 +123,13 @@ class nrex
          * parsing the pattern.
          *
          * \param pattern   The regex pattern
-         * \param extended  If true, raises the limit on number of capture
-         *                  groups and back-references to 99. Otherwise limited
-         *                  to 9. Defaults to false.
+         * \param captures  The maximum number of capture groups to allow. Any
+         *                  extra would be converted to non-capturing groups.
+         *                  If negative, no limit would be imposed. Defaults
+         *                  to 9.
          * \return True if the pattern was succesfully compiled
          */
-        bool compile(const nrex_char* pattern, bool extended = false);
+        bool compile(const nrex_char* pattern, int captures = 9);
 
         /*!
          * \brief Uses the pattern to search through the provided string

+ 3 - 3
drivers/nrex/regex.cpp

@@ -15,7 +15,7 @@
 
 void RegEx::_bind_methods() {
 
-	ObjectTypeDB::bind_method(_MD("compile","pattern", "expanded"),&RegEx::compile, DEFVAL(true));
+	ObjectTypeDB::bind_method(_MD("compile","pattern", "capture"),&RegEx::compile, DEFVAL(9));
 	ObjectTypeDB::bind_method(_MD("find","text","start","end"),&RegEx::find, DEFVAL(0), DEFVAL(-1));
 	ObjectTypeDB::bind_method(_MD("clear"),&RegEx::clear);
 	ObjectTypeDB::bind_method(_MD("is_valid"),&RegEx::is_valid);
@@ -68,11 +68,11 @@ String RegEx::get_capture(int capture) const {
 
 }
 
-Error RegEx::compile(const String& p_pattern, bool expanded) {
+Error RegEx::compile(const String& p_pattern, int capture) {
 
 	clear();
 
-	exp.compile(p_pattern.c_str(), expanded);
+	exp.compile(p_pattern.c_str(), capture);
 
 	ERR_FAIL_COND_V( !exp.valid(), FAILED );
 

+ 1 - 1
drivers/nrex/regex.h

@@ -36,7 +36,7 @@ public:
 	bool is_valid() const;
 	int get_capture_count() const;
 	String get_capture(int capture) const;
-	Error compile(const String& p_pattern, bool expanded = false);
+	Error compile(const String& p_pattern, int capture = 9);
 	int find(const String& p_text, int p_start = 0, int p_end = -1) const;
 
 	RegEx();