2
0
Эх сурвалжийг харах

RegEx re-implemented as a module

Re-wrote nrex as a module using godot-specific parts and new
features:

 * Added string substitutions.
 * Named groups are now supported.
 * Removed use of mutable variables in RegEx. RegExMatch is returned
   instead.
Zher Huei Lee 9 жил өмнө
parent
commit
439d439321

+ 2 - 13
bin/tests/test_string.cpp

@@ -31,7 +31,6 @@
 //#include "math_funcs.h"
 #include <stdio.h>
 #include "os/os.h"
-#include "drivers/nrex/regex.h"
 #include "core/io/ip_address.h"
 
 #include "test_string.h"
@@ -462,18 +461,8 @@ bool test_25() {
 
 bool test_26() {
 
-	OS::get_singleton()->print("\n\nTest 26: RegEx\n");
-	RegEx regexp("(.*):(.*)");
-
-	int res = regexp.find("name:password");
-	printf("\tmatch: %s\n", (res>=0)?"true":"false");
-
-	printf("\t%i captures:\n", regexp.get_capture_count());
-	for (int i = 0; i<regexp.get_capture_count(); i++)
-	{
-		printf("%ls\n", regexp.get_capture(i).c_str());
-	}
-	return (res>=0);
+	//TODO: Do replacement RegEx test
+	return true;
 };
 
 struct test_27_data {

+ 99 - 21
doc/base/classes.xml

@@ -32514,6 +32514,7 @@
 		would be read as [code]"(?:\\.|[^"])*"[/code]
 		Currently supported features:
 		* Capturing [code]()[/code] and non-capturing [code](?:)[/code] groups
+		* Named capturing groups [code](?P&lt;name&gt;)[/code]
 		* Any character [code].[/code]
 		* Shorthand character classes [code]\w \W \s \S \d \D[/code]
 		* User-defined character classes such as [code][A-Za-z][/code]
@@ -32522,7 +32523,7 @@
 		* Lazy (non-greedy) quantifiers [code]*?[/code]
 		* Beginning [code]^[/code] and end [code]$[/code] anchors
 		* Alternation [code]|[/code]
-		* Backreferences [code]\1[/code] and [code]\g{1}[/code]
+		* Backreferences [code]\1[/code], [code]\g{1}[/code], and [code]\g&lt;name&gt;[/code]
 		* POSIX character classes [code][[:alnum:]][/code]
 		* Lookahead [code](?=)[/code], [code](?!)[/code] and lookbehind [code](?&lt;=)[/code], [code](?&lt;!)[/code]
 		* ASCII [code]\xFF[/code] and Unicode [code]\uFFFF[/code] code points (in a style similar to Python)
@@ -32531,7 +32532,7 @@
 	<methods>
 		<method name="clear">
 			<description>
-				This method resets the state of the object, as it was freshly created. Namely, it unassigns the regular expression of this object, and forgets all captures made by the last [method find].
+				This method resets the state of the object, as it was freshly created. Namely, it unassigns the regular expression of this object.
 			</description>
 		</method>
 		<method name="compile">
@@ -32539,15 +32540,41 @@
 			</return>
 			<argument index="0" name="pattern" type="String">
 			</argument>
-			<argument index="1" name="capture" type="int" default="9">
-			</argument>
 			<description>
-				Compiles and assign the regular expression pattern to use. The limit on the number of capturing groups can be specified or made unlimited if negative.
+				Compiles and assign the regular expression pattern to use.
 			</description>
 		</method>
-		<method name="find" qualifiers="const">
+		<method name="get_group_count" qualifiers="const">
 			<return type="int">
 			</return>
+			<description>
+				Returns the number of numeric capturing groups.
+			</description>
+		</method>
+		<method name="get_names" qualifiers="const">
+			<return type="Array">
+			</return>
+			<description>
+				Returns an array of names of named capturing groups.
+			</description>
+		</method>
+		<method name="get_pattern" qualifiers="const">
+			<return type="String">
+			</return>
+			<description>
+				Returns the expression used to compile the code.
+			</description>
+		</method>
+		<method name="is_valid" qualifiers="const">
+			<return type="bool">
+			</return>
+			<description>
+				Returns whether this object has a valid regular expression assigned.
+			</description>
+		</method>
+		<method name="search" qualifiers="const">
+			<return type="Object">
+			</return>
 			<argument index="0" name="text" type="String">
 			</argument>
 			<argument index="1" name="start" type="int" default="0">
@@ -32555,45 +32582,96 @@
 			<argument index="2" name="end" type="int" default="-1">
 			</argument>
 			<description>
-				This method tries to find the pattern within the string, and returns the position where it was found. It also stores any capturing group (see [method get_capture]) for further retrieval.
+				Searches the text for the compiled pattern. Returns a [RegExMatch] container of the first matching reult if found, otherwise null. The starting point of the serch could be specified without moving the string start anchor.
 			</description>
 		</method>
-		<method name="get_capture" qualifiers="const">
+		<method name="sub" qualifiers="const">
 			<return type="String">
 			</return>
-			<argument index="0" name="capture" type="int">
+			<argument index="0" name="text" type="String">
+			</argument>
+			<argument index="1" name="template" type="String">
+			</argument>
+			<argument index="2" name="start" type="int" default="0">
+			</argument>
+			<argument index="3" name="end" type="int" default="-1">
 			</argument>
 			<description>
-				Returns a captured group. A captured group is the part of a string that matches a part of the pattern delimited by parentheses (unless they are non-capturing parentheses [i](?:)[/i]).
+				Searches the specified text for the compiled pattern and returns the text with the result replaced. Escapes and backreferences such as [code]\1[/code] and [code]\g&lt;name&gt;[/code] are automatically expanded and resolved. If no change was found the unmodified text is returned instead.
 			</description>
 		</method>
-		<method name="get_capture_count" qualifiers="const">
-			<return type="int">
+	</methods>
+	<constants>
+	</constants>
+</class>
+<class name="RegExMatch" inherits="Reference" category="Core">
+	<brief_description>
+	</brief_description>
+	<description>
+	</description>
+	<methods>
+		<method name="expand" qualifiers="const">
+			<return type="String">
 			</return>
+			<argument index="0" name="template" type="String">
+			</argument>
 			<description>
-				Returns the number of capturing groups. A captured group is the part of a string that matches a part of the pattern delimited by parentheses (unless they are non-capturing parentheses [i](?:)[/i]).
+				Using results from the search, returns the specified string with escapes and backreferences such as [code]\1[/code] and [code]\g&lt;name&gt;[/code] expanded and resolved
 			</description>
 		</method>
-		<method name="get_capture_start" qualifiers="const">
+		<method name="get_end" qualifiers="const">
 			<return type="int">
 			</return>
-			<argument index="0" name="capture" type="int">
+			<argument index="0" name="name" type="Variant" default="0">
 			</argument>
 			<description>
+				Returns the end position of the match in the string. An interger can be specified for numeric groups or a string for named groups. Returns -1 if that group wasn't found or doesn't exist. Defaults to 0 (whole pattern).
 			</description>
 		</method>
-		<method name="get_captures" qualifiers="const">
-			<return type="StringArray">
+		<method name="get_group_array" qualifiers="const">
+			<return type="Array">
 			</return>
 			<description>
-				Return a list of all the captures made by the regular expression.
+				Returns an array of the results of the numeric groups.
 			</description>
 		</method>
-		<method name="is_valid" qualifiers="const">
-			<return type="bool">
+		<method name="get_group_count" qualifiers="const">
+			<return type="int">
 			</return>
 			<description>
-				Returns whether this object has a valid regular expression assigned.
+				Returns the number of numeric capturing groups.
+			</description>
+		</method>
+		<method name="get_name_dict" qualifiers="const">
+			<return type="Dictionary">
+			</return>
+			<description>
+				Returns a dictionary containing the named capturing groups and their results.
+			</description>
+		</method>
+		<method name="get_names" qualifiers="const">
+			<return type="Array">
+			</return>
+			<description>
+				Returns an array of names of named capturing groups.
+			</description>
+		</method>
+		<method name="get_start" qualifiers="const">
+			<return type="int">
+			</return>
+			<argument index="0" name="name" type="Variant" default="0">
+			</argument>
+			<description>
+				Returns the starting position of the match in the string. An interger can be specified for numeric groups or a string for named groups. Returns -1 if that group wasn't found or doesn't exist. Defaults to 0 (whole pattern).
+			</description>
+		</method>
+		<method name="get_string" qualifiers="const">
+			<return type="String">
+			</return>
+			<argument index="0" name="name" type="Variant" default="0">
+			</argument>
+			<description>
+				Returns the result of the match in the string. An interger can be specified for numeric groups or a string for named groups. Returns -1 if that group wasn't found or doesn't exist. Defaults to 0 (whole pattern).
 			</description>
 		</method>
 	</methods>

+ 0 - 1
drivers/SCsub

@@ -25,7 +25,6 @@ SConscript('gl_context/SCsub');
 
 # Core dependencies
 SConscript("png/SCsub");
-SConscript("nrex/SCsub");
 
 # Tools override
 # FIXME: Should likely be integrated in the tools/ codebase

+ 0 - 75
drivers/nrex/README.md

@@ -1,75 +0,0 @@
-# NREX: Node RegEx
-
-[![Build Status](https://travis-ci.org/leezh/nrex.svg?branch=master)](https://travis-ci.org/leezh/nrex)
-
-** Version 0.2 **
-
-Small node-based regular expression library. It only does text pattern
-matchhing, not replacement. To use add the files `nrex.hpp`, `nrex.cpp`
-and `nrex_config.h` to your project and follow the example:
-
-	nrex regex;
-	regex.compile("^(fo+)bar$");
-
-	nrex_result captures[regex.capture_size()];
-	if (regex.match("foobar", captures))
-	{
-		std::cout << captures[0].start << std::endl;
-		std::cout << captures[0].length << std::endl;
-	}
-
-More details about its use is documented in `nrex.hpp`
-
-Currently supported features:
- * Capturing `()` and non-capturing `(?:)` groups
- * Any character `.` (includes newlines)
- * Shorthand caracter classes `\w\W\s\S\d\D`
- * POSIX character classes such as `[[:alnum:]]`
- * Bracket expressions such as `[A-Za-z]`
- * Simple quantifiers `?`, `*` and `+`
- * Range quantifiers `{0,1}`
- * Lazy (non-greedy) quantifiers `*?`
- * Begining `^` and end `$` anchors
- * Word boundaries `\b`
- * Alternation `|`
- * ASCII `\xFF` code points
- * Unicode `\uFFFF` code points
- * Positive `(?=)` and negative `(?!)` lookahead
- * Positive `(?<=)` and negative `(?<!)` lookbehind (fixed length and no alternations)
- * Backreferences `\1` and `\g{1}` (limited by default to 9 - can be unlimited)
-
-## License
-
-Copyright (c) 2015-2016, Zher Huei Lee
-All rights reserved.
-
-This software is provided 'as-is', without any express or implied
-warranty.  In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
- 1. The origin of this software must not be misrepresented; you must not
-    claim that you wrote the original software. If you use this software
-    in a product, an acknowledgment in the product documentation would
-    be appreciated but is not required.
-    
- 2. Altered source versions must be plainly marked as such, and must not
-    be misrepresented as being the original software.
-    
- 3. This notice may not be removed or altered from any source
-    distribution.
-
-
-# Changes
-
-## Version 0.2 (2016-08-04)
- * Fixed capturing groups matching to invalid results
- * Fixed parents of recursive quantifiers not expanding properly
- * Fixed LookAhead sometimes adding to result
- * More verbose unit testing
-
-## Version 0.1 (2015-12-04)
- * Initial release

+ 0 - 1496
drivers/nrex/nrex.cpp

@@ -1,1496 +0,0 @@
-//  NREX: Node RegEx
-//  Version 0.2
-//
-//  Copyright (c) 2015-2016, Zher Huei Lee
-//  All rights reserved.
-//
-//  This software is provided 'as-is', without any express or implied
-//  warranty.  In no event will the authors be held liable for any damages
-//  arising from the use of this software.
-//
-//  Permission is granted to anyone to use this software for any purpose,
-//  including commercial applications, and to alter it and redistribute it
-//  freely, subject to the following restrictions:
-//
-//   1. The origin of this software must not be misrepresented; you must not
-//      claim that you wrote the original software. If you use this software
-//      in a product, an acknowledgment in the product documentation would
-//      be appreciated but is not required.
-//
-//   2. Altered source versions must be plainly marked as such, and must not
-//      be misrepresented as being the original software.
-//
-//   3. This notice may not be removed or altered from any source
-//      distribution.
-//
-
-#include "nrex.hpp"
-
-#ifdef NREX_UNICODE
-#include <wctype.h>
-#include <wchar.h>
-#define NREX_ISALPHANUM iswalnum
-#define NREX_ISSPACE iswspace
-#define NREX_STRLEN wcslen
-#else
-#include <ctype.h>
-#include <string.h>
-#define NREX_ISALPHANUM isalnum
-#define NREX_ISSPACE isspace
-#define NREX_STRLEN strlen
-#endif
-
-#ifdef NREX_THROW_ERROR
-#define NREX_COMPILE_ERROR(M) throw nrex_compile_error(M)
-#else
-#define NREX_COMPILE_ERROR(M) reset(); return false
-#endif
-
-#ifndef NREX_NEW
-#define NREX_NEW(X) new X
-#define NREX_NEW_ARRAY(X, N) new X[N]
-#define NREX_DELETE(X) delete X
-#define NREX_DELETE_ARRAY(X) delete[] X
-#endif
-
-template<typename T>
-class nrex_array
-{
-    private:
-        T* _data;
-        unsigned int _reserved;
-        unsigned int _size;
-    public:
-        nrex_array()
-            : _data(NREX_NEW_ARRAY(T, 2))
-            , _reserved(2)
-            , _size(0)
-        {
-        }
-
-        nrex_array(unsigned int reserved)
-            : _data(NREX_NEW_ARRAY(T, reserved ? reserved : 1))
-            , _reserved(reserved ? reserved : 1)
-            , _size(0)
-        {
-        }
-
-        ~nrex_array()
-        {
-            NREX_DELETE_ARRAY(_data);
-        }
-
-        unsigned int size() const
-        {
-            return _size;
-        }
-
-        void reserve(unsigned int size)
-        {
-            if (size < _size) {
-                size = _size;
-            }
-            if (size == 0) {
-                size = 1;
-            }
-            T* old = _data;
-            _data = NREX_NEW_ARRAY(T, size);
-            _reserved = size;
-            for (unsigned int i = 0; i < _size; ++i)
-            {
-                _data[i] = old[i];
-            }
-            NREX_DELETE_ARRAY(old);
-        }
-
-        void push(T item)
-        {
-            if (_size == _reserved)
-            {
-                reserve(_reserved * 2);
-            }
-            _data[_size] = item;
-            _size++;
-        }
-
-        const T& top() const
-        {
-            return _data[_size - 1];
-        }
-
-        const T& operator[] (unsigned int i) const
-        {
-            return _data[i];
-        }
-
-        void pop()
-        {
-            if (_size > 0)
-            {
-                --_size;
-            }
-        }
-};
-
-static int nrex_parse_hex(nrex_char c)
-{
-    if ('0' <= c && c <= '9')
-    {
-        return int(c - '0');
-    }
-    else if ('a' <= c && c <= 'f')
-    {
-        return int(c - 'a') + 10;
-    }
-    else if ('A' <= c && c <= 'F')
-    {
-        return int(c - 'A') + 10;
-    }
-    return -1;
-}
-
-static nrex_char nrex_unescape(const nrex_char*& c)
-{
-    switch (c[1])
-    {
-        case '0': ++c; return '\0';
-        case 'a': ++c; return '\a';
-        case 'e': ++c; return '\e';
-        case 'f': ++c; return '\f';
-        case 'n': ++c; return '\n';
-        case 'r': ++c; return '\r';
-        case 't': ++c; return '\t';
-        case 'v': ++c; return '\v';
-        case 'b': ++c; return '\b';
-        case 'x':
-        {
-            int point = 0;
-            for (int i = 2; i <= 3; ++i)
-            {
-                int res = nrex_parse_hex(c[i]);
-                if (res == -1)
-                {
-                    return '\0';
-                }
-                point = (point << 4) + res;
-            }
-            c = &c[3];
-            return nrex_char(point);
-        }
-        case 'u':
-        {
-            int point = 0;
-            for (int i = 2; i <= 5; ++i)
-            {
-                int res = nrex_parse_hex(c[i]);
-                if (res == -1)
-                {
-                    return '\0';
-                }
-                point = (point << 4) + res;
-            }
-            c = &c[5];
-            return nrex_char(point);
-        }
-    }
-    return (++c)[0];
-}
-
-struct nrex_search
-{
-        const nrex_char* str;
-        nrex_result* captures;
-        int end;
-        bool complete;
-        nrex_array<int> lookahead_pos;
-
-        nrex_char at(int pos)
-        {
-            return str[pos];
-        }
-
-        nrex_search(const nrex_char* str, nrex_result* captures, int lookahead)
-            : str(str)
-            , captures(captures)
-            , end(0)
-            , lookahead_pos(lookahead)
-        {
-        }
-};
-
-struct nrex_node
-{
-        nrex_node* next;
-        nrex_node* previous;
-        nrex_node* parent;
-        bool quantifiable;
-        int length;
-
-        nrex_node(bool quantify = false)
-            : next(NULL)
-            , previous(NULL)
-            , parent(NULL)
-            , quantifiable(quantify)
-            , length(-1)
-        {
-        }
-
-        virtual ~nrex_node()
-        {
-            if (next)
-            {
-                NREX_DELETE(next);
-            }
-        }
-
-        virtual int test(nrex_search* s, int pos) const
-        {
-            return next ? next->test(s, pos) : -1;
-        }
-
-        virtual int test_parent(nrex_search* s, int pos) const
-        {
-            if (next)
-            {
-                pos = next->test(s, pos);
-            }
-            if (pos >= 0)
-            {
-                s->complete = true;
-            }
-            if (parent && pos >= 0)
-            {
-                pos = parent->test_parent(s, pos);
-            }
-            if (pos < 0)
-            {
-                s->complete = false;
-            }
-            return pos;
-        }
-
-        void increment_length(int amount, bool subtract = false)
-        {
-            if (amount >= 0 && length >= 0)
-            {
-                if (!subtract)
-                {
-                    length += amount;
-                }
-                else
-                {
-                    length -= amount;
-                }
-            }
-            else
-            {
-                length = -1;
-            }
-            if (parent)
-            {
-                parent->increment_length(amount, subtract);
-            }
-        }
-};
-
-enum nrex_group_type
-{
-    nrex_group_capture,
-    nrex_group_non_capture,
-    nrex_group_bracket,
-    nrex_group_look_ahead,
-    nrex_group_look_behind,
-};
-
-struct nrex_node_group : public nrex_node
-{
-        nrex_group_type type;
-        int id;
-        bool negate;
-        nrex_array<nrex_node*> childset;
-        nrex_node* back;
-
-        nrex_node_group(nrex_group_type type, int id = 0)
-            : nrex_node(true)
-            , type(type)
-            , id(id)
-            , negate(false)
-            , back(NULL)
-        {
-            if (type != nrex_group_bracket)
-            {
-                length = 0;
-            }
-            else
-            {
-                length = 1;
-            }
-            if (type == nrex_group_look_ahead || type == nrex_group_look_behind)
-            {
-                quantifiable = false;
-            }
-        }
-
-        virtual ~nrex_node_group()
-        {
-            for (unsigned int i = 0; i < childset.size(); ++i)
-            {
-                NREX_DELETE(childset[i]);
-            }
-
-        }
-
-        int test(nrex_search* s, int pos) const
-        {
-            int old_start;
-            if (type == nrex_group_capture)
-            {
-                old_start = s->captures[id].start;
-                s->captures[id].start = pos;
-            }
-            for (unsigned int i = 0; i < childset.size(); ++i)
-            {
-                s->complete = false;
-                int offset = 0;
-                if (type == nrex_group_look_behind)
-                {
-                    if (pos < length)
-                    {
-                        return -1;
-                    }
-                    offset = length;
-                }
-                if (type == nrex_group_look_ahead)
-                {
-                    s->lookahead_pos.push(pos);
-                }
-                int res = childset[i]->test(s, pos - offset);
-                if (type == nrex_group_look_ahead)
-                {
-                    s->lookahead_pos.pop();
-                }
-                if (s->complete)
-                {
-                    return res;
-                }
-                if (negate)
-                {
-                    if (res < 0)
-                    {
-                        res = pos + 1;
-                    }
-                    else
-                    {
-                        return -1;
-                    }
-                    if (i + 1 < childset.size())
-                    {
-                        continue;
-                    }
-                }
-                if (res >= 0)
-                {
-                    if (type == nrex_group_capture)
-                    {
-                        s->captures[id].length = res - pos;
-                    }
-                    else if (type == nrex_group_look_ahead || type == nrex_group_look_behind)
-                    {
-                        res = pos;
-                    }
-                    return next ? next->test(s, res) : res;
-                }
-            }
-            if (type == nrex_group_capture)
-            {
-                s->captures[id].start = old_start;
-            }
-            return -1;
-        }
-
-        virtual int test_parent(nrex_search* s, int pos) const
-        {
-            if (type == nrex_group_capture)
-            {
-                s->captures[id].length = pos - s->captures[id].start;
-            }
-            if (type == nrex_group_look_ahead)
-            {
-                pos = s->lookahead_pos[id];
-            }
-            return nrex_node::test_parent(s, pos);
-        }
-
-        void add_childset()
-        {
-            if (childset.size() > 0 && type != nrex_group_bracket)
-            {
-                length = -1;
-            }
-            back = NULL;
-        }
-
-        void add_child(nrex_node* node)
-        {
-            node->parent = this;
-            node->previous = back;
-            if (back && type != nrex_group_bracket)
-            {
-                back->next = node;
-            }
-            else
-            {
-                childset.push(node);
-            }
-            if (type != nrex_group_bracket)
-            {
-                increment_length(node->length);
-            }
-            back = node;
-        }
-
-        nrex_node* swap_back(nrex_node* node)
-        {
-            if (!back)
-            {
-                add_child(node);
-                return NULL;
-            }
-            nrex_node* old = back;
-            if (!old->previous)
-            {
-                childset.pop();
-            }
-            if (type != nrex_group_bracket)
-            {
-                increment_length(old->length, true);
-            }
-            back = old->previous;
-            add_child(node);
-            return old;
-        }
-
-        void pop_back()
-        {
-            if (back)
-            {
-                nrex_node* old = back;
-                if (!old->previous)
-                {
-                    childset.pop();
-                }
-                if (type != nrex_group_bracket)
-                {
-                    increment_length(old->length, true);
-                }
-                back = old->previous;
-                NREX_DELETE(old);
-            }
-        }
-};
-
-struct nrex_node_char : public nrex_node
-{
-        nrex_char ch;
-
-        nrex_node_char(nrex_char c)
-            : nrex_node(true)
-            , ch(c)
-        {
-            length = 1;
-        }
-
-        int test(nrex_search* s, int pos) const
-        {
-            if (s->end <= pos || 0 > pos || s->at(pos) != ch)
-            {
-                return -1;
-            }
-            return next ? next->test(s, pos + 1) : pos + 1;
-        }
-};
-
-struct nrex_node_range : public nrex_node
-{
-        nrex_char start;
-        nrex_char end;
-
-        nrex_node_range(nrex_char s, nrex_char e)
-            : nrex_node(true)
-            , start(s)
-            , end(e)
-        {
-            length = 1;
-        }
-
-        int test(nrex_search* s, int pos) const
-        {
-            if (s->end <= pos || 0 > pos)
-            {
-                return -1;
-            }
-            nrex_char c = s->at(pos);
-            if (c < start || end < c)
-            {
-                return -1;
-            }
-            return next ? next->test(s, pos + 1) : pos + 1;
-        }
-};
-
-enum nrex_class_type
-{
-    nrex_class_none,
-    nrex_class_alnum,
-    nrex_class_alpha,
-    nrex_class_blank,
-    nrex_class_cntrl,
-    nrex_class_digit,
-    nrex_class_graph,
-    nrex_class_lower,
-    nrex_class_print,
-    nrex_class_punct,
-    nrex_class_space,
-    nrex_class_upper,
-    nrex_class_xdigit,
-    nrex_class_word
-};
-
-static bool nrex_compare_class(const nrex_char** pos, const char* text)
-{
-    unsigned int i = 0;
-    for (i = 0; text[i] != '\0'; ++i)
-    {
-        if ((*pos)[i] != text[i])
-        {
-            return false;
-        }
-    }
-    if ((*pos)[i++] != ':' || (*pos)[i] != ']')
-    {
-        return false;
-    }
-    *pos = &(*pos)[i];
-    return true;
-}
-
-#define NREX_COMPARE_CLASS(POS, NAME) if (nrex_compare_class(POS, #NAME)) return nrex_class_ ## NAME
-
-static nrex_class_type nrex_parse_class(const nrex_char** pos)
-{
-    NREX_COMPARE_CLASS(pos, alnum);
-    NREX_COMPARE_CLASS(pos, alpha);
-    NREX_COMPARE_CLASS(pos, blank);
-    NREX_COMPARE_CLASS(pos, cntrl);
-    NREX_COMPARE_CLASS(pos, digit);
-    NREX_COMPARE_CLASS(pos, graph);
-    NREX_COMPARE_CLASS(pos, lower);
-    NREX_COMPARE_CLASS(pos, print);
-    NREX_COMPARE_CLASS(pos, punct);
-    NREX_COMPARE_CLASS(pos, space);
-    NREX_COMPARE_CLASS(pos, upper);
-    NREX_COMPARE_CLASS(pos, xdigit);
-    NREX_COMPARE_CLASS(pos, word);
-    return nrex_class_none;
-}
-
-struct nrex_node_class : public nrex_node
-{
-        nrex_class_type type;
-
-        nrex_node_class(nrex_class_type t)
-            : nrex_node(true)
-            , type(t)
-        {
-            length = 1;
-        }
-
-        int test(nrex_search* s, int pos) const
-        {
-            if (s->end <= pos || 0 > pos)
-            {
-                return -1;
-            }
-            if (!test_class(s->at(pos)))
-            {
-                return -1;
-            }
-            return next ? next->test(s, pos + 1) : pos + 1;
-        }
-
-        bool test_class(nrex_char c) const
-        {
-            if ((0 <= c && c <= 0x1F) || c == 0x7F)
-            {
-                if (type == nrex_class_cntrl)
-                {
-                    return true;
-                }
-            }
-            else if (c < 0x7F)
-            {
-                if (type == nrex_class_print)
-                {
-                    return true;
-                }
-                else if (type == nrex_class_graph && c != ' ')
-                {
-                    return true;
-                }
-                else if ('0' <= c && c <= '9')
-                {
-                    switch (type)
-                    {
-                        case nrex_class_alnum:
-                        case nrex_class_digit:
-                        case nrex_class_xdigit:
-                        case nrex_class_word:
-                            return true;
-                        default:
-                            break;
-                    }
-                }
-                else if ('A' <= c && c <= 'Z')
-                {
-                    switch (type)
-                    {
-                        case nrex_class_alnum:
-                        case nrex_class_alpha:
-                        case nrex_class_upper:
-                        case nrex_class_word:
-                            return true;
-                        case nrex_class_xdigit:
-                            if (c <= 'F')
-                            {
-                                return true;
-                            }
-                        default:
-                            break;
-                    }
-                }
-                else if ('a' <= c && c <= 'z')
-                {
-                    switch (type)
-                    {
-                        case nrex_class_alnum:
-                        case nrex_class_alpha:
-                        case nrex_class_lower:
-                        case nrex_class_word:
-                            return true;
-                        case nrex_class_xdigit:
-                            if (c <= 'f')
-                            {
-                                return true;
-                            }
-                        default:
-                            break;
-                    }
-                }
-            }
-            switch (c)
-            {
-                case ' ':
-                case '\t':
-                    if (type == nrex_class_blank)
-                    {
-                        return true;
-                    }
-                case '\r':
-                case '\n':
-                case '\f':
-                    if (type == nrex_class_space)
-                    {
-                        return true;
-                    }
-                    break;
-                case '_':
-                    if (type == nrex_class_word)
-                    {
-                        return true;
-                    }
-                case ']':
-                case '[':
-                case '!':
-                case '"':
-                case '#':
-                case '$':
-                case '%':
-                case '&':
-                case '\'':
-                case '(':
-                case ')':
-                case '*':
-                case '+':
-                case ',':
-                case '.':
-                case '/':
-                case ':':
-                case ';':
-                case '<':
-                case '=':
-                case '>':
-                case '?':
-                case '@':
-                case '\\':
-                case '^':
-                case '`':
-                case '{':
-                case '|':
-                case '}':
-                case '~':
-                case '-':
-                    if (type == nrex_class_punct)
-                    {
-                        return true;
-                    }
-                    break;
-                default:
-                    break;
-            }
-            return false;
-        }
-};
-
-static bool nrex_is_shorthand(nrex_char repr)
-{
-    switch (repr)
-    {
-        case 'W':
-        case 'w':
-        case 'D':
-        case 'd':
-        case 'S':
-        case 's':
-            return true;
-    }
-    return false;
-}
-
-struct nrex_node_shorthand : public nrex_node
-{
-        nrex_char repr;
-
-        nrex_node_shorthand(nrex_char c)
-            : nrex_node(true)
-            , repr(c)
-        {
-            length = 1;
-        }
-
-        int test(nrex_search* s, int pos) const
-        {
-            if (s->end <= pos || 0 > pos)
-            {
-                return -1;
-            }
-            bool found = false;
-            bool invert = false;
-            nrex_char c = s->at(pos);
-            switch (repr)
-            {
-                case '.':
-                    found = true;
-                    break;
-                case 'W':
-                    invert = true;
-                case 'w':
-                    if (c == '_' || NREX_ISALPHANUM(c))
-                    {
-                        found = true;
-                    }
-                    break;
-                case 'D':
-                    invert = true;
-                case 'd':
-                    if ('0' <= c && c <= '9')
-                    {
-                        found = true;
-                    }
-                    break;
-                case 'S':
-                    invert = true;
-                case 's':
-                    if (NREX_ISSPACE(c))
-                    {
-                        found = true;
-                    }
-                    break;
-            }
-            if (found == invert)
-            {
-                return -1;
-            }
-            return next ? next->test(s, pos + 1) : pos + 1;
-        }
-};
-
-static bool nrex_is_quantifier(nrex_char repr)
-{
-    switch (repr)
-    {
-        case '?':
-        case '*':
-        case '+':
-        case '{':
-            return true;
-    }
-    return false;
-}
-
-struct nrex_node_quantifier : public nrex_node
-{
-        int min;
-        int max;
-        bool greedy;
-        nrex_node* child;
-
-        nrex_node_quantifier(int min, int max)
-            : nrex_node()
-            , min(min)
-            , max(max)
-            , greedy(true)
-            , child(NULL)
-        {
-        }
-
-        virtual ~nrex_node_quantifier()
-        {
-            if (child)
-            {
-                NREX_DELETE(child);
-            }
-        }
-
-        int test(nrex_search* s, int pos) const
-        {
-            return test_step(s, pos, 0, pos);
-        }
-
-        int test_step(nrex_search* s, int pos, int level, int start) const
-        {
-            if (pos > s->end)
-            {
-                return -1;
-            }
-            if (!greedy && level > min)
-            {
-                int res = pos;
-                if (next)
-                {
-                    res = next->test(s, res);
-                }
-                if (s->complete)
-                {
-                    return res;
-                }
-                if (res >= 0 && parent->test_parent(s, res) >= 0)
-                {
-                    return res;
-                }
-            }
-            if (max >= 0 && level > max)
-            {
-                return -1;
-            }
-            if (level > 1 && level > min + 1 && pos == start)
-            {
-                return -1;
-            }
-            int res = pos;
-            if (level >= 1)
-            {
-                res = child->test(s, pos);
-                if (s->complete)
-                {
-                    return res;
-                }
-            }
-            if (res >= 0)
-            {
-                int res_step = test_step(s, res, level + 1, start);
-                if (res_step >= 0)
-                {
-                    return res_step;
-                }
-                else if (greedy && level >= min)
-                {
-                    if (next)
-                    {
-                        res = next->test(s, res);
-                    }
-                    if (s->complete)
-                    {
-                        return res;
-                    }
-                    if (res >= 0 && parent->test_parent(s, res) >= 0)
-                    {
-                        return res;
-                    }
-                }
-            }
-            return -1;
-        }
-
-        virtual int test_parent(nrex_search* s, int pos) const
-        {
-            s->complete = false;
-            return pos;
-        }
-};
-
-struct nrex_node_anchor : public nrex_node
-{
-        bool end;
-
-        nrex_node_anchor(bool end)
-            : nrex_node()
-            , end(end)
-        {
-            length = 0;
-        }
-
-        int test(nrex_search* s, int pos) const
-        {
-            if (!end && pos != 0)
-            {
-                return -1;
-            }
-            else if (end && pos != s->end)
-            {
-                return -1;
-            }
-            return next ? next->test(s, pos) : pos;
-        }
-};
-
-struct nrex_node_word_boundary : public nrex_node
-{
-        bool inverse;
-
-        nrex_node_word_boundary(bool inverse)
-            : nrex_node()
-            , inverse(inverse)
-        {
-            length = 0;
-        }
-
-        int test(nrex_search* s, int pos) const
-        {
-            bool left = false;
-            bool right = false;
-            if (pos != 0)
-            {
-                nrex_char c = s->at(pos - 1);
-                if (c == '_' || NREX_ISALPHANUM(c))
-                {
-                    left = true;
-                }
-            }
-            if (pos != s->end)
-            {
-                nrex_char c = s->at(pos);
-                if (c == '_' || NREX_ISALPHANUM(c))
-                {
-                    right = true;
-                }
-            }
-            if ((left != right) == inverse)
-            {
-                return -1;
-            }
-            return next ? next->test(s, pos) : pos;
-        }
-};
-
-struct nrex_node_backreference : public nrex_node
-{
-        int ref;
-
-        nrex_node_backreference(int ref)
-            : nrex_node(true)
-            , ref(ref)
-        {
-            length = -1;
-        }
-
-        int test(nrex_search* s, int pos) const
-        {
-            nrex_result& r = s->captures[ref];
-            for (int i = 0; i < r.length; ++i)
-            {
-                if (pos + i >= s->end)
-                {
-                    return -1;
-                }
-                if (s->at(r.start + i) != s->at(pos + i))
-                {
-                    return -1;
-                }
-            }
-            return next ? next->test(s, pos + r.length) : pos + r.length;
-        }
-};
-
-bool nrex_has_lookbehind(nrex_array<nrex_node_group*>& stack)
-{
-    for (unsigned int i = 0; i < stack.size(); i++)
-    {
-        if (stack[i]->type == nrex_group_look_behind)
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
-nrex::nrex()
-    : _capturing(0)
-    , _lookahead_depth(0)
-    , _root(NULL)
-{
-}
-
-nrex::nrex(const nrex_char* pattern, int captures)
-    : _capturing(0)
-    , _lookahead_depth(0)
-    , _root(NULL)
-{
-    compile(pattern, captures);
-}
-
-nrex::~nrex()
-{
-    if (_root)
-    {
-        NREX_DELETE(_root);
-    }
-}
-
-bool nrex::valid() const
-{
-    return (_root != NULL);
-}
-
-void nrex::reset()
-{
-    _capturing = 0;
-    _lookahead_depth = 0;
-    if (_root)
-    {
-        NREX_DELETE(_root);
-    }
-    _root = NULL;
-}
-
-int nrex::capture_size() const
-{
-    if (_root)
-    {
-        return _capturing + 1;
-    }
-    return 0;
-}
-
-bool nrex::compile(const nrex_char* pattern, int captures)
-{
-    reset();
-    nrex_node_group* root = NREX_NEW(nrex_node_group(nrex_group_capture, _capturing));
-    nrex_array<nrex_node_group*> stack;
-    stack.push(root);
-    unsigned int lookahead_level = 0;
-    _root = root;
-
-    for (const nrex_char* c = pattern; c[0] != '\0'; ++c)
-    {
-        if (c[0] == '(')
-        {
-            if (c[1] == '?')
-            {
-                if (c[2] == ':')
-                {
-                    c = &c[2];
-                    nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_group_non_capture));
-                    stack.top()->add_child(group);
-                    stack.push(group);
-                }
-                else if (c[2] == '!' || c[2] == '=')
-                {
-                    c = &c[2];
-                    nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_group_look_ahead, lookahead_level++));
-                    group->negate = (c[0] == '!');
-                    stack.top()->add_child(group);
-                    stack.push(group);
-                    if (lookahead_level > _lookahead_depth)
-                    {
-                        _lookahead_depth = lookahead_level;
-                    }
-                }
-                else if (c[2] == '<' && (c[3] == '!' || c[3] == '='))
-                {
-                    c = &c[3];
-                    nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_group_look_behind));
-                    group->negate = (c[0] == '!');
-                    stack.top()->add_child(group);
-                    stack.push(group);
-                }
-                else
-                {
-                    NREX_COMPILE_ERROR("unrecognised qualifier for group");
-                }
-            }
-            else if (captures >= 0 && _capturing < captures)
-            {
-                nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_group_capture, ++_capturing));
-                stack.top()->add_child(group);
-                stack.push(group);
-            }
-            else
-            {
-                nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_group_non_capture));
-                stack.top()->add_child(group);
-                stack.push(group);
-            }
-        }
-        else if (c[0] == ')')
-        {
-            if (stack.size() > 1)
-            {
-                if (stack.top()->type == nrex_group_look_ahead)
-                {
-                    --lookahead_level;
-                }
-                stack.pop();
-            }
-            else
-            {
-                NREX_COMPILE_ERROR("unexpected ')'");
-            }
-        }
-        else if (c[0] == '[')
-        {
-            nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_group_bracket));
-            stack.top()->add_child(group);
-            if (c[1] == '^')
-            {
-                group->negate = true;
-                ++c;
-            }
-            bool first_child = true;
-            nrex_char previous_child;
-            bool previous_child_single = false;
-            while (true)
-            {
-                group->add_childset();
-                ++c;
-                if (c[0] == '\0')
-                {
-                    NREX_COMPILE_ERROR("unclosed bracket expression '['");
-                }
-                if (c[0] == '[' && c[1] == ':')
-                {
-                    const nrex_char* d = &c[2];
-                    nrex_class_type cls = nrex_parse_class(&d);
-                    if (cls != nrex_class_none)
-                    {
-                        c = d;
-                        group->add_child(NREX_NEW(nrex_node_class(cls)));
-                        previous_child_single = false;
-                    }
-                    else
-                    {
-                        group->add_child(NREX_NEW(nrex_node_char('[')));
-                        previous_child = '[';
-                        previous_child_single = true;
-                    }
-                }
-                else if (c[0] == ']' && !first_child)
-                {
-                    break;
-                }
-                else if (c[0] == '\\')
-                {
-                    if (nrex_is_shorthand(c[1]))
-                    {
-                        group->add_child(NREX_NEW(nrex_node_shorthand(c[1])));
-                        ++c;
-                        previous_child_single = false;
-                    }
-                    else
-                    {
-                        const nrex_char* d = c;
-                        nrex_char unescaped = nrex_unescape(d);
-                        if (c == d)
-                        {
-                            NREX_COMPILE_ERROR("invalid escape token");
-                        }
-                        group->add_child(NREX_NEW(nrex_node_char(unescaped)));
-                        c = d;
-                        previous_child = unescaped;
-                        previous_child_single = true;
-                    }
-                }
-                else if (previous_child_single && c[0] == '-')
-                {
-                    bool is_range = false;
-                    nrex_char next;
-                    if (c[1] != '\0' && c[1] != ']')
-                    {
-                        if (c[1] == '\\')
-                        {
-                            const nrex_char* d = ++c;
-                            next = nrex_unescape(d);
-                            if (c == d)
-                            {
-                                NREX_COMPILE_ERROR("invalid escape token in range");
-                            }
-                        }
-                        else
-                        {
-                            next = c[1];
-                            ++c;
-                        }
-                        is_range = true;
-                    }
-                    if (is_range)
-                    {
-                        if (next < previous_child)
-                        {
-                            NREX_COMPILE_ERROR("text range out of order");
-                        }
-                        group->pop_back();
-                        group->add_child(NREX_NEW(nrex_node_range(previous_child, next)));
-                        previous_child_single = false;
-                    }
-                    else
-                    {
-                        group->add_child(NREX_NEW(nrex_node_char(c[0])));
-                        previous_child = c[0];
-                        previous_child_single = true;
-                    }
-                }
-                else
-                {
-                    group->add_child(NREX_NEW(nrex_node_char(c[0])));
-                    previous_child = c[0];
-                    previous_child_single = true;
-                }
-                first_child = false;
-            }
-        }
-        else if (nrex_is_quantifier(c[0]))
-        {
-            int min = 0;
-            int max = -1;
-            bool valid_quantifier = true;
-            if (c[0] == '?')
-            {
-                min = 0;
-                max = 1;
-            }
-            else if (c[0] == '+')
-            {
-                min = 1;
-                max = -1;
-            }
-            else if (c[0] == '*')
-            {
-                min = 0;
-                max = -1;
-            }
-            else if (c[0] == '{')
-            {
-                bool max_set = false;
-                const nrex_char* d = c;
-                while (true)
-                {
-                    ++d;
-                    if (d[0] == '\0')
-                    {
-                        valid_quantifier = false;
-                        break;
-                    }
-                    else if (d[0] == '}')
-                    {
-                        break;
-                    }
-                    else if (d[0] == ',')
-                    {
-                        max_set = true;
-                        continue;
-                    }
-                    else if (d[0] < '0' || '9' < d[0])
-                    {
-                        valid_quantifier = false;
-                        break;
-                    }
-                    if (max_set)
-                    {
-                        if (max < 0)
-                        {
-                            max = int(d[0] - '0');
-                        }
-                        else
-                        {
-                            max = max * 10 + int(d[0] - '0');
-                        }
-                    }
-                    else
-                    {
-                        min = min * 10 + int(d[0] - '0');
-                    }
-                }
-                if (!max_set)
-                {
-                    max = min;
-                }
-                if (valid_quantifier)
-                {
-                    c = d;
-                }
-            }
-            if (valid_quantifier)
-            {
-                if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
-                {
-                    NREX_COMPILE_ERROR("element not quantifiable");
-                }
-                nrex_node_quantifier* quant = NREX_NEW(nrex_node_quantifier(min, max));
-                if (min == max)
-                {
-                    if (stack.top()->back->length >= 0)
-                    {
-                        quant->length = max * stack.top()->back->length;
-                    }
-                }
-                else
-                {
-                    if (nrex_has_lookbehind(stack))
-                    {
-                        NREX_COMPILE_ERROR("variable length quantifiers inside lookbehind not supported");
-                    }
-                }
-                quant->child = stack.top()->swap_back(quant);
-                quant->child->previous = NULL;
-                quant->child->next = NULL;
-                quant->child->parent = quant;
-                if (c[1] == '?')
-                {
-                    quant->greedy = false;
-                    ++c;
-                }
-            }
-            else
-            {
-                stack.top()->add_child(NREX_NEW(nrex_node_char(c[0])));
-            }
-        }
-        else if (c[0] == '|')
-        {
-            if (nrex_has_lookbehind(stack))
-            {
-                NREX_COMPILE_ERROR("alternations inside lookbehind not supported");
-            }
-            stack.top()->add_childset();
-        }
-        else if (c[0] == '^' || c[0] == '$')
-        {
-            stack.top()->add_child(NREX_NEW(nrex_node_anchor((c[0] == '$'))));
-        }
-        else if (c[0] == '.')
-        {
-            stack.top()->add_child(NREX_NEW(nrex_node_shorthand('.')));
-        }
-        else if (c[0] == '\\')
-        {
-            if (nrex_is_shorthand(c[1]))
-            {
-                stack.top()->add_child(NREX_NEW(nrex_node_shorthand(c[1])));
-                ++c;
-            }
-            else if (('1' <= c[1] && c[1] <= '9') || (c[1] == 'g' && c[2] == '{'))
-            {
-                int ref = 0;
-                bool unclosed = false;
-                if (c[1] == 'g')
-                {
-                    unclosed = true;
-                    c = &c[2];
-                }
-                while ('0' <= c[1] && c[1] <= '9')
-                {
-                    ref = ref * 10 + int(c[1] - '0');
-                    ++c;
-                }
-                if (c[1] == '}')
-                {
-                    unclosed = false;
-                    ++c;
-                }
-                if (ref > _capturing || ref <= 0 || unclosed)
-                {
-                    NREX_COMPILE_ERROR("backreference to non-existent capture");
-                }
-                if (nrex_has_lookbehind(stack))
-                {
-                    NREX_COMPILE_ERROR("backreferences inside lookbehind not supported");
-                }
-                stack.top()->add_child(NREX_NEW(nrex_node_backreference(ref)));
-            }
-            else if (c[1] == 'b' || c[1] == 'B')
-            {
-                stack.top()->add_child(NREX_NEW(nrex_node_word_boundary(c[1] == 'B')));
-                ++c;
-            }
-            else
-            {
-                const nrex_char* d = c;
-                nrex_char unescaped = nrex_unescape(d);
-                if (c == d)
-                {
-                    NREX_COMPILE_ERROR("invalid escape token");
-                }
-                stack.top()->add_child(NREX_NEW(nrex_node_char(unescaped)));
-                c = d;
-            }
-        }
-        else
-        {
-            stack.top()->add_child(NREX_NEW(nrex_node_char(c[0])));
-        }
-    }
-    if (stack.size() > 1)
-    {
-        NREX_COMPILE_ERROR("unclosed group '('");
-    }
-    return true;
-}
-
-bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int end) const
-{
-    if (!_root)
-    {
-        return false;
-    }
-    nrex_search s(str, captures, _lookahead_depth);
-    if (end >= offset)
-    {
-        s.end = end;
-    }
-    else
-    {
-        s.end = NREX_STRLEN(str);
-    }
-    for (int i = offset; i <= s.end; ++i)
-    {
-        for (int c = 0; c <= _capturing; ++c)
-        {
-            captures[c].start = 0;
-            captures[c].length = 0;
-        }
-        if (_root->test(&s, i) >= 0)
-        {
-            return true;
-        }
-    }
-    return false;
-}

+ 0 - 176
drivers/nrex/nrex.hpp

@@ -1,176 +0,0 @@
-//  NREX: Node RegEx
-//  Version 0.2
-//
-//  Copyright (c) 2015-2016, Zher Huei Lee
-//  All rights reserved.
-//
-//  This software is provided 'as-is', without any express or implied
-//  warranty.  In no event will the authors be held liable for any damages
-//  arising from the use of this software.
-//
-//  Permission is granted to anyone to use this software for any purpose,
-//  including commercial applications, and to alter it and redistribute it
-//  freely, subject to the following restrictions:
-//
-//   1. The origin of this software must not be misrepresented; you must not
-//      claim that you wrote the original software. If you use this software
-//      in a product, an acknowledgment in the product documentation would
-//      be appreciated but is not required.
-//
-//   2. Altered source versions must be plainly marked as such, and must not
-//      be misrepresented as being the original software.
-//
-//   3. This notice may not be removed or altered from any source
-//      distribution.
-//
-
-#ifndef NREX_HPP
-#define NREX_HPP
-
-#include "nrex_config.h"
-
-#ifdef NREX_UNICODE
-typedef wchar_t nrex_char;
-#else
-typedef char nrex_char;
-#endif
-
-/*!
- * \brief Struct to contain the range of a capture result
- *
- * The range provided is relative to the begining of the searched string.
- *
- * \see nrex_node::match()
- */
-struct nrex_result
-{
-    public:
-        int start; /*!< Start of text range */
-        int length; /*!< Length of text range */
-};
-
-class nrex_node;
-
-/*!
- * \brief Holds the compiled regex pattern
- */
-class nrex
-{
-    private:
-        unsigned int _capturing;
-        unsigned int _lookahead_depth;
-        nrex_node* _root;
-    public:
-
-        /*!
-         * \brief Initialises an empty regex container
-         */
-        nrex();
-
-        /*!
-         * \brief Initialises and compiles the regex pattern
-         *
-         * This calls nrex::compile() with the same arguments. To check whether
-         * the compilation was successfull, use nrex::valid().
-         *
-         * If the NREX_THROW_ERROR was defined it would automatically throw a
-         * runtime error nrex_compile_error if it encounters a problem when
-         * parsing the pattern.
-         *
-         * \param pattern   The regex pattern
-         * \param captures  The maximum number of capture groups to allow. Any
-         *                  extra would be converted to non-capturing groups.
-         *                  If negative, no limit would be imposed. Defaults
-         *                  to 9.
-         *
-         * \see nrex::compile()
-         */
-        nrex(const nrex_char* pattern, int captures = 9);
-
-        ~nrex();
-
-        /*!
-         * \brief Removes the compiled regex and frees up the memory
-         */
-        void reset();
-
-        /*!
-         * \brief Checks if there is a compiled regex being stored
-         * \return True if present, False if not present
-         */
-        bool valid() const;
-
-        /*!
-         * \brief Provides number of captures the compiled regex uses
-         *
-         * This is used to provide the array size of the captures needed for
-         * nrex::match() to work. The size is actually the number of capture
-         * groups + one for the matching of the entire pattern. This can be
-         * capped using the extra argument given in nrex::compile()
-         * (default 10).
-         *
-         * \return The number of captures
-         */
-        int capture_size() const;
-
-        /*!
-         * \brief Compiles the provided regex pattern
-         *
-         * This automatically removes the existing compiled regex if already
-         * present.
-         *
-         * If the NREX_THROW_ERROR was defined it would automatically throw a
-         * runtime error nrex_compile_error if it encounters a problem when
-         * parsing the pattern.
-         *
-         * \param pattern   The regex pattern
-         * \param captures  The maximum number of capture groups to allow. Any
-         *                  extra would be converted to non-capturing groups.
-         *                  If negative, no limit would be imposed. Defaults
-         *                  to 9.
-         * \return True if the pattern was succesfully compiled
-         */
-        bool compile(const nrex_char* pattern, int captures = 9);
-
-        /*!
-         * \brief Uses the pattern to search through the provided string
-         * \param str       The text to search through. It only needs to be
-         *                  null terminated if the end point is not provided.
-         *                  This also determines the starting anchor.
-         * \param captures  The array of results to store the capture results.
-         *                  The size of that array needs to be the same as the
-         *                  size given in nrex::capture_size(). As it matches
-         *                  the function fills the array with the results. 0 is
-         *                  the result for the entire pattern, 1 and above
-         *                  corresponds to the regex capture group if present.
-         * \param offset    The starting point of the search. This does not move
-         *                  the starting anchor. Defaults to 0.
-         * \param end       The end point of the search. This also determines
-         *                  the ending anchor. If a number less than the offset
-         *                  is provided, the search would be done until null
-         *                  termination. Defaults to -1.
-         * \return          True if a match was found. False otherwise.
-         */
-        bool match(const nrex_char* str, nrex_result* captures, int offset = 0, int end = -1) const;
-};
-
-#ifdef NREX_THROW_ERROR
-
-#include <stdexcept>
-
-class nrex_compile_error : std::runtime_error
-{
-    public:
-        nrex_compile_error(const char* message)
-            : std::runtime_error(message)
-        {
-        }
-
-        ~nrex_compile_error() throw()
-        {
-        }
-};
-
-#endif
-
-#endif // NREX_HPP

+ 0 - 12
drivers/nrex/nrex_config.h

@@ -1,12 +0,0 @@
-// Godot-specific configuration
-// To use this, replace nrex_config.h
-
-#include "core/os/memory.h"
-
-#define NREX_UNICODE
-//#define NREX_THROW_ERROR
-
-#define NREX_NEW(X) memnew(X)
-#define NREX_NEW_ARRAY(X, N) memnew_arr(X, N)
-#define NREX_DELETE(X) memdelete(X)
-#define NREX_DELETE_ARRAY(X) memdelete_arr(X)

+ 0 - 142
drivers/nrex/regex.cpp

@@ -1,142 +0,0 @@
-/*************************************************************************/
-/*  regex.cpp                                                            */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                    http://www.godotengine.org                         */
-/*************************************************************************/
-/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur.                 */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-#include "regex.h"
-#include "nrex.hpp"
-#include "core/os/memory.h"
-
-void RegEx::_bind_methods() {
-
-	ObjectTypeDB::bind_method(_MD("compile","pattern", "capture"),&RegEx::compile, DEFVAL(9));
-	ObjectTypeDB::bind_method(_MD("find","text","start","end"),&RegEx::find, DEFVAL(0), DEFVAL(-1));
-	ObjectTypeDB::bind_method(_MD("clear"),&RegEx::clear);
-	ObjectTypeDB::bind_method(_MD("is_valid"),&RegEx::is_valid);
-	ObjectTypeDB::bind_method(_MD("get_capture_count"),&RegEx::get_capture_count);
-	ObjectTypeDB::bind_method(_MD("get_capture","capture"),&RegEx::get_capture);
-	ObjectTypeDB::bind_method(_MD("get_capture_start","capture"),&RegEx::get_capture_start);
-	ObjectTypeDB::bind_method(_MD("get_captures"),&RegEx::_bind_get_captures);
-
-};
-
-StringArray RegEx::_bind_get_captures() const {
-
-	StringArray ret;
-	int count = get_capture_count();
-	for (int i=0; i<count; i++) {
-
-		String c = get_capture(i);
-		ret.push_back(c);
-	};
-
-	return ret;
-
-};
-
-void RegEx::clear() {
-
-	text.clear();
-	captures.clear();
-	exp.reset();
-
-};
-
-bool RegEx::is_valid() const {
-
-	return exp.valid();
-
-};
-
-int RegEx::get_capture_count() const {
-
-	ERR_FAIL_COND_V( !exp.valid(), 0 );
-
-	return exp.capture_size();
-}
-
-String RegEx::get_capture(int capture) const {
-
-	ERR_FAIL_COND_V( get_capture_count() <= capture, String() );
-
-	return text.substr(captures[capture].start, captures[capture].length);
-
-}
-
-int RegEx::get_capture_start(int capture) const {
-
-	ERR_FAIL_COND_V( get_capture_count() <= capture, -1 );
-
-	return captures[capture].start;
-
-}
-
-Error RegEx::compile(const String& p_pattern, int capture) {
-
-	clear();
-
-	exp.compile(p_pattern.c_str(), capture);
-
-	ERR_FAIL_COND_V( !exp.valid(), FAILED );
-
-	captures.resize(exp.capture_size());
-
-	return OK;
-
-};
-
-int RegEx::find(const String& p_text, int p_start, int p_end) const {
-
-	ERR_FAIL_COND_V( !exp.valid(), -1 );
-	ERR_FAIL_COND_V( p_text.length() < p_start, -1 );
-	ERR_FAIL_COND_V( p_text.length() < p_end, -1 );
-
-	bool res = exp.match(p_text.c_str(), &captures[0], p_start, p_end);
-
-	if (res) {
-		text = p_text;
-		return captures[0].start;
-	}
-	text.clear();
-	return -1;
-
-};
-
-RegEx::RegEx(const String& p_pattern) {
-
-	compile(p_pattern);
-
-};
-
-RegEx::RegEx() {
-
-};
-
-RegEx::~RegEx() {
-
-	clear();
-
-};

+ 0 - 4
drivers/register_driver_types.cpp

@@ -40,8 +40,6 @@
 #include "platform/windows/export/export.h"
 #endif
 
-#include "drivers/nrex/regex.h"
-
 static ImageLoaderPNG *image_loader_png=NULL;
 static ResourceSaverPNG *resource_saver_png=NULL;
 
@@ -53,8 +51,6 @@ void register_core_driver_types() {
 
 	resource_saver_png = memnew( ResourceSaverPNG );
 	ResourceSaver::add_resource_format_saver(resource_saver_png);
-
-	ObjectTypeDB::register_type<RegEx>();
 }
 
 void unregister_core_driver_types() {

+ 1 - 1
drivers/nrex/SCsub → modules/regex/SCsub

@@ -2,6 +2,6 @@
 
 Import('env')
 
-env.add_source_files(env.drivers_sources, "*.cpp")
+env.add_source_files(env.modules_sources, "*.cpp")
 
 Export('env')

+ 8 - 0
modules/regex/config.py

@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+
+def can_build(platform):
+    return True
+
+def configure(env):
+    pass
+

+ 1465 - 0
modules/regex/regex.cpp

@@ -0,0 +1,1465 @@
+/*************************************************************************/
+/*  regex.cpp                                                            */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "regex.h"
+#include <wctype.h>
+#include <wchar.h>
+
+static int RegEx_hex2int(const CharType c)
+{
+	if ('0' <= c && c <= '9')
+		return int(c - '0');
+	else if ('a' <= c && c <= 'f')
+		return int(c - 'a') + 10;
+	else if ('A' <= c && c <= 'F')
+		return int(c - 'A') + 10;
+	return -1;
+}
+
+struct RegExSearch {
+
+	Ref<RegExMatch> match;
+	const CharType* str;
+	int end;
+	int eof;
+
+	// For standard quantifier behaviour, test_parent is used to check the
+	// rest of the pattern. If the pattern matches, to prevent the parent
+	// from testing again, the complete flag is used as a shortcut out.
+	bool complete;
+
+	// With lookahead, the position needs to rewind to its starting position
+	// when test_parent is used. Due to functional programming, this state
+	// has to be kept as a parameter.
+	Vector<int> lookahead_pos;
+
+	CharType at(int p_pos) {
+		return str[p_pos];
+	}
+
+	RegExSearch(Ref<RegExMatch>& p_match, int p_end, int p_lookahead) : match(p_match) {
+
+		str = p_match->string.c_str();
+		end = p_end;
+		eof = p_match->string.length();
+		complete = false;
+		lookahead_pos.resize(p_lookahead);
+	}
+
+};
+
+struct RegExNode {
+
+	RegExNode* next;
+	RegExNode* previous;
+	RegExNode* parent;
+	bool quantifiable;
+	int length;
+
+	RegExNode() {
+
+		next = NULL;
+		previous = NULL;
+		parent = NULL;
+		quantifiable = false;
+		length = -1;
+	}
+
+	virtual ~RegExNode() {
+
+		if (next)
+			memdelete(next);
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		return next ? next->test(s, pos) : -1;
+	}
+
+	virtual int test_parent(RegExSearch& s, int pos) const {
+
+		if (next)
+			pos = next->test(s, pos);
+
+		if (pos >= 0) {
+			s.complete = true;
+			if (parent)
+				pos = parent->test_parent(s, pos);
+		}
+
+		if (pos < 0)
+			s.complete = false;
+
+		return pos;
+	}
+
+	void increment_length(int amount, bool subtract = false) {
+
+		if (amount >= 0 && length >= 0) {
+			if (!subtract)
+				length += amount;
+			else
+				length -= amount;
+		} else {
+			length = -1;
+		}
+
+		if (parent)
+			parent->increment_length(amount, subtract);
+
+	}
+
+};
+
+struct RegExNodeChar : public RegExNode {
+
+	CharType ch;
+
+	RegExNodeChar(CharType p_char) {
+
+		length = 1;
+		quantifiable = true;
+		ch = p_char;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		if (s.end <= pos || 0 > pos || s.at(pos) != ch)
+			return -1;
+
+		return next ? next->test(s, pos + 1) : pos + 1;
+	}
+
+	static CharType parse_escape(const CharType*& c) {
+
+		int point = 0;
+		switch (c[1]) {
+			case 'x':
+				for (int i = 2; i <= 3; ++i) {
+					int res = RegEx_hex2int(c[i]);
+					if (res == -1)
+						return '\0';
+					point = (point << 4) + res;
+				}
+				c = &c[3];
+				return CharType(point);
+			case 'u':
+				for (int i = 2; i <= 5; ++i) {
+					int res = RegEx_hex2int(c[i]);
+					if (res == -1)
+						return '\0';
+					point = (point << 4) + res;
+				}
+				c = &c[5];
+				return CharType(point);
+			case '0': ++c; return '\0';
+			case 'a': ++c; return '\a';
+			case 'e': ++c; return '\e';
+			case 'f': ++c; return '\f';
+			case 'n': ++c; return '\n';
+			case 'r': ++c; return '\r';
+			case 't': ++c; return '\t';
+			case 'v': ++c; return '\v';
+			case 'b': ++c; return '\b';
+			default: break;
+		}
+		return (++c)[0];
+	}
+};
+
+struct RegExNodeRange : public RegExNode {
+
+	CharType start;
+	CharType end;
+
+	RegExNodeRange(CharType p_start, CharType p_end) {
+
+		length = 1;
+		quantifiable = true;
+		start = p_start;
+		end = p_end;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		if (s.end <= pos || 0 > pos)
+			return -1;
+
+		CharType c = s.at(pos);
+		if (c < start || end < c)
+			return -1;
+
+		return next ? next->test(s, pos + 1) : pos + 1;
+	}
+};
+
+struct RegExNodeShorthand : public RegExNode {
+
+	CharType repr;
+
+	RegExNodeShorthand(CharType p_repr) {
+
+		length = 1;
+		quantifiable = true;
+		repr = p_repr;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		if (s.end <= pos || 0 > pos)
+			return -1;
+
+		bool found = false;
+		bool invert = false;
+		CharType c = s.at(pos);
+		switch (repr) {
+			case '.':
+				found = true;
+				break;
+			case 'W':
+				invert = true;
+			case 'w':
+				found = (c == '_' || iswalnum(c) != 0);
+				break;
+			case 'D':
+				invert = true;
+			case 'd':
+				found = ('0' <= c && c <= '9');
+				break;
+			case 'S':
+				invert = true;
+			case 's':
+				found = (iswspace(c) != 0);
+				break;
+			default:
+				break;
+		}
+
+		if (found == invert)
+			return -1;
+
+		return next ? next->test(s, pos + 1) : pos + 1;
+	}
+};
+
+struct RegExNodeClass : public RegExNode {
+
+	enum Type {
+		Type_none,
+		Type_alnum,
+		Type_alpha,
+		Type_ascii,
+		Type_blank,
+		Type_cntrl,
+		Type_digit,
+		Type_graph,
+		Type_lower,
+		Type_print,
+		Type_punct,
+		Type_space,
+		Type_upper,
+		Type_xdigit,
+		Type_word
+	};
+
+	Type type;
+
+	bool test_class(CharType c) const {
+
+		static Vector<CharType> REGEX_NODE_SPACE = String(" \t\r\n\f");
+		static Vector<CharType> REGEX_NODE_PUNCT = String("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
+
+		switch (type) {
+			case Type_alnum:
+				if ('0' <= c && c <= '9') return true;
+				if ('a' <= c && c <= 'z') return true;
+				if ('A' <= c && c <= 'Z') return true;
+				return false;
+			case Type_alpha:
+				if ('a' <= c && c <= 'z') return true;
+				if ('A' <= c && c <= 'Z') return true;
+				return false;
+			case Type_ascii:
+				return (0x00 <= c && c <= 0x7F);
+			case Type_blank:
+				return (c == ' ' || c == '\t');
+			case Type_cntrl:
+				return ((0x00 <= c && c <= 0x1F) || c == 0x7F);
+			case Type_digit:
+				return ('0' <= c && c <= '9');
+			case Type_graph:
+				return (0x20 < c && c < 0x7F);
+			case Type_lower:
+				return ('a' <= c && c <= 'z');
+			case Type_print:
+				return (0x1F < c && c < 0x1F);
+			case Type_punct:
+				return (REGEX_NODE_PUNCT.find(c) >= 0);
+			case Type_space:
+				return (REGEX_NODE_SPACE.find(c) >= 0);
+			case Type_upper:
+				return ('A' <= c && c <= 'Z');
+			case Type_xdigit:
+				if ('0' <= c && c <= '9') return true;
+				if ('a' <= c && c <= 'f') return true;
+				if ('A' <= c && c <= 'F') return true;
+				return false;
+			case Type_word:
+				if ('0' <= c && c <= '9') return true;
+				if ('a' <= c && c <= 'z') return true;
+				if ('A' <= c && c <= 'Z') return true;
+				return (c == '_');
+			default:
+				return false;
+		}
+		return false;
+	}
+
+	RegExNodeClass(Type p_type) {
+
+		length = 1;
+		quantifiable = true;
+		type = p_type;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		if (s.end <= pos || 0 > pos)
+			return -1;
+
+		if (!test_class(s.at(pos)))
+			return -1;
+
+		return next ? next->test(s, pos + 1) : pos + 1;
+	}
+
+#define REGEX_CMP_CLASS(POS, NAME) if (cmp_class(POS, #NAME)) return Type_ ## NAME
+
+	static Type parse_type(const CharType*& p_pos) {
+
+		REGEX_CMP_CLASS(p_pos, alnum);
+		REGEX_CMP_CLASS(p_pos, alpha);
+		REGEX_CMP_CLASS(p_pos, ascii);
+		REGEX_CMP_CLASS(p_pos, blank);
+		REGEX_CMP_CLASS(p_pos, cntrl);
+		REGEX_CMP_CLASS(p_pos, digit);
+		REGEX_CMP_CLASS(p_pos, graph);
+		REGEX_CMP_CLASS(p_pos, lower);
+		REGEX_CMP_CLASS(p_pos, print);
+		REGEX_CMP_CLASS(p_pos, punct);
+		REGEX_CMP_CLASS(p_pos, space);
+		REGEX_CMP_CLASS(p_pos, upper);
+		REGEX_CMP_CLASS(p_pos, xdigit);
+		REGEX_CMP_CLASS(p_pos, word);
+		return Type_none;
+	}
+
+	static bool cmp_class(const CharType*& p_pos, const char* p_text) {
+
+		unsigned int i = 0;
+		for (i = 0; p_text[i] != '\0'; ++i)
+			if (p_pos[i] != p_text[i])
+				return false;
+
+		if (p_pos[i++] != ':' || p_pos[i] != ']')
+			return false;
+
+		p_pos = &p_pos[i];
+		return true;
+	}
+};
+
+struct RegExNodeAnchorStart : public RegExNode {
+
+	RegExNodeAnchorStart() {
+
+		length = 0;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		if (pos != 0)
+			return -1;
+
+		return next ? next->test(s, pos) : pos;
+	}
+};
+
+struct RegExNodeAnchorEnd : public RegExNode {
+
+	RegExNodeAnchorEnd() {
+
+		length = 0;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		if (pos != s.eof)
+			return -1;
+
+		return next ? next->test(s, pos) : pos;
+	}
+};
+
+struct RegExNodeWordBoundary : public RegExNode {
+
+	bool inverse;
+
+	RegExNodeWordBoundary(bool p_inverse) {
+
+		length = 0;
+		inverse = p_inverse;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		bool left = false;
+		bool right = false;
+
+		if (pos != 0) {
+			CharType c = s.at(pos - 1);
+			if (c == '_' || iswalnum(c))
+				left = true;
+		}
+
+		if (pos != s.eof) {
+			CharType c = s.at(pos);
+			if (c == '_' || iswalnum(c))
+				right = true;
+		}
+
+		if ((left == right) != inverse)
+			return -1;
+
+		return next ? next->test(s, pos) : pos;
+	}
+};
+
+struct RegExNodeQuantifier : public RegExNode {
+
+	int min;
+	int max;
+	bool greedy;
+	RegExNode* child;
+
+	RegExNodeQuantifier(int p_min, int p_max) {
+
+		min = p_min;
+		max = p_max;
+		greedy = true;
+		child = NULL;
+	}
+
+	~RegExNodeQuantifier() {
+
+		if (child)
+			memdelete(child);
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		return test_step(s, pos, 0, pos);
+	}
+
+	virtual int test_parent(RegExSearch& s, int pos) const {
+
+		s.complete = false;
+		return pos;
+	}
+
+	int test_step(RegExSearch& s, int pos, int level, int start) const {
+
+		if (pos > s.end)
+			return -1;
+
+		if (!greedy && level > min) {
+			int res = next ? next->test(s, pos) : pos;
+			if (s.complete)
+				return res;
+
+			if (res >= 0 && parent->test_parent(s, res) >= 0)
+				return res;
+		}
+
+		if (max >= 0 && level > max)
+			return -1;
+
+		int res = pos;
+		if (level >= 1) {
+			if (level > min + 1 && pos == start)
+				return -1;
+
+			res = child->test(s, pos);
+			if (s.complete)
+				return res;
+		}
+
+		if (res >= 0) {
+
+			int res_step = test_step(s, res, level + 1, start);
+			if (res_step >= 0)
+				return res_step;
+
+			if (greedy && level >= min) {
+				if (next)
+					res = next->test(s, res);
+				if (s.complete)
+					return res;
+
+				if (res >= 0 && parent->test_parent(s, res) >= 0)
+					return res;
+			}
+		}
+		return -1;
+	}
+};
+
+struct RegExNodeBackReference : public RegExNode {
+
+	int id;
+
+	RegExNodeBackReference(int p_id) {
+
+		length = -1;
+		quantifiable = true;
+		id = p_id;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		RegExMatch::Group& ref = s.match->captures[id];
+		for (int i = 0; i < ref.length; ++i) {
+
+			if (pos + i >= s.end)
+				return -1;
+
+			if (s.at(ref.start + i) != s.at(pos + i))
+				return -1;
+		}
+		return next ? next->test(s, pos + ref.length) : pos + ref.length;
+	}
+};
+
+
+struct RegExNodeGroup : public RegExNode {
+
+	bool inverse;
+	bool reset_pos;
+	Vector<RegExNode*> childset;
+	RegExNode* back;
+
+	RegExNodeGroup() {
+
+		length = 0;
+		quantifiable = true;
+		inverse = false;
+		reset_pos = false;
+		back = NULL;
+	}
+
+	virtual ~RegExNodeGroup() {
+
+		for (int i = 0; i < childset.size(); ++i)
+			memdelete(childset[i]);
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		for (int i = 0; i < childset.size(); ++i) {
+
+			s.complete = false;
+
+			int res = childset[i]->test(s, pos);
+
+			if (s.complete)
+				return res;
+
+			if (inverse) {
+				if (res < 0)
+					res = pos + 1;
+				else
+					return -1;
+
+				if (i + 1 < childset.size())
+					continue;
+			}
+
+			if (res >= 0) {
+				if (reset_pos)
+					res = pos;
+				return next ? next->test(s, res) : res;
+			}
+		}
+		return -1;
+	}
+
+	void add_child(RegExNode* node) {
+
+		node->parent = this;
+		node->previous = back;
+
+		if (back)
+			back->next = node;
+		else
+			childset.push_back(node);
+
+		increment_length(node->length);
+
+		back = node;
+	}
+
+	void add_childset() {
+
+		if (childset.size() > 0)
+			length = -1;
+		back = NULL;
+	}
+
+	RegExNode* swap_back(RegExNode* node) {
+
+		RegExNode* old = back;
+
+		if (old) {
+			if (!old->previous)
+				childset.remove(childset.size() - 1);
+			back = old->previous;
+			increment_length(old->length, true);
+		}
+
+		add_child(node);
+
+		return old;
+	}
+};
+
+struct RegExNodeCapturing : public RegExNodeGroup {
+
+	int id;
+
+	RegExNodeCapturing(int p_id = 0) {
+
+		id = p_id;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		RegExMatch::Group& ref = s.match->captures[id];
+		int old_start = ref.start;
+		ref.start = pos;
+
+		int res = RegExNodeGroup::test(s, pos);
+
+		if (res >= 0) {
+			if (!s.complete)
+				ref.length = res - pos;
+		} else {
+			ref.start = old_start;
+		}
+
+		return res;
+	}
+
+	virtual int test_parent(RegExSearch& s, int pos) const {
+
+		RegExMatch::Group& ref = s.match->captures[id];
+		ref.length = pos - ref.start;
+		return RegExNode::test_parent(s, pos);
+	}
+
+	static Variant parse_name(const CharType*& c, bool p_allow_numeric) {
+
+		if (c[1] == '0') {
+			return -1;
+		} else if ('1' <= c[1] && c[1] <= '9') {
+			if (!p_allow_numeric)
+				return -1;
+			int res = (++c)[0] - '0';
+			while ('0' <= c[1] && c[1] <= '9')
+				res = res * 10 + int((++c)[0] - '0');
+			if ((++c)[0] != '>')
+				return -1;
+			return res;
+		} else if (iswalnum(c[1])) {
+			String res(++c, 1);
+			while (iswalnum(c[1]))
+				res += String(++c, 1);
+			if ((++c)[0] != '>')
+				return -1;
+			return res;
+		}
+		return -1;
+	}
+};
+
+struct RegExNodeLookAhead : public RegExNodeGroup {
+
+	int id;
+
+	RegExNodeLookAhead(bool p_inverse, int p_id = 0) {
+
+		quantifiable = false;
+		inverse = p_inverse;
+		reset_pos = true;
+		id = p_id;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		s.lookahead_pos[id] = pos;
+		return RegExNodeGroup::test(s, pos);
+	}
+
+	virtual int test_parent(RegExSearch& s, int pos) const {
+
+		return RegExNode::test_parent(s, s.lookahead_pos[id]);
+	}
+};
+
+struct RegExNodeLookBehind : public RegExNodeGroup {
+
+	RegExNodeLookBehind(bool p_inverse, int p_id = 0) {
+
+		quantifiable = false;
+		inverse = p_inverse;
+		reset_pos = true;
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		if (pos < length)
+			return -1;
+		return RegExNodeGroup::test(s, pos - length);
+	}
+};
+
+struct RegExNodeBracket : public RegExNode {
+
+	bool inverse;
+	Vector<RegExNode*> children;
+
+	RegExNodeBracket() {
+
+		length = 1;
+		quantifiable = true;
+		inverse = false;
+	}
+
+	virtual ~RegExNodeBracket() {
+
+		for (int i = 0; i < children.size(); ++i)
+			memdelete(children[i]);
+	}
+
+	virtual int test(RegExSearch& s, int pos) const {
+
+		for (int i = 0; i < children.size(); ++i) {
+
+			int res = children[i]->test(s, pos);
+
+			if (inverse) {
+				if (res < 0)
+					res = pos + 1;
+				else
+					return -1;
+
+				if (i + 1 < children.size())
+					continue;
+			}
+
+			if (res >= 0)
+				return next ? next->test(s, res) : res;
+		}
+		return -1;
+	}
+
+	void add_child(RegExNode* node) {
+
+		node->parent = this;
+		children.push_back(node);
+	}
+
+	void pop_back() {
+
+		memdelete(children[children.size() - 1]);
+		children.remove(children.size() - 1);
+	}
+};
+
+#define REGEX_EXPAND_FAIL(MSG)\
+{\
+	ERR_PRINT(MSG);\
+	return String();\
+}
+
+String RegExMatch::expand(const String& p_template) const {
+
+	String res;
+	for (const CharType* c = p_template.c_str(); *c != '\0'; ++c) {
+		if (c[0] == '\\') {
+			if (('1' <= c[1] && c[1] <= '9') || (c[1] == 'g' && c[2] == '{')) {
+
+				int ref = 0;
+				bool unclosed = false;
+
+				if (c[1] == 'g') {
+					unclosed = true;
+					c = &c[2];
+				}
+
+				while ('0' <= c[1] && c[1] <= '9') {
+					ref = ref * 10 + int(c[1] - '0');
+					++c;
+				}
+
+				if (unclosed) {
+					if (c[1] != '}')
+						REGEX_EXPAND_FAIL("unclosed backreference '{'");
+					++c;
+				}
+
+				res += get_string(ref);
+
+			} else if (c[1] =='g' && c[2] == '<') {
+
+				const CharType* d = &c[2];
+
+				Variant name = RegExNodeCapturing::parse_name(d, true);
+				if (name == Variant(-1))
+					REGEX_EXPAND_FAIL("unrecognised character for group name");
+
+				c = d;
+
+				res += get_string(name);
+
+			} else {
+
+				const CharType* d = c;
+				CharType ch = RegExNodeChar::parse_escape(d);
+				if (c == d)
+					REGEX_EXPAND_FAIL("invalid escape token");
+				res += String(&ch, 1);
+				c = d;
+			}
+		} else {
+			res += String(c, 1);
+		}
+	}
+	return res;
+}
+
+int RegExMatch::get_group_count() const {
+
+	int count = 0;
+	for (int i = 1; i < captures.size(); ++i)
+		if (captures[i].name.get_type() == Variant::INT)
+			++count;
+	return count;
+}
+
+Array RegExMatch::get_group_array() const {
+
+	Array res;
+	for (int i = 1; i < captures.size(); ++i) {
+		const RegExMatch::Group& capture = captures[i];
+		if (capture.name.get_type() != Variant::INT)
+			continue;
+
+		if (capture.start >= 0)
+			res.push_back(string.substr(capture.start, capture.length));
+		else
+			res.push_back(String());
+	}
+	return res;
+}
+
+Array RegExMatch::get_names() const {
+
+	Array res;
+	for (int i = 1; i < captures.size(); ++i)
+		if (captures[i].name.get_type() == Variant::STRING)
+			res.push_back(captures[i].name);
+	return res;
+}
+
+Dictionary RegExMatch::get_name_dict() const {
+
+	Dictionary res;
+	for (int i = 1; i < captures.size(); ++i) {
+		const RegExMatch::Group& capture = captures[i];
+		if (capture.name.get_type() != Variant::STRING)
+			continue;
+
+		if (capture.start >= 0)
+			res[capture.name] = string.substr(capture.start, capture.length);
+		else
+			res[capture.name] = String();
+	}
+	return res;
+}
+
+String RegExMatch::get_string(const Variant& p_name) const {
+
+	for (int i = 0; i < captures.size(); ++i) {
+
+		const RegExMatch::Group& capture = captures[i];
+
+		if (capture.name != p_name)
+			continue;
+
+		if (capture.start == -1)
+			return String();
+
+		return string.substr(capture.start, capture.length);
+	}
+	return String();
+}
+
+int RegExMatch::get_start(const Variant& p_name) const {
+
+	for (int i = 0; i < captures.size(); ++i)
+		if (captures[i].name == p_name)
+			return captures[i].start;
+	return -1;
+}
+
+int RegExMatch::get_end(const Variant& p_name) const {
+
+	for (int i = 0; i < captures.size(); ++i)
+		if (captures[i].name == p_name)
+			return captures[i].start + captures[i].length;
+	return -1;
+}
+
+RegExMatch::RegExMatch() {
+
+}
+
+static bool RegEx_is_shorthand(CharType ch) {
+
+	switch (ch) {
+		case 'w':
+		case 'W':
+		case 'd':
+		case 'D':
+		case 's':
+		case 'S':
+			return true;
+		default:
+			break;
+	}
+	return false;
+}
+
+#define REGEX_COMPILE_FAIL(MSG)\
+{\
+	ERR_PRINT(MSG);\
+	clear();\
+	return FAILED;\
+}
+
+Error RegEx::compile(const String& p_pattern) {
+
+	if (pattern == p_pattern)
+		return OK;
+
+	clear();
+	pattern = p_pattern;
+	group_names.push_back(0);
+	RegExNodeGroup* root_group = memnew(RegExNodeCapturing(0));
+	root = root_group;
+	Vector<RegExNodeGroup*> stack;
+	stack.push_back(root_group);
+	int lookahead_level = 0;
+	int numeric_groups = 0;
+	const int numeric_max = 9;
+
+	for (const CharType* c = p_pattern.c_str(); *c != '\0'; ++c) {
+
+		switch (c[0]) {
+			case '(':
+				if (c[1] == '?') {
+
+					RegExNodeGroup* group = NULL;
+					switch (c[2]) {
+						case ':':
+							c = &c[2];
+							group = memnew(RegExNodeGroup());
+							break;
+						case '!':
+						case '=':
+							group = memnew(RegExNodeLookAhead((c[2] == '!'), lookahead_level++));
+							if (lookahead_depth < lookahead_level)
+								lookahead_depth = lookahead_level;
+							c = &c[2];
+							break;
+						case '<':
+							if (c[3] == '!' || c[3] == '=') {
+								group = memnew(RegExNodeLookBehind((c[3] == '!'), lookahead_level++));
+								c = &c[3];
+							}
+							break;
+						case 'P':
+							if (c[3] == '<') {
+								const CharType* d = &c[3];
+								Variant name = RegExNodeCapturing::parse_name(d, false);
+								if (name == Variant(-1))
+									REGEX_COMPILE_FAIL("unrecognised character for group name");
+								group = memnew(RegExNodeCapturing(group_names.size()));
+								group_names.push_back(name);
+								c = d;
+							}
+						default:
+							break;
+					}
+					if (!group)
+						REGEX_COMPILE_FAIL("unrecognised qualifier for group");
+					stack[0]->add_child(group);
+					stack.insert(0, group);
+
+				} else if (numeric_groups < numeric_max) {
+
+					RegExNodeCapturing* group = memnew(RegExNodeCapturing(group_names.size()));
+					group_names.push_back(++numeric_groups);
+					stack[0]->add_child(group);
+					stack.insert(0, group);
+
+				} else {
+
+					RegExNodeGroup* group = memnew(RegExNodeGroup());
+					stack[0]->add_child(group);
+					stack.insert(0, group);
+				}
+				break;
+			case ')':
+				if (stack.size() == 1)
+					REGEX_COMPILE_FAIL("unexpected ')'");
+				stack.remove(0);
+				break;
+			case '\\':
+				if (('1' <= c[1] && c[1] <= '9') || (c[1] == 'g' && c[2] == '{')) {
+
+					int ref = 0;
+					bool unclosed = false;
+
+					if (c[1] == 'g') {
+						unclosed = true;
+						c = &c[2];
+					}
+
+					while ('0' <= c[1] && c[1] <= '9') {
+						ref = ref * 10 + int(c[1] - '0');
+						++c;
+					}
+
+					if (unclosed) {
+						if (c[1] != '}')
+							REGEX_COMPILE_FAIL("unclosed backreference '{'");
+						++c;
+					}
+
+					if (ref > numeric_groups || ref <= 0)
+						REGEX_COMPILE_FAIL("backreference not found");
+
+					for (int i = 0; i < stack.size(); ++i)
+						if (dynamic_cast<RegExNodeLookBehind*>(stack[i]))
+							REGEX_COMPILE_FAIL("backreferences inside lookbehind not supported");
+
+					for (int i = 0; i < group_names.size(); ++i) {
+						if (group_names[i].get_type() == Variant::INT && int(group_names[i]) == ref) {
+							ref = group_names[i];
+							break;
+						}
+					}
+
+					stack[0]->add_child(memnew(RegExNodeBackReference(ref)));
+
+				} if (c[1] =='g' && c[2] == '<') {
+
+					const CharType* d = &c[2];
+
+					Variant name = RegExNodeCapturing::parse_name(d, true);
+					if (name == Variant(-1))
+						REGEX_COMPILE_FAIL("unrecognised character for group name");
+
+					c = d;
+
+					for (int i = 0; i < stack.size(); ++i)
+						if (dynamic_cast<RegExNodeLookBehind*>(stack[i]))
+							REGEX_COMPILE_FAIL("backreferences inside lookbehind not supported");
+
+					int ref = -1;
+
+					for (int i = 0; i < group_names.size(); ++i) {
+						if (group_names[i].get_type() == Variant::INT && int(group_names[i]) == ref) {
+							ref = group_names[i];
+							break;
+						}
+					}
+
+					if (ref == -1)
+						REGEX_COMPILE_FAIL("backreference not found");
+
+					stack[0]->add_child(memnew(RegExNodeBackReference(ref)));
+
+				} else if (c[1] == 'b' || c[1] == 'B') {
+
+					stack[0]->add_child(memnew(RegExNodeWordBoundary(*(++c) == 'B')));
+
+				} else if (RegEx_is_shorthand(c[1])) {
+
+					stack[0]->add_child(memnew(RegExNodeShorthand(*(++c))));
+
+				} else {
+
+					const CharType* d = c;
+					CharType ch = RegExNodeChar::parse_escape(d);
+					if (c == d)
+						REGEX_COMPILE_FAIL("invalid escape token");
+					stack[0]->add_child(memnew(RegExNodeChar(ch)));
+					c = d;
+
+				}
+				break;
+			case '[':
+				{
+					RegExNodeBracket* bracket = memnew(RegExNodeBracket());
+					stack[0]->add_child(bracket);
+					if (c[1] == '^') {
+						bracket->inverse = true;
+						++c;
+					}
+					bool first_child = true;
+					CharType previous_child;
+					bool previous_child_single = false;
+					while (true) {
+						++c;
+						if (!first_child && c[0] == ']') {
+
+							break;
+
+						} else if (c[0] == '\0') {
+
+							REGEX_COMPILE_FAIL("unclosed bracket expression '['");
+
+						} else if (c[0] == '\\') {
+
+							if (RegEx_is_shorthand(c[1])) {
+								bracket->add_child(memnew(RegExNodeShorthand(*(++c))));
+							} else {
+								const CharType* d = c;
+								CharType ch = RegExNodeChar::parse_escape(d);
+								if (c == d)
+									REGEX_COMPILE_FAIL("invalid escape token");
+								bracket->add_child(memnew(RegExNodeChar(ch)));
+								c = d;
+								previous_child = ch;
+								previous_child_single = true;
+							}
+
+						} else if (c[0] == ']' && c[1] == ':') {
+
+							const CharType* d = &c[2];
+							RegExNodeClass::Type type = RegExNodeClass::parse_type(d);
+							if (type != RegExNodeClass::Type_none) {
+
+								c = d;
+								previous_child_single = false;
+
+							} else {
+
+								bracket->add_child(memnew(RegExNodeChar('[')));
+								previous_child = '[';
+								previous_child_single = true;
+							}
+						} else if (previous_child_single && c[0] == '-') {
+
+							if (c[1] != '\0' && c[1] != ']') {
+
+								CharType next;
+
+								if (c[1] == '\\') {
+									const CharType* d = ++c;
+									next = RegExNodeChar::parse_escape(d);
+									if (c == d)
+										REGEX_COMPILE_FAIL("invalid escape token");
+								} else {
+									next = *(++c);
+								}
+
+								if (next < previous_child)
+									REGEX_COMPILE_FAIL("text range out of order");
+
+								bracket->pop_back();
+								bracket->add_child(memnew(RegExNodeRange(previous_child, next)));
+								previous_child_single = false;
+							} else {
+
+								bracket->add_child(memnew(RegExNodeChar('-')));
+								previous_child = '-';
+								previous_child_single = true;
+							}
+						} else {
+
+							bracket->add_child(memnew(RegExNodeChar(c[0])));
+							previous_child = c[0];
+							previous_child_single = true;
+						}
+						first_child = false;
+					}
+				}
+				break;
+			case '|':
+				for (int i = 0; i < stack.size(); ++i)
+					if (dynamic_cast<RegExNodeLookBehind*>(stack[i]))
+						REGEX_COMPILE_FAIL("alternations inside lookbehind not supported");
+				stack[0]->add_childset();
+				break;
+			case '^':
+				stack[0]->add_child(memnew(RegExNodeAnchorStart()));
+				break;
+			case '$':
+				stack[0]->add_child(memnew(RegExNodeAnchorEnd()));
+				break;
+			case '.':
+				stack[0]->add_child(memnew(RegExNodeShorthand('.')));
+				break;
+			case '?':
+			case '*':
+			case '+':
+			case '{':
+				{
+					int min_val = 0;
+					int max_val = -1;
+					bool valid = true;
+					const CharType* d = c;
+					bool max_set = true;
+					switch (c[0]) {
+						case '?':
+							min_val = 0;
+							max_val = 1;
+							break;
+						case '*':
+							min_val = 0;
+							max_val = -1;
+							break;
+						case '+':
+							min_val = 1;
+							max_val = -1;
+							break;
+						case '{':
+							max_set = false;
+							while (valid) {
+								++d;
+								if (d[0] == '}') {
+									break;
+								} else if (d[0] == ',') {
+									max_set = true;
+								} else if ('0' <= d[0] && d[0] <= '9') {
+									if (max_set) {
+										if (max_val < 0)
+											max_val = int(d[0] - '0');
+										else
+											max_val = max_val * 10 + int(d[0] - '0');
+									} else {
+										min_val = min_val * 10 + int(d[0] - '0');
+									}
+								} else {
+									valid = false;
+								}
+							}
+							break;
+						default:
+							break;
+					}
+
+					if (!max_set)
+						max_val = min_val;
+
+					if (valid) {
+
+						c = d;
+
+						if (stack[0]->back == NULL || !stack[0]->back->quantifiable)
+							REGEX_COMPILE_FAIL("element not quantifiable");
+
+						if (min_val != max_val)
+							for (int i = 0; i < stack.size(); ++i)
+								if (dynamic_cast<RegExNodeLookBehind*>(stack[i]))
+									REGEX_COMPILE_FAIL("variable length quantifiers inside lookbehind not supported");
+
+						RegExNodeQuantifier* quant = memnew(RegExNodeQuantifier(min_val, max_val));
+						quant->child = stack[0]->swap_back(quant);
+						quant->child->previous = NULL;
+						quant->child->parent = quant;
+
+						if (min_val == max_val && quant->child->length >= 0)
+							quant->length = max_val * quant->child->length;
+
+						if (c[1] == '?') {
+							quant->greedy = false;
+							++c;
+						}
+						break;
+					}
+				}
+			default:
+				stack[0]->add_child(memnew(RegExNodeChar(c[0])));
+				break;
+		}
+	}
+	if (stack.size() > 1)
+		REGEX_COMPILE_FAIL("unclosed group '('");
+	return OK;
+}
+
+Ref<RegExMatch> RegEx::search(const String& p_text, int p_start, int p_end) const {
+
+	Ref<RegExMatch> res = memnew(RegExMatch());
+
+	for (int i = 0; i < group_names.size(); ++i) {
+		RegExMatch::Group group;
+		group.name = group_names[i];
+		res->captures.push_back(group);
+	}
+
+	res->string = p_text;
+
+	if (p_end < p_start || p_end > p_text.length())
+		p_end = p_text.length();
+
+	RegExSearch s(res, p_end, lookahead_depth);
+
+	for (int i = p_start; i <= s.end; ++i) {
+		for (int c = 0; c < group_names.size(); ++c) {
+			res->captures[c].start = -1;
+			res->captures[c].length = 0;
+		}
+		if (root->test(s, i) >= 0)
+			break;
+	}
+
+	if (res->captures[0].start >= 0)
+		return res;
+	return NULL;
+}
+
+String RegEx::sub(const String& p_text, const String& p_template, int p_start, int p_end) const {
+
+	Ref<RegExMatch> m = search(p_text, p_start, p_end);
+	RegExMatch::Group& s = m->captures[0];
+	if (s.start >= 0) {
+		String res = p_text.substr(0, s.start) + m->expand(p_template);
+		int end = s.start + s.length;
+		if (end < p_text.length())
+			res += p_text.substr(end, p_text.length() - end);
+		return res;
+	}
+	return p_text;
+}
+
+void RegEx::clear() {
+
+	if (root)
+		memdelete(root);
+
+	pattern.clear();
+	group_names.clear();
+	lookahead_depth = 0;
+}
+
+bool RegEx::is_valid() const {
+
+	return (root != NULL);
+}
+
+String RegEx::get_pattern() const {
+
+	return pattern;
+}
+
+int RegEx::get_group_count() const {
+
+	int count = 0;
+	for (int i = 1; i < group_names.size(); ++i)
+		if (group_names[i].get_type() == Variant::INT)
+			++count;
+	return count;
+}
+
+Array RegEx::get_names() const {
+
+	Array res;
+	for (int i = 1; i < group_names.size(); ++i)
+		if (group_names[i].get_type() == Variant::STRING)
+			res.push_back(group_names[i]);
+	return res;
+}
+
+RegEx::RegEx() {
+
+	root = NULL;
+	lookahead_depth = 0;
+}
+
+RegEx::RegEx(const String& p_pattern) {
+
+	root = NULL;
+	compile(p_pattern);
+}
+
+RegEx::~RegEx() {
+
+	if (root)
+		memdelete(root);
+}
+
+void RegExMatch::_bind_methods() {
+
+	ObjectTypeDB::bind_method(_MD("expand","template"),&RegExMatch::expand);
+	ObjectTypeDB::bind_method(_MD("get_group_count"),&RegExMatch::get_group_count);
+	ObjectTypeDB::bind_method(_MD("get_group_array"),&RegExMatch::get_group_array);
+	ObjectTypeDB::bind_method(_MD("get_names"),&RegExMatch::get_names);
+	ObjectTypeDB::bind_method(_MD("get_name_dict"),&RegExMatch::get_name_dict);
+	ObjectTypeDB::bind_method(_MD("get_string","name"),&RegExMatch::get_string, DEFVAL(0));
+	ObjectTypeDB::bind_method(_MD("get_start","name"),&RegExMatch::get_start, DEFVAL(0));
+	ObjectTypeDB::bind_method(_MD("get_end","name"),&RegExMatch::get_end, DEFVAL(0));
+}
+
+void RegEx::_bind_methods() {
+
+	ObjectTypeDB::bind_method(_MD("clear"),&RegEx::clear);
+	ObjectTypeDB::bind_method(_MD("compile","pattern"),&RegEx::compile);
+	ObjectTypeDB::bind_method(_MD("search","text","start","end"),&RegEx::search, DEFVAL(0), DEFVAL(-1));
+	ObjectTypeDB::bind_method(_MD("sub","text","template","start","end"),&RegEx::sub, DEFVAL(0), DEFVAL(-1));
+	ObjectTypeDB::bind_method(_MD("is_valid"),&RegEx::is_valid);
+	ObjectTypeDB::bind_method(_MD("get_pattern"),&RegEx::get_pattern);
+	ObjectTypeDB::bind_method(_MD("get_group_count"),&RegEx::get_group_count);
+	ObjectTypeDB::bind_method(_MD("get_names"),&RegEx::get_names);
+}
+

+ 114 - 0
modules/regex/regex.h

@@ -0,0 +1,114 @@
+/*************************************************************************/
+/*  regex.h                                                              */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef REGEX_H
+#define REGEX_H
+
+#include "core/vector.h"
+#include "core/ustring.h"
+#include "core/dictionary.h"
+#include "core/reference.h"
+#include "core/resource.h"
+
+class RegExNode;
+
+class RegExMatch : public Reference {
+
+	OBJ_TYPE(RegExMatch, Reference);
+
+	struct Group {
+		Variant name;
+		int start;
+		int length;
+	};
+
+	Vector<Group> captures;
+	String string;
+
+	friend class RegEx;
+	friend class RegExSearch;
+	friend class RegExNodeCapturing;
+	friend class RegExNodeBackReference;
+
+protected:
+
+	static void _bind_methods();
+
+public:
+
+	String expand(const String& p_template) const;
+
+	int get_group_count() const;
+	Array get_group_array() const;
+
+	Array get_names() const;
+	Dictionary get_name_dict() const;
+
+	String get_string(const Variant& p_name) const;
+	int get_start(const Variant& p_name) const;
+	int get_end(const Variant& p_name) const;
+
+	RegExMatch();
+
+};
+
+class RegEx : public Reference {
+
+	OBJ_TYPE(RegEx, Reference);
+
+	RegExNode* root;
+	Vector<Variant> group_names;
+	String pattern;
+	int lookahead_depth;
+
+protected:
+
+	static void _bind_methods();
+
+public:
+
+	void clear();
+	Error compile(const String& p_pattern);
+
+	Ref<RegExMatch> search(const String& p_text, int p_start = 0, int p_end = -1) const;
+	String sub(const String& p_text, const String& p_template, int p_start = 0, int p_end = -1) const;
+
+	bool is_valid() const;
+	String get_pattern() const;
+	int get_group_count() const;
+	Array get_names() const;
+
+	RegEx();
+	RegEx(const String& p_pattern);
+	~RegEx();
+
+};
+
+#endif // REGEX_H
+

+ 10 - 32
drivers/nrex/regex.h → modules/regex/register_types.cpp

@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  regex.h                                                              */
+/*  register_types.cpp                                                   */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -26,40 +26,18 @@
 /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
-#ifndef REGEX_H
-#define REGEX_H
 
-#include "ustring.h"
-#include "vector.h"
-#include "core/reference.h"
-#include "nrex.hpp"
+#include "register_types.h"
+#include "object_type_db.h"
+#include "regex.h"
 
-class RegEx : public Reference {
+void register_regex_types() {
 
-	OBJ_TYPE(RegEx, Reference);
+	ObjectTypeDB::register_type<RegExMatch>();
+	ObjectTypeDB::register_type<RegEx>();
+}
 
-	mutable String text;
-	mutable Vector<nrex_result> captures;
-	nrex exp;
+void unregister_regex_types() {
 
-protected:
+}
 
-	static void _bind_methods();
-	StringArray _bind_get_captures() const;
-
-public:
-
-	void clear();
-	bool is_valid() const;
-	int get_capture_count() const;
-	int get_capture_start(int capture) const;
-	String get_capture(int capture) const;
-	Error compile(const String& p_pattern, int capture = 9);
-	int find(const String& p_text, int p_start = 0, int p_end = -1) const;
-
-	RegEx();
-	RegEx(const String& p_pattern);
-	~RegEx();
-};
-
-#endif // REGEX_H

+ 31 - 0
modules/regex/register_types.h

@@ -0,0 +1,31 @@
+/*************************************************************************/
+/*  register_types.h                                                     */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+void register_regex_types();
+void unregister_regex_types();