1 rok temu · cb0704d51c
--- a/core/text/regex/common/common.odin
+++ b/core/text/regex/common/common.odin
@@ -0,0 +1,27 @@
 
				+// This package helps break dependency cycles.
			
 
				+package regex_common
			
 
				+
			
 
				+// VM limitations
			
 
				+MAX_CAPTURE_GROUPS :: 10
			
 
				+MAX_PROGRAM_SIZE   :: int(max(i16))
			
 
				+MAX_CLASSES        :: int(max(u8))
			
 
				+
			
 
				+Flag :: enum u8 {
			
 
				+	// Global: try to match the pattern anywhere in the string.
			
 
				+	Global,
			
 
				+	// Multiline: treat `^` and `$` as if they also match newlines.
			
 
				+	Multiline,
			
 
				+	// Case Insensitive: treat `a-z` as if it was also `A-Z`.
			
 
				+	Case_Insensitive,
			
 
				+	// Ignore Whitespace: bypass unescaped whitespace outside of classes.
			
 
				+	Ignore_Whitespace,
			
 
				+	// Unicode: let the compiler and virtual machine know to expect Unicode strings.
			
 
				+	Unicode,
			
 
				+
			
 
				+	// No Capture: avoid saving capture group data entirely.
			
 
				+	No_Capture,
			
 
				+	// No Optimization: do not pass the pattern through the optimizer; for debugging.
			
 
				+	No_Optimization,
			
 
				+}
			
 
				+
			
 
				+Flags :: bit_set[Flag; u8]
			
--- a/core/text/regex/common/debugging.odin
+++ b/core/text/regex/common/debugging.odin
@@ -0,0 +1,25 @@
 
				+package regex_common
			
 
				+
			
 
				+@require import "core:os"
			
 
				+import "core:io"
			
 
				+import "core:strings"
			
 
				+
			
 
				+ODIN_DEBUG_REGEX :: #config(ODIN_DEBUG_REGEX, false)
			
 
				+
			
 
				+when ODIN_DEBUG_REGEX {
			
 
				+	debug_stream := os.stream_from_handle(os.stderr)
			
 
				+}
			
 
				+
			
 
				+write_padded_hex :: proc(w: io.Writer, #any_int n, zeroes: int) {
			
 
				+	sb := strings.builder_make()
			
 
				+	defer strings.builder_destroy(&sb)
			
 
				+
			
 
				+	sbw := strings.to_writer(&sb)
			
 
				+	io.write_int(sbw, n, 0x10)
			
 
				+
			
 
				+	io.write_string(w, "0x")
			
 
				+	for _ in 0..<max(0, zeroes - strings.builder_len(sb)) {
			
 
				+		io.write_byte(w, '0')
			
 
				+	}
			
 
				+	io.write_int(w, n, 0x10)
			
 
				+}
			
--- a/core/text/regex/compiler/compiler.odin
+++ b/core/text/regex/compiler/compiler.odin
@@ -0,0 +1,538 @@
 
				+package regex_compiler
			
 
				+
			
 
				+import "core:text/regex/common"
			
 
				+import "core:text/regex/parser"
			
 
				+import "core:text/regex/tokenizer"
			
 
				+import "core:text/regex/virtual_machine"
			
 
				+import "core:unicode"
			
 
				+
			
 
				+Token      :: tokenizer.Token
			
 
				+Token_Kind :: tokenizer.Token_Kind
			
 
				+Tokenizer  :: tokenizer.Tokenizer
			
 
				+
			
 
				+Rune_Class_Range            :: parser.Rune_Class_Range
			
 
				+Rune_Class_Data             :: parser.Rune_Class_Data
			
 
				+
			
 
				+Node                        :: parser.Node
			
 
				+Node_Rune                   :: parser.Node_Rune
			
 
				+Node_Rune_Class             :: parser.Node_Rune_Class
			
 
				+Node_Wildcard               :: parser.Node_Wildcard
			
 
				+Node_Concatenation          :: parser.Node_Concatenation
			
 
				+Node_Alternation            :: parser.Node_Alternation
			
 
				+Node_Repeat_Zero            :: parser.Node_Repeat_Zero
			
 
				+Node_Repeat_Zero_Non_Greedy :: parser.Node_Repeat_Zero_Non_Greedy
			
 
				+Node_Repeat_One             :: parser.Node_Repeat_One
			
 
				+Node_Repeat_One_Non_Greedy  :: parser.Node_Repeat_One_Non_Greedy
			
 
				+Node_Repeat_N               :: parser.Node_Repeat_N
			
 
				+Node_Optional               :: parser.Node_Optional
			
 
				+Node_Optional_Non_Greedy    :: parser.Node_Optional_Non_Greedy
			
 
				+Node_Group                  :: parser.Node_Group
			
 
				+Node_Anchor                 :: parser.Node_Anchor
			
 
				+Node_Word_Boundary          :: parser.Node_Word_Boundary
			
 
				+Node_Match_All_And_Escape   :: parser.Node_Match_All_And_Escape
			
 
				+
			
 
				+Opcode :: virtual_machine.Opcode
			
 
				+Program  :: [dynamic]Opcode
			
 
				+
			
 
				+JUMP_SIZE  :: size_of(Opcode) + 1 * size_of(u16)
			
 
				+SPLIT_SIZE :: size_of(Opcode) + 2 * size_of(u16)
			
 
				+
			
 
				+
			
 
				+Compiler :: struct {
			
 
				+	flags: common.Flags,
			
 
				+	anchor_start_seen: bool,
			
 
				+	class_data: [dynamic]Rune_Class_Data,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+Error :: enum {
			
 
				+	None,
			
 
				+	Program_Too_Big,
			
 
				+	Too_Many_Classes,
			
 
				+}
			
 
				+
			
 
				+classes_are_exact :: proc(q, w: ^Rune_Class_Data) -> bool #no_bounds_check {
			
 
				+	assert(q != nil)
			
 
				+	assert(w != nil)
			
 
				+
			
 
				+	if q == w {
			
 
				+		return true
			
 
				+	}
			
 
				+
			
 
				+	if len(q.runes) != len(w.runes) || len(q.ranges) != len(w.ranges) {
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	for r, i in q.runes {
			
 
				+		if r != w.runes[i] {
			
 
				+			return false
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	for r, i in q.ranges {
			
 
				+		if r.lower != w.ranges[i].lower || r.upper != w.ranges[i].upper {
			
 
				+			return false
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+map_all_classes :: proc(tree: Node, collection: ^[dynamic]Rune_Class_Data) {
			
 
				+	if tree == nil {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	switch specific in tree {
			
 
				+	case ^Node_Rune: break
			
 
				+	case ^Node_Wildcard: break
			
 
				+	case ^Node_Anchor: break
			
 
				+	case ^Node_Word_Boundary: break
			
 
				+	case ^Node_Match_All_And_Escape: break
			
 
				+
			
 
				+	case ^Node_Concatenation:
			
 
				+		for subnode in specific.nodes {
			
 
				+			map_all_classes(subnode, collection)
			
 
				+		}
			
 
				+
			
 
				+	case ^Node_Repeat_Zero:
			
 
				+		map_all_classes(specific.inner, collection)
			
 
				+	case ^Node_Repeat_Zero_Non_Greedy:
			
 
				+		map_all_classes(specific.inner, collection)
			
 
				+	case ^Node_Repeat_One:
			
 
				+		map_all_classes(specific.inner, collection)
			
 
				+	case ^Node_Repeat_One_Non_Greedy:
			
 
				+		map_all_classes(specific.inner, collection)
			
 
				+	case ^Node_Repeat_N:
			
 
				+		map_all_classes(specific.inner, collection)
			
 
				+	case ^Node_Optional:
			
 
				+		map_all_classes(specific.inner, collection)
			
 
				+	case ^Node_Optional_Non_Greedy:
			
 
				+		map_all_classes(specific.inner, collection)
			
 
				+	case ^Node_Group:
			
 
				+		map_all_classes(specific.inner, collection)
			
 
				+
			
 
				+	case ^Node_Alternation:
			
 
				+		map_all_classes(specific.left, collection)
			
 
				+		map_all_classes(specific.right, collection)
			
 
				+
			
 
				+	case ^Node_Rune_Class:
			
 
				+		unseen := true
			
 
				+		for &value in collection {
			
 
				+			if classes_are_exact(&specific.data, &value) {
			
 
				+				unseen = false
			
 
				+				break
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if unseen {
			
 
				+			append(collection, specific.data)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+append_raw :: #force_inline proc(code: ^Program, data: $T) {
			
 
				+	// NOTE: This is system-dependent endian.
			
 
				+	for b in transmute([size_of(T)]byte)data {
			
 
				+		append(code, cast(Opcode)b)
			
 
				+	}
			
 
				+}
			
 
				+inject_raw :: #force_inline proc(code: ^Program, start: int, data: $T) {
			
 
				+	// NOTE: This is system-dependent endian.
			
 
				+	for b, i in transmute([size_of(T)]byte)data {
			
 
				+		inject_at(code, start + i, cast(Opcode)b)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@require_results
			
 
				+generate_code :: proc(c: ^Compiler, node: Node) -> (code: Program) {
			
 
				+	if node == nil {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// NOTE: For Jump/Split arguments, we write as i16 and will reinterpret
			
 
				+	// this later when relative jumps are turned into absolute jumps.
			
 
				+
			
 
				+	switch specific in node {
			
 
				+	// Atomic Nodes:
			
 
				+	case ^Node_Rune:
			
 
				+		if .Unicode not_in c.flags || specific.data < unicode.MAX_LATIN1 {
			
 
				+			append(&code, Opcode.Byte)
			
 
				+			append(&code, cast(Opcode)specific.data)
			
 
				+		} else {
			
 
				+			append(&code, Opcode.Rune)
			
 
				+			append_raw(&code, specific.data)
			
 
				+		}
			
 
				+
			
 
				+	case ^Node_Rune_Class:
			
 
				+		if specific.negating {
			
 
				+			append(&code, Opcode.Rune_Class_Negated)
			
 
				+		} else {
			
 
				+			append(&code, Opcode.Rune_Class)
			
 
				+		}
			
 
				+
			
 
				+		index := -1
			
 
				+		for &data, i in c.class_data {
			
 
				+			if classes_are_exact(&data, &specific.data) {
			
 
				+				index = i
			
 
				+				break
			
 
				+			}
			
 
				+		}
			
 
				+		assert(index != -1, "Unable to find collected Rune_Class_Data index.")
			
 
				+
			
 
				+		append(&code, Opcode(index))
			
 
				+
			
 
				+	case ^Node_Wildcard:
			
 
				+		append(&code, Opcode.Wildcard)
			
 
				+
			
 
				+	case ^Node_Anchor:
			
 
				+		if .Multiline in c.flags {
			
 
				+			append(&code, Opcode.Multiline_Open)
			
 
				+			append(&code, Opcode.Multiline_Close)
			
 
				+		} else {
			
 
				+			if specific.start {
			
 
				+				c.anchor_start_seen = true
			
 
				+				append(&code, Opcode.Assert_Start)
			
 
				+			} else {
			
 
				+				append(&code, Opcode.Assert_End)
			
 
				+			}
			
 
				+		}
			
 
				+	case ^Node_Word_Boundary:
			
 
				+		if specific.non_word {
			
 
				+			append(&code, Opcode.Assert_Non_Word_Boundary)
			
 
				+		} else {
			
 
				+			append(&code, Opcode.Assert_Word_Boundary)
			
 
				+		}
			
 
				+
			
 
				+	// Compound Nodes:
			
 
				+	case ^Node_Group:
			
 
				+		code = generate_code(c, specific.inner)
			
 
				+
			
 
				+		if specific.capture && .No_Capture not_in c.flags {
			
 
				+			inject_at(&code, 0, Opcode.Save)
			
 
				+			inject_at(&code, 1, Opcode(2 * specific.capture_id))
			
 
				+
			
 
				+			append(&code, Opcode.Save)
			
 
				+			append(&code, Opcode(2 * specific.capture_id + 1))
			
 
				+		}
			
 
				+
			
 
				+	case ^Node_Alternation:
			
 
				+		left := generate_code(c, specific.left)
			
 
				+		right := generate_code(c, specific.right)
			
 
				+
			
 
				+		left_len := len(left)
			
 
				+
			
 
				+		// Avoiding duplicate allocation by reusing `left`.
			
 
				+		code = left
			
 
				+
			
 
				+		inject_at(&code, 0, Opcode.Split)
			
 
				+		inject_raw(&code, size_of(byte)               , i16(SPLIT_SIZE))
			
 
				+		inject_raw(&code, size_of(byte) + size_of(i16), i16(SPLIT_SIZE + left_len + JUMP_SIZE))
			
 
				+
			
 
				+		append(&code, Opcode.Jump)
			
 
				+		append_raw(&code, i16(len(right) + JUMP_SIZE))
			
 
				+
			
 
				+		for opcode in right {
			
 
				+			append(&code, opcode)
			
 
				+		}
			
 
				+
			
 
				+	case ^Node_Concatenation:
			
 
				+		for subnode in specific.nodes {
			
 
				+			subnode_code := generate_code(c, subnode)
			
 
				+			for opcode in subnode_code {
			
 
				+				append(&code, opcode)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+	case ^Node_Repeat_Zero:
			
 
				+		code = generate_code(c, specific.inner)
			
 
				+		original_len := len(code)
			
 
				+
			
 
				+		inject_at(&code, 0, Opcode.Split)
			
 
				+		inject_raw(&code, size_of(byte)               , i16(SPLIT_SIZE))
			
 
				+		inject_raw(&code, size_of(byte) + size_of(i16), i16(SPLIT_SIZE + original_len + JUMP_SIZE))
			
 
				+
			
 
				+		append(&code, Opcode.Jump)
			
 
				+		append_raw(&code, i16(-original_len - SPLIT_SIZE))
			
 
				+
			
 
				+	case ^Node_Repeat_Zero_Non_Greedy:
			
 
				+		code = generate_code(c, specific.inner)
			
 
				+		original_len := len(code)
			
 
				+
			
 
				+		inject_at(&code, 0, Opcode.Split)
			
 
				+		inject_raw(&code, size_of(byte)               , i16(SPLIT_SIZE + original_len + JUMP_SIZE))
			
 
				+		inject_raw(&code, size_of(byte) + size_of(i16), i16(SPLIT_SIZE))
			
 
				+
			
 
				+		append(&code, Opcode.Jump)
			
 
				+		append_raw(&code, i16(-original_len - SPLIT_SIZE))
			
 
				+
			
 
				+	case ^Node_Repeat_One:
			
 
				+		code = generate_code(c, specific.inner)
			
 
				+		original_len := len(code)
			
 
				+
			
 
				+		append(&code, Opcode.Split)
			
 
				+		append_raw(&code, i16(-original_len))
			
 
				+		append_raw(&code, i16(SPLIT_SIZE))
			
 
				+
			
 
				+	case ^Node_Repeat_One_Non_Greedy:
			
 
				+		code = generate_code(c, specific.inner)
			
 
				+		original_len := len(code)
			
 
				+
			
 
				+		append(&code, Opcode.Split)
			
 
				+		append_raw(&code, i16(SPLIT_SIZE))
			
 
				+		append_raw(&code, i16(-original_len))
			
 
				+
			
 
				+	case ^Node_Repeat_N:
			
 
				+		inside := generate_code(c, specific.inner)
			
 
				+		original_len := len(inside)
			
 
				+
			
 
				+		if specific.lower == specific.upper { // {N}
			
 
				+			// e{N} ... evaluates to ... e^N
			
 
				+			for i := 0; i < specific.upper; i += 1 {
			
 
				+				for opcode in inside {
			
 
				+					append(&code, opcode)
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+		} else if specific.lower == -1 && specific.upper > 0 { // {,M}
			
 
				+			// e{,M} ... evaluates to ... e?^M
			
 
				+			for i := 0; i < specific.upper; i += 1 {
			
 
				+				append(&code, Opcode.Split)
			
 
				+				append_raw(&code, i16(SPLIT_SIZE))
			
 
				+				append_raw(&code, i16(SPLIT_SIZE + original_len))
			
 
				+				for opcode in inside {
			
 
				+					append(&code, opcode)
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+		} else if specific.lower >= 0 && specific.upper == -1 { // {N,}
			
 
				+			// e{N,} ... evaluates to ... e^N e*
			
 
				+			for i := 0; i < specific.lower; i += 1 {
			
 
				+				for opcode in inside {
			
 
				+					append(&code, opcode)
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			append(&code, Opcode.Split)
			
 
				+			append_raw(&code, i16(SPLIT_SIZE))
			
 
				+			append_raw(&code, i16(SPLIT_SIZE + original_len + JUMP_SIZE))
			
 
				+
			
 
				+			for opcode in inside {
			
 
				+				append(&code, opcode)
			
 
				+			}
			
 
				+
			
 
				+			append(&code, Opcode.Jump)
			
 
				+			append_raw(&code, i16(-original_len - SPLIT_SIZE))
			
 
				+
			
 
				+		} else if specific.lower >= 0 && specific.upper > 0 {
			
 
				+			// e{N,M}  evaluates to ... e^N e?^(M-N)
			
 
				+			for i := 0; i < specific.lower; i += 1 {
			
 
				+				for opcode in inside {
			
 
				+					append(&code, opcode)
			
 
				+				}
			
 
				+			}
			
 
				+			for i := 0; i < specific.upper - specific.lower; i += 1 {
			
 
				+				append(&code, Opcode.Split)
			
 
				+				append_raw(&code, i16(SPLIT_SIZE + original_len))
			
 
				+				append_raw(&code, i16(SPLIT_SIZE))
			
 
				+				for opcode in inside {
			
 
				+					append(&code, opcode)
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+		} else {
			
 
				+			panic("RegEx compiler received invalid repetition group.")
			
 
				+		}
			
 
				+
			
 
				+	case ^Node_Optional:
			
 
				+		code = generate_code(c, specific.inner)
			
 
				+		original_len := len(code)
			
 
				+
			
 
				+		inject_at(&code, 0, Opcode.Split)
			
 
				+		inject_raw(&code, size_of(byte)               , i16(SPLIT_SIZE))
			
 
				+		inject_raw(&code, size_of(byte) + size_of(i16), i16(SPLIT_SIZE + original_len))
			
 
				+
			
 
				+	case ^Node_Optional_Non_Greedy:
			
 
				+		code = generate_code(c, specific.inner)
			
 
				+		original_len := len(code)
			
 
				+
			
 
				+		inject_at(&code, 0, Opcode.Split)
			
 
				+		inject_raw(&code, size_of(byte)               , i16(SPLIT_SIZE + original_len))
			
 
				+		inject_raw(&code, size_of(byte) + size_of(i16), i16(SPLIT_SIZE))
			
 
				+
			
 
				+	case ^Node_Match_All_And_Escape:
			
 
				+		append(&code, Opcode.Match_All_And_Escape)
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+@require_results
			
 
				+compile :: proc(tree: Node, flags: common.Flags) -> (code: Program, class_data: [dynamic]Rune_Class_Data, err: Error) {
			
 
				+	if tree == nil {
			
 
				+		if .No_Capture not_in flags {
			
 
				+			append(&code, Opcode.Save); append(&code, Opcode(0x00))
			
 
				+			append(&code, Opcode.Save); append(&code, Opcode(0x01))
			
 
				+			append(&code, Opcode.Match)
			
 
				+		} else {
			
 
				+			append(&code, Opcode.Match_And_Exit)
			
 
				+		}
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	c: Compiler
			
 
				+	c.flags = flags
			
 
				+
			
 
				+	map_all_classes(tree, &class_data)
			
 
				+	if len(class_data) >= common.MAX_CLASSES {
			
 
				+		err = .Too_Many_Classes
			
 
				+		return
			
 
				+	}
			
 
				+	c.class_data = class_data
			
 
				+
			
 
				+	code = generate_code(&c, tree)
			
 
				+
			
 
				+	pc_open := 0
			
 
				+
			
 
				+	add_global: if .Global in flags {
			
 
				+		// Check if the opening to the pattern is predictable.
			
 
				+		// If so, use one of the optimized Wait opcodes.
			
 
				+		iter := virtual_machine.Opcode_Iterator{ code[:], 0 }
			
 
				+		seek_loop: for opcode, pc in virtual_machine.iterate_opcodes(&iter) {
			
 
				+			#partial switch opcode {
			
 
				+			case .Byte:
			
 
				+				inject_at(&code, pc_open, Opcode.Wait_For_Byte)
			
 
				+				pc_open += size_of(Opcode)
			
 
				+				inject_at(&code, pc_open, Opcode(code[pc + size_of(Opcode) + pc_open]))
			
 
				+				pc_open += size_of(u8)
			
 
				+				break add_global
			
 
				+
			
 
				+			case .Rune:
			
 
				+				operand := (cast(^rune)&code[pc+1])^
			
 
				+				inject_at(&code, pc_open, Opcode.Wait_For_Rune)
			
 
				+				pc_open += size_of(Opcode)
			
 
				+				inject_raw(&code, pc_open, operand)
			
 
				+				pc_open += size_of(rune)
			
 
				+				break add_global
			
 
				+
			
 
				+			case .Rune_Class:
			
 
				+				inject_at(&code, pc_open, Opcode.Wait_For_Rune_Class)
			
 
				+				pc_open += size_of(Opcode)
			
 
				+				inject_at(&code, pc_open, Opcode(code[pc + size_of(Opcode) + pc_open]))
			
 
				+				pc_open += size_of(u8)
			
 
				+				break add_global
			
 
				+
			
 
				+			case .Rune_Class_Negated:
			
 
				+				inject_at(&code, pc_open, Opcode.Wait_For_Rune_Class_Negated)
			
 
				+				pc_open += size_of(Opcode)
			
 
				+				inject_at(&code, pc_open, Opcode(code[pc + size_of(Opcode) + pc_open]))
			
 
				+				pc_open += size_of(u8)
			
 
				+				break add_global
			
 
				+
			
 
				+			case .Save:
			
 
				+				continue
			
 
				+			case:
			
 
				+				break seek_loop
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// `.*?`
			
 
				+		inject_at(&code, pc_open, Opcode.Split)
			
 
				+		pc_open += size_of(byte)
			
 
				+		inject_raw(&code, pc_open, i16(SPLIT_SIZE + size_of(byte) + JUMP_SIZE))
			
 
				+		pc_open += size_of(i16)
			
 
				+		inject_raw(&code, pc_open, i16(SPLIT_SIZE))
			
 
				+		pc_open += size_of(i16)
			
 
				+
			
 
				+		inject_at(&code, pc_open, Opcode.Wildcard)
			
 
				+		pc_open += size_of(byte)
			
 
				+
			
 
				+		inject_at(&code, pc_open, Opcode.Jump)
			
 
				+		pc_open += size_of(byte)
			
 
				+		inject_raw(&code, pc_open, i16(-size_of(byte) - SPLIT_SIZE))
			
 
				+		pc_open += size_of(i16)
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	if .No_Capture not_in flags {
			
 
				+		// `(` <generated code>
			
 
				+		inject_at(&code, pc_open, Opcode.Save)
			
 
				+		inject_at(&code, pc_open + size_of(byte), Opcode(0x00))
			
 
				+
			
 
				+		// `)`
			
 
				+		append(&code, Opcode.Save); append(&code, Opcode(0x01))
			
 
				+
			
 
				+		append(&code, Opcode.Match)
			
 
				+	} else {
			
 
				+		append(&code, Opcode.Match_And_Exit)
			
 
				+	}
			
 
				+
			
 
				+	if len(code) >= common.MAX_PROGRAM_SIZE {
			
 
				+		err = .Program_Too_Big
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// NOTE: No further opcode addition beyond this point, as we've already
			
 
				+	// checked the program size. Removal or transformation is fine.
			
 
				+
			
 
				+	// Post-Compile Optimizations:
			
 
				+
			
 
				+	// * Jump Extension
			
 
				+	//
			
 
				+	// A:RelJmp(1) -> B:RelJmp(2) => A:RelJmp(2)
			
 
				+	if .No_Optimization not_in flags {
			
 
				+		for passes_left := 1; passes_left > 0; passes_left -= 1 {
			
 
				+			do_another_pass := false
			
 
				+
			
 
				+			iter := virtual_machine.Opcode_Iterator{ code[:], 0 }
			
 
				+			for opcode, pc in virtual_machine.iterate_opcodes(&iter) {
			
 
				+				#partial switch opcode {
			
 
				+				case .Jump:
			
 
				+					jmp   := cast(^i16)&code[pc+size_of(Opcode)]
			
 
				+					if code[cast(i16)pc+jmp^] == .Jump {
			
 
				+						next_jmp := (cast(^i16)&code[cast(i16)pc+jmp^+size_of(Opcode)])^
			
 
				+						jmp^ = jmp^ + next_jmp
			
 
				+						do_another_pass = true
			
 
				+					}
			
 
				+				case .Split:
			
 
				+					jmp_x := cast(^i16)&code[pc+size_of(Opcode)]
			
 
				+					if code[cast(i16)pc+jmp_x^] == .Jump {
			
 
				+						next_jmp := (cast(^i16)&code[cast(i16)pc+jmp_x^+size_of(Opcode)])^
			
 
				+						jmp_x^ = jmp_x^ + next_jmp
			
 
				+						do_another_pass = true
			
 
				+					}
			
 
				+					jmp_y := cast(^i16)&code[pc+size_of(Opcode)+size_of(i16)]
			
 
				+					if code[cast(i16)pc+jmp_y^] == .Jump {
			
 
				+						next_jmp := (cast(^i16)&code[cast(i16)pc+jmp_y^+size_of(Opcode)])^
			
 
				+						jmp_y^ = jmp_y^ + next_jmp
			
 
				+						do_another_pass = true
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if do_another_pass {
			
 
				+				passes_left += 1
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// * Relative Jump to Absolute Jump
			
 
				+	//
			
 
				+	// RelJmp{PC +/- N} => AbsJmp{M}
			
 
				+	iter := virtual_machine.Opcode_Iterator{ code[:], 0 }
			
 
				+	for opcode, pc in virtual_machine.iterate_opcodes(&iter) {
			
 
				+		// NOTE: The virtual machine implementation depends on this.
			
 
				+		#partial switch opcode {
			
 
				+		case .Jump:
			
 
				+			jmp   := cast(^u16)&code[pc+size_of(Opcode)]
			
 
				+			jmp^   = jmp^   + cast(u16)pc
			
 
				+		case .Split:
			
 
				+			jmp_x := cast(^u16)&code[pc+size_of(Opcode)]
			
 
				+			jmp_x^ = jmp_x^ + cast(u16)pc
			
 
				+			jmp_y := cast(^u16)&code[pc+size_of(Opcode)+size_of(i16)]
			
 
				+			jmp_y^ = jmp_y^ + cast(u16)pc
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
--- a/core/text/regex/compiler/debugging.odin
+++ b/core/text/regex/compiler/debugging.odin
@@ -0,0 +1,84 @@
 
				+package regex_compiler
			
 
				+
			
 
				+import "core:io"
			
 
				+import "core:text/regex/common"
			
 
				+import "core:text/regex/virtual_machine"
			
 
				+
			
 
				+get_jump_targets :: proc(code: []Opcode) -> (jump_targets: map[int]int) {
			
 
				+	iter := virtual_machine.Opcode_Iterator{ code, 0 }
			
 
				+	for opcode, pc in virtual_machine.iterate_opcodes(&iter) {
			
 
				+		#partial switch opcode {
			
 
				+		case .Jump:
			
 
				+			jmp   := cast(int)(cast(^u16)&code[pc+1])^
			
 
				+			jump_targets[jmp] = pc
			
 
				+		case .Split:
			
 
				+			jmp_x := cast(int)(cast(^u16)&code[pc+1])^
			
 
				+			jmp_y := cast(int)(cast(^u16)&code[pc+3])^
			
 
				+			jump_targets[jmp_x] = pc
			
 
				+			jump_targets[jmp_y] = pc
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+trace :: proc(w: io.Writer, code: []Opcode) {
			
 
				+	jump_targets := get_jump_targets(code)
			
 
				+	defer delete(jump_targets)
			
 
				+
			
 
				+	iter := virtual_machine.Opcode_Iterator{ code, 0 }
			
 
				+	for opcode, pc in virtual_machine.iterate_opcodes(&iter) {
			
 
				+		if src, ok := jump_targets[pc]; ok {
			
 
				+			io.write_string(w, "--")
			
 
				+			common.write_padded_hex(w, src, 4)
			
 
				+			io.write_string(w, "--> ")
			
 
				+		} else {
			
 
				+			io.write_string(w, "            ")
			
 
				+		}
			
 
				+
			
 
				+		io.write_string(w, "[PC: ")
			
 
				+		common.write_padded_hex(w, pc, 4)
			
 
				+		io.write_string(w, "] ")
			
 
				+		io.write_string(w, virtual_machine.opcode_to_name(opcode))
			
 
				+		io.write_byte(w, ' ')
			
 
				+
			
 
				+		#partial switch opcode {
			
 
				+		case .Byte:
			
 
				+			operand := cast(rune)code[pc+1]
			
 
				+			io.write_encoded_rune(w, operand)
			
 
				+		case .Rune:
			
 
				+			operand := (cast(^rune)&code[pc+1])^
			
 
				+			io.write_encoded_rune(w, operand)
			
 
				+		case .Rune_Class, .Rune_Class_Negated:
			
 
				+			operand := cast(u8)code[pc+1]
			
 
				+			common.write_padded_hex(w, operand, 2)
			
 
				+		case .Jump:
			
 
				+			jmp   := (cast(^u16)&code[pc+1])^
			
 
				+			io.write_string(w, "-> $")
			
 
				+			common.write_padded_hex(w, jmp, 4)
			
 
				+		case .Split:
			
 
				+			jmp_x := (cast(^u16)&code[pc+1])^
			
 
				+			jmp_y := (cast(^u16)&code[pc+3])^
			
 
				+			io.write_string(w, "=> $")
			
 
				+			common.write_padded_hex(w, jmp_x, 4)
			
 
				+			io.write_string(w, ", $")
			
 
				+			common.write_padded_hex(w, jmp_y, 4)
			
 
				+		case .Save:
			
 
				+			operand := cast(u8)code[pc+1]
			
 
				+			common.write_padded_hex(w, operand, 2)
			
 
				+		case .Wait_For_Byte:
			
 
				+			operand := cast(rune)code[pc+1]
			
 
				+			io.write_encoded_rune(w, operand)
			
 
				+		case .Wait_For_Rune:
			
 
				+			operand := (cast(^rune)&code[pc+1])^
			
 
				+			io.write_encoded_rune(w, operand)
			
 
				+		case .Wait_For_Rune_Class:
			
 
				+			operand := cast(u8)code[pc+1]
			
 
				+			common.write_padded_hex(w, operand, 2)
			
 
				+		case .Wait_For_Rune_Class_Negated:
			
 
				+			operand := cast(u8)code[pc+1]
			
 
				+			common.write_padded_hex(w, operand, 2)
			
 
				+		}
			
 
				+
			
 
				+		io.write_byte(w, '\n')
			
 
				+	}
			
 
				+}
			
--- a/core/text/regex/compiler/doc.odin
+++ b/core/text/regex/compiler/doc.odin
@@ -0,0 +1,9 @@
 
				+/*
			
 
				+package regex_compiler implements a bytecode compiler for the virtual machine
			
 
				+included alongside it.
			
 
				+
			
 
				+Operands larger than u8 are written in system endian order.
			
 
				+
			
 
				+More details can be found in the documentation for the virtual machine.
			
 
				+*/
			
 
				+package regex_compiler
			
--- a/core/text/regex/doc.odin
+++ b/core/text/regex/doc.odin
@@ -0,0 +1,75 @@
 
				+/*
			
 
				+package regex implements a complete suite for using Regular Expressions to
			
 
				+match and capture text.
			
 
				+
			
 
				+Regular expressions are used to describe how a piece of text can match to
			
 
				+another, using a pattern language.
			
 
				+
			
 
				+Odin's regex library implements the following features:
			
 
				+
			
 
				+	Alternation:           `apple|cherry`
			
 
				+	Classes:               `[0-9_]`
			
 
				+	Wildcards:             `.`
			
 
				+	Repeat, optional:      `a*`
			
 
				+	Repeat, at least once: `a+`
			
 
				+	Optional:              `a?`
			
 
				+	Group Capture:         `([0-9])`
			
 
				+	Group Non-Capture:     `(?:[0-9])`
			
 
				+	Start & End Anchors:   `^hello$`
			
 
				+	Word Boundaries:       `\bhello\b`
			
 
				+	Non-Word Boundaries:   `hello\B`
			
 
				+
			
 
				+These specifiers can be composed together, such as an optional group:
			
 
				+`(?:hello)?`
			
 
				+
			
 
				+This package also supports the non-greedy variants of the repeating and
			
 
				+optional specifiers by appending a `?` to them.
			
 
				+
			
 
				+
			
 
				+
			
 
				+	``Some people, when confronted with a problem, think
			
 
				+	  "I know, I'll use regular expressions." Now they have two problems.''
			
 
				+
			
 
				+	     - Jamie Zawinski
			
 
				+
			
 
				+
			
 
				+Regular expressions have gathered a reputation over the decades for often being
			
 
				+chosen as the wrong tool for the job. Here, we will clarify a few cases in
			
 
				+which RegEx might be good or bad.
			
 
				+
			
 
				+
			
 
				+**When is it a good time to use RegEx?**
			
 
				+
			
 
				+- You don't know at compile-time what patterns of text the program will need to
			
 
				+  match when it's running.
			
 
				+- As an example, you are making a client which can be configured by the user to
			
 
				+  trigger on certain text patterns received from a server.
			
 
				+- For another example, you need a way for users of a text editor to compose
			
 
				+  matching strings that are more intricate than a simple substring lookup.
			
 
				+- The text you're matching against is small (< 64 KiB) and your patterns aren't
			
 
				+  overly complicated with branches (alternations, repeats, and optionals).
			
 
				+- If none of the above general impressions apply but your project doesn't
			
 
				+  warrant long-term maintenance.
			
 
				+
			
 
				+**When is it a bad time to use RegEx?**
			
 
				+
			
 
				+- You know at compile-time the grammar you're parsing; a hand-made parser has
			
 
				+  the potential to be more maintainable and readable.
			
 
				+- The grammar you're parsing has certain validation steps that lend itself to
			
 
				+  forming complicated expressions, such as e-mail addresses, URIs, dates,
			
 
				+  postal codes, credit cards, et cetera. Using RegEx to validate these
			
 
				+  structures is almost always a bad sign.
			
 
				+- The text you're matching against is big (> 1 MiB); you would be better served
			
 
				+  by first dividing the text into manageable chunks and using some heuristic to
			
 
				+  locate the most likely location of a match before applying RegEx against it.
			
 
				+- You value high performance and low memory usage; RegEx will always have a
			
 
				+  certain overhead which increases with the complexity of the pattern.
			
 
				+
			
 
				+
			
 
				+The implementation of this package has been optimized, but it will never be as
			
 
				+thoroughly performant as a hand-made parser. In comparison, there are just too
			
 
				+many intermediate steps, assumptions, and generalizations in what it takes to
			
 
				+handle a regular expression.
			
 
				+
			
 
				+*/
			
 
				+package regex
			
--- a/core/text/regex/optimizer/doc.odin
+++ b/core/text/regex/optimizer/doc.odin
@@ -0,0 +1,58 @@
 
				+/*
			
 
				+package regex_optimizer implements an optimizer which acts upon the AST of a
			
 
				+parsed regular expression pattern, transforming it in-place without moving to a
			
 
				+compilation step.
			
 
				+
			
 
				+Where possible, it aims to reduce branching as much as possible in the
			
 
				+expression by reducing usage of `|`.
			
 
				+
			
 
				+
			
 
				+Here is a summary of the optimizations that it will do:
			
 
				+
			
 
				+* Class Simplification               : `[aab]` => `[ab]`
			
 
				+                                       `[aa]`  => `[a]`
			
 
				+
			
 
				+* Class Reduction                    : `[a]`    => `a`
			
 
				+* Range Construction                 : `[abc]`  => `[a-c]`
			
 
				+* Rune Merging into Range            : `[aa-c]` => `[a-c]`
			
 
				+
			
 
				+* Range Merging                      : `[a-cc-e]` => `[a-e]`
			
 
				+                                       `[a-cd-e]` => `[a-e]`
			
 
				+                                       `[a-cb-e]` => `[a-e]`
			
 
				+
			
 
				+* Alternation to Optional            : `a|`  => `a?`
			
 
				+* Alternation to Optional Non-Greedy : `|a`  => `a??`
			
 
				+* Alternation Reduction              : `a|a` => `a`
			
 
				+* Alternation to Class               : `a|b` => `[ab]`
			
 
				+* Class Union                        : `[a0]|[b1]` => `[a0b1]`
			
 
				+                                       `[a-b]|c`   => `[a-bc]`
			
 
				+                                       `a|[b-c]`   => `[b-ca]`
			
 
				+
			
 
				+* Wildcard Reduction                 : `a|.`    => `.`
			
 
				+                                       `.|a`    => `.`
			
 
				+                                       `[ab]|.` => `.`
			
 
				+                                       `.|[ab]` => `.`
			
 
				+
			
 
				+* Common Suffix Elimination : `blueberry|strawberry` => `(?:blue|straw)berry`
			
 
				+* Common Prefix Elimination : `abi|abe` => `ab(?:i|e)`
			
 
				+
			
 
				+* Composition: Consume All to Anchored End
			
 
				+	`.*$` =>     <special opcode>
			
 
				+	`.+$` => `.` <special opcode>
			
 
				+
			
 
				+
			
 
				+Possible future improvements:
			
 
				+
			
 
				+- Change the AST of alternations to be a list instead of a tree, so that
			
 
				+  constructions such as `(ab|bb|cb)` can be considered in whole by the affix
			
 
				+  elimination optimizations.
			
 
				+
			
 
				+- Introduce specialized opcodes for certain classes of repetition.
			
 
				+
			
 
				+- Add Common Infix Elimination.
			
 
				+
			
 
				+- Measure the precise finite minimum and maximum of a pattern, if available,
			
 
				+  and check against that on any strings before running the virtual machine.
			
 
				+
			
 
				+*/
			
 
				+package regex_optimizer
			
--- a/core/text/regex/optimizer/optimizer.odin
+++ b/core/text/regex/optimizer/optimizer.odin
@@ -0,0 +1,522 @@
 
				+package regex_optimizer
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+@require import "core:io"
			
 
				+import "core:slice"
			
 
				+import "core:text/regex/common"
			
 
				+import "core:text/regex/parser"
			
 
				+
			
 
				+Rune_Class_Range :: parser.Rune_Class_Range
			
 
				+
			
 
				+Node                        :: parser.Node
			
 
				+Node_Rune                   :: parser.Node_Rune
			
 
				+Node_Rune_Class             :: parser.Node_Rune_Class
			
 
				+Node_Wildcard               :: parser.Node_Wildcard
			
 
				+Node_Concatenation          :: parser.Node_Concatenation
			
 
				+Node_Alternation            :: parser.Node_Alternation
			
 
				+Node_Repeat_Zero            :: parser.Node_Repeat_Zero
			
 
				+Node_Repeat_Zero_Non_Greedy :: parser.Node_Repeat_Zero_Non_Greedy
			
 
				+Node_Repeat_One             :: parser.Node_Repeat_One
			
 
				+Node_Repeat_One_Non_Greedy  :: parser.Node_Repeat_One_Non_Greedy
			
 
				+Node_Repeat_N               :: parser.Node_Repeat_N
			
 
				+Node_Optional               :: parser.Node_Optional
			
 
				+Node_Optional_Non_Greedy    :: parser.Node_Optional_Non_Greedy
			
 
				+Node_Group                  :: parser.Node_Group
			
 
				+Node_Anchor                 :: parser.Node_Anchor
			
 
				+Node_Word_Boundary          :: parser.Node_Word_Boundary
			
 
				+Node_Match_All_And_Escape   :: parser.Node_Match_All_And_Escape
			
 
				+
			
 
				+
			
 
				+class_range_sorter :: proc(i, j: Rune_Class_Range) -> bool {
			
 
				+	return i.lower < j.lower
			
 
				+}
			
 
				+
			
 
				+optimize_subtree :: proc(tree: Node, flags: common.Flags) -> (result: Node, changes: int) {
			
 
				+	if tree == nil {
			
 
				+		return nil, 0
			
 
				+	}
			
 
				+
			
 
				+	result = tree
			
 
				+
			
 
				+	switch specific in tree {
			
 
				+	// No direct optimization possible on these nodes:
			
 
				+	case ^Node_Rune: break
			
 
				+	case ^Node_Wildcard: break
			
 
				+	case ^Node_Anchor: break
			
 
				+	case ^Node_Word_Boundary: break
			
 
				+	case ^Node_Match_All_And_Escape: break
			
 
				+
			
 
				+	case ^Node_Concatenation:
			
 
				+		// * Composition: Consume All to Anchored End
			
 
				+		//
			
 
				+		// DO: `.*$` =>     <special opcode>
			
 
				+		// DO: `.+$` => `.` <special opcode>
			
 
				+		if .Multiline not_in flags && len(specific.nodes) >= 2 {
			
 
				+			i := len(specific.nodes) - 2
			
 
				+			wrza: {
			
 
				+				subnode := specific.nodes[i].(^Node_Repeat_Zero) or_break wrza
			
 
				+				_ = subnode.inner.(^Node_Wildcard) or_break wrza
			
 
				+				next_node := specific.nodes[i+1].(^Node_Anchor) or_break wrza
			
 
				+				if next_node.start == false {
			
 
				+					specific.nodes[i] = new(Node_Match_All_And_Escape)
			
 
				+					ordered_remove(&specific.nodes, i + 1)
			
 
				+					changes += 1
			
 
				+					break
			
 
				+				}
			
 
				+			}
			
 
				+			wroa: {
			
 
				+				subnode := specific.nodes[i].(^Node_Repeat_One) or_break wroa
			
 
				+				subsubnode := subnode.inner.(^Node_Wildcard) or_break wroa
			
 
				+				next_node := specific.nodes[i+1].(^Node_Anchor) or_break wroa
			
 
				+				if next_node.start == false {
			
 
				+					specific.nodes[i] = subsubnode
			
 
				+					specific.nodes[i+1] = new(Node_Match_All_And_Escape)
			
 
				+					changes += 1
			
 
				+					break
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// Only recursive optimizations:
			
 
				+		for i := 0; i < len(specific.nodes); i += 1 {
			
 
				+			subnode, subnode_changes := optimize_subtree(specific.nodes[i], flags)
			
 
				+			changes += subnode_changes
			
 
				+			if subnode == nil {
			
 
				+				ordered_remove(&specific.nodes, i)
			
 
				+				i -= 1
			
 
				+				changes += 1
			
 
				+			} else {
			
 
				+				specific.nodes[i] = subnode
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if len(specific.nodes) == 1 {
			
 
				+			result = specific.nodes[0]
			
 
				+			changes += 1
			
 
				+		} else if len(specific.nodes) == 0 {
			
 
				+			return nil, changes + 1
			
 
				+		}
			
 
				+
			
 
				+	case ^Node_Repeat_Zero:
			
 
				+		specific.inner, changes = optimize_subtree(specific.inner, flags)
			
 
				+		if specific.inner == nil {
			
 
				+			return nil, changes + 1
			
 
				+		}
			
 
				+	case ^Node_Repeat_Zero_Non_Greedy:
			
 
				+		specific.inner, changes = optimize_subtree(specific.inner, flags)
			
 
				+		if specific.inner == nil {
			
 
				+			return nil, changes + 1
			
 
				+		}
			
 
				+	case ^Node_Repeat_One:
			
 
				+		specific.inner, changes = optimize_subtree(specific.inner, flags)
			
 
				+		if specific.inner == nil {
			
 
				+			return nil, changes + 1
			
 
				+		}
			
 
				+	case ^Node_Repeat_One_Non_Greedy:
			
 
				+		specific.inner, changes = optimize_subtree(specific.inner, flags)
			
 
				+		if specific.inner == nil {
			
 
				+			return nil, changes + 1
			
 
				+		}
			
 
				+	case ^Node_Repeat_N:
			
 
				+		specific.inner, changes = optimize_subtree(specific.inner, flags)
			
 
				+		if specific.inner == nil {
			
 
				+			return nil, changes + 1
			
 
				+		}
			
 
				+	case ^Node_Optional:
			
 
				+		specific.inner, changes = optimize_subtree(specific.inner, flags)
			
 
				+		if specific.inner == nil {
			
 
				+			return nil, changes + 1
			
 
				+		}
			
 
				+	case ^Node_Optional_Non_Greedy:
			
 
				+		specific.inner, changes = optimize_subtree(specific.inner, flags)
			
 
				+		if specific.inner == nil {
			
 
				+			return nil, changes + 1
			
 
				+		}
			
 
				+
			
 
				+	case ^Node_Group:
			
 
				+		specific.inner, changes = optimize_subtree(specific.inner, flags)
			
 
				+
			
 
				+		if specific.inner == nil {
			
 
				+			return nil, changes + 1
			
 
				+		}
			
 
				+
			
 
				+		if !specific.capture {
			
 
				+			result = specific.inner
			
 
				+			changes += 1
			
 
				+		}
			
 
				+
			
 
				+	// Full optimization:
			
 
				+	case ^Node_Rune_Class:
			
 
				+		// * Class Simplification
			
 
				+		//
			
 
				+		// DO: `[aab]` => `[ab]`
			
 
				+		// DO: `[aa]`  => `[a]`
			
 
				+		runes_seen: map[rune]bool
			
 
				+
			
 
				+		for r in specific.runes {
			
 
				+			runes_seen[r] = true
			
 
				+		}
			
 
				+
			
 
				+		if len(runes_seen) != len(specific.runes) {
			
 
				+			clear(&specific.runes)
			
 
				+			for key in runes_seen {
			
 
				+				append(&specific.runes, key)
			
 
				+			}
			
 
				+			changes += 1
			
 
				+		}
			
 
				+
			
 
				+		// * Class Reduction
			
 
				+		//
			
 
				+		// DO: `[a]` => `a`
			
 
				+		if !specific.negating && len(specific.runes) == 1 && len(specific.ranges) == 0 {
			
 
				+			only_rune := specific.runes[0]
			
 
				+
			
 
				+			node := new(Node_Rune)
			
 
				+			node.data = only_rune
			
 
				+
			
 
				+			return node, changes + 1
			
 
				+		}
			
 
				+
			
 
				+		// * Range Construction
			
 
				+		//
			
 
				+		// DO: `[abc]` => `[a-c]`
			
 
				+		slice.sort(specific.runes[:])
			
 
				+		if len(specific.runes) > 1 {
			
 
				+			new_range: Rune_Class_Range
			
 
				+			new_range.lower = specific.runes[0]
			
 
				+			new_range.upper = specific.runes[0]
			
 
				+
			
 
				+			for i := 1; i < len(specific.runes); i += 1 {
			
 
				+				r := specific.runes[i]
			
 
				+				if new_range.lower == -1 {
			
 
				+					new_range = { r, r }
			
 
				+					continue
			
 
				+				}
			
 
				+
			
 
				+				if r == new_range.lower - 1 {
			
 
				+					new_range.lower -= 1
			
 
				+					ordered_remove(&specific.runes, i)
			
 
				+					i -= 1
			
 
				+					changes += 1
			
 
				+				} else if r == new_range.upper + 1 {
			
 
				+					new_range.upper += 1
			
 
				+					ordered_remove(&specific.runes, i)
			
 
				+					i -= 1
			
 
				+					changes += 1
			
 
				+				} else if new_range.lower != new_range.upper {
			
 
				+					append(&specific.ranges, new_range)
			
 
				+					new_range = { -1, -1 }
			
 
				+					changes += 1
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if new_range.lower != new_range.upper {
			
 
				+				append(&specific.ranges, new_range)
			
 
				+				changes += 1
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// * Rune Merging into Range
			
 
				+		//
			
 
				+		// DO: `[aa-c]` => `[a-c]`
			
 
				+		for range in specific.ranges {
			
 
				+			for i := 0; i < len(specific.runes); i += 1 {
			
 
				+				r := specific.runes[i]
			
 
				+				if range.lower <= r && r <= range.upper {
			
 
				+					ordered_remove(&specific.runes, i)
			
 
				+					i -= 1
			
 
				+					changes += 1
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// * Range Merging
			
 
				+		//
			
 
				+		// DO: `[a-cc-e]` => `[a-e]`
			
 
				+		// DO: `[a-cd-e]` => `[a-e]`
			
 
				+		// DO: `[a-cb-e]` => `[a-e]`
			
 
				+		slice.sort_by(specific.ranges[:], class_range_sorter)
			
 
				+		for i := 0; i < len(specific.ranges) - 1; i += 1 {
			
 
				+			for j := i + 1; j < len(specific.ranges); j += 1 {
			
 
				+				left_range  := &specific.ranges[i]
			
 
				+				right_range :=  specific.ranges[j]
			
 
				+
			
 
				+				if left_range.upper == right_range.lower     ||
			
 
				+				   left_range.upper == right_range.lower - 1 ||
			
 
				+				   left_range.lower <= right_range.lower && right_range.lower <= left_range.upper {
			
 
				+					left_range.upper = max(left_range.upper, right_range.upper)
			
 
				+					ordered_remove(&specific.ranges, j)
			
 
				+					j -= 1
			
 
				+					changes += 1
			
 
				+				} else {
			
 
				+					break
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if len(specific.ranges) == 0 {
			
 
				+			specific.ranges = {}
			
 
				+		}
			
 
				+		if len(specific.runes) == 0 {
			
 
				+			specific.runes = {}
			
 
				+		}
			
 
				+
			
 
				+		// * NOP
			
 
				+		//
			
 
				+		// DO: `[]` => <nil>
			
 
				+		if len(specific.ranges) + len(specific.runes) == 0 {
			
 
				+			return nil, 1
			
 
				+		}
			
 
				+
			
 
				+		slice.sort(specific.runes[:])
			
 
				+		slice.sort_by(specific.ranges[:], class_range_sorter)
			
 
				+
			
 
				+	case ^Node_Alternation:
			
 
				+		// Perform recursive optimization first.
			
 
				+		left_changes, right_changes: int
			
 
				+		specific.left, left_changes = optimize_subtree(specific.left, flags)
			
 
				+		specific.right, right_changes = optimize_subtree(specific.right, flags)
			
 
				+		changes += left_changes + right_changes
			
 
				+
			
 
				+		// * Alternation to Optional
			
 
				+		//
			
 
				+		// DO: `a|` => `a?`
			
 
				+		if specific.left != nil && specific.right == nil {
			
 
				+			node := new(Node_Optional)
			
 
				+			node.inner = specific.left
			
 
				+			return node, 1
			
 
				+		}
			
 
				+
			
 
				+		// * Alternation to Optional Non-Greedy
			
 
				+		//
			
 
				+		// DO: `|a` => `a??`
			
 
				+		if specific.right != nil && specific.left == nil {
			
 
				+			node := new(Node_Optional_Non_Greedy)
			
 
				+			node.inner = specific.right
			
 
				+			return node, 1
			
 
				+		}
			
 
				+
			
 
				+		// * NOP
			
 
				+		//
			
 
				+		// DO: `|` => <nil>
			
 
				+		if specific.left == nil && specific.right == nil {
			
 
				+			return nil, 1
			
 
				+		}
			
 
				+
			
 
				+		left_rune, left_is_rune := specific.left.(^Node_Rune)
			
 
				+		right_rune, right_is_rune := specific.right.(^Node_Rune)
			
 
				+
			
 
				+		if left_is_rune && right_is_rune {
			
 
				+			if left_rune.data == right_rune.data {
			
 
				+				// * Alternation Reduction
			
 
				+				//
			
 
				+				// DO: `a|a` => `a`
			
 
				+				return left_rune, 1
			
 
				+			} else {
			
 
				+				// * Alternation to Class
			
 
				+				//
			
 
				+				// DO: `a|b` => `[ab]`
			
 
				+				node := new(Node_Rune_Class)
			
 
				+				append(&node.runes, left_rune.data)
			
 
				+				append(&node.runes, right_rune.data)
			
 
				+				return node, 1
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		left_wildcard, left_is_wildcard := specific.left.(^Node_Wildcard)
			
 
				+		right_wildcard, right_is_wildcard := specific.right.(^Node_Wildcard)
			
 
				+
			
 
				+		// * Class Union
			
 
				+		//
			
 
				+		// DO: `[a0]|[b1]` => `[a0b1]`
			
 
				+		left_class, left_is_class := specific.left.(^Node_Rune_Class)
			
 
				+		right_class, right_is_class := specific.right.(^Node_Rune_Class)
			
 
				+		if left_is_class && right_is_class {
			
 
				+			for r in right_class.runes {
			
 
				+				append(&left_class.runes, r)
			
 
				+			}
			
 
				+			for range in right_class.ranges {
			
 
				+				append(&left_class.ranges, range)
			
 
				+			}
			
 
				+			return left_class, 1
			
 
				+		}
			
 
				+
			
 
				+		// * Class Union
			
 
				+		//
			
 
				+		// DO: `[a-b]|c` => `[a-bc]`
			
 
				+		if left_is_class && right_is_rune {
			
 
				+			append(&left_class.runes, right_rune.data)
			
 
				+			return left_class, 1
			
 
				+		}
			
 
				+
			
 
				+		// * Class Union
			
 
				+		//
			
 
				+		// DO: `a|[b-c]` => `[b-ca]`
			
 
				+		if left_is_rune && right_is_class {
			
 
				+			append(&right_class.runes, left_rune.data)
			
 
				+			return right_class, 1
			
 
				+		}
			
 
				+
			
 
				+		// * Wildcard Reduction
			
 
				+		//
			
 
				+		// DO: `a|.` => `.`
			
 
				+		if left_is_rune && right_is_wildcard {
			
 
				+			return right_wildcard, 1
			
 
				+		}
			
 
				+
			
 
				+		// * Wildcard Reduction
			
 
				+		//
			
 
				+		// DO: `.|a` => `.`
			
 
				+		if left_is_wildcard && right_is_rune {
			
 
				+			return left_wildcard, 1
			
 
				+		}
			
 
				+
			
 
				+		// * Wildcard Reduction
			
 
				+		//
			
 
				+		// DO: `[ab]|.` => `.`
			
 
				+		if left_is_class && right_is_wildcard {
			
 
				+			return right_wildcard, 1
			
 
				+		}
			
 
				+
			
 
				+		// * Wildcard Reduction
			
 
				+		//
			
 
				+		// DO: `.|[ab]` => `.`
			
 
				+		if left_is_wildcard && right_is_class {
			
 
				+			return left_wildcard, 1
			
 
				+		}
			
 
				+
			
 
				+		left_concatenation, left_is_concatenation := specific.left.(^Node_Concatenation)
			
 
				+		right_concatenation, right_is_concatenation := specific.right.(^Node_Concatenation)
			
 
				+
			
 
				+		// * Common Suffix Elimination
			
 
				+		//
			
 
				+		// DO: `blueberry|strawberry` => `(?:blue|straw)berry`
			
 
				+		if left_is_concatenation && right_is_concatenation {
			
 
				+			// Remember that a concatenation could contain any node, not just runes.
			
 
				+			left_len  := len(left_concatenation.nodes)
			
 
				+			right_len := len(right_concatenation.nodes)
			
 
				+			least_len := min(left_len, right_len)
			
 
				+			same_len  := 0
			
 
				+			for i := 1; i <= least_len; i += 1 {
			
 
				+				left_subrune, left_is_subrune := left_concatenation.nodes[left_len - i].(^Node_Rune)
			
 
				+				right_subrune, right_is_subrune := right_concatenation.nodes[right_len - i].(^Node_Rune)
			
 
				+
			
 
				+				if !left_is_subrune || !right_is_subrune {
			
 
				+					// One of the nodes isn't a rune; there's nothing more we can do.
			
 
				+					break
			
 
				+				}
			
 
				+
			
 
				+				if left_subrune.data == right_subrune.data {
			
 
				+					same_len += 1
			
 
				+				} else {
			
 
				+					// No more similarities.
			
 
				+					break
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if same_len > 0 {
			
 
				+				// Dissolve this alternation into a concatenation.
			
 
				+				cat_node := new(Node_Concatenation)
			
 
				+				group_node := new(Node_Group)
			
 
				+				append(&cat_node.nodes, group_node)
			
 
				+
			
 
				+				// Turn the concatenation into the common suffix.
			
 
				+				for i := left_len - same_len; i < left_len; i += 1 {
			
 
				+					append(&cat_node.nodes, left_concatenation.nodes[i])
			
 
				+				}
			
 
				+
			
 
				+				// Construct the group of alternating prefixes.
			
 
				+				for i := same_len; i > 0; i -= 1 {
			
 
				+					pop(&left_concatenation.nodes)
			
 
				+					pop(&right_concatenation.nodes)
			
 
				+				}
			
 
				+
			
 
				+				// (Re-using this alternation node.)
			
 
				+				alter_node := specific
			
 
				+				alter_node.left = left_concatenation
			
 
				+				alter_node.right = right_concatenation
			
 
				+				group_node.inner = alter_node
			
 
				+
			
 
				+				return cat_node, 1
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// * Common Prefix Elimination
			
 
				+		//
			
 
				+		// DO: `abi|abe` => `ab(?:i|e)`
			
 
				+		if left_is_concatenation && right_is_concatenation {
			
 
				+			// Try to identify a common prefix.
			
 
				+			// Remember that a concatenation could contain any node, not just runes.
			
 
				+			least_len := min(len(left_concatenation.nodes), len(right_concatenation.nodes))
			
 
				+			same_len := 0
			
 
				+			for i := 0; i < least_len; i += 1 {
			
 
				+				left_subrune, left_is_subrune := left_concatenation.nodes[i].(^Node_Rune)
			
 
				+				right_subrune, right_is_subrune := right_concatenation.nodes[i].(^Node_Rune)
			
 
				+
			
 
				+				if !left_is_subrune || !right_is_subrune {
			
 
				+					// One of the nodes isn't a rune; there's nothing more we can do.
			
 
				+					break
			
 
				+				}
			
 
				+
			
 
				+				if left_subrune.data == right_subrune.data {
			
 
				+					same_len = i + 1
			
 
				+				} else {
			
 
				+					// No more similarities.
			
 
				+					break
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if same_len > 0 {
			
 
				+				cat_node := new(Node_Concatenation)
			
 
				+				for i := 0; i < same_len; i += 1 {
			
 
				+					append(&cat_node.nodes, left_concatenation.nodes[i])
			
 
				+				}
			
 
				+				for i := same_len; i > 0; i -= 1 {
			
 
				+					ordered_remove(&left_concatenation.nodes, 0)
			
 
				+					ordered_remove(&right_concatenation.nodes, 0)
			
 
				+				}
			
 
				+
			
 
				+				group_node := new(Node_Group)
			
 
				+				// (Re-using this alternation node.)
			
 
				+				alter_node := specific
			
 
				+				alter_node.left = left_concatenation
			
 
				+				alter_node.right = right_concatenation
			
 
				+				group_node.inner = alter_node
			
 
				+
			
 
				+				append(&cat_node.nodes, group_node)
			
 
				+				return cat_node, 1
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+optimize :: proc(tree: Node, flags: common.Flags) -> (result: Node, changes: int) {
			
 
				+	result = tree
			
 
				+	new_changes := 0
			
 
				+
			
 
				+	when common.ODIN_DEBUG_REGEX {
			
 
				+		io.write_string(common.debug_stream, "AST before Optimizer: ")
			
 
				+		parser.write_node(common.debug_stream, tree)
			
 
				+		io.write_byte(common.debug_stream, '\n')
			
 
				+	}
			
 
				+
			
 
				+	// Keep optimizing until no more changes are seen.
			
 
				+	for {
			
 
				+		result, new_changes = optimize_subtree(result, flags)
			
 
				+		changes += new_changes
			
 
				+		if new_changes == 0 {
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	when common.ODIN_DEBUG_REGEX {
			
 
				+		io.write_string(common.debug_stream, "AST after Optimizer: ")
			
 
				+		parser.write_node(common.debug_stream, result)
			
 
				+		io.write_byte(common.debug_stream, '\n')
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	return
			
 
				+}
			
--- a/core/text/regex/parser/debugging.odin
+++ b/core/text/regex/parser/debugging.odin
@@ -0,0 +1,103 @@
 
				+package regex_parser
			
 
				+
			
 
				+import "core:io"
			
 
				+
			
 
				+write_node :: proc(w: io.Writer, node: Node) {
			
 
				+	switch specific in node {
			
 
				+	case ^Node_Rune:
			
 
				+		io.write_rune(w, specific.data)
			
 
				+
			
 
				+	case ^Node_Rune_Class:
			
 
				+		io.write_byte(w, '[')
			
 
				+		if specific.negating {
			
 
				+			io.write_byte(w, '^')
			
 
				+		}
			
 
				+		for r in specific.data.runes {
			
 
				+			io.write_rune(w, r)
			
 
				+		}
			
 
				+		for range in specific.data.ranges {
			
 
				+			io.write_rune(w, range.lower)
			
 
				+			io.write_byte(w, '-')
			
 
				+			io.write_rune(w, range.upper)
			
 
				+		}
			
 
				+		io.write_byte(w, ']')
			
 
				+
			
 
				+	case ^Node_Wildcard:
			
 
				+		io.write_byte(w, '.')
			
 
				+
			
 
				+	case ^Node_Concatenation:
			
 
				+		io.write_rune(w, '「')
			
 
				+		for subnode, i in specific.nodes {
			
 
				+			if i != 0 {
			
 
				+				io.write_rune(w, '⋅')
			
 
				+			}
			
 
				+			write_node(w, subnode)
			
 
				+		}
			
 
				+		io.write_rune(w, '」')
			
 
				+
			
 
				+	case ^Node_Repeat_Zero:
			
 
				+		write_node(w, specific.inner)
			
 
				+		io.write_byte(w, '*')
			
 
				+	case ^Node_Repeat_Zero_Non_Greedy:
			
 
				+		write_node(w, specific.inner)
			
 
				+		io.write_string(w, "*?")
			
 
				+	case ^Node_Repeat_One:
			
 
				+		write_node(w, specific.inner)
			
 
				+		io.write_byte(w, '+')
			
 
				+	case ^Node_Repeat_One_Non_Greedy:
			
 
				+		write_node(w, specific.inner)
			
 
				+		io.write_string(w, "+?")
			
 
				+
			
 
				+	case ^Node_Repeat_N:
			
 
				+		write_node(w, specific.inner)
			
 
				+		if specific.lower == 0 && specific.upper == -1 {
			
 
				+			io.write_byte(w, '*')
			
 
				+		} else if specific.lower == 1 && specific.upper == -1 {
			
 
				+			io.write_byte(w, '+')
			
 
				+		} else {
			
 
				+			io.write_byte(w, '{')
			
 
				+			io.write_int(w, specific.lower)
			
 
				+			io.write_byte(w, ',')
			
 
				+			io.write_int(w, specific.upper)
			
 
				+			io.write_byte(w, '}')
			
 
				+		}
			
 
				+
			
 
				+	case ^Node_Alternation:
			
 
				+		io.write_rune(w, '《')
			
 
				+		write_node(w, specific.left)
			
 
				+		io.write_byte(w, '|')
			
 
				+		write_node(w, specific.right)
			
 
				+		io.write_rune(w, '》')
			
 
				+
			
 
				+	case ^Node_Optional:
			
 
				+		io.write_rune(w, '〈')
			
 
				+		write_node(w, specific.inner)
			
 
				+		io.write_byte(w, '?')
			
 
				+		io.write_rune(w, '〉')
			
 
				+	case ^Node_Optional_Non_Greedy:
			
 
				+		io.write_rune(w, '〈')
			
 
				+		write_node(w, specific.inner)
			
 
				+		io.write_string(w, "??")
			
 
				+		io.write_rune(w, '〉')
			
 
				+
			
 
				+	case ^Node_Group:
			
 
				+		io.write_byte(w, '(')
			
 
				+		if !specific.capture {
			
 
				+			io.write_string(w, "?:")
			
 
				+		}
			
 
				+		write_node(w, specific.inner)
			
 
				+		io.write_byte(w, ')')
			
 
				+
			
 
				+	case ^Node_Anchor:
			
 
				+		io.write_byte(w, '^' if specific.start else '$')
			
 
				+
			
 
				+	case ^Node_Word_Boundary:
			
 
				+		io.write_string(w, `\B` if specific.non_word else `\b`)
			
 
				+
			
 
				+	case ^Node_Match_All_And_Escape:
			
 
				+		io.write_string(w, "《.*$》")
			
 
				+
			
 
				+	case nil:
			
 
				+		io.write_string(w, "<nil>")
			
 
				+	}
			
 
				+}
			
--- a/core/text/regex/parser/doc.odin
+++ b/core/text/regex/parser/doc.odin
@@ -0,0 +1,10 @@
 
				+/*
			
 
				+package regex_parser implements a Pratt parser, also known as a Top-Down
			
 
				+Operator Precedence parser, for parsing tokenized regular expression patterns.
			
 
				+
			
 
				+References:
			
 
				+- https://dl.acm.org/doi/10.1145/512927.512931
			
 
				+- https://tdop.github.io/
			
 
				+- http://crockford.com/javascript/tdop/tdop.html
			
 
				+*/
			
 
				+package regex_parser
			
--- a/core/text/regex/parser/parser.odin
+++ b/core/text/regex/parser/parser.odin
@@ -0,0 +1,580 @@
 
				+package regex_parser
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:strconv"
			
 
				+import "core:strings"
			
 
				+import "core:text/regex/common"
			
 
				+import "core:text/regex/tokenizer"
			
 
				+import "core:unicode"
			
 
				+import "core:unicode/utf8"
			
 
				+
			
 
				+Token      :: tokenizer.Token
			
 
				+Token_Kind :: tokenizer.Token_Kind
			
 
				+Tokenizer  :: tokenizer.Tokenizer
			
 
				+
			
 
				+Rune_Class_Range :: struct {
			
 
				+	lower, upper: rune,
			
 
				+}
			
 
				+Rune_Class_Data :: struct {
			
 
				+	runes: [dynamic]rune,
			
 
				+	ranges: [dynamic]Rune_Class_Range,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+Node_Rune :: struct {
			
 
				+	data: rune,
			
 
				+}
			
 
				+
			
 
				+Node_Rune_Class :: struct {
			
 
				+	negating: bool,
			
 
				+	using data: Rune_Class_Data,
			
 
				+}
			
 
				+
			
 
				+Node_Wildcard :: struct {}
			
 
				+
			
 
				+Node_Alternation :: struct {
			
 
				+	left, right: Node,
			
 
				+}
			
 
				+
			
 
				+Node_Concatenation :: struct {
			
 
				+	nodes: [dynamic]Node,
			
 
				+}
			
 
				+
			
 
				+Node_Repeat_Zero :: struct {
			
 
				+	inner: Node,
			
 
				+}
			
 
				+Node_Repeat_Zero_Non_Greedy :: struct {
			
 
				+	inner: Node,
			
 
				+}
			
 
				+Node_Repeat_One :: struct {
			
 
				+	inner: Node,
			
 
				+}
			
 
				+Node_Repeat_One_Non_Greedy :: struct {
			
 
				+	inner: Node,
			
 
				+}
			
 
				+
			
 
				+Node_Repeat_N :: struct {
			
 
				+	inner: Node,
			
 
				+	lower, upper: int,
			
 
				+}
			
 
				+
			
 
				+Node_Optional :: struct {
			
 
				+	inner: Node,
			
 
				+}
			
 
				+Node_Optional_Non_Greedy :: struct {
			
 
				+	inner: Node,
			
 
				+}
			
 
				+
			
 
				+Node_Group :: struct {
			
 
				+	inner: Node,
			
 
				+	capture_id: int,
			
 
				+	capture: bool,
			
 
				+}
			
 
				+
			
 
				+Node_Anchor :: struct {
			
 
				+	start: bool,
			
 
				+}
			
 
				+Node_Word_Boundary :: struct {
			
 
				+	non_word: bool,
			
 
				+}
			
 
				+
			
 
				+Node_Match_All_And_Escape :: struct {}
			
 
				+
			
 
				+Node :: union {
			
 
				+	^Node_Rune,
			
 
				+	^Node_Rune_Class,
			
 
				+	^Node_Wildcard,
			
 
				+	^Node_Concatenation,
			
 
				+	^Node_Alternation,
			
 
				+	^Node_Repeat_Zero,
			
 
				+	^Node_Repeat_Zero_Non_Greedy,
			
 
				+	^Node_Repeat_One,
			
 
				+	^Node_Repeat_One_Non_Greedy,
			
 
				+	^Node_Repeat_N,
			
 
				+	^Node_Optional,
			
 
				+	^Node_Optional_Non_Greedy,
			
 
				+	^Node_Group,
			
 
				+	^Node_Anchor,
			
 
				+	^Node_Word_Boundary,
			
 
				+
			
 
				+	// Optimized nodes (not created by the Parser):
			
 
				+	^Node_Match_All_And_Escape,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+left_binding_power :: proc(kind: Token_Kind) -> int {
			
 
				+	#partial switch kind {
			
 
				+	case .Alternate:                return 1
			
 
				+	case .Concatenate:              return 2
			
 
				+	case .Repeat_Zero, .Repeat_One,
			
 
				+	     .Repeat_Zero_Non_Greedy, .Repeat_One_Non_Greedy,
			
 
				+	     .Repeat_N:                 return 3
			
 
				+	case .Optional,
			
 
				+	     .Optional_Non_Greedy:      return 4
			
 
				+	case .Open_Paren,
			
 
				+	     .Open_Paren_Non_Capture:   return 9
			
 
				+	}
			
 
				+	return 0
			
 
				+}
			
 
				+
			
 
				+
			
 
				+Expected_Token :: struct {
			
 
				+	pos: int,
			
 
				+	kind: Token_Kind,
			
 
				+}
			
 
				+
			
 
				+Invalid_Repetition :: struct {
			
 
				+	pos: int,
			
 
				+}
			
 
				+
			
 
				+Invalid_Token :: struct {
			
 
				+	pos: int,
			
 
				+	kind: Token_Kind,
			
 
				+}
			
 
				+
			
 
				+Invalid_Unicode :: struct {
			
 
				+	pos: int,
			
 
				+}
			
 
				+
			
 
				+Too_Many_Capture_Groups :: struct {
			
 
				+	pos: int,
			
 
				+}
			
 
				+
			
 
				+Unexpected_EOF :: struct {
			
 
				+	pos: int,
			
 
				+}
			
 
				+
			
 
				+Error :: union {
			
 
				+	Expected_Token,
			
 
				+	Invalid_Repetition,
			
 
				+	Invalid_Token,
			
 
				+	Invalid_Unicode,
			
 
				+	Too_Many_Capture_Groups,
			
 
				+	Unexpected_EOF,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+Parser :: struct {
			
 
				+	flags: common.Flags,
			
 
				+	t: Tokenizer,
			
 
				+
			
 
				+	cur_token: Token,
			
 
				+
			
 
				+	groups: int,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+@require_results
			
 
				+advance :: proc(p: ^Parser) -> Error {
			
 
				+	p.cur_token = tokenizer.scan(&p.t)
			
 
				+	if p.cur_token.kind == .Invalid {
			
 
				+		return Invalid_Unicode { pos = 0 }
			
 
				+	}
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+expect :: proc(p: ^Parser, kind: Token_Kind) -> (err: Error) {
			
 
				+	if p.cur_token.kind == kind {
			
 
				+		advance(p) or_return
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	return Expected_Token{
			
 
				+		pos = p.t.offset,
			
 
				+		kind = kind,
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+null_denotation :: proc(p: ^Parser, token: Token) -> (result: Node, err: Error) {
			
 
				+	#partial switch token.kind {
			
 
				+	case .Rune:
			
 
				+		r: rune
			
 
				+		for ru in token.text {
			
 
				+			r = ru
			
 
				+			break
			
 
				+		}
			
 
				+		assert(r != 0, "Parsed an empty Rune token.")
			
 
				+
			
 
				+		if .Case_Insensitive in p.flags {
			
 
				+			lower := unicode.to_lower(r)
			
 
				+			upper := unicode.to_upper(r)
			
 
				+			if lower != upper {
			
 
				+				node := new(Node_Rune_Class)
			
 
				+				append(&node.runes, lower)
			
 
				+				append(&node.runes, upper)
			
 
				+				return node, nil
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		node := new(Node_Rune)
			
 
				+		node ^= { r }
			
 
				+		return node, nil
			
 
				+
			
 
				+	case .Rune_Class:
			
 
				+		if len(token.text) == 0 {
			
 
				+			return nil, nil
			
 
				+		}
			
 
				+
			
 
				+		node := new(Node_Rune_Class)
			
 
				+
			
 
				+		for i := 0; i < len(token.text); /**/ {
			
 
				+			r, size := utf8.decode_rune(token.text[i:])
			
 
				+			if i == 0 && r == '^' {
			
 
				+				node.negating = true
			
 
				+				i += size
			
 
				+				continue
			
 
				+			}
			
 
				+			i += size
			
 
				+
			
 
				+			assert(size > 0, "RegEx tokenizer passed an incomplete Rune_Class to the parser.")
			
 
				+
			
 
				+			if r == '\\' {
			
 
				+				next_r, next_size := utf8.decode_rune(token.text[i:])
			
 
				+				i += next_size
			
 
				+				assert(next_size > 0, "RegEx tokenizer passed an incomplete Rune_Class to the parser.")
			
 
				+
			
 
				+				// @MetaCharacter
			
 
				+				// NOTE: These must be kept in sync with the tokenizer.
			
 
				+				switch next_r {
			
 
				+				case 'f': append(&node.runes, '\f')
			
 
				+				case 'n': append(&node.runes, '\n')
			
 
				+				case 'r': append(&node.runes, '\r')
			
 
				+				case 't': append(&node.runes, '\t')
			
 
				+
			
 
				+				case 'd':
			
 
				+					append(&node.ranges, Rune_Class_Range{ '0', '9' })
			
 
				+				case 's':
			
 
				+					append(&node.runes, '\t')
			
 
				+					append(&node.runes, '\n')
			
 
				+					append(&node.runes, '\f')
			
 
				+					append(&node.runes, '\r')
			
 
				+					append(&node.runes, ' ')
			
 
				+				case 'w':
			
 
				+					append(&node.ranges, Rune_Class_Range{ '0', '9' })
			
 
				+					append(&node.ranges, Rune_Class_Range{ 'A', 'Z' })
			
 
				+					append(&node.runes, '_')
			
 
				+					append(&node.ranges, Rune_Class_Range{ 'a', 'z' })
			
 
				+				case 'D':
			
 
				+					append(&node.ranges, Rune_Class_Range{        0,  '0' - 1  })
			
 
				+					append(&node.ranges, Rune_Class_Range{  '9' + 1, max(rune) })
			
 
				+				case 'S':
			
 
				+					append(&node.ranges, Rune_Class_Range{        0, '\t' - 1  })
			
 
				+					// \t and \n are adjacent.
			
 
				+					append(&node.runes, '\x0b') // Vertical Tab
			
 
				+					append(&node.ranges, Rune_Class_Range{ '\r' + 1,  ' ' - 1  })
			
 
				+					append(&node.ranges, Rune_Class_Range{  ' ' + 1, max(rune) })
			
 
				+				case 'W':
			
 
				+					append(&node.ranges, Rune_Class_Range{        0,  '0' - 1  })
			
 
				+					append(&node.ranges, Rune_Class_Range{  '9' + 1,  'A' - 1  })
			
 
				+					append(&node.ranges, Rune_Class_Range{  'Z' + 1,  '_' - 1  })
			
 
				+					append(&node.ranges, Rune_Class_Range{  '_' + 1,  'a' - 1  })
			
 
				+					append(&node.ranges, Rune_Class_Range{  'z' + 1, max(rune) })
			
 
				+				case:
			
 
				+					append(&node.runes, next_r)
			
 
				+				}
			
 
				+				continue
			
 
				+			}
			
 
				+
			
 
				+			if r == '-' && len(node.runes) > 0 {
			
 
				+				next_r, next_size := utf8.decode_rune(token.text[i:])
			
 
				+				if next_size > 0 {
			
 
				+					last := pop(&node.runes)
			
 
				+					i += next_size
			
 
				+
			
 
				+					append(&node.ranges, Rune_Class_Range{ last, next_r })
			
 
				+					continue
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			append(&node.runes, r)
			
 
				+		}
			
 
				+
			
 
				+		if .Case_Insensitive in p.flags {
			
 
				+			length := len(node.runes)
			
 
				+			#no_bounds_check for i := 0; i < length; i += 1 {
			
 
				+				r := node.runes[i]
			
 
				+				lower := unicode.to_lower(r)
			
 
				+				upper := unicode.to_upper(r)
			
 
				+
			
 
				+				if lower != upper {
			
 
				+					if lower != r {
			
 
				+						append(&node.runes, lower)
			
 
				+					} else {
			
 
				+						append(&node.runes, upper)
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			length = len(node.ranges)
			
 
				+			#no_bounds_check for i := 0; i < length; i += 1 {
			
 
				+				range := &node.ranges[i]
			
 
				+
			
 
				+				min_lower := unicode.to_lower(range.lower)
			
 
				+				max_lower := unicode.to_lower(range.upper)
			
 
				+
			
 
				+				min_upper := unicode.to_upper(range.lower)
			
 
				+				max_upper := unicode.to_upper(range.upper)
			
 
				+
			
 
				+				if min_lower != min_upper && max_lower != max_upper {
			
 
				+					range.lower = min_lower
			
 
				+					range.upper = max_lower
			
 
				+					append(&node.ranges, Rune_Class_Range{ min_upper, max_upper })
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		result = node
			
 
				+
			
 
				+	case .Wildcard:
			
 
				+		node := new(Node_Wildcard)
			
 
				+		result = node
			
 
				+
			
 
				+	case .Open_Paren:
			
 
				+		// Because of the recursive nature of the token parser, we take the
			
 
				+		// group number first instead of afterwards, in order to construct
			
 
				+		// group matches from the outside in.
			
 
				+		p.groups += 1
			
 
				+		if p.groups == common.MAX_CAPTURE_GROUPS {
			
 
				+			return nil, Too_Many_Capture_Groups{ pos = token.pos }
			
 
				+		}
			
 
				+		this_group := p.groups
			
 
				+
			
 
				+		node := new(Node_Group)
			
 
				+		node.capture = true
			
 
				+		node.capture_id = this_group
			
 
				+
			
 
				+		node.inner = parse_expression(p, 0) or_return
			
 
				+		expect(p, .Close_Paren) or_return
			
 
				+		result = node
			
 
				+	case .Open_Paren_Non_Capture:
			
 
				+		node := new(Node_Group)
			
 
				+		node.inner = parse_expression(p, 0) or_return
			
 
				+		expect(p, .Close_Paren) or_return
			
 
				+		result = node
			
 
				+	case .Close_Paren:
			
 
				+		node := new(Node_Rune)
			
 
				+		node ^= { ')' }
			
 
				+		return node, nil
			
 
				+		
			
 
				+	case .Anchor_Start:
			
 
				+		node := new(Node_Anchor)
			
 
				+		node.start = true
			
 
				+		result = node
			
 
				+	case .Anchor_End:
			
 
				+		node := new(Node_Anchor)
			
 
				+		result = node
			
 
				+	case .Word_Boundary:
			
 
				+		node := new(Node_Word_Boundary)
			
 
				+		result = node
			
 
				+	case .Non_Word_Boundary:
			
 
				+		node := new(Node_Word_Boundary)
			
 
				+		node.non_word = true
			
 
				+		result = node
			
 
				+
			
 
				+	case .Alternate:
			
 
				+		// A unary alternation with a left-side empty path, i.e. `|a`.
			
 
				+		right, right_err := parse_expression(p, left_binding_power(.Alternate))
			
 
				+		#partial switch specific in right_err {
			
 
				+		case Unexpected_EOF:
			
 
				+			// This token is a NOP, i.e. `|`.
			
 
				+			break
			
 
				+		case nil:
			
 
				+			break
			
 
				+		case:
			
 
				+			return nil, right_err
			
 
				+		}
			
 
				+
			
 
				+		node := new(Node_Alternation)
			
 
				+		node.right = right
			
 
				+		result = node
			
 
				+
			
 
				+	case .EOF:
			
 
				+		return nil, Unexpected_EOF{ pos = token.pos }
			
 
				+
			
 
				+	case:
			
 
				+		return nil, Invalid_Token{ pos = token.pos, kind = token.kind }
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+left_denotation :: proc(p: ^Parser, token: Token, left: Node) -> (result: Node, err: Error) {
			
 
				+	#partial switch token.kind {
			
 
				+	case .Alternate:
			
 
				+		if p.cur_token.kind == .Close_Paren {
			
 
				+			// `(a|)`
			
 
				+			// parse_expression will fail, so intervene here.
			
 
				+			node := new(Node_Alternation)
			
 
				+			node.left = left
			
 
				+			return node, nil
			
 
				+		}
			
 
				+
			
 
				+		right, right_err := parse_expression(p, left_binding_power(.Alternate))
			
 
				+
			
 
				+		#partial switch specific in right_err {
			
 
				+		case nil:
			
 
				+			break
			
 
				+		case Unexpected_EOF:
			
 
				+			// EOF is okay in an alternation; it's an edge case in the way of
			
 
				+			// expressing an optional such as `a|`.
			
 
				+			break
			
 
				+		case:
			
 
				+			return nil, right_err
			
 
				+		}
			
 
				+
			
 
				+		node := new(Node_Alternation)
			
 
				+		node.left = left
			
 
				+		node.right = right
			
 
				+		result = node
			
 
				+
			
 
				+	case .Concatenate:
			
 
				+		right := parse_expression(p, left_binding_power(.Concatenate)) or_return
			
 
				+
			
 
				+		// There should be no need to check if right is Node_Concatenation, due
			
 
				+		// to how the parsing direction works.
			
 
				+		#partial switch specific in left {
			
 
				+		case ^Node_Concatenation:
			
 
				+			append(&specific.nodes, right)
			
 
				+			result = specific
			
 
				+		case:
			
 
				+			node := new(Node_Concatenation)
			
 
				+			append(&node.nodes, left)
			
 
				+			append(&node.nodes, right)
			
 
				+			result = node
			
 
				+		}
			
 
				+
			
 
				+	case .Repeat_Zero:
			
 
				+		node := new(Node_Repeat_Zero)
			
 
				+		node.inner = left
			
 
				+		result = node
			
 
				+	case .Repeat_Zero_Non_Greedy:
			
 
				+		node := new(Node_Repeat_Zero_Non_Greedy)
			
 
				+		node.inner = left
			
 
				+		result = node
			
 
				+	case .Repeat_One:
			
 
				+		node := new(Node_Repeat_One)
			
 
				+		node.inner = left
			
 
				+		result = node
			
 
				+	case .Repeat_One_Non_Greedy:
			
 
				+		node := new(Node_Repeat_One_Non_Greedy)
			
 
				+		node.inner = left
			
 
				+		result = node
			
 
				+
			
 
				+	case .Repeat_N:
			
 
				+		node := new(Node_Repeat_N)
			
 
				+		node.inner = left
			
 
				+
			
 
				+		comma := strings.index_byte(token.text, ',')
			
 
				+
			
 
				+		switch comma {
			
 
				+		case -1: // {N}
			
 
				+			exact, ok := strconv.parse_u64_of_base(token.text, base = 10)
			
 
				+			if !ok {
			
 
				+				return nil, Invalid_Repetition{ pos = token.pos }
			
 
				+			}
			
 
				+			if exact == 0 {
			
 
				+				return nil, Invalid_Repetition{ pos = token.pos }
			
 
				+			}
			
 
				+
			
 
				+			node.lower = cast(int)exact
			
 
				+			node.upper = cast(int)exact
			
 
				+
			
 
				+		case 0: // {,M}
			
 
				+			upper, ok := strconv.parse_u64_of_base(token.text[1:], base = 10)
			
 
				+			if !ok {
			
 
				+				return nil, Invalid_Repetition{ pos = token.pos }
			
 
				+			}
			
 
				+			if upper == 0 {
			
 
				+				return nil, Invalid_Repetition{ pos = token.pos }
			
 
				+			}
			
 
				+
			
 
				+			node.lower = -1
			
 
				+			node.upper = cast(int)upper
			
 
				+
			
 
				+		case len(token.text) - 1: // {N,}
			
 
				+			lower, ok := strconv.parse_u64_of_base(token.text[:comma], base = 10)
			
 
				+			if !ok {
			
 
				+				return nil, Invalid_Repetition{ pos = token.pos }
			
 
				+			}
			
 
				+
			
 
				+			node.lower = cast(int)lower
			
 
				+			node.upper = -1
			
 
				+
			
 
				+		case: // {N,M}
			
 
				+			lower, lower_ok := strconv.parse_u64_of_base(token.text[:comma], base = 10)
			
 
				+			if !lower_ok {
			
 
				+				return nil, Invalid_Repetition{ pos = token.pos }
			
 
				+			}
			
 
				+			upper, upper_ok := strconv.parse_u64_of_base(token.text[comma+1:], base = 10)
			
 
				+			if !upper_ok {
			
 
				+				return nil, Invalid_Repetition{ pos = token.pos }
			
 
				+			}
			
 
				+			if lower > upper {
			
 
				+				return nil, Invalid_Repetition{ pos = token.pos }
			
 
				+			}
			
 
				+			if upper == 0 {
			
 
				+				return nil, Invalid_Repetition{ pos = token.pos }
			
 
				+			}
			
 
				+
			
 
				+			node.lower = cast(int)lower
			
 
				+			node.upper = cast(int)upper
			
 
				+		}
			
 
				+
			
 
				+		result = node
			
 
				+
			
 
				+	case .Optional:
			
 
				+		node := new(Node_Optional)
			
 
				+		node.inner = left
			
 
				+		result = node
			
 
				+	case .Optional_Non_Greedy:
			
 
				+		node := new(Node_Optional_Non_Greedy)
			
 
				+		node.inner = left
			
 
				+		result = node
			
 
				+
			
 
				+	case .EOF:
			
 
				+		return nil, Unexpected_EOF{ pos = token.pos }
			
 
				+
			
 
				+	case:
			
 
				+		return nil, Invalid_Token{ pos = token.pos, kind = token.kind }
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+parse_expression :: proc(p: ^Parser, rbp: int) -> (result: Node, err: Error) {
			
 
				+	token := p.cur_token
			
 
				+
			
 
				+	advance(p) or_return
			
 
				+	left := null_denotation(p, token) or_return
			
 
				+
			
 
				+	token = p.cur_token
			
 
				+	for rbp < left_binding_power(token.kind) {
			
 
				+		advance(p) or_return
			
 
				+		left = left_denotation(p, token, left) or_return
			
 
				+		token = p.cur_token
			
 
				+	}
			
 
				+
			
 
				+	return left, nil
			
 
				+}
			
 
				+
			
 
				+parse :: proc(str: string, flags: common.Flags) -> (result: Node, err: Error) {
			
 
				+	if len(str) == 0 {
			
 
				+		node := new(Node_Group)
			
 
				+		return node, nil
			
 
				+	}
			
 
				+
			
 
				+	p: Parser
			
 
				+	p.flags = flags
			
 
				+
			
 
				+	tokenizer.init(&p.t, str, flags)
			
 
				+
			
 
				+	p.cur_token = tokenizer.scan(&p.t)
			
 
				+	if p.cur_token.kind == .Invalid {
			
 
				+		return nil, Invalid_Unicode { pos = 0 }
			
 
				+	}
			
 
				+
			
 
				+	node := parse_expression(&p, 0) or_return
			
 
				+	result = node
			
 
				+
			
 
				+	return
			
 
				+}
			
--- a/core/text/regex/regex.odin
+++ b/core/text/regex/regex.odin
@@ -0,0 +1,434 @@
 
				+package regex
			
 
				+
			
 
				+import "core:text/regex/common"
			
 
				+import "core:text/regex/compiler"
			
 
				+import "core:text/regex/optimizer"
			
 
				+import "core:text/regex/parser"
			
 
				+import "core:text/regex/virtual_machine"
			
 
				+
			
 
				+Flag           :: common.Flag
			
 
				+Flags          :: common.Flags
			
 
				+Parser_Error   :: parser.Error
			
 
				+Compiler_Error :: compiler.Error
			
 
				+
			
 
				+Creation_Error :: enum {
			
 
				+	None,
			
 
				+	Bad_Delimiter,
			
 
				+	Expected_Delimiter,
			
 
				+	Unknown_Flag,
			
 
				+}
			
 
				+
			
 
				+Error :: union #shared_nil {
			
 
				+	Parser_Error,
			
 
				+	Compiler_Error,
			
 
				+	Creation_Error,
			
 
				+}
			
 
				+
			
 
				+Capture :: struct {
			
 
				+	pos: [][2]int,
			
 
				+	groups: []string,
			
 
				+}
			
 
				+
			
 
				+Regular_Expression :: struct {
			
 
				+	original_pattern: string,
			
 
				+	flags: Flags,
			
 
				+	class_data: []virtual_machine.Rune_Class_Data,
			
 
				+	program: []virtual_machine.Opcode `fmt:"-"`,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+@(rodata)
			
 
				+Flag_To_Letter := #sparse[Flag]u8 {
			
 
				+	.Global            = 'g',
			
 
				+	.Multiline         = 'm',
			
 
				+	.Case_Insensitive  = 'i',
			
 
				+	.Ignore_Whitespace = 'x',
			
 
				+	.Unicode           = 'u',
			
 
				+	.No_Capture        = 'n',
			
 
				+	.No_Optimization   = '-',
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Create a regular expression from a string pattern and a set of flags.
			
 
				+
			
 
				+*Allocates Using Provided Allocators*
			
 
				+
			
 
				+Inputs:
			
 
				+- pattern: The pattern to compile.
			
 
				+- flags: A `bit_set` of RegEx flags.
			
 
				+- permanent_allocator: The allocator to use for the final regular expression. (default: context.allocator)
			
 
				+- temporary_allocator: The allocator to use for the intermediate compilation stages. (default: context.temp_allocator)
			
 
				+
			
 
				+Returns:
			
 
				+- result: The regular expression.
			
 
				+- err: An error, if one occurred.
			
 
				+*/
			
 
				+@require_results
			
 
				+create :: proc(
			
 
				+	pattern: string,
			
 
				+	flags: Flags = {},
			
 
				+	permanent_allocator := context.allocator,
			
 
				+	temporary_allocator := context.temp_allocator,
			
 
				+) -> (result: Regular_Expression, err: Error) {
			
 
				+
			
 
				+	// For the sake of speed and simplicity, we first run all the intermediate
			
 
				+	// processes such as parsing and compilation through the temporary
			
 
				+	// allocator.
			
 
				+	program: [dynamic]virtual_machine.Opcode = ---
			
 
				+	class_data: [dynamic]parser.Rune_Class_Data = ---
			
 
				+	{
			
 
				+		context.allocator = temporary_allocator
			
 
				+
			
 
				+		ast := parser.parse(pattern, flags) or_return
			
 
				+
			
 
				+		if .No_Optimization not_in flags {
			
 
				+			ast, _ = optimizer.optimize(ast, flags)
			
 
				+		}
			
 
				+
			
 
				+		program, class_data = compiler.compile(ast, flags) or_return
			
 
				+	}
			
 
				+
			
 
				+	// When that's successful, re-allocate all at once with the permanent
			
 
				+	// allocator so everything can be tightly packed.
			
 
				+	context.allocator = permanent_allocator
			
 
				+
			
 
				+	result.original_pattern = pattern
			
 
				+	result.flags = flags
			
 
				+
			
 
				+	if len(class_data) > 0 {
			
 
				+		result.class_data = make([]virtual_machine.Rune_Class_Data, len(class_data))
			
 
				+	}
			
 
				+	for data, i in class_data {
			
 
				+		if len(data.runes) > 0 {
			
 
				+			result.class_data[i].runes = make([]rune, len(data.runes))
			
 
				+			copy(result.class_data[i].runes, data.runes[:])
			
 
				+		}
			
 
				+		if len(data.ranges) > 0 {
			
 
				+			result.class_data[i].ranges = make([]virtual_machine.Rune_Class_Range, len(data.ranges))
			
 
				+			copy(result.class_data[i].ranges, data.ranges[:])
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	result.program = make([]virtual_machine.Opcode, len(program))
			
 
				+	copy(result.program, program[:])
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Create a regular expression from a delimited string pattern, such as one
			
 
				+provided by users of a program or those found in a configuration file.
			
 
				+
			
 
				+They are in the form of:
			
 
				+
			
 
				+	[DELIMITER] [regular expression] [DELIMITER] [flags]
			
 
				+
			
 
				+For example, the following strings are valid:
			
 
				+
			
 
				+	/hellope/i
			
 
				+	#hellope#i
			
 
				+	•hellope•i
			
 
				+	つhellopeつi
			
 
				+
			
 
				+The delimiter is determined by the very first rune in the string.
			
 
				+The only restriction is that the delimiter cannot be `\`, as that rune is used
			
 
				+to escape the delimiter if found in the middle of the string.
			
 
				+
			
 
				+All runes after the closing delimiter will be parsed as flags:
			
 
				+
			
 
				+- 'g': Global
			
 
				+- 'm': Multiline
			
 
				+- 'i': Case_Insensitive
			
 
				+- 'x': Ignore_Whitespace
			
 
				+- 'u': Unicode
			
 
				+- 'n': No_Capture
			
 
				+- '-': No_Optimization
			
 
				+
			
 
				+
			
 
				+*Allocates Using Provided Allocators*
			
 
				+
			
 
				+Inputs:
			
 
				+- pattern: The delimited pattern with optional flags to compile.
			
 
				+- str: The string to match against.
			
 
				+- permanent_allocator: The allocator to use for the final regular expression. (default: context.allocator)
			
 
				+- temporary_allocator: The allocator to use for the intermediate compilation stages. (default: context.temp_allocator)
			
 
				+
			
 
				+Returns:
			
 
				+- result: The regular expression.
			
 
				+- err: An error, if one occurred.
			
 
				+*/
			
 
				+@require_results
			
 
				+create_by_user :: proc(
			
 
				+	pattern: string,
			
 
				+	permanent_allocator := context.allocator,
			
 
				+	temporary_allocator := context.temp_allocator,
			
 
				+) -> (result: Regular_Expression, err: Error) {
			
 
				+
			
 
				+	if len(pattern) == 0 {
			
 
				+		err = .Expected_Delimiter
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	delimiter: rune
			
 
				+	start := -1
			
 
				+	end := -1
			
 
				+
			
 
				+	flags: Flags
			
 
				+
			
 
				+	escaping: bool
			
 
				+	parse_loop: for r, i in pattern {
			
 
				+		if delimiter == 0 {
			
 
				+			if r == '\\' {
			
 
				+				err = .Bad_Delimiter
			
 
				+				return
			
 
				+			}
			
 
				+			delimiter = r
			
 
				+			continue parse_loop
			
 
				+		}
			
 
				+
			
 
				+		if start == -1 {
			
 
				+			start = i
			
 
				+		}
			
 
				+
			
 
				+		if escaping {
			
 
				+			escaping = false
			
 
				+			continue parse_loop
			
 
				+		}
			
 
				+
			
 
				+		switch r {
			
 
				+		case '\\':
			
 
				+			escaping = true
			
 
				+		case delimiter:
			
 
				+			end = i
			
 
				+			break parse_loop
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if end == -1 {
			
 
				+		err = .Expected_Delimiter
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// `start` is also the size of the delimiter, which is why it's being added
			
 
				+	// to `end` here.
			
 
				+	for r in pattern[start + end:] {
			
 
				+		switch r {
			
 
				+		case 'g': flags += { .Global }
			
 
				+		case 'm': flags += { .Multiline }
			
 
				+		case 'i': flags += { .Case_Insensitive }
			
 
				+		case 'x': flags += { .Ignore_Whitespace }
			
 
				+		case 'u': flags += { .Unicode }
			
 
				+		case 'n': flags += { .No_Capture }
			
 
				+		case '-': flags += { .No_Optimization }
			
 
				+		case:
			
 
				+			err = .Unknown_Flag
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return create(pattern[start:end], flags, permanent_allocator, temporary_allocator)
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Match a regular expression against a string and allocate the results into the
			
 
				+returned `capture` structure.
			
 
				+
			
 
				+The resulting capture strings will be slices to the string `str`, not wholly
			
 
				+copied strings, so they won't need to be individually deleted.
			
 
				+
			
 
				+*Allocates Using Provided Allocators*
			
 
				+
			
 
				+Inputs:
			
 
				+- regex: The regular expression.
			
 
				+- str: The string to match against.
			
 
				+- permanent_allocator: The allocator to use for the capture results. (default: context.allocator)
			
 
				+- temporary_allocator: The allocator to use for the virtual machine. (default: context.temp_allocator)
			
 
				+
			
 
				+Returns:
			
 
				+- capture: The capture groups found in the string.
			
 
				+- success: True if the regex matched the string.
			
 
				+*/
			
 
				+@require_results
			
 
				+match_and_allocate_capture :: proc(
			
 
				+	regex: Regular_Expression,
			
 
				+	str: string,
			
 
				+	permanent_allocator := context.allocator,
			
 
				+	temporary_allocator := context.temp_allocator,
			
 
				+) -> (capture: Capture, success: bool) {
			
 
				+
			
 
				+	saved: ^[2 * common.MAX_CAPTURE_GROUPS]int
			
 
				+
			
 
				+	{
			
 
				+		context.allocator = temporary_allocator
			
 
				+
			
 
				+		vm := virtual_machine.create(regex.program, str)
			
 
				+		vm.class_data = regex.class_data
			
 
				+
			
 
				+		if .Unicode in regex.flags {
			
 
				+			saved, success = virtual_machine.run(&vm, true)
			
 
				+		} else {
			
 
				+			saved, success = virtual_machine.run(&vm, false)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if saved != nil {
			
 
				+		context.allocator = permanent_allocator
			
 
				+
			
 
				+		num_groups := 0
			
 
				+		for i := 0; i < len(saved); i += 2 {
			
 
				+			a, b := saved[i], saved[i + 1]
			
 
				+			if a == -1 || b == -1 {
			
 
				+				continue
			
 
				+			}
			
 
				+			num_groups += 1
			
 
				+		}
			
 
				+
			
 
				+		if num_groups > 0 {
			
 
				+			capture.groups = make([]string, num_groups)
			
 
				+			capture.pos = make([][2]int, num_groups)
			
 
				+			n := 0
			
 
				+
			
 
				+			#no_bounds_check for i := 0; i < len(saved); i += 2 {
			
 
				+				a, b := saved[i], saved[i + 1]
			
 
				+				if a == -1 || b == -1 {
			
 
				+					continue
			
 
				+				}
			
 
				+
			
 
				+				capture.groups[n] = str[a:b]
			
 
				+				capture.pos[n] = {a, b}
			
 
				+				n += 1
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Match a regular expression against a string and save the capture results into
			
 
				+the provided `capture` structure.
			
 
				+
			
 
				+The resulting capture strings will be slices to the string `str`, not wholly
			
 
				+copied strings, so they won't need to be individually deleted.
			
 
				+
			
 
				+*Allocates Using Provided Allocator*
			
 
				+
			
 
				+Inputs:
			
 
				+- regex: The regular expression.
			
 
				+- str: The string to match against.
			
 
				+- capture: A pointer to a Capture structure with `groups` and `pos` already allocated.
			
 
				+- temporary_allocator: The allocator to use for the virtual machine. (default: context.temp_allocator)
			
 
				+
			
 
				+Returns:
			
 
				+- num_groups: The number of capture groups set into `capture`.
			
 
				+- success: True if the regex matched the string.
			
 
				+*/
			
 
				+@require_results
			
 
				+match_with_preallocated_capture :: proc(
			
 
				+	regex: Regular_Expression,
			
 
				+	str: string,
			
 
				+	capture: ^Capture,
			
 
				+	temporary_allocator := context.temp_allocator,
			
 
				+) -> (num_groups: int, success: bool) {
			
 
				+
			
 
				+	assert(capture != nil, "Pre-allocated RegEx capture must not be nil.")
			
 
				+	assert(len(capture.groups) >= common.MAX_CAPTURE_GROUPS,
			
 
				+		"Pre-allocated RegEx capture `groups` must be at least 10 elements long.")
			
 
				+	assert(len(capture.pos) >= common.MAX_CAPTURE_GROUPS,
			
 
				+		"Pre-allocated RegEx capture `pos` must be at least 10 elements long.")
			
 
				+
			
 
				+	saved: ^[2 * common.MAX_CAPTURE_GROUPS]int
			
 
				+
			
 
				+	{
			
 
				+		context.allocator = temporary_allocator
			
 
				+
			
 
				+		vm := virtual_machine.create(regex.program, str)
			
 
				+		vm.class_data = regex.class_data
			
 
				+
			
 
				+		if .Unicode in regex.flags {
			
 
				+			saved, success = virtual_machine.run(&vm, true)
			
 
				+		} else {
			
 
				+			saved, success = virtual_machine.run(&vm, false)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if saved != nil {
			
 
				+		n := 0
			
 
				+
			
 
				+		#no_bounds_check for i := 0; i < len(saved); i += 2 {
			
 
				+			a, b := saved[i], saved[i + 1]
			
 
				+			if a == -1 || b == -1 {
			
 
				+				continue
			
 
				+			}
			
 
				+
			
 
				+			capture.groups[n] = str[a:b]
			
 
				+			capture.pos[n] = {a, b}
			
 
				+			n += 1
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+match :: proc {
			
 
				+	match_and_allocate_capture,
			
 
				+	match_with_preallocated_capture,
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Allocate a `Capture` in advance for use with `match`. This can save some time
			
 
				+if you plan on performing several matches at once and only need the results
			
 
				+between matches.
			
 
				+
			
 
				+Inputs:
			
 
				+- allocator: (default: context.allocator)
			
 
				+
			
 
				+Returns:
			
 
				+- result: The `Capture` with the maximum number of groups allocated.
			
 
				+*/
			
 
				+@require_results
			
 
				+preallocate_capture :: proc(allocator := context.allocator) -> (result: Capture) {
			
 
				+	context.allocator = allocator
			
 
				+	result.pos = make([][2]int, common.MAX_CAPTURE_GROUPS)
			
 
				+	result.groups = make([]string, common.MAX_CAPTURE_GROUPS)
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Free all data allocated by the `create*` procedures.
			
 
				+
			
 
				+*Frees Using Provided Allocator*
			
 
				+
			
 
				+Inputs:
			
 
				+- regex: A regular expression.
			
 
				+- allocator: (default: context.allocator)
			
 
				+*/
			
 
				+destroy_regex :: proc(regex: Regular_Expression, allocator := context.allocator) {
			
 
				+	context.allocator = allocator
			
 
				+	delete(regex.program)
			
 
				+	for data in regex.class_data {
			
 
				+		delete(data.runes)
			
 
				+		delete(data.ranges)
			
 
				+	}
			
 
				+	delete(regex.class_data)
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Free all data allocated by the `match_and_allocate_capture` procedure.
			
 
				+
			
 
				+*Frees Using Provided Allocator*
			
 
				+
			
 
				+Inputs:
			
 
				+- capture: A Capture.
			
 
				+- allocator: (default: context.allocator)
			
 
				+*/
			
 
				+destroy_capture :: proc(capture: Capture, allocator := context.allocator) {
			
 
				+	context.allocator = allocator
			
 
				+	delete(capture.groups)
			
 
				+	delete(capture.pos)
			
 
				+}
			
 
				+
			
 
				+destroy :: proc {
			
 
				+	destroy_regex,
			
 
				+	destroy_capture,
			
 
				+}
			
--- a/core/text/regex/tokenizer/tokenizer.odin
+++ b/core/text/regex/tokenizer/tokenizer.odin
@@ -0,0 +1,349 @@
 
				+package regex_tokenizer
			
 
				+
			
 
				+import "core:text/regex/common"
			
 
				+import "core:unicode/utf8"
			
 
				+
			
 
				+Token_Kind :: enum {
			
 
				+	Invalid,
			
 
				+	EOF,
			
 
				+
			
 
				+	Rune,
			
 
				+	Wildcard,
			
 
				+
			
 
				+	Alternate,
			
 
				+
			
 
				+	Concatenate,
			
 
				+
			
 
				+	Repeat_Zero,
			
 
				+	Repeat_Zero_Non_Greedy,
			
 
				+	Repeat_One,
			
 
				+	Repeat_One_Non_Greedy,
			
 
				+
			
 
				+	Repeat_N,
			
 
				+
			
 
				+	Optional,
			
 
				+	Optional_Non_Greedy,
			
 
				+
			
 
				+	Rune_Class,
			
 
				+
			
 
				+	Open_Paren,
			
 
				+	Open_Paren_Non_Capture,
			
 
				+	Close_Paren,
			
 
				+
			
 
				+	Anchor_Start,
			
 
				+	Anchor_End,
			
 
				+
			
 
				+	Word_Boundary,
			
 
				+	Non_Word_Boundary,
			
 
				+}
			
 
				+
			
 
				+Token :: struct {
			
 
				+	kind: Token_Kind,
			
 
				+	text: string,
			
 
				+	pos: int,
			
 
				+}
			
 
				+
			
 
				+Tokenizer :: struct {
			
 
				+	flags: common.Flags,
			
 
				+	src: string,
			
 
				+
			
 
				+	ch: rune,
			
 
				+	offset: int,
			
 
				+	read_offset: int,
			
 
				+
			
 
				+	last_token_kind: Token_Kind,
			
 
				+	held_token: Token,
			
 
				+	error_state: Error,
			
 
				+	paren_depth: int,
			
 
				+}
			
 
				+
			
 
				+Error :: enum {
			
 
				+	None,
			
 
				+	Illegal_Null_Character,
			
 
				+	Illegal_Codepoint,
			
 
				+	Illegal_Byte_Order_Mark,
			
 
				+}
			
 
				+
			
 
				+init :: proc(t: ^Tokenizer, str: string, flags: common.Flags) {
			
 
				+	t.src = str
			
 
				+	t.flags = flags
			
 
				+	t.error_state = advance_rune(t)
			
 
				+}
			
 
				+
			
 
				+peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte {
			
 
				+	if t.read_offset+offset < len(t.src) {
			
 
				+		return t.src[t.read_offset+offset]
			
 
				+	}
			
 
				+	return 0
			
 
				+}
			
 
				+
			
 
				+advance_rune :: proc(t: ^Tokenizer) -> (err: Error) {
			
 
				+	if t.error_state != nil {
			
 
				+		return t.error_state
			
 
				+	}
			
 
				+
			
 
				+	if t.read_offset < len(t.src) {
			
 
				+		t.offset = t.read_offset
			
 
				+		r, w := rune(t.src[t.read_offset]), 1
			
 
				+		switch {
			
 
				+		case r == 0:
			
 
				+			err = .Illegal_Null_Character
			
 
				+		case r >= utf8.RUNE_SELF:
			
 
				+			r, w = utf8.decode_rune(t.src[t.read_offset:])
			
 
				+			if r == utf8.RUNE_ERROR && w == 1 {
			
 
				+				err = .Illegal_Codepoint
			
 
				+			} else if r == utf8.RUNE_BOM && t.offset > 0 {
			
 
				+				err = .Illegal_Byte_Order_Mark
			
 
				+			}
			
 
				+		}
			
 
				+		t.read_offset += w
			
 
				+		t.ch = r
			
 
				+	} else {
			
 
				+		t.offset = len(t.src)
			
 
				+		t.ch = -1
			
 
				+	}
			
 
				+
			
 
				+	t.error_state = err
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+@require_results
			
 
				+scan_class :: proc(t: ^Tokenizer) -> (str: string, ok: bool) {
			
 
				+	start := t.read_offset
			
 
				+
			
 
				+	for {
			
 
				+		advance_rune(t)
			
 
				+		if t.ch == -1 || t.error_state != nil {
			
 
				+			return "", false
			
 
				+		}
			
 
				+
			
 
				+		if t.ch == '\\' {
			
 
				+			advance_rune(t)
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		if t.ch == ']' {
			
 
				+			return t.src[start:t.offset], true
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	unreachable()
			
 
				+}
			
 
				+
			
 
				+@require_results
			
 
				+scan_repeat :: proc(t: ^Tokenizer) -> (str: string, ok: bool) {
			
 
				+	start := t.read_offset
			
 
				+
			
 
				+	for {
			
 
				+		advance_rune(t)
			
 
				+		if t.ch == -1 {
			
 
				+			return "", false
			
 
				+		}
			
 
				+		if t.ch == '}' {
			
 
				+			return t.src[start:t.offset], true
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	unreachable()
			
 
				+}
			
 
				+
			
 
				+@require_results
			
 
				+scan_non_greedy :: proc(t: ^Tokenizer) -> bool {
			
 
				+	if peek_byte(t) == '?' {
			
 
				+		advance_rune(t)
			
 
				+		return true
			
 
				+	}
			
 
				+
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+scan_comment :: proc(t: ^Tokenizer) {
			
 
				+	for {
			
 
				+		advance_rune(t)
			
 
				+		switch t.ch {
			
 
				+		case -1:
			
 
				+			return
			
 
				+		case '\n':
			
 
				+			// UNIX newline.
			
 
				+			advance_rune(t)
			
 
				+			return
			
 
				+		case '\r':
			
 
				+			// Mac newline.
			
 
				+			advance_rune(t)
			
 
				+			if t.ch == '\n' {
			
 
				+				// Windows newline.
			
 
				+				advance_rune(t)
			
 
				+			}
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@require_results
			
 
				+scan_non_capture_group :: proc(t: ^Tokenizer) -> bool {
			
 
				+	if peek_byte(t) == '?' && peek_byte(t, 1) == ':' {
			
 
				+		advance_rune(t)
			
 
				+		advance_rune(t)
			
 
				+		return true
			
 
				+	}
			
 
				+
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+@require_results
			
 
				+scan :: proc(t: ^Tokenizer) -> (token: Token) {
			
 
				+	kind: Token_Kind
			
 
				+	lit: string
			
 
				+	pos := t.offset
			
 
				+
			
 
				+	defer {
			
 
				+		t.last_token_kind = token.kind
			
 
				+	}
			
 
				+
			
 
				+	if t.error_state != nil {
			
 
				+		t.error_state = nil
			
 
				+		return { .Invalid, "", pos }
			
 
				+	}
			
 
				+
			
 
				+	if t.held_token != {} {
			
 
				+		popped := t.held_token
			
 
				+		t.held_token = {}
			
 
				+		
			
 
				+		return popped
			
 
				+	}
			
 
				+
			
 
				+	ch_loop: for {
			
 
				+		switch t.ch {
			
 
				+		case -1:
			
 
				+			return { .EOF, "", pos }
			
 
				+
			
 
				+		case '\\':
			
 
				+			advance_rune(t)
			
 
				+
			
 
				+			if t.ch == -1 {
			
 
				+				return { .EOF, "", pos }
			
 
				+			}
			
 
				+
			
 
				+			pos = t.offset
			
 
				+
			
 
				+			// @MetaCharacter
			
 
				+			// NOTE: These must be kept in sync with the compiler.
			
 
				+			DIGIT_CLASS :: "0-9"
			
 
				+			SPACE_CLASS :: "\t\n\f\r "
			
 
				+			WORD_CLASS  :: "0-9A-Z_a-z"
			
 
				+
			
 
				+			switch t.ch {
			
 
				+			case 'b': kind = .Word_Boundary
			
 
				+			case 'B': kind = .Non_Word_Boundary
			
 
				+
			
 
				+			case 'f': kind = .Rune; lit = "\f"
			
 
				+			case 'n': kind = .Rune; lit = "\n"
			
 
				+			case 'r': kind = .Rune; lit = "\r"
			
 
				+			case 't': kind = .Rune; lit = "\t"
			
 
				+
			
 
				+			case 'd': kind = .Rune_Class; lit = DIGIT_CLASS
			
 
				+			case 's': kind = .Rune_Class; lit = SPACE_CLASS
			
 
				+			case 'w': kind = .Rune_Class; lit = WORD_CLASS
			
 
				+			case 'D': kind = .Rune_Class; lit = "^" + DIGIT_CLASS
			
 
				+			case 'S': kind = .Rune_Class; lit = "^" + SPACE_CLASS
			
 
				+			case 'W': kind = .Rune_Class; lit = "^" + WORD_CLASS
			
 
				+			case:
			
 
				+				kind = .Rune
			
 
				+				lit = t.src[t.offset:t.read_offset]
			
 
				+			}
			
 
				+
			
 
				+		case '.':
			
 
				+			kind = .Wildcard
			
 
				+
			
 
				+		case '|': kind = .Alternate
			
 
				+
			
 
				+		case '*': kind = .Repeat_Zero_Non_Greedy if scan_non_greedy(t) else .Repeat_Zero
			
 
				+		case '+': kind = .Repeat_One_Non_Greedy  if scan_non_greedy(t) else .Repeat_One
			
 
				+		case '?': kind = .Optional_Non_Greedy    if scan_non_greedy(t) else .Optional
			
 
				+
			
 
				+		case '[':
			
 
				+			if text, ok := scan_class(t); ok {
			
 
				+				kind = .Rune_Class
			
 
				+				lit = text
			
 
				+			} else {
			
 
				+				return { .EOF, "", pos }
			
 
				+			}
			
 
				+
			
 
				+		case '{':
			
 
				+			if text, ok := scan_repeat(t); ok {
			
 
				+				kind = .Repeat_N
			
 
				+				lit = text
			
 
				+			} else {
			
 
				+				return { .EOF, "", pos }
			
 
				+			}
			
 
				+
			
 
				+		case '(':
			
 
				+			kind = .Open_Paren_Non_Capture if scan_non_capture_group(t) else .Open_Paren
			
 
				+			t.paren_depth += 1
			
 
				+		case ')':
			
 
				+			kind = .Close_Paren
			
 
				+			t.paren_depth -= 1
			
 
				+
			
 
				+		case '^': kind = .Anchor_Start
			
 
				+		case '$':
			
 
				+			kind = .Anchor_End
			
 
				+
			
 
				+		case:
			
 
				+			if .Ignore_Whitespace in t.flags {
			
 
				+				switch t.ch {
			
 
				+				case ' ', '\r', '\n', '\t', '\f':
			
 
				+					advance_rune(t)
			
 
				+					continue ch_loop
			
 
				+				case:
			
 
				+					break
			
 
				+				}
			
 
				+			}
			
 
				+			if t.ch == '#' && t.paren_depth == 0 {
			
 
				+				scan_comment(t)
			
 
				+				continue ch_loop
			
 
				+			}
			
 
				+
			
 
				+			kind = .Rune
			
 
				+			lit = t.src[t.offset:t.read_offset]
			
 
				+		}
			
 
				+
			
 
				+		break ch_loop
			
 
				+	}
			
 
				+
			
 
				+	if t.error_state != nil {
			
 
				+		t.error_state = nil
			
 
				+		return { .Invalid, "", pos }
			
 
				+	}
			
 
				+
			
 
				+	advance_rune(t)
			
 
				+
			
 
				+	// The following set of rules dictate where Concatenate tokens are
			
 
				+	// automatically inserted.
			
 
				+	#partial switch kind {
			
 
				+	case
			
 
				+	.Close_Paren,
			
 
				+	.Alternate,
			
 
				+	.Optional,    .Optional_Non_Greedy,
			
 
				+	.Repeat_Zero, .Repeat_Zero_Non_Greedy,
			
 
				+	.Repeat_One,  .Repeat_One_Non_Greedy,
			
 
				+	.Repeat_N:
			
 
				+		// Never prepend a Concatenate before these tokens.
			
 
				+		break
			
 
				+	case:
			
 
				+		#partial switch t.last_token_kind {
			
 
				+		case
			
 
				+		.Invalid,
			
 
				+		.Open_Paren, .Open_Paren_Non_Capture,
			
 
				+		.Alternate:
			
 
				+			// Never prepend a Concatenate token when the _last token_ was one
			
 
				+			// of these.
			
 
				+			break
			
 
				+		case:
			
 
				+			t.held_token = { kind, lit, pos }
			
 
				+			return { .Concatenate, "", pos }
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return { kind, lit, pos }
			
 
				+}
			
--- a/core/text/regex/virtual_machine/doc.odin
+++ b/core/text/regex/virtual_machine/doc.odin
@@ -0,0 +1,175 @@
 
				+/*
			
 
				+package regex_vm implements a threaded virtual machine for interpreting
			
 
				+regular expressions, based on the designs described by Russ Cox and attributed
			
 
				+to both Ken Thompson and Rob Pike.
			
 
				+
			
 
				+The virtual machine executes all threads in lock step, i.e. the string pointer
			
 
				+does not advance until all threads have finished processing the current rune.
			
 
				+The algorithm does not look backwards.
			
 
				+
			
 
				+Threads merge when splitting or jumping to positions already visited by another
			
 
				+thread, based on the observation that each thread having visited one PC
			
 
				+(Program Counter) state will execute identically to the previous thread.
			
 
				+
			
 
				+Each thread keeps a save state of its capture groups, and thread priority is
			
 
				+used to allow higher precedence operations to complete first with correct save
			
 
				+states, such as greedy versus non-greedy repetition.
			
 
				+
			
 
				+For more information, see: https://swtch.com/~rsc/regexp/regexp2.html
			
 
				+
			
 
				+
			
 
				+**Implementation Details:**
			
 
				+
			
 
				+- Each opcode is 8 bits in size, and most instructions have no operands.
			
 
				+
			
 
				+- All operands larger than `u8` are read in system endian order.
			
 
				+
			
 
				+- Jump and Split instructions operate on absolute positions in `u16` operands.
			
 
				+
			
 
				+- Classes such as `[0-9]` are stored in a RegEx-specific slice of structs which
			
 
				+  are then dereferenced by a `u8` index from the `Rune_Class` instructions.
			
 
				+
			
 
				+- Each Byte and Rune opcode have their operands stored inline after the opcode,
			
 
				+  sized `u8` and `i32` respectively.
			
 
				+
			
 
				+- A bitmap is used to determine which PC positions are occupied by a thread to
			
 
				+  perform merging. The bitmap is cleared with every new frame.
			
 
				+
			
 
				+- The VM supports two modes: ASCII and Unicode, decided by a compile-time
			
 
				+  boolean constant argument provided to `run`. The procedure differs only in
			
 
				+  string decoding. This was done for the sake of performance.
			
 
				+
			
 
				+- No allocations are ever freed; the VM expects an arena or temporary allocator
			
 
				+  to be used in the context preceding it.
			
 
				+
			
 
				+
			
 
				+**Opcode Reference:**
			
 
				+
			
 
				+	(0x00) Match
			
 
				+
			
 
				+	The terminal opcode which ends a thread. This always comes at the end of
			
 
				+	the program.
			
 
				+
			
 
				+	(0x01) Match_And_Exit
			
 
				+
			
 
				+	A modified version of Match which stops the virtual machine entirely. It is
			
 
				+	only compiled for `No_Capture` expressions, as those expressions do not
			
 
				+	need to determine which thread may have saved the most appropriate capture
			
 
				+	groups.
			
 
				+
			
 
				+	(0x02) Byte
			
 
				+
			
 
				+	Consumes one byte from the text using its operand, which is also a byte.
			
 
				+
			
 
				+	(0x03) Rune
			
 
				+
			
 
				+	Consumes one Unicode codepoint from the text using its operand, which is
			
 
				+	four bytes long in a system-dependent endian order.
			
 
				+
			
 
				+	(0x04) Rune_Class
			
 
				+
			
 
				+	Consumes one character (which may be an ASCII byte or Unicode codepoint,
			
 
				+	wholly dependent on which mode the virtual machine is running in) from the
			
 
				+	text.
			
 
				+
			
 
				+	The actual data storing what runes and ranges of runes apply to the class
			
 
				+	are stored alongside the program in the Regular_Expression structure and
			
 
				+	the operand for this opcode is a single byte which indexes into a
			
 
				+	collection of these data structures.
			
 
				+
			
 
				+	(0x05) Rune_Class_Negated
			
 
				+
			
 
				+	A modified version of Rune_Class that functions the same, save for how it
			
 
				+	returns the opposite of what Rune_Class matches.
			
 
				+
			
 
				+	(0x06) Wildcard
			
 
				+
			
 
				+	Consumes one byte or one Unicode codepoint, depending on the VM mode.
			
 
				+
			
 
				+	(0x07) Jump
			
 
				+
			
 
				+	Sets the Program Counter of a VM thread to the operand, which is a u16.
			
 
				+	This opcode is used to implement Alternation (coming at the end of the left
			
 
				+	choice) and Repeat_Zero (to cause the thread to loop backwards).
			
 
				+
			
 
				+	(0x08) Split
			
 
				+
			
 
				+	Spawns a new thread for the X operand and causes the current thread to jump
			
 
				+	to the Y operand. This opcode is used to implement Alternation, all the
			
 
				+	Repeat variations, and the Optional nodes.
			
 
				+
			
 
				+	Splitting threads is how the virtual machine is able to execute optional
			
 
				+	control flow paths, letting it evaluate different possible ways to match
			
 
				+	text.
			
 
				+
			
 
				+	(0x09) Save
			
 
				+
			
 
				+	Saves the current string index to a slot on the thread dictated by the
			
 
				+	operand. These values will be used later to reconstruct capture groups.
			
 
				+
			
 
				+	(0x0A) Assert_Start
			
 
				+
			
 
				+	Asserts that the thread is at the beginning of a string.
			
 
				+
			
 
				+	(0x0B) Assert_End
			
 
				+
			
 
				+	Asserts that the thread is at the end of a string.
			
 
				+
			
 
				+	(0x0C) Assert_Word_Boundary
			
 
				+
			
 
				+	Asserts that the thread is on a word boundary, which can be the start or
			
 
				+	end of the text. This examines both the current rune and the next rune.
			
 
				+
			
 
				+	(0x0D) Assert_Non_Word_Boundary
			
 
				+
			
 
				+	A modified version of Assert_Word_Boundary that returns the opposite value.
			
 
				+
			
 
				+	(0x0E) Multiline_Open
			
 
				+
			
 
				+	This opcode is compiled in only when the `Multiline` flag is present, and
			
 
				+	it replaces both `^` and `$` text anchors.
			
 
				+
			
 
				+	It asserts that either the current thread is on one of the string
			
 
				+	boundaries, or it consumes a `\n` or `\r` character.
			
 
				+
			
 
				+	If a `\r` character is consumed, the PC will be advanced to the sibling
			
 
				+	`Multiline_Close` opcode to optionally consume a `\n` character on the next
			
 
				+	frame.
			
 
				+
			
 
				+	(0x0F) Multiline_Close
			
 
				+
			
 
				+	This opcode is always present after `Multiline_Open`.
			
 
				+
			
 
				+	It handles consuming the second half of a complete newline, if necessary.
			
 
				+	For example, Windows newlines are represented by the characters `\r\n`,
			
 
				+	whereas UNIX newlines are `\n` and Macintosh newlines are `\r`.
			
 
				+
			
 
				+	(0x10) Wait_For_Byte
			
 
				+	(0x11) Wait_For_Rune
			
 
				+	(0x12) Wait_For_Rune_Class
			
 
				+	(0x13) Wait_For_Rune_Class_Negated
			
 
				+
			
 
				+	These opcodes are an optimization around restarting threads on failed
			
 
				+	matches when the beginning to a pattern is predictable and the Global flag
			
 
				+	is set.
			
 
				+
			
 
				+	They will cause the VM to wait for the next rune to match before splitting,
			
 
				+	as would happen in the un-optimized version.
			
 
				+
			
 
				+	(0x14) Match_All_And_Escape
			
 
				+
			
 
				+	This opcode is an optimized version of `.*$` or `.+$` that causes the
			
 
				+	active thread to immediately work on escaping the program by following all
			
 
				+	Jumps out to the end.
			
 
				+
			
 
				+	While running through the rest of the program, the thread will trigger on
			
 
				+	every Save instruction it passes to store the length of the string.
			
 
				+
			
 
				+	This way, any time a program hits one of these `.*$` constructs, the
			
 
				+	virtual machine can exit early, vastly improving processing times.
			
 
				+
			
 
				+	Be aware, this opcode is not compiled in if the `Multiline` flag is on, as
			
 
				+	the meaning of `$` changes with that flag.
			
 
				+
			
 
				+*/
			
 
				+package regex_vm
			
--- a/core/text/regex/virtual_machine/util.odin
+++ b/core/text/regex/virtual_machine/util.odin
@@ -0,0 +1,73 @@
 
				+package regex_vm
			
 
				+
			
 
				+Opcode_Iterator :: struct {
			
 
				+	code: Program,
			
 
				+	pc: int,
			
 
				+}
			
 
				+
			
 
				+iterate_opcodes :: proc(iter: ^Opcode_Iterator) -> (opcode: Opcode, pc: int, ok: bool) {
			
 
				+	if iter.pc >= len(iter.code) {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	opcode = iter.code[iter.pc]
			
 
				+	pc = iter.pc
			
 
				+	ok = true
			
 
				+
			
 
				+	switch opcode {
			
 
				+	case .Match:                       iter.pc += size_of(Opcode)
			
 
				+	case .Match_And_Exit:              iter.pc += size_of(Opcode)
			
 
				+	case .Byte:                        iter.pc += size_of(Opcode) + size_of(u8)
			
 
				+	case .Rune:                        iter.pc += size_of(Opcode) + size_of(rune)
			
 
				+	case .Rune_Class:                  iter.pc += size_of(Opcode) + size_of(u8)
			
 
				+	case .Rune_Class_Negated:          iter.pc += size_of(Opcode) + size_of(u8)
			
 
				+	case .Wildcard:                    iter.pc += size_of(Opcode)
			
 
				+	case .Jump:                        iter.pc += size_of(Opcode) + size_of(u16)
			
 
				+	case .Split:                       iter.pc += size_of(Opcode) + 2 * size_of(u16)
			
 
				+	case .Save:                        iter.pc += size_of(Opcode) + size_of(u8)
			
 
				+	case .Assert_Start:                iter.pc += size_of(Opcode)
			
 
				+	case .Assert_End:                  iter.pc += size_of(Opcode)
			
 
				+	case .Assert_Word_Boundary:        iter.pc += size_of(Opcode)
			
 
				+	case .Assert_Non_Word_Boundary:    iter.pc += size_of(Opcode)
			
 
				+	case .Multiline_Open:              iter.pc += size_of(Opcode)
			
 
				+	case .Multiline_Close:             iter.pc += size_of(Opcode)
			
 
				+	case .Wait_For_Byte:               iter.pc += size_of(Opcode) + size_of(u8)
			
 
				+	case .Wait_For_Rune:               iter.pc += size_of(Opcode) + size_of(rune)
			
 
				+	case .Wait_For_Rune_Class:         iter.pc += size_of(Opcode) + size_of(u8)
			
 
				+	case .Wait_For_Rune_Class_Negated: iter.pc += size_of(Opcode) + size_of(u8)
			
 
				+	case .Match_All_And_Escape:        iter.pc += size_of(Opcode)
			
 
				+	case:
			
 
				+		panic("Invalid opcode found in RegEx program.")
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+opcode_to_name :: proc(opcode: Opcode) -> (str: string) {
			
 
				+	switch opcode {
			
 
				+	case .Match:                       str = "Match"
			
 
				+	case .Match_And_Exit:              str = "Match_And_Exit"
			
 
				+	case .Byte:                        str = "Byte"
			
 
				+	case .Rune:                        str = "Rune"
			
 
				+	case .Rune_Class:                  str = "Rune_Class"
			
 
				+	case .Rune_Class_Negated:          str = "Rune_Class_Negated"
			
 
				+	case .Wildcard:                    str = "Wildcard"
			
 
				+	case .Jump:                        str = "Jump"
			
 
				+	case .Split:                       str = "Split"
			
 
				+	case .Save:                        str = "Save"
			
 
				+	case .Assert_Start:                str = "Assert_Start"
			
 
				+	case .Assert_End:                  str = "Assert_End"
			
 
				+	case .Assert_Word_Boundary:        str = "Assert_Word_Boundary"
			
 
				+	case .Assert_Non_Word_Boundary:    str = "Assert_Non_Word_Boundary"
			
 
				+	case .Multiline_Open:              str = "Multiline_Open"
			
 
				+	case .Multiline_Close:             str = "Multiline_Close"
			
 
				+	case .Wait_For_Byte:               str = "Wait_For_Byte"
			
 
				+	case .Wait_For_Rune:               str = "Wait_For_Rune"
			
 
				+	case .Wait_For_Rune_Class:         str = "Wait_For_Rune_Class"
			
 
				+	case .Wait_For_Rune_Class_Negated: str = "Wait_For_Rune_Class_Negated"
			
 
				+	case .Match_All_And_Escape:        str = "Match_All_And_Escape"
			
 
				+	case:                              str = "<UNKNOWN>"
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
--- a/core/text/regex/virtual_machine/virtual_machine.odin
+++ b/core/text/regex/virtual_machine/virtual_machine.odin
@@ -0,0 +1,638 @@
 
				+package regex_vm
			
 
				+
			
 
				+@require import "core:io"
			
 
				+import "core:text/regex/common"
			
 
				+import "core:text/regex/parser"
			
 
				+import "core:unicode/utf8"
			
 
				+
			
 
				+Rune_Class_Range  :: parser.Rune_Class_Range
			
 
				+
			
 
				+// NOTE: This structure differs intentionally from the one in `regex/parser`,
			
 
				+// as this data doesn't need to be a dynamic array once it hits the VM.
			
 
				+Rune_Class_Data :: struct {
			
 
				+	runes: []rune,
			
 
				+	ranges: []Rune_Class_Range,
			
 
				+}
			
 
				+
			
 
				+Opcode :: enum u8 {
			
 
				+	                                    // | [ operands ]
			
 
				+	Match                       = 0x00, // |
			
 
				+	Match_And_Exit              = 0x01, // |
			
 
				+	Byte                        = 0x02, // | u8
			
 
				+	Rune                        = 0x03, // | i32
			
 
				+	Rune_Class                  = 0x04, // | u8
			
 
				+	Rune_Class_Negated          = 0x05, // | u8
			
 
				+	Wildcard                    = 0x06, // |
			
 
				+	Jump                        = 0x07, // | u16
			
 
				+	Split                       = 0x08, // | u16, u16
			
 
				+	Save                        = 0x09, // | u8
			
 
				+	Assert_Start                = 0x0A, // |
			
 
				+	Assert_End                  = 0x0B, // |
			
 
				+	Assert_Word_Boundary        = 0x0C, // |
			
 
				+	Assert_Non_Word_Boundary    = 0x0D, // |
			
 
				+	Multiline_Open              = 0x0E, // |
			
 
				+	Multiline_Close             = 0x0F, // |
			
 
				+	Wait_For_Byte               = 0x10, // | u8
			
 
				+	Wait_For_Rune               = 0x11, // | i32
			
 
				+	Wait_For_Rune_Class         = 0x12, // | u8
			
 
				+	Wait_For_Rune_Class_Negated = 0x13, // | u8
			
 
				+	Match_All_And_Escape        = 0x14, // |
			
 
				+}
			
 
				+
			
 
				+Thread :: struct {
			
 
				+	pc: int,
			
 
				+	saved: ^[2 * common.MAX_CAPTURE_GROUPS]int,
			
 
				+}
			
 
				+
			
 
				+Program :: []Opcode
			
 
				+
			
 
				+Machine :: struct {
			
 
				+	// Program state
			
 
				+	memory: string,
			
 
				+	class_data: []Rune_Class_Data,
			
 
				+	code: Program,
			
 
				+
			
 
				+	// Thread state
			
 
				+	top_thread: int,
			
 
				+	threads: [^]Thread,
			
 
				+	next_threads: [^]Thread,
			
 
				+
			
 
				+	// The busy map is used to merge threads based on their program counters.
			
 
				+	busy_map: []u64,
			
 
				+
			
 
				+	// Global state
			
 
				+	string_pointer: int,
			
 
				+
			
 
				+	current_rune: rune,
			
 
				+	current_rune_size: int,
			
 
				+	next_rune: rune,
			
 
				+	next_rune_size: int,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+// @MetaCharacter
			
 
				+// NOTE: This must be kept in sync with the compiler & tokenizer.
			
 
				+is_word_class :: #force_inline proc "contextless" (r: rune) -> bool {
			
 
				+	switch r {
			
 
				+	case '0'..='9', 'A'..='Z', '_', 'a'..='z':
			
 
				+		return true
			
 
				+	case:
			
 
				+		return false
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+set_busy_map :: #force_inline proc "contextless" (vm: ^Machine, pc: int) -> bool #no_bounds_check {
			
 
				+	slot := cast(u64)pc >> 6
			
 
				+	bit: u64 = 1 << (cast(u64)pc & 0x3F)
			
 
				+	if vm.busy_map[slot] & bit > 0 {
			
 
				+		return false
			
 
				+	}
			
 
				+	vm.busy_map[slot] |= bit
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+check_busy_map :: #force_inline proc "contextless" (vm: ^Machine, pc: int) -> bool #no_bounds_check {
			
 
				+	slot := cast(u64)pc >> 6
			
 
				+	bit: u64 = 1 << (cast(u64)pc & 0x3F)
			
 
				+	return vm.busy_map[slot] & bit > 0
			
 
				+}
			
 
				+
			
 
				+add_thread :: proc(vm: ^Machine, saved: ^[2 * common.MAX_CAPTURE_GROUPS]int, pc: int) #no_bounds_check {
			
 
				+	if check_busy_map(vm, pc) {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	saved := saved
			
 
				+	pc := pc
			
 
				+
			
 
				+	resolution_loop: for {
			
 
				+		if !set_busy_map(vm, pc) {
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		when common.ODIN_DEBUG_REGEX {
			
 
				+			io.write_string(common.debug_stream, "Thread [PC:")
			
 
				+			common.write_padded_hex(common.debug_stream, pc, 4)
			
 
				+			io.write_string(common.debug_stream, "] thinking about ")
			
 
				+			io.write_string(common.debug_stream, opcode_to_name(vm.code[pc]))
			
 
				+			io.write_rune(common.debug_stream, '\n')
			
 
				+		}
			
 
				+
			
 
				+		#partial switch vm.code[pc] {
			
 
				+		case .Jump:
			
 
				+			pc = cast(int)(cast(^u16)&vm.code[pc + size_of(Opcode)])^
			
 
				+			continue
			
 
				+
			
 
				+		case .Split:
			
 
				+			jmp_x := cast(int)(cast(^u16)&vm.code[pc + size_of(Opcode)])^
			
 
				+			jmp_y := cast(int)(cast(^u16)&vm.code[pc + size_of(Opcode) + size_of(u16)])^
			
 
				+
			
 
				+			add_thread(vm, saved, jmp_x)
			
 
				+			pc = jmp_y
			
 
				+			continue
			
 
				+
			
 
				+		case .Save:
			
 
				+			new_saved := new([2 * common.MAX_CAPTURE_GROUPS]int)
			
 
				+			new_saved ^= saved^
			
 
				+			saved = new_saved
			
 
				+
			
 
				+			index := vm.code[pc + size_of(Opcode)]
			
 
				+			sp := vm.string_pointer+vm.current_rune_size
			
 
				+			saved[index] = sp
			
 
				+
			
 
				+			when common.ODIN_DEBUG_REGEX {
			
 
				+				io.write_string(common.debug_stream, "Thread [PC:")
			
 
				+				common.write_padded_hex(common.debug_stream, pc, 4)
			
 
				+				io.write_string(common.debug_stream, "] saving state: (slot ")
			
 
				+				io.write_int(common.debug_stream, cast(int)index)
			
 
				+				io.write_string(common.debug_stream, " = ")
			
 
				+				io.write_int(common.debug_stream, sp)
			
 
				+				io.write_string(common.debug_stream, ")\n")
			
 
				+			}
			
 
				+
			
 
				+			pc += size_of(Opcode) + size_of(u8)
			
 
				+			continue
			
 
				+
			
 
				+		case .Assert_Start:
			
 
				+			sp := vm.string_pointer+vm.current_rune_size
			
 
				+			if sp == 0 {
			
 
				+				pc += size_of(Opcode)
			
 
				+				continue
			
 
				+			}
			
 
				+		case .Assert_End:
			
 
				+			sp := vm.string_pointer+vm.current_rune_size
			
 
				+			if sp == len(vm.memory) {
			
 
				+				pc += size_of(Opcode)
			
 
				+				continue
			
 
				+			}
			
 
				+		case .Multiline_Open:
			
 
				+			sp := vm.string_pointer+vm.current_rune_size
			
 
				+			if sp == 0 || sp == len(vm.memory) {
			
 
				+				if vm.next_rune == '\r' || vm.next_rune == '\n' {
			
 
				+					// The VM is currently on a newline at the string boundary,
			
 
				+					// so consume the newline next frame.
			
 
				+					when common.ODIN_DEBUG_REGEX {
			
 
				+						io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+						common.write_padded_hex(common.debug_stream, pc, 4)
			
 
				+						io.write_string(common.debug_stream, "]\n")
			
 
				+					}
			
 
				+					vm.next_threads[vm.top_thread] = Thread{ pc = pc, saved = saved }
			
 
				+					vm.top_thread += 1
			
 
				+				} else {
			
 
				+					// Skip the `Multiline_Close` opcode.
			
 
				+					pc += 2 * size_of(Opcode)
			
 
				+					continue
			
 
				+				}
			
 
				+			} else {
			
 
				+				// Not on a string boundary.
			
 
				+				// Try to consume a newline next frame in the other opcode loop.
			
 
				+				when common.ODIN_DEBUG_REGEX {
			
 
				+					io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+					common.write_padded_hex(common.debug_stream, pc, 4)
			
 
				+					io.write_string(common.debug_stream, "]\n")
			
 
				+				}
			
 
				+				vm.next_threads[vm.top_thread] = Thread{ pc = pc, saved = saved }
			
 
				+				vm.top_thread += 1
			
 
				+			}
			
 
				+		case .Assert_Word_Boundary:
			
 
				+			sp := vm.string_pointer+vm.current_rune_size
			
 
				+			if sp == 0 || sp == len(vm.memory) {
			
 
				+				pc += size_of(Opcode)
			
 
				+				continue
			
 
				+			} else {
			
 
				+				last_rune_is_wc := is_word_class(vm.current_rune)
			
 
				+				this_rune_is_wc := is_word_class(vm.next_rune)
			
 
				+
			
 
				+				if last_rune_is_wc && !this_rune_is_wc || !last_rune_is_wc && this_rune_is_wc {
			
 
				+					pc += size_of(Opcode)
			
 
				+					continue
			
 
				+				}
			
 
				+			}
			
 
				+		case .Assert_Non_Word_Boundary:
			
 
				+			sp := vm.string_pointer+vm.current_rune_size
			
 
				+			if sp != 0 && sp != len(vm.memory) {
			
 
				+				last_rune_is_wc := is_word_class(vm.current_rune)
			
 
				+				this_rune_is_wc := is_word_class(vm.next_rune)
			
 
				+
			
 
				+				if last_rune_is_wc && this_rune_is_wc || !last_rune_is_wc && !this_rune_is_wc {
			
 
				+					pc += size_of(Opcode)
			
 
				+					continue
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+		case .Wait_For_Byte:
			
 
				+			operand := cast(rune)vm.code[pc + size_of(Opcode)]
			
 
				+			if vm.next_rune == operand {
			
 
				+				add_thread(vm, saved, pc + size_of(Opcode) + size_of(u8))
			
 
				+			}
			
 
				+
			
 
				+			when common.ODIN_DEBUG_REGEX {
			
 
				+				io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+				common.write_padded_hex(common.debug_stream, pc, 4)
			
 
				+				io.write_string(common.debug_stream, "]\n")
			
 
				+			}
			
 
				+			vm.next_threads[vm.top_thread] = Thread{ pc = pc, saved = saved }
			
 
				+			vm.top_thread += 1
			
 
				+
			
 
				+		case .Wait_For_Rune:
			
 
				+			operand := (cast(^rune)&vm.code[pc + size_of(Opcode)])^
			
 
				+			if vm.next_rune == operand {
			
 
				+				add_thread(vm, saved, pc + size_of(Opcode) + size_of(rune))
			
 
				+			}
			
 
				+
			
 
				+			when common.ODIN_DEBUG_REGEX {
			
 
				+				io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+				common.write_padded_hex(common.debug_stream, pc, 4)
			
 
				+				io.write_string(common.debug_stream, "]\n")
			
 
				+			}
			
 
				+			vm.next_threads[vm.top_thread] = Thread{ pc = pc, saved = saved }
			
 
				+			vm.top_thread += 1
			
 
				+
			
 
				+		case .Wait_For_Rune_Class:
			
 
				+			operand := cast(u8)vm.code[pc + size_of(Opcode)]
			
 
				+			class_data := vm.class_data[operand]
			
 
				+			next_rune := vm.next_rune
			
 
				+
			
 
				+			check: {
			
 
				+				for r in class_data.runes {
			
 
				+					if next_rune == r {
			
 
				+						add_thread(vm, saved, pc + size_of(Opcode) + size_of(u8))
			
 
				+						break check
			
 
				+					}
			
 
				+				}
			
 
				+				for range in class_data.ranges {
			
 
				+					if range.lower <= next_rune && next_rune <= range.upper {
			
 
				+						add_thread(vm, saved, pc + size_of(Opcode) + size_of(u8))
			
 
				+						break check
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+			when common.ODIN_DEBUG_REGEX {
			
 
				+				io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+				common.write_padded_hex(common.debug_stream, pc, 4)
			
 
				+				io.write_string(common.debug_stream, "]\n")
			
 
				+			}
			
 
				+			vm.next_threads[vm.top_thread] = Thread{ pc = pc, saved = saved }
			
 
				+			vm.top_thread += 1
			
 
				+
			
 
				+		case .Wait_For_Rune_Class_Negated:
			
 
				+			operand := cast(u8)vm.code[pc + size_of(Opcode)]
			
 
				+			class_data := vm.class_data[operand]
			
 
				+			next_rune := vm.next_rune
			
 
				+
			
 
				+			check_negated: {
			
 
				+				for r in class_data.runes {
			
 
				+					if next_rune == r {
			
 
				+						break check_negated
			
 
				+					}
			
 
				+				}
			
 
				+				for range in class_data.ranges {
			
 
				+					if range.lower <= next_rune && next_rune <= range.upper {
			
 
				+						break check_negated
			
 
				+					}
			
 
				+				}
			
 
				+				add_thread(vm, saved, pc + size_of(Opcode) + size_of(u8))
			
 
				+			}
			
 
				+			when common.ODIN_DEBUG_REGEX {
			
 
				+				io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+				common.write_padded_hex(common.debug_stream, pc, 4)
			
 
				+				io.write_string(common.debug_stream, "]\n")
			
 
				+			}
			
 
				+			vm.next_threads[vm.top_thread] = Thread{ pc = pc, saved = saved }
			
 
				+			vm.top_thread += 1
			
 
				+
			
 
				+		case:
			
 
				+			when common.ODIN_DEBUG_REGEX {
			
 
				+				io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+				common.write_padded_hex(common.debug_stream, pc, 4)
			
 
				+				io.write_string(common.debug_stream, "]\n")
			
 
				+			}
			
 
				+			vm.next_threads[vm.top_thread] = Thread{ pc = pc, saved = saved }
			
 
				+			vm.top_thread += 1
			
 
				+		}
			
 
				+
			
 
				+		break resolution_loop
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+run :: proc(vm: ^Machine, $UNICODE_MODE: bool) -> (saved: ^[2 * common.MAX_CAPTURE_GROUPS]int, ok: bool) #no_bounds_check {
			
 
				+	when UNICODE_MODE {
			
 
				+		vm.next_rune, vm.next_rune_size = utf8.decode_rune_in_string(vm.memory)
			
 
				+	} else {
			
 
				+		if len(vm.memory) > 0 {
			
 
				+			vm.next_rune = cast(rune)vm.memory[0]
			
 
				+			vm.next_rune_size = 1
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	when common.ODIN_DEBUG_REGEX {
			
 
				+		io.write_string(common.debug_stream, "### Adding initial thread.\n")
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		starter_saved := new([2 * common.MAX_CAPTURE_GROUPS]int)
			
 
				+		starter_saved ^= -1
			
 
				+
			
 
				+		add_thread(vm, starter_saved, 0)
			
 
				+	}
			
 
				+
			
 
				+	// `add_thread` adds to `next_threads` by default, but we need to put this
			
 
				+	// thread in the current thread buffer.
			
 
				+	vm.threads, vm.next_threads = vm.next_threads, vm.threads
			
 
				+
			
 
				+	when common.ODIN_DEBUG_REGEX {
			
 
				+		io.write_string(common.debug_stream, "### VM starting.\n")
			
 
				+		defer io.write_string(common.debug_stream, "### VM finished.\n")
			
 
				+	}
			
 
				+
			
 
				+	for {
			
 
				+		for i := 0; i < len(vm.busy_map); i += 1 {
			
 
				+			vm.busy_map[i] = 0
			
 
				+		}
			
 
				+
			
 
				+		assert(vm.string_pointer <= len(vm.memory), "VM string pointer went out of bounds.")
			
 
				+
			
 
				+		current_rune := vm.next_rune
			
 
				+		vm.current_rune = current_rune
			
 
				+		vm.current_rune_size = vm.next_rune_size
			
 
				+		when UNICODE_MODE {
			
 
				+			vm.next_rune, vm.next_rune_size = utf8.decode_rune_in_string(vm.memory[vm.string_pointer+vm.current_rune_size:])
			
 
				+		} else {
			
 
				+			if vm.string_pointer+size_of(u8) < len(vm.memory) {
			
 
				+				vm.next_rune = cast(rune)vm.memory[vm.string_pointer+size_of(u8)]
			
 
				+				vm.next_rune_size = size_of(u8)
			
 
				+			} else {
			
 
				+				vm.next_rune = 0
			
 
				+				vm.next_rune_size = 0
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		when common.ODIN_DEBUG_REGEX {
			
 
				+			io.write_string(common.debug_stream, ">>> Dispatching rune: ")
			
 
				+			io.write_encoded_rune(common.debug_stream, current_rune)
			
 
				+			io.write_byte(common.debug_stream, '\n')
			
 
				+		}
			
 
				+
			
 
				+		thread_count := vm.top_thread
			
 
				+		vm.top_thread = 0
			
 
				+		thread_loop: for i := 0; i < thread_count; i += 1 {
			
 
				+			t := vm.threads[i]
			
 
				+
			
 
				+			when common.ODIN_DEBUG_REGEX {
			
 
				+				io.write_string(common.debug_stream, "Thread [PC:")
			
 
				+				common.write_padded_hex(common.debug_stream, t.pc, 4)
			
 
				+				io.write_string(common.debug_stream, "] stepping on ")
			
 
				+				io.write_string(common.debug_stream, opcode_to_name(vm.code[t.pc]))
			
 
				+				io.write_byte(common.debug_stream, '\n')
			
 
				+			}
			
 
				+
			
 
				+			#partial opcode: switch vm.code[t.pc] {
			
 
				+			case .Match:
			
 
				+				when common.ODIN_DEBUG_REGEX {
			
 
				+					io.write_string(common.debug_stream, "Thread matched!\n")
			
 
				+				}
			
 
				+				saved = t.saved
			
 
				+				ok = true
			
 
				+				break thread_loop
			
 
				+
			
 
				+			case .Match_And_Exit:
			
 
				+				when common.ODIN_DEBUG_REGEX {
			
 
				+					io.write_string(common.debug_stream, "Thread matched! (Exiting)\n")
			
 
				+				}
			
 
				+				return nil, true
			
 
				+
			
 
				+			case .Byte:
			
 
				+				operand := cast(rune)vm.code[t.pc + size_of(Opcode)]
			
 
				+				if current_rune == operand {
			
 
				+					add_thread(vm, t.saved, t.pc + size_of(Opcode) + size_of(u8))
			
 
				+				}
			
 
				+
			
 
				+			case .Rune:
			
 
				+				operand := (cast(^rune)&vm.code[t.pc + size_of(Opcode)])^
			
 
				+				if current_rune == operand {
			
 
				+					add_thread(vm, t.saved, t.pc + size_of(Opcode) + size_of(rune))
			
 
				+				}
			
 
				+
			
 
				+			case .Rune_Class:
			
 
				+				operand := cast(u8)vm.code[t.pc + size_of(Opcode)]
			
 
				+				class_data := vm.class_data[operand]
			
 
				+
			
 
				+				for r in class_data.runes {
			
 
				+					if current_rune == r {
			
 
				+						add_thread(vm, t.saved, t.pc + size_of(Opcode) + size_of(u8))
			
 
				+						break opcode
			
 
				+					}
			
 
				+				}
			
 
				+				for range in class_data.ranges {
			
 
				+					if range.lower <= current_rune && current_rune <= range.upper {
			
 
				+						add_thread(vm, t.saved, t.pc + size_of(Opcode) + size_of(u8))
			
 
				+						break opcode
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+			case .Rune_Class_Negated:
			
 
				+				operand := cast(u8)vm.code[t.pc + size_of(Opcode)]
			
 
				+				class_data := vm.class_data[operand]
			
 
				+				for r in class_data.runes {
			
 
				+					if current_rune == r {
			
 
				+						break opcode
			
 
				+					}
			
 
				+				}
			
 
				+				for range in class_data.ranges {
			
 
				+					if range.lower <= current_rune && current_rune <= range.upper {
			
 
				+						break opcode
			
 
				+					}
			
 
				+				}
			
 
				+				add_thread(vm, t.saved, t.pc + size_of(Opcode) + size_of(u8))
			
 
				+
			
 
				+			case .Wildcard:
			
 
				+				add_thread(vm, t.saved, t.pc + size_of(Opcode))
			
 
				+
			
 
				+			case .Multiline_Open:
			
 
				+				if current_rune == '\n' {
			
 
				+					// UNIX newline.
			
 
				+					add_thread(vm, t.saved, t.pc + 2 * size_of(Opcode))
			
 
				+				} else if current_rune == '\r' {
			
 
				+					if vm.next_rune == '\n' {
			
 
				+						// Windows newline. (1/2)
			
 
				+						add_thread(vm, t.saved, t.pc + size_of(Opcode))
			
 
				+					} else {
			
 
				+						// Mac newline.
			
 
				+						add_thread(vm, t.saved, t.pc + 2 * size_of(Opcode))
			
 
				+					}
			
 
				+				}
			
 
				+			case .Multiline_Close:
			
 
				+				if current_rune == '\n' {
			
 
				+					// Windows newline. (2/2)
			
 
				+					add_thread(vm, t.saved, t.pc + size_of(Opcode))
			
 
				+				}
			
 
				+
			
 
				+			case .Wait_For_Byte:
			
 
				+				operand := cast(rune)vm.code[t.pc + size_of(Opcode)]
			
 
				+				if vm.next_rune == operand {
			
 
				+					add_thread(vm, t.saved, t.pc + size_of(Opcode) + size_of(u8))
			
 
				+				}
			
 
				+				when common.ODIN_DEBUG_REGEX {
			
 
				+					io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+					common.write_padded_hex(common.debug_stream, t.pc, 4)
			
 
				+					io.write_string(common.debug_stream, "]\n")
			
 
				+				}
			
 
				+				vm.next_threads[vm.top_thread] = Thread{ pc = t.pc, saved = t.saved }
			
 
				+				vm.top_thread += 1
			
 
				+
			
 
				+			case .Wait_For_Rune:
			
 
				+				operand := (cast(^rune)&vm.code[t.pc + size_of(Opcode)])^
			
 
				+				if vm.next_rune == operand {
			
 
				+					add_thread(vm, t.saved, t.pc + size_of(Opcode) + size_of(rune))
			
 
				+				}
			
 
				+				when common.ODIN_DEBUG_REGEX {
			
 
				+					io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+					common.write_padded_hex(common.debug_stream, t.pc, 4)
			
 
				+					io.write_string(common.debug_stream, "]\n")
			
 
				+				}
			
 
				+				vm.next_threads[vm.top_thread] = Thread{ pc = t.pc, saved = t.saved }
			
 
				+				vm.top_thread += 1
			
 
				+
			
 
				+			case .Wait_For_Rune_Class:
			
 
				+				operand := cast(u8)vm.code[t.pc + size_of(Opcode)]
			
 
				+				class_data := vm.class_data[operand]
			
 
				+				next_rune := vm.next_rune
			
 
				+
			
 
				+				check: {
			
 
				+					for r in class_data.runes {
			
 
				+						if next_rune == r {
			
 
				+							add_thread(vm, t.saved, t.pc + size_of(Opcode) + size_of(u8))
			
 
				+							break check
			
 
				+						}
			
 
				+					}
			
 
				+					for range in class_data.ranges {
			
 
				+						if range.lower <= next_rune && next_rune <= range.upper {
			
 
				+							add_thread(vm, t.saved, t.pc + size_of(Opcode) + size_of(u8))
			
 
				+							break check
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+				when common.ODIN_DEBUG_REGEX {
			
 
				+					io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+					common.write_padded_hex(common.debug_stream, t.pc, 4)
			
 
				+					io.write_string(common.debug_stream, "]\n")
			
 
				+				}
			
 
				+				vm.next_threads[vm.top_thread] = Thread{ pc = t.pc, saved = t.saved }
			
 
				+				vm.top_thread += 1
			
 
				+
			
 
				+			case .Wait_For_Rune_Class_Negated:
			
 
				+				operand := cast(u8)vm.code[t.pc + size_of(Opcode)]
			
 
				+				class_data := vm.class_data[operand]
			
 
				+				next_rune := vm.next_rune
			
 
				+
			
 
				+				check_negated: {
			
 
				+					for r in class_data.runes {
			
 
				+						if next_rune == r {
			
 
				+							break check_negated
			
 
				+						}
			
 
				+					}
			
 
				+					for range in class_data.ranges {
			
 
				+						if range.lower <= next_rune && next_rune <= range.upper {
			
 
				+							break check_negated
			
 
				+						}
			
 
				+					}
			
 
				+					add_thread(vm, t.saved, t.pc + size_of(Opcode) + size_of(u8))
			
 
				+				}
			
 
				+				when common.ODIN_DEBUG_REGEX {
			
 
				+					io.write_string(common.debug_stream, "*** New thread added [PC:")
			
 
				+					common.write_padded_hex(common.debug_stream, t.pc, 4)
			
 
				+					io.write_string(common.debug_stream, "]\n")
			
 
				+				}
			
 
				+				vm.next_threads[vm.top_thread] = Thread{ pc = t.pc, saved = t.saved }
			
 
				+				vm.top_thread += 1
			
 
				+
			
 
				+			case .Match_All_And_Escape:
			
 
				+				t.pc += size_of(Opcode)
			
 
				+				// The point of this loop is to walk out of wherever this
			
 
				+				// opcode lives to the end of the program, while saving the
			
 
				+				// index to the length of the string at each pass on the way.
			
 
				+				escape_loop: for {
			
 
				+					#partial switch vm.code[t.pc] {
			
 
				+					case .Match, .Match_And_Exit:
			
 
				+						break escape_loop
			
 
				+
			
 
				+					case .Jump:
			
 
				+						t.pc = cast(int)(cast(^u16)&vm.code[t.pc + size_of(Opcode)])^
			
 
				+
			
 
				+					case .Save:
			
 
				+						index := vm.code[t.pc + size_of(Opcode)]
			
 
				+						t.saved[index] = len(vm.memory)
			
 
				+						t.pc += size_of(Opcode) + size_of(u8)
			
 
				+
			
 
				+					case .Match_All_And_Escape:
			
 
				+						// Layering these is fine.
			
 
				+						t.pc += size_of(Opcode)
			
 
				+
			
 
				+					// If the loop has to process any opcode not listed above,
			
 
				+					// it means someone did something odd like `a(.*$)b`, in
			
 
				+					// which case, just fail. Technically, the expression makes
			
 
				+					// no sense.
			
 
				+					case:
			
 
				+						break opcode
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				saved = t.saved
			
 
				+				ok = true
			
 
				+				return
			
 
				+
			
 
				+			case:
			
 
				+				when common.ODIN_DEBUG_REGEX {
			
 
				+					io.write_string(common.debug_stream, "Opcode: ")
			
 
				+					io.write_int(common.debug_stream, cast(int)vm.code[t.pc])
			
 
				+					io.write_string(common.debug_stream, "\n")
			
 
				+				}
			
 
				+				panic("Invalid opcode in RegEx thread loop.")
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		vm.threads, vm.next_threads = vm.next_threads, vm.threads
			
 
				+
			
 
				+		when common.ODIN_DEBUG_REGEX {
			
 
				+			io.write_string(common.debug_stream, "<<< Frame ended. (Threads: ")
			
 
				+			io.write_int(common.debug_stream, vm.top_thread)
			
 
				+			io.write_string(common.debug_stream, ")\n")
			
 
				+		}
			
 
				+
			
 
				+		if vm.string_pointer == len(vm.memory) || vm.top_thread == 0 {
			
 
				+			break
			
 
				+		}
			
 
				+
			
 
				+		vm.string_pointer += vm.current_rune_size
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+opcode_count :: proc(code: Program) -> (opcodes: int) {
			
 
				+	iter := Opcode_Iterator{ code, 0 }
			
 
				+	for _ in iterate_opcodes(&iter) {
			
 
				+		opcodes += 1
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+create :: proc(code: Program, str: string) -> (vm: Machine) {
			
 
				+	assert(len(code) > 0, "RegEx VM has no instructions.")
			
 
				+
			
 
				+	vm.memory = str
			
 
				+	vm.code = code
			
 
				+
			
 
				+	sizing := len(code) >> 6 + (1 if len(code) & 0x3F > 0 else 0)
			
 
				+	assert(sizing > 0)
			
 
				+	vm.busy_map = make([]u64, sizing)
			
 
				+
			
 
				+	max_possible_threads := max(1, opcode_count(vm.code) - 1)
			
 
				+
			
 
				+	vm.threads = make([^]Thread, max_possible_threads)
			
 
				+	vm.next_threads = make([^]Thread, max_possible_threads)
			
 
				+
			
 
				+	return
			
 
				+}