Browse Source

Add benchmarks for `core:text/regex`

Feoramund 1 year ago
parent
commit
be38ba6c5e
2 changed files with 259 additions and 0 deletions
  1. 1 0
      tests/benchmark/all.odin
  2. 258 0
      tests/benchmark/text/regex/benchmark_regex.odin

+ 1 - 0
tests/benchmark/all.odin

@@ -2,3 +2,4 @@ package benchmarks
 
 @(require) import "crypto"
 @(require) import "hash"
+@(require) import "text/regex"

+ 258 - 0
tests/benchmark/text/regex/benchmark_regex.odin

@@ -0,0 +1,258 @@
+package benchmark_core_text_regex
+
+import "core:fmt"
+import "core:log"
+import "core:math/rand"
+import "core:mem"
+import "core:testing"
+import "core:text/regex"
+import "core:time"
+import "core:unicode/utf8"
+
+randomize_ascii :: proc(data: []u8) {
+	for i in 0..<len(data) {
+		data[i] = ' ' + cast(u8)rand.int_max(0x7F - ' ')
+	}
+}
+
+randomize_unicode :: proc(data: []u8) {
+	for i := 0; i < len(data); /**/ {
+		check_rune_loop: for {
+			r := cast(rune)rand.int_max(utf8.MAX_RUNE)
+			if !utf8.valid_rune(r) {
+				continue
+			}
+			if utf8.rune_size(r) > len(data) - i {
+				continue
+			}
+
+			r_data, size := utf8.encode_rune(r)
+			for j in 0..<size {
+				data[i+j] = r_data[j]
+			}
+
+			i += size
+			break check_rune_loop
+		}
+	}
+}
+
+sizes := [?]int {
+	2 * mem.Kilobyte,
+	32 * mem.Kilobyte,
+	64 * mem.Kilobyte,
+	256 * mem.Kilobyte,
+	0.50 * mem.Megabyte,
+	1.00 * mem.Megabyte,
+	2.00 * mem.Megabyte,
+}
+
+@test
+expensive_for_backtrackers :: proc(t: ^testing.T) {
+	counts := [?]int {
+		8,
+		16,
+		32,
+		64,
+	}
+
+	report: string
+
+	for count in counts {
+		data := make([]u8, count)
+		pattern := make([]u8, 2 * count + count)
+		defer {
+			delete(data)
+			delete(pattern)
+		}
+		for i in 0..<2 * count {
+			pattern[i] = 'a' if i & 1 == 0 else '?'
+		}
+		for i in 2 * count..<2 * count + count {
+			pattern[i] = 'a'
+		}
+		for i in 0..<count {
+			data[i] = 'a'
+		}
+
+		rex, err := regex.create(cast(string)pattern)
+		if !testing.expect_value(t, err, nil) {
+			return
+		}
+		defer regex.destroy(rex)
+
+		str := cast(string)data
+
+		log.debug(rex, str)
+
+		start := time.now()
+		capture, ok := regex.match(rex, str)
+		done := time.since(start)
+		defer regex.destroy(capture)
+
+		if !testing.expect_value(t, ok, true) {
+			continue
+		}
+		testing.expect_value(t, capture.pos[0], [2]int{0, count})
+
+		rate := cast(int)(cast(f64)(count / 2) / (cast(f64)done / 1e9))
+		report = fmt.tprintf("%s\n        +++ [%i : %v : %M/s] Matched `a?^%ia^%i` against `a^%i`.", report, count, done, rate, count, count, count)
+	}
+	log.info(report)
+}
+
+@test
+global_capture_end_word :: proc(t: ^testing.T) {
+	EXPR :: `Hellope World!`
+
+	rex, err := regex.create(EXPR, { .Global })
+	if !testing.expect_value(t, err, nil) {
+		return
+	}
+	defer regex.destroy(rex)
+
+	report := fmt.tprintf("Matching %v over a block of random ASCII text.", rex)
+
+	for size in sizes {
+		data := make([]u8, size)
+		defer delete(data)
+		randomize_ascii(data[:])
+
+		for r, i in EXPR {
+			data[len(data) - len(EXPR) + i] = cast(u8)r
+		}
+
+		str := cast(string)data
+
+		start := time.now()
+		capture, ok := regex.match(rex, str)
+		done := time.since(start)
+		defer regex.destroy(capture)
+
+		if !testing.expect_value(t, ok, true) {
+			continue
+		}
+		testing.expect_value(t, capture.pos[0], [2]int{size - len(EXPR), size})
+
+		rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
+		report = fmt.tprintf("%s\n        +++ [%M : %v : %M/s]", report, size, done, rate)
+	}
+	log.info(report)
+}
+
+@test
+global_capture_end_word_unicode :: proc(t: ^testing.T) {
+	EXPR :: `こにちは`
+	needle := string(EXPR)
+
+	rex, err := regex.create(EXPR, { .Global, .Unicode })
+	if !testing.expect_value(t, err, nil) {
+		return
+	}
+	defer regex.destroy(rex)
+
+	report := fmt.tprintf("Matching %v over a block of random Unicode text.", rex)
+
+	for size in sizes {
+		data := make([]u8, size)
+		defer delete(data)
+		randomize_unicode(data[:size - len(needle)])
+
+		for i := 0; i < len(needle); i += 1 {
+			data[len(data) - len(needle) + i] = needle[i]
+		}
+
+		str := cast(string)data
+
+		start := time.now()
+		capture, ok := regex.match(rex, str)
+		done := time.since(start)
+		defer regex.destroy(capture)
+
+		if !testing.expect_value(t, ok, true) {
+			continue
+		}
+		testing.expect_value(t, capture.groups[0], needle)
+
+		rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
+		report = fmt.tprintf("%s\n        +++ [%M : %v : %M/s]", report, size, done, rate)
+	}
+	log.info(report)
+}
+
+
+@test
+alternations :: proc(t: ^testing.T) {
+	EXPR :: `a(?:bb|cc|dd|ee|ff)`
+
+	rex, err := regex.create(EXPR, { .No_Capture, .Global })
+	if !testing.expect_value(t, err, nil) {
+		return
+	}
+	defer regex.destroy(rex)
+
+	report := fmt.tprintf("Matching %v over a text block of only `a`s.", rex)
+
+	for size in sizes {
+		data := make([]u8, size)
+		defer delete(data)
+		for i in 0..<size {
+			data[i] = 'a'
+		}
+
+		str := cast(string)data
+
+		start := time.now()
+		_, ok := regex.match(rex, str)
+		done := time.since(start)
+
+		testing.expect_value(t, ok, false)
+
+		rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
+		report = fmt.tprintf("%s\n        +++ [%M : %v : %M/s]", report, size, done, rate)
+	}
+	log.info(report)
+}
+
+@test
+classes :: proc(t: ^testing.T) {
+	EXPR :: `[\w\d]+`
+	NEEDLE :: "0123456789abcdef"
+
+	rex, err := regex.create(EXPR, { .Global })
+	if !testing.expect_value(t, err, nil) {
+		return
+	}
+	defer regex.destroy(rex)
+
+	report := fmt.tprintf("Matching %v over a string of spaces with %q at the end.", rex, NEEDLE)
+
+	for size in sizes {
+		data := make([]u8, size)
+		defer delete(data)
+
+		for i in 0..<size {
+			data[i] = ' '
+		}
+
+		for r, i in NEEDLE {
+			data[len(data) - len(NEEDLE) + i] = cast(u8)r
+		}
+
+		str := cast(string)data
+
+		start := time.now()
+		capture, ok := regex.match(rex, str)
+		done := time.since(start)
+		defer regex.destroy(capture)
+
+		if !testing.expect_value(t, ok, true) {
+			continue
+		}
+		testing.expect_value(t, capture.pos[0], [2]int{size - len(NEEDLE), size})
+
+		rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
+		report = fmt.tprintf("%s\n        +++ [%M : %v : %M/s]", report, size, done, rate)
+	}
+	log.info(report)
+}