Browse Source

Update `package path` and fix bugs; add path.match which uses shell pattern matching

gingerBill 4 years ago
parent
commit
b42c7f9161
4 changed files with 428 additions and 59 deletions
  1. 205 0
      core/path/match.odin
  2. 215 55
      core/path/path.odin
  3. 4 2
      core/path/path_unix.odin
  4. 4 2
      core/path/path_windows.odin

+ 205 - 0
core/path/match.odin

@@ -0,0 +1,205 @@
+package path
+
+import "core:strings"
+import "core:unicode/utf8"
+
+Match_Error :: enum {
+	None,
+	Syntax_Error,
+}
+
+// match states whether "name" matches the shell pattern
+// Pattern syntax is:
+//	pattern:
+//		{term}
+//	term:
+//		'*'	        matches any sequence of non-/ characters
+//		'?'             matches any single non-/ character
+//		'[' ['^']  { character-range } ']'
+//		                character classification (cannot be empty)
+//		c               matches character c (c != '*', '?', '\\', '[')
+//		'\\' c          matches character c
+//
+//	character-range
+//		c               matches character c (c != '\\', '-', ']')
+//		'\\' c          matches character c
+//		lo '-' hi       matches character c for lo <= c <= hi
+//
+// match requires that the pattern matches the entirety of the name, not just a substring
+// The only possible error returned is .Syntax_Error
+//
+// NOTE(bill): This is effectively the shell pattern matching system found
+//
+match :: proc(pattern, name: string) -> (matched: bool, err: Match_Error) {
+	pattern, name := pattern, name;
+	pattern_loop: for len(pattern) > 0 {
+		star: bool;
+		chunk: string;
+		star, chunk, pattern = scan_chunk(pattern);
+		if star && chunk == "" {
+			return !strings.contains(name, "/"), .None;
+		}
+
+		t: string;
+		ok: bool;
+		t, ok, err = match_chunk(chunk, name);
+
+		if ok && (len(t) == 0 || len(pattern) > 0) {
+			name = t;
+			continue;
+		}
+		if err != .None {
+			return;
+		}
+		if star {
+			for i := 0; i < len(name) && name[i] != '/'; i += 1 {
+				t, ok, err = match_chunk(chunk, name[i+1:]);
+				if ok {
+					if len(pattern) == 0 && len(t) > 0 {
+						continue;
+					}
+					name = t;
+					continue pattern_loop;
+				}
+				if err != .None {
+					return;
+				}
+			}
+		}
+
+		return false, .None;
+	}
+
+	return len(name) == 0, .None;
+}
+
+
+@(private="file")
+scan_chunk :: proc(pattern: string) -> (star: bool, chunk, rest: string) {
+	pattern := pattern;
+	for len(pattern) > 0 && pattern[0] == '*' {
+		pattern = pattern[1:];
+		star = true;
+	}
+	in_range := false;
+	i: int;
+
+	scan_loop: for i = 0; i < len(pattern); i += 1 {
+		switch pattern[i] {
+		case '\\':
+			if i+1 < len(pattern) {
+				i += 1;
+			}
+		case '[':
+			in_range = true;
+		case ']':
+			in_range = false;
+		case '*':
+			if !in_range {
+				break scan_loop;
+			}
+
+		}
+	}
+	return star, pattern[:i], pattern[i:];
+}
+
+@(private="file")
+match_chunk :: proc(chunk, s: string) -> (rest: string, ok: bool, err: Match_Error) {
+	chunk, s := chunk, s;
+	for len(chunk) > 0 {
+		if len(s) == 0 {
+			return;
+		}
+		switch chunk[0] {
+		case '[':
+			r, w := utf8.decode_rune_in_string(s);
+			s = s[w:];
+			chunk = chunk[1:];
+			is_negated := false;
+			if len(chunk) > 0 && chunk[0] == '^' {
+				is_negated = true;
+				chunk = chunk[1:];
+			}
+			match := false;
+			range_count := 0;
+			for {
+				if len(chunk) > 0 && chunk[0] == ']' && range_count > 0 {
+					chunk = chunk[1:];
+					break;
+				}
+				lo, hi: rune;
+				if lo, chunk, err = get_escape(chunk); err != .None {
+					return;
+				}
+				hi = lo;
+				if chunk[0] == '-' {
+					if hi, chunk, err = get_escape(chunk[1:]); err != .None {
+						return;
+					}
+				}
+
+				if lo <= r && r <= hi {
+					match = true;
+				}
+				range_count += 1;
+			}
+			if match == is_negated {
+				return;
+			}
+
+		case '?':
+			if s[0] == '/' {
+				return;
+			}
+			_, w := utf8.decode_rune_in_string(s);
+			s = s[w:];
+			chunk = chunk[1:];
+
+		case '\\':
+			chunk = chunk[1:];
+			if len(chunk) == 0 {
+				err = .Syntax_Error;
+				return;
+			}
+			fallthrough;
+		case:
+			if chunk[0] != s[0] {
+				return;
+			}
+			s = s[1:];
+			chunk = chunk[1:];
+
+		}
+	}
+	return s, true, .None;
+}
+
+@(private="file")
+get_escape :: proc(chunk: string) -> (r: rune, next_chunk: string, err: Match_Error) {
+	if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
+		err = .Syntax_Error;
+		return;
+	}
+	chunk := chunk;
+	if chunk[0] == '\\' {
+		chunk = chunk[1:];
+		if len(chunk) == 0 {
+			err = .Syntax_Error;
+			return;
+		}
+	}
+
+	w: int;
+	r, w = utf8.decode_rune_in_string(chunk);
+	if r == utf8.RUNE_ERROR && w == 1 {
+		err = .Syntax_Error;
+	}
+
+	next_chunk = chunk[w:];
+	if len(next_chunk) == 0 {
+		err = .Syntax_Error;
+	}
+
+	return;
+}

+ 215 - 55
core/path/path.odin

@@ -1,91 +1,211 @@
 package path
 
 import "core:strings"
+import "core:runtime"
 import "core:unicode/utf8"
 
-
-// returns everything preceding the last path element
-dir :: proc(path: string, new := false, allocator := context.allocator) -> string {
-	if path == "" {
-		return "";
+// is_separator_byte checks whether the byte is a valid separator character
+is_separator_byte :: proc(c: byte) -> bool {
+	switch c {
+	case '/':  return true;
+	case '\\': return ODIN_OS == "windows";
 	}
+	return false;
+}
+
 
-	for i := len(path) - 1; i >= 0; i -= 1 {
-		if path[i] == '/' || path[i] == '\\' {
-			if path[:i] == "" {
-				// path is root
-				return new ? strings.clone(SEPARATOR_STRING, allocator) : SEPARATOR_STRING;
-			} else {
-				return new ? strings.clone(path[:i], allocator) : path[:i];
+// is_abs checks whether the path is absolute
+is_abs :: proc(path: string) -> bool {
+	if len(path) > 0 && path[0] == '/' {
+		return true;
+	}
+	when ODIN_OS == "windows" {
+		if len(path) > 2 {
+			switch path[0] {
+			case 'A'..'Z', 'a'..'z':
+				return path[1] == ':' && is_separator_byte(path[2]);
 			}
 		}
 	}
-
-	// path doesn't contain any folder structure
-	return "";
+	return false;
 }
 
-// returns the final path element
-base :: proc(path: string, new := false, allocator := context.allocator) -> string {
+
+// base returns the last element of path
+// Trailing slashes are removed
+// If the path is empty, it returns ".".
+// If the path is all slashes, it returns "/"
+base :: proc(path: string, new := false, allocator := context.allocator) -> (last_element: string) {
+	defer if new {
+		last_element = strings.clone(last_element, allocator);
+	}
+
 	if path == "" {
-		return "";
+		last_element = ".";
+		return;
 	}
 
-	end := len(path) - 1;
 
-	for i := end; i >= 0; i -= 1 {
-		switch path[i] {
-		case '/', '\\':
-			if i != end {
-				return new ? strings.clone(path[i+1:], allocator) : path[i+1:];
-			} else {
-				end = i; // we don't want trailing slashes
-			}
-		}
+	path := path;
+
+	for len(path) > 0 && is_separator_byte(path[len(path)-1]) {
+		path = path[:len(path)-1];
+	}
+	if i := strings.last_index_any(path, OS_SEPARATORS); i >= 0 {
+		path = path[i+1:];
 	}
 
-	// path doesn't contain any folder structure, return entire path
-	return new ? strings.clone(path, allocator) : path;
+	if path == "" {
+		last_element = "/";
+	} else {
+		last_element = path;
+	}
+	return;
+}
+
+// dir returns all but the last element of path, typically the path's directory.
+// After dropping the final element using it, the path is cleaned and trailing slashes are removed
+// If the path is empty, it returns "."
+// If the path consists entirely of slashes followed by non-slash bytes, it returns a single slash
+// In any other case, the returned path does not end in a slash
+dir :: proc(path: string, allocator := context.allocator) -> string {
+	directory, _ := split(path);
+	return clean(directory, allocator);
+}
+
+
+// split splits path immediately following the last slash,
+// separating it into a directory and file name component.
+// If there is no slash in path, it returns an empty dir and file set to path
+// The returned values have the property that path = dir+file
+split :: proc(path: string) -> (dir, file: string) {
+	i := strings.last_index_any(path, OS_SEPARATORS);
+	return path[:i+1], path[i+1:];
+}
+
+// split_elements splits the path elements into slices of the original path string
+split_elements :: proc(path: string, allocator := context.allocator) -> []string {
+	return strings.split_multi(path, OS_SEPARATORS_ARRAY, true, allocator);
 }
 
-// returns the final path element, excluding the file extension if there is one
-name :: proc(path: string, new := false, allocator := context.allocator) -> string {
+// clean returns the shortest path name equivalent to path through lexical analysis only
+// It applies the following rules iterative until done:
+//
+//	1) replace multiple slashes with one
+//	2) remove each . path name element
+//	3) remove inner .. path name element
+//	4) remove .. that  begin a rooted path ("/.." becomes "/")
+//
+clean :: proc(path: string, allocator := context.allocator) -> string {
+	context.allocator = allocator;
+
 	if path == "" {
-		return "";
+		return strings.clone(".");
 	}
 
-	end := len(path) - 1;
-	dot := end;
+	// NOTE(bill): do not use is_separator_byte because window paths do not follow this convention
+	rooted := path[0] == '/';
+	n := len(path);
+
+	out := &Lazy_Buffer{s = path};
+
+	// Check for ../../.. prefixes
+	r, dot_dot := 0, 0;
+	if rooted {
+		lazy_buffer_append(out, '/');
+		r, dot_dot = 1, 1;
+	}
 
-	for i := end; i >= 0; i -= 1 {
-		switch path[i] {
-		case '.':       dot = (dot == end ? i : dot);
-		case '/', '\\': return new ? strings.clone(path[i+1:dot], allocator) : path[i+1:dot];
+	for r < n {
+		switch {
+		case is_separator_byte(path[r]):
+			r += 1;
+		case path[r] == '.' && (r+1 == n || is_separator_byte(path[r+1])):
+			r += 1;
+		case path[r] == '.' && path[r+1] == '.' && (r+2 == n || is_separator_byte(path[r+2])):
+			r += 2;
+			switch {
+			case out.w > dot_dot:
+				out.w -= 1;
+				for out.w > dot_dot && !is_separator_byte(lazy_buffer_index(out, out.w)) {
+					out.w -= 1;
+				}
+
+			case !rooted:
+				if out.w > 0 {
+					lazy_buffer_append(out, '/');
+				}
+				lazy_buffer_append(out, '.');
+				lazy_buffer_append(out, '.');
+				dot_dot = out.w;
+			}
+		case:
+			if rooted && out.w != 1 || !rooted && out.w != 0 {
+				lazy_buffer_append(out, '/');
+			}
+			for ; r < n && !is_separator_byte(path[r]); r += 1 {
+				lazy_buffer_append(out, path[r]);
+			}
 		}
 	}
 
-	// path doesn't contain any folder structure or file extensions; assumed to be a valid file name
-	return new ? strings.clone(path, allocator) : path;
+	if out.w == 0 {
+		delete(out.b);
+		return strings.clone(".");
+	}
+
+	return lazy_buffer_string(out);
+}
+
+// join joins numerous path elements into a single path
+join :: proc(elems: ..string, allocator := context.allocator) -> string {
+	context.allocator = allocator;
+	for elem, i in elems {
+		if elem != "" {
+			s := strings.join(elems[i:], "/");
+			return clean(s);
+		}
+	}
+	return "";
 }
 
-// returns the file extension, if there is one
+// ext returns the file name extension used by "path"
+// The extension is the suffix beginning at the file fot in the last slash separated element of "path"
+// The path is empty if there is no dot
 ext :: proc(path: string, new := false, allocator := context.allocator) -> string {
-	if path == "" {
-		return "";
+	for i := len(path)-1; i >= 0 && !is_separator_byte(path[i]); i -= 1 {
+		if path[i] == '.' {
+			res := path[i:];
+			if new {
+				res = strings.clone(res, allocator);
+			}
+			return res;
+		}
+	}
+	return "";
+}
+
+// name returns the file without the base and without the extension
+name :: proc(path: string, new := false, allocator := context.allocator) -> (name: string) {
+	_, file := split(path);
+	name = file;
+
+	defer if new {
+		name = strings.clone(name, allocator);
 	}
 
-	for i := len(path)-1; i >= 0; i -= 1 {
-		switch path[i] {
-		case '/', '\\': return "";
-		case '.':       return new ? strings.clone(path[i+1:], allocator) : path[i+1:];
+	for i := len(file)-1; i >= 0 && !is_separator_byte(file[i]); i -= 1 {
+		if file[i] == '.' {
+			name = file[:i];
+			return;
 		}
 	}
+	return file;
 
-	// path does not include a file extension
-	return "";
 }
 
 
+
 rel :: proc{rel_between, rel_current};
 
 // returns the relative path from one path to another
@@ -177,7 +297,7 @@ rel_between :: proc(from, to: string, allocator := context.allocator) -> string
 		buffer := make([]byte, 2 + len(to), allocator);
 
 		buffer[0] = '.';
-		buffer[1] = SEPARATOR;
+		buffer[1] = '/';
 		copy(buffer[2:], to);
 
 		return string(buffer);
@@ -188,7 +308,7 @@ rel_between :: proc(from, to: string, allocator := context.allocator) -> string
 		for i in 0..<from_slashes {
 			buffer[i*3+0] = '.';
 			buffer[i*3+1] = '.';
-			buffer[i*3+2] = SEPARATOR;
+			buffer[i*3+2] = '/';
 		}
 
 		copy(buffer[from_slashes*3:], to);
@@ -205,7 +325,47 @@ rel_current :: proc(to: string, allocator := context.allocator) -> string {
 }
 
 
-// splits the path elements into slices of the original path string
-split :: proc(s: string, allocator := context.allocator) -> []string {
-	return strings.split_multi(s, []string{"\\", "/"}, true, allocator);
+
+
+
+
+
+
+/*
+	Lazy_Buffer is a lazily made path buffer
+	When it does allocate, it uses the context.allocator
+ */
+@(private)
+Lazy_Buffer :: struct {
+	s: string,
+	b: []byte,
+	w: int, // write index
+}
+
+@(private)
+lazy_buffer_index :: proc(lb: ^Lazy_Buffer, i: int) -> byte {
+	if lb.b != nil {
+		return lb.b[i];
+	}
+	return lb.s[i];
+}
+@(private)
+lazy_buffer_append :: proc(lb: ^Lazy_Buffer, c: byte) {
+	if lb.b == nil {
+		if lb.w < len(lb.s) && lb.s[lb.w] == c {
+			lb.w += 1;
+			return;
+		}
+		lb.b = make([]byte, len(lb.s));
+		copy(lb.b, lb.s[:lb.w]);
+	}
+	lb.b[lb.w] = c;
+	lb.w += 1;
+}
+@(private)
+lazy_buffer_string :: proc(lb: ^Lazy_Buffer) -> string {
+	if lb.b == nil {
+		return strings.clone(lb.s[:lb.w]);
+	}
+	return string(lb.b[:lb.w]);
 }

+ 4 - 2
core/path/path_unix.odin

@@ -9,9 +9,11 @@ import "core:strings"
 
 MAX :: 4096; // @note(bp): apparently PATH_MAX is bullshit
 
-SEPARATOR        :: '/';
-SEPARATOR_STRING :: "/";
+OS_SEPARATOR        :: '/';
+OS_SEPARATOR_STRING :: "/";
 
+OS_SEPARATORS :: `/`;
+OS_SEPARATORS_ARRAY :: []string{`/`};
 
 @(private)
 null_term :: proc(str: string) -> string {

+ 4 - 2
core/path/path_windows.odin

@@ -4,9 +4,11 @@ import "core:strings"
 import win32 "core:sys/windows"
 
 
-SEPARATOR        :: '\\';
-SEPARATOR_STRING :: "\\";
+OS_SEPARATOR        :: '\\';
+OS_SEPARATOR_STRING :: "\\";
 
+OS_SEPARATORS :: `/\`;
+OS_SEPARATORS_ARRAY :: []string{`/`, `\`};
 
 @(private)
 null_term :: proc "contextless" (str: string) -> string {