match.odin 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. package filepath
  2. import "core:os"
  3. import "core:slice"
  4. import "core:strings"
  5. import "core:unicode/utf8"
  6. Match_Error :: enum {
  7. None,
  8. Syntax_Error,
  9. }
  10. // match states whether "name" matches the shell pattern
  11. // Pattern syntax is:
  12. // pattern:
  13. // {term}
  14. // term:
  15. // '*' matches any sequence of non-/ characters
  16. // '?' matches any single non-/ character
  17. // '[' ['^'] { character-range } ']'
  18. // character classification (cannot be empty)
  19. // c matches character c (c != '*', '?', '\\', '[')
  20. // '\\' c matches character c
  21. //
  22. // character-range
  23. // c matches character c (c != '\\', '-', ']')
  24. // '\\' c matches character c
  25. // lo '-' hi matches character c for lo <= c <= hi
  26. //
  27. // match requires that the pattern matches the entirety of the name, not just a substring
  28. // The only possible error returned is .Syntax_Error
  29. //
  30. // NOTE(bill): This is effectively the shell pattern matching system found
  31. //
  32. match :: proc(pattern, name: string) -> (matched: bool, err: Match_Error) {
  33. pattern, name := pattern, name
  34. pattern_loop: for len(pattern) > 0 {
  35. star: bool
  36. chunk: string
  37. star, chunk, pattern = scan_chunk(pattern)
  38. if star && chunk == "" {
  39. return !strings.contains(name, SEPARATOR_STRING), .None
  40. }
  41. t: string
  42. ok: bool
  43. t, ok, err = match_chunk(chunk, name)
  44. if ok && (len(t) == 0 || len(pattern) > 0) {
  45. name = t
  46. continue
  47. }
  48. if err != .None {
  49. return
  50. }
  51. if star {
  52. for i := 0; i < len(name) && name[i] != SEPARATOR; i += 1 {
  53. t, ok, err = match_chunk(chunk, name[i+1:])
  54. if ok {
  55. if len(pattern) == 0 && len(t) > 0 {
  56. continue
  57. }
  58. name = t
  59. continue pattern_loop
  60. }
  61. if err != .None {
  62. return
  63. }
  64. }
  65. }
  66. return false, .None
  67. }
  68. return len(name) == 0, .None
  69. }
  70. @(private="file")
  71. scan_chunk :: proc(pattern: string) -> (star: bool, chunk, rest: string) {
  72. pattern := pattern
  73. for len(pattern) > 0 && pattern[0] == '*' {
  74. pattern = pattern[1:]
  75. star = true
  76. }
  77. in_range, i := false, 0
  78. scan_loop: for i = 0; i < len(pattern); i += 1 {
  79. switch pattern[i] {
  80. case '\\':
  81. when ODIN_OS != .Windows {
  82. if i+1 < len(pattern) {
  83. i += 1
  84. }
  85. }
  86. case '[':
  87. in_range = true
  88. case ']':
  89. in_range = false
  90. case '*':
  91. in_range or_break scan_loop
  92. }
  93. }
  94. return star, pattern[:i], pattern[i:]
  95. }
  96. @(private="file")
  97. match_chunk :: proc(chunk, s: string) -> (rest: string, ok: bool, err: Match_Error) {
  98. chunk, s := chunk, s
  99. for len(chunk) > 0 {
  100. if len(s) == 0 {
  101. return
  102. }
  103. switch chunk[0] {
  104. case '[':
  105. r, w := utf8.decode_rune_in_string(s)
  106. s = s[w:]
  107. chunk = chunk[1:]
  108. is_negated := false
  109. if len(chunk) > 0 && chunk[0] == '^' {
  110. is_negated = true
  111. chunk = chunk[1:]
  112. }
  113. match := false
  114. range_count := 0
  115. for {
  116. if len(chunk) > 0 && chunk[0] == ']' && range_count > 0 {
  117. chunk = chunk[1:]
  118. break
  119. }
  120. lo, hi: rune
  121. if lo, chunk, err = get_escape(chunk); err != .None {
  122. return
  123. }
  124. hi = lo
  125. if chunk[0] == '-' {
  126. if hi, chunk, err = get_escape(chunk[1:]); err != .None {
  127. return
  128. }
  129. }
  130. if lo <= r && r <= hi {
  131. match = true
  132. }
  133. range_count += 1
  134. }
  135. if match == is_negated {
  136. return
  137. }
  138. case '?':
  139. if s[0] == SEPARATOR {
  140. return
  141. }
  142. _, w := utf8.decode_rune_in_string(s)
  143. s = s[w:]
  144. chunk = chunk[1:]
  145. case '\\':
  146. when ODIN_OS != .Windows {
  147. chunk = chunk[1:]
  148. if len(chunk) == 0 {
  149. err = .Syntax_Error
  150. return
  151. }
  152. }
  153. fallthrough
  154. case:
  155. if chunk[0] != s[0] {
  156. return
  157. }
  158. s = s[1:]
  159. chunk = chunk[1:]
  160. }
  161. }
  162. return s, true, .None
  163. }
  164. @(private="file")
  165. get_escape :: proc(chunk: string) -> (r: rune, next_chunk: string, err: Match_Error) {
  166. if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
  167. err = .Syntax_Error
  168. return
  169. }
  170. chunk := chunk
  171. if chunk[0] == '\\' && ODIN_OS != .Windows {
  172. chunk = chunk[1:]
  173. if len(chunk) == 0 {
  174. err = .Syntax_Error
  175. return
  176. }
  177. }
  178. w: int
  179. r, w = utf8.decode_rune_in_string(chunk)
  180. if r == utf8.RUNE_ERROR && w == 1 {
  181. err = .Syntax_Error
  182. }
  183. next_chunk = chunk[w:]
  184. if len(next_chunk) == 0 {
  185. err = .Syntax_Error
  186. }
  187. return
  188. }
  189. // glob returns the names of all files matching pattern or nil if there are no matching files
  190. // The syntax of patterns is the same as "match".
  191. // The pattern may describe hierarchical names such as /usr/*/bin (assuming '/' is a separator)
  192. //
  193. // glob ignores file system errors
  194. //
  195. glob :: proc(pattern: string, allocator := context.allocator) -> (matches: []string, err: Match_Error) {
  196. context.allocator = allocator
  197. if !has_meta(pattern) {
  198. // TODO(bill): os.lstat on here to check for error
  199. m := make([]string, 1)
  200. m[0] = pattern
  201. return m[:], .None
  202. }
  203. dir, file := split(pattern)
  204. volume_len := 0
  205. when ODIN_OS == .Windows {
  206. temp_buf: [8]byte
  207. volume_len, dir = clean_glob_path_windows(dir, temp_buf[:])
  208. } else {
  209. dir = clean_glob_path(dir)
  210. }
  211. if !has_meta(dir[volume_len:]) {
  212. m, e := _glob(dir, file, nil)
  213. return m[:], e
  214. }
  215. m: []string
  216. m, err = glob(dir)
  217. if err != .None {
  218. return
  219. }
  220. dmatches := make([dynamic]string, 0, 0)
  221. for d in m {
  222. dmatches, err = _glob(d, file, &dmatches)
  223. if err != .None {
  224. break
  225. }
  226. }
  227. if len(dmatches) > 0 {
  228. matches = dmatches[:]
  229. }
  230. return
  231. }
  232. // Internal implementation of `glob`, not meant to be used by the user. Prefer `glob`.
  233. _glob :: proc(dir, pattern: string, matches: ^[dynamic]string, allocator := context.allocator) -> (m: [dynamic]string, e: Match_Error) {
  234. context.allocator = allocator
  235. if matches != nil {
  236. m = matches^
  237. } else {
  238. m = make([dynamic]string, 0, 0)
  239. }
  240. d, derr := os.open(dir, os.O_RDONLY)
  241. if derr != 0 {
  242. return
  243. }
  244. defer os.close(d)
  245. {
  246. file_info, ferr := os.fstat(d)
  247. defer os.file_info_delete(file_info)
  248. if ferr != 0 {
  249. return
  250. }
  251. if !file_info.is_dir {
  252. return
  253. }
  254. }
  255. fis, _ := os.read_dir(d, -1)
  256. slice.sort_by(fis, proc(a, b: os.File_Info) -> bool {
  257. return a.name < b.name
  258. })
  259. defer {
  260. for fi in fis {
  261. os.file_info_delete(fi)
  262. }
  263. delete(fis)
  264. }
  265. for fi in fis {
  266. n := fi.name
  267. matched := match(pattern, n) or_return
  268. if matched {
  269. append(&m, join({dir, n}))
  270. }
  271. }
  272. return
  273. }
  274. @(private)
  275. has_meta :: proc(path: string) -> bool {
  276. when ODIN_OS == .Windows {
  277. CHARS :: `*?[`
  278. } else {
  279. CHARS :: `*?[\`
  280. }
  281. return strings.contains_any(path, CHARS)
  282. }
  283. @(private)
  284. clean_glob_path :: proc(path: string) -> string {
  285. switch path {
  286. case "":
  287. return "."
  288. case SEPARATOR_STRING:
  289. return path
  290. }
  291. return path[:len(path)-1]
  292. }
  293. @(private)
  294. clean_glob_path_windows :: proc(path: string, temp_buf: []byte) -> (prefix_len: int, cleaned: string) {
  295. vol_len := volume_name_len(path)
  296. switch {
  297. case path == "":
  298. return 0, "."
  299. case vol_len+1 == len(path) && is_separator(path[len(path)-1]): // /, \, C:\, C:/
  300. return vol_len+1, path
  301. case vol_len == len(path) && len(path) == 2: // C:
  302. copy(temp_buf[:], path)
  303. temp_buf[2] = '.'
  304. return vol_len, string(temp_buf[:3])
  305. }
  306. if vol_len >= len(path) {
  307. vol_len = len(path) -1
  308. }
  309. return vol_len, path[:len(path)-1]
  310. }