match.odin 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. #+build !wasi
  2. #+build !js
  3. package filepath
  4. import "core:os"
  5. import "core:slice"
  6. import "core:strings"
  7. import "core:unicode/utf8"
  8. Match_Error :: enum {
  9. None,
  10. Syntax_Error,
  11. }
  12. // match states whether "name" matches the shell pattern
  13. // Pattern syntax is:
  14. // pattern:
  15. // {term}
  16. // term:
  17. // '*' matches any sequence of non-/ characters
  18. // '?' matches any single non-/ character
  19. // '[' ['^'] { character-range } ']'
  20. // character classification (cannot be empty)
  21. // c matches character c (c != '*', '?', '\\', '[')
  22. // '\\' c matches character c
  23. //
  24. // character-range
  25. // c matches character c (c != '\\', '-', ']')
  26. // '\\' c matches character c
  27. // lo '-' hi matches character c for lo <= c <= hi
  28. //
  29. // match requires that the pattern matches the entirety of the name, not just a substring
  30. // The only possible error returned is .Syntax_Error
  31. //
  32. // NOTE(bill): This is effectively the shell pattern matching system found
  33. //
  34. match :: proc(pattern, name: string) -> (matched: bool, err: Match_Error) {
  35. pattern, name := pattern, name
  36. pattern_loop: for len(pattern) > 0 {
  37. star: bool
  38. chunk: string
  39. star, chunk, pattern = scan_chunk(pattern)
  40. if star && chunk == "" {
  41. return !strings.contains(name, SEPARATOR_STRING), .None
  42. }
  43. t: string
  44. ok: bool
  45. t, ok, err = match_chunk(chunk, name)
  46. if ok && (len(t) == 0 || len(pattern) > 0) {
  47. name = t
  48. continue
  49. }
  50. if err != .None {
  51. return
  52. }
  53. if star {
  54. for i := 0; i < len(name) && name[i] != SEPARATOR; i += 1 {
  55. t, ok, err = match_chunk(chunk, name[i+1:])
  56. if ok {
  57. if len(pattern) == 0 && len(t) > 0 {
  58. continue
  59. }
  60. name = t
  61. continue pattern_loop
  62. }
  63. if err != .None {
  64. return
  65. }
  66. }
  67. }
  68. return false, .None
  69. }
  70. return len(name) == 0, .None
  71. }
  72. @(private="file")
  73. scan_chunk :: proc(pattern: string) -> (star: bool, chunk, rest: string) {
  74. pattern := pattern
  75. for len(pattern) > 0 && pattern[0] == '*' {
  76. pattern = pattern[1:]
  77. star = true
  78. }
  79. in_range, i := false, 0
  80. scan_loop: for i = 0; i < len(pattern); i += 1 {
  81. switch pattern[i] {
  82. case '\\':
  83. when ODIN_OS != .Windows {
  84. if i+1 < len(pattern) {
  85. i += 1
  86. }
  87. }
  88. case '[':
  89. in_range = true
  90. case ']':
  91. in_range = false
  92. case '*':
  93. in_range or_break scan_loop
  94. }
  95. }
  96. return star, pattern[:i], pattern[i:]
  97. }
  98. @(private="file")
  99. match_chunk :: proc(chunk, s: string) -> (rest: string, ok: bool, err: Match_Error) {
  100. chunk, s := chunk, s
  101. for len(chunk) > 0 {
  102. if len(s) == 0 {
  103. return
  104. }
  105. switch chunk[0] {
  106. case '[':
  107. r, w := utf8.decode_rune_in_string(s)
  108. s = s[w:]
  109. chunk = chunk[1:]
  110. is_negated := false
  111. if len(chunk) > 0 && chunk[0] == '^' {
  112. is_negated = true
  113. chunk = chunk[1:]
  114. }
  115. match := false
  116. range_count := 0
  117. for {
  118. if len(chunk) > 0 && chunk[0] == ']' && range_count > 0 {
  119. chunk = chunk[1:]
  120. break
  121. }
  122. lo, hi: rune
  123. if lo, chunk, err = get_escape(chunk); err != .None {
  124. return
  125. }
  126. hi = lo
  127. if chunk[0] == '-' {
  128. if hi, chunk, err = get_escape(chunk[1:]); err != .None {
  129. return
  130. }
  131. }
  132. if lo <= r && r <= hi {
  133. match = true
  134. }
  135. range_count += 1
  136. }
  137. if match == is_negated {
  138. return
  139. }
  140. case '?':
  141. if s[0] == SEPARATOR {
  142. return
  143. }
  144. _, w := utf8.decode_rune_in_string(s)
  145. s = s[w:]
  146. chunk = chunk[1:]
  147. case '\\':
  148. when ODIN_OS != .Windows {
  149. chunk = chunk[1:]
  150. if len(chunk) == 0 {
  151. err = .Syntax_Error
  152. return
  153. }
  154. }
  155. fallthrough
  156. case:
  157. if chunk[0] != s[0] {
  158. return
  159. }
  160. s = s[1:]
  161. chunk = chunk[1:]
  162. }
  163. }
  164. return s, true, .None
  165. }
  166. @(private="file")
  167. get_escape :: proc(chunk: string) -> (r: rune, next_chunk: string, err: Match_Error) {
  168. if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
  169. err = .Syntax_Error
  170. return
  171. }
  172. chunk := chunk
  173. if chunk[0] == '\\' && ODIN_OS != .Windows {
  174. chunk = chunk[1:]
  175. if len(chunk) == 0 {
  176. err = .Syntax_Error
  177. return
  178. }
  179. }
  180. w: int
  181. r, w = utf8.decode_rune_in_string(chunk)
  182. if r == utf8.RUNE_ERROR && w == 1 {
  183. err = .Syntax_Error
  184. }
  185. next_chunk = chunk[w:]
  186. if len(next_chunk) == 0 {
  187. err = .Syntax_Error
  188. }
  189. return
  190. }
  191. // glob returns the names of all files matching pattern or nil if there are no matching files
  192. // The syntax of patterns is the same as "match".
  193. // The pattern may describe hierarchical names such as /usr/*/bin (assuming '/' is a separator)
  194. //
  195. // glob ignores file system errors
  196. //
  197. glob :: proc(pattern: string, allocator := context.allocator) -> (matches: []string, err: Match_Error) {
  198. context.allocator = allocator
  199. if !has_meta(pattern) {
  200. // TODO(bill): os.lstat on here to check for error
  201. m := make([]string, 1)
  202. m[0] = pattern
  203. return m[:], .None
  204. }
  205. dir, file := split(pattern)
  206. volume_len := 0
  207. when ODIN_OS == .Windows {
  208. temp_buf: [8]byte
  209. volume_len, dir = clean_glob_path_windows(dir, temp_buf[:])
  210. } else {
  211. dir = clean_glob_path(dir)
  212. }
  213. if !has_meta(dir[volume_len:]) {
  214. m, e := _glob(dir, file, nil)
  215. return m[:], e
  216. }
  217. m: []string
  218. m, err = glob(dir)
  219. if err != .None {
  220. return
  221. }
  222. defer {
  223. for s in m {
  224. delete(s)
  225. }
  226. delete(m)
  227. }
  228. dmatches := make([dynamic]string, 0, 0)
  229. for d in m {
  230. dmatches, err = _glob(d, file, &dmatches)
  231. if err != .None {
  232. break
  233. }
  234. }
  235. if len(dmatches) > 0 {
  236. matches = dmatches[:]
  237. }
  238. return
  239. }
  240. // Internal implementation of `glob`, not meant to be used by the user. Prefer `glob`.
  241. _glob :: proc(dir, pattern: string, matches: ^[dynamic]string, allocator := context.allocator) -> (m: [dynamic]string, e: Match_Error) {
  242. context.allocator = allocator
  243. if matches != nil {
  244. m = matches^
  245. } else {
  246. m = make([dynamic]string, 0, 0)
  247. }
  248. d, derr := os.open(dir, os.O_RDONLY)
  249. if derr != nil {
  250. return
  251. }
  252. defer os.close(d)
  253. {
  254. file_info, ferr := os.fstat(d)
  255. defer os.file_info_delete(file_info)
  256. if ferr != nil {
  257. return
  258. }
  259. if !file_info.is_dir {
  260. return
  261. }
  262. }
  263. fis, _ := os.read_dir(d, -1)
  264. slice.sort_by(fis, proc(a, b: os.File_Info) -> bool {
  265. return a.name < b.name
  266. })
  267. defer {
  268. for fi in fis {
  269. os.file_info_delete(fi)
  270. }
  271. delete(fis)
  272. }
  273. for fi in fis {
  274. n := fi.name
  275. matched := match(pattern, n) or_return
  276. if matched {
  277. append(&m, join({dir, n}))
  278. }
  279. }
  280. return
  281. }
  282. @(private)
  283. has_meta :: proc(path: string) -> bool {
  284. when ODIN_OS == .Windows {
  285. CHARS :: `*?[`
  286. } else {
  287. CHARS :: `*?[\`
  288. }
  289. return strings.contains_any(path, CHARS)
  290. }
  291. @(private)
  292. clean_glob_path :: proc(path: string) -> string {
  293. switch path {
  294. case "":
  295. return "."
  296. case SEPARATOR_STRING:
  297. return path
  298. }
  299. return path[:len(path)-1]
  300. }
  301. @(private)
  302. clean_glob_path_windows :: proc(path: string, temp_buf: []byte) -> (prefix_len: int, cleaned: string) {
  303. vol_len := volume_name_len(path)
  304. switch {
  305. case path == "":
  306. return 0, "."
  307. case vol_len+1 == len(path) && is_separator(path[len(path)-1]): // /, \, C:\, C:/
  308. return vol_len+1, path
  309. case vol_len == len(path) && len(path) == 2: // C:
  310. copy(temp_buf[:], path)
  311. temp_buf[2] = '.'
  312. return vol_len, string(temp_buf[:3])
  313. }
  314. if vol_len >= len(path) {
  315. vol_len = len(path) -1
  316. }
  317. return vol_len, path[:len(path)-1]
  318. }