path.odin 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. // The path/filepath package uses either forward slashes or backslashes depending on the operating system
  2. // To process paths such as URLs that depend on forward slashes regardless of the OS, use the path package
  3. package filepath
  4. import "core:strings"
  5. SEPARATOR_CHARS :: `/\`
  6. // is_separator checks whether the byte is a valid separator character
  7. is_separator :: proc(c: byte) -> bool {
  8. switch c {
  9. case '/': return true
  10. case '\\': return ODIN_OS == .Windows
  11. }
  12. return false
  13. }
  14. @(private)
  15. is_slash :: proc(c: byte) -> bool {
  16. return c == '\\' || c == '/'
  17. }
  18. // Splits path immediate following the last separator; separating the path into a directory and file.
  19. // If no separator is found, `dir` will be empty and `path` set to `path`.
  20. split :: proc(path: string) -> (dir, file: string) {
  21. vol := volume_name(path)
  22. i := len(path) - 1
  23. for i >= len(vol) && !is_separator(path[i]) {
  24. i -= 1
  25. }
  26. return path[:i+1], path[i+1:]
  27. }
  28. /*
  29. Returns leading volume name.
  30. e.g.
  31. "C:\foo\bar\baz" will return "C:" on Windows.
  32. Everything else will be "".
  33. */
  34. volume_name :: proc(path: string) -> string {
  35. return path[:volume_name_len(path)]
  36. }
  37. // Returns the length of the volume name in bytes.
  38. volume_name_len :: proc(path: string) -> int {
  39. if ODIN_OS == .Windows {
  40. if len(path) < 2 {
  41. return 0
  42. }
  43. c := path[0]
  44. if path[1] == ':' {
  45. switch c {
  46. case 'a'..='z', 'A'..='Z':
  47. return 2
  48. }
  49. }
  50. // URL: https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
  51. if l := len(path); l >= 5 && is_slash(path[0]) && is_slash(path[1]) &&
  52. !is_slash(path[2]) && path[2] != '.' {
  53. for n := 3; n < l-1; n += 1 {
  54. if is_slash(path[n]) {
  55. n += 1
  56. if !is_slash(path[n]) {
  57. if path[n] == '.' {
  58. break
  59. }
  60. }
  61. for ; n < l; n += 1 {
  62. if is_slash(path[n]) {
  63. break
  64. }
  65. }
  66. return n
  67. }
  68. break
  69. }
  70. }
  71. }
  72. return 0
  73. }
  74. /*
  75. Gets the file name and extension from a path.
  76. e.g.
  77. 'path/to/name.tar.gz' -> 'name.tar.gz'
  78. 'path/to/name.txt' -> 'name.txt'
  79. 'path/to/name' -> 'name'
  80. Returns "." if the path is an empty string.
  81. */
  82. base :: proc(path: string) -> string {
  83. if path == "" {
  84. return "."
  85. }
  86. path := path
  87. for len(path) > 0 && is_separator(path[len(path)-1]) {
  88. path = path[:len(path)-1]
  89. }
  90. path = path[volume_name_len(path):]
  91. i := len(path)-1
  92. for i >= 0 && !is_separator(path[i]) {
  93. i -= 1
  94. }
  95. if i >= 0 {
  96. path = path[i+1:]
  97. }
  98. if path == "" {
  99. return SEPARATOR_STRING
  100. }
  101. return path
  102. }
  103. /*
  104. Gets the name of a file from a path.
  105. The stem of a file is such that stem(path) + ext(path) = base(path).
  106. Only the last dot is considered when splitting the file extension.
  107. See `short_stem`.
  108. e.g.
  109. 'name.tar.gz' -> 'name.tar'
  110. 'name.txt' -> 'name'
  111. Returns an empty string if there is no stem. e.g: '.gitignore'.
  112. Returns an empty string if there's a trailing path separator.
  113. */
  114. stem :: proc(path: string) -> string {
  115. if len(path) > 0 && is_separator(path[len(path) - 1]) {
  116. // NOTE(tetra): Trailing separator
  117. return ""
  118. }
  119. // NOTE(tetra): Get the basename
  120. path := path
  121. if i := strings.last_index_any(path, SEPARATOR_CHARS); i != -1 {
  122. path = path[i+1:]
  123. }
  124. if i := strings.last_index_byte(path, '.'); i != -1 {
  125. return path[:i]
  126. }
  127. return path
  128. }
  129. /*
  130. Gets the name of a file from a path.
  131. The short stem is such that short_stem(path) + long_ext(path) = base(path).
  132. The first dot is used to split off the file extension, unlike `stem` which uses the last dot.
  133. e.g.
  134. 'name.tar.gz' -> 'name'
  135. 'name.txt' -> 'name'
  136. Returns an empty string if there is no stem. e.g: '.gitignore'.
  137. Returns an empty string if there's a trailing path separator.
  138. */
  139. short_stem :: proc(path: string) -> string {
  140. s := stem(path)
  141. if i := strings.index_byte(s, '.'); i != -1 {
  142. return s[:i]
  143. }
  144. return s
  145. }
  146. /*
  147. Gets the file extension from a path, including the dot.
  148. The file extension is such that stem(path) + ext(path) = base(path).
  149. Only the last dot is considered when splitting the file extension.
  150. See `long_ext`.
  151. e.g.
  152. 'name.tar.gz' -> '.gz'
  153. 'name.txt' -> '.txt'
  154. Returns an empty string if there is no dot.
  155. Returns an empty string if there is a trailing path separator.
  156. */
  157. ext :: proc(path: string) -> string {
  158. for i := len(path)-1; i >= 0 && !is_separator(path[i]); i -= 1 {
  159. if path[i] == '.' {
  160. return path[i:]
  161. }
  162. }
  163. return ""
  164. }
  165. /*
  166. Gets the file extension from a path, including the dot.
  167. The long file extension is such that short_stem(path) + long_ext(path) = base(path).
  168. The first dot is used to split off the file extension, unlike `ext` which uses the last dot.
  169. e.g.
  170. 'name.tar.gz' -> '.tar.gz'
  171. 'name.txt' -> '.txt'
  172. Returns an empty string if there is no dot.
  173. Returns an empty string if there is a trailing path separator.
  174. */
  175. long_ext :: proc(path: string) -> string {
  176. if len(path) > 0 && is_separator(path[len(path) - 1]) {
  177. // NOTE(tetra): Trailing separator
  178. return ""
  179. }
  180. // NOTE(tetra): Get the basename
  181. path := path
  182. if i := strings.last_index_any(path, SEPARATOR_CHARS); i != -1 {
  183. path = path[i+1:]
  184. }
  185. if i := strings.index_byte(path, '.'); i != -1 {
  186. return path[i:]
  187. }
  188. return ""
  189. }
  190. /*
  191. Returns the shortest path name equivalent to `path` through solely lexical processing.
  192. It applies the folliwng rules until none of them can be applied:
  193. * Replace multiple separators with a single one
  194. * Remove each current directory (`.`) path name element
  195. * Remove each inner parent directory (`..`) path and the preceding paths
  196. * Remove `..` that begin at the root of a path
  197. * All possible separators are replaced with the OS specific separator
  198. The return path ends in a slash only if it represents the root of a directory (`C:\` on Windows and `/` on *nix systems).
  199. If the result of the path is an empty string, the returned path with be `"."`.
  200. */
  201. clean :: proc(path: string, allocator := context.allocator) -> string {
  202. context.allocator = allocator
  203. path := path
  204. original_path := path
  205. vol_len := volume_name_len(path)
  206. path = path[vol_len:]
  207. if path == "" {
  208. if vol_len > 1 && original_path[1] != ':' {
  209. s, ok := from_slash(original_path)
  210. if !ok {
  211. s = strings.clone(s)
  212. }
  213. return s
  214. }
  215. return strings.concatenate({original_path, "."})
  216. }
  217. rooted := is_separator(path[0])
  218. n := len(path)
  219. out := &Lazy_Buffer{
  220. s = path,
  221. vol_and_path = original_path,
  222. vol_len = vol_len,
  223. }
  224. defer lazy_buffer_destroy(out)
  225. r, dot_dot := 0, 0
  226. if rooted {
  227. lazy_buffer_append(out, SEPARATOR)
  228. r, dot_dot = 1, 1
  229. }
  230. for r < n {
  231. switch {
  232. case is_separator(path[r]):
  233. r += 1
  234. case path[r] == '.' && (r+1 == n || is_separator(path[r+1])):
  235. r += 1
  236. case path[r] == '.' && path[r+1] == '.' && (r+2 == n || is_separator(path[r+2])):
  237. r += 2
  238. switch {
  239. case out.w > dot_dot:
  240. out.w -= 1
  241. for out.w > dot_dot && !is_separator(lazy_buffer_index(out, out.w)) {
  242. out.w -= 1
  243. }
  244. case !rooted:
  245. if out.w > 0 {
  246. lazy_buffer_append(out, SEPARATOR)
  247. }
  248. lazy_buffer_append(out, '.')
  249. lazy_buffer_append(out, '.')
  250. dot_dot = out.w
  251. }
  252. case:
  253. if rooted && out.w != 1 || !rooted && out.w != 0 {
  254. lazy_buffer_append(out, SEPARATOR)
  255. }
  256. for ; r < n && !is_separator(path[r]); r += 1 {
  257. lazy_buffer_append(out, path[r])
  258. }
  259. }
  260. }
  261. if out.w == 0 {
  262. lazy_buffer_append(out, '.')
  263. }
  264. s := lazy_buffer_string(out)
  265. cleaned, new_allocation := from_slash(s)
  266. if new_allocation {
  267. delete(s)
  268. }
  269. return cleaned
  270. }
  271. // Returns the result of replacing each forward slash `/` character in the path with the separate OS specific character.
  272. from_slash :: proc(path: string, allocator := context.allocator) -> (new_path: string, new_allocation: bool) {
  273. if SEPARATOR == '/' {
  274. return path, false
  275. }
  276. return strings.replace_all(path, "/", SEPARATOR_STRING, allocator)
  277. }
  278. // Returns the result of replacing each OS specific separator with a forward slash `/` character.
  279. to_slash :: proc(path: string, allocator := context.allocator) -> (new_path: string, new_allocation: bool) {
  280. if SEPARATOR == '/' {
  281. return path, false
  282. }
  283. return strings.replace_all(path, SEPARATOR_STRING, "/", allocator)
  284. }
  285. Relative_Error :: enum {
  286. None,
  287. Cannot_Relate,
  288. }
  289. /*
  290. Returns a relative path that is lexically equivalent to the `target_path` when joined with the `base_path` with an OS specific separator.
  291. e.g. `join(base_path, rel(base_path, target_path))` is equivalent to `target_path`
  292. On failure, the `Relative_Error` will be state it cannot compute the necessary relative path.
  293. */
  294. rel :: proc(base_path, target_path: string, allocator := context.allocator) -> (string, Relative_Error) {
  295. context.allocator = allocator
  296. base_clean, target_clean := clean(base_path), clean(target_path)
  297. delete_target := true
  298. defer {
  299. if delete_target {
  300. delete(target_clean)
  301. }
  302. delete(base_clean)
  303. }
  304. if strings.equal_fold(target_clean, base_clean) {
  305. return strings.clone("."), .None
  306. }
  307. base_vol, target_vol := volume_name(base_path), volume_name(target_path)
  308. base := base_clean[len(base_vol):]
  309. target := target_clean[len(target_vol):]
  310. if base == "." {
  311. base = ""
  312. }
  313. base_slashed := len(base) > 0 && base[0] == SEPARATOR
  314. target_slashed := len(target) > 0 && target[0] == SEPARATOR
  315. if base_slashed != target_slashed || !strings.equal_fold(base_vol, target_vol) {
  316. return "", .Cannot_Relate
  317. }
  318. bl, tl := len(base), len(target)
  319. b0, bi, t0, ti: int
  320. for {
  321. for bi < bl && base[bi] != SEPARATOR {
  322. bi += 1
  323. }
  324. for ti < tl && target[ti] != SEPARATOR {
  325. ti += 1
  326. }
  327. strings.equal_fold(target[t0:ti], base[b0:bi]) or_break
  328. if bi < bl {
  329. bi += 1
  330. }
  331. if ti < tl {
  332. ti += 1
  333. }
  334. b0, t0 = bi, ti
  335. }
  336. if base[b0:bi] == ".." {
  337. return "", .Cannot_Relate
  338. }
  339. if b0 != bl {
  340. seps := strings.count(base[b0:bl], SEPARATOR_STRING)
  341. size := 2 + seps*3
  342. if tl != t0 {
  343. size += 1 + tl - t0
  344. }
  345. buf := make([]byte, size)
  346. n := copy(buf, "..")
  347. for _ in 0..<seps {
  348. buf[n] = SEPARATOR
  349. copy(buf[n+1:], "..")
  350. n += 3
  351. }
  352. if t0 != tl {
  353. buf[n] = SEPARATOR
  354. copy(buf[n+1:], target[t0:])
  355. }
  356. return string(buf), .None
  357. }
  358. delete_target = false
  359. return target[t0:], .None
  360. }
  361. /*
  362. Returns all but the last element path, usually the path's directory. Once the final element has been removed,
  363. `dir` calls `clean` on the path and trailing separators are removed. If the path consists purely of separators,
  364. then `"."` is returned.
  365. */
  366. dir :: proc(path: string, allocator := context.allocator) -> string {
  367. context.allocator = allocator
  368. vol := volume_name(path)
  369. i := len(path) - 1
  370. for i >= len(vol) && !is_separator(path[i]) {
  371. i -= 1
  372. }
  373. dir := clean(path[len(vol) : i+1])
  374. defer delete(dir)
  375. if dir == "." && len(vol) > 2 {
  376. return strings.clone(vol)
  377. }
  378. return strings.concatenate({vol, dir})
  379. }
  380. // Splits the PATH-like `path` string, returning an array of its separated components (delete after use).
  381. // For Windows the separator is `;`, for Unix it's `:`.
  382. // An empty string returns nil. A non-empty string with no separators returns a 1-element array.
  383. // Any empty components will be included, e.g. `a::b` will return a 3-element array, as will `::`.
  384. // Separators within pairs of double-quotes will be ignored and stripped, e.g. `"a:b"c:d` will return []{`a:bc`, `d`}.
  385. split_list :: proc(path: string, allocator := context.allocator) -> []string {
  386. if path == "" {
  387. return nil
  388. }
  389. start: int
  390. quote: bool
  391. start, quote = 0, false
  392. count := 0
  393. for i := 0; i < len(path); i += 1 {
  394. c := path[i]
  395. switch {
  396. case c == '"':
  397. quote = !quote
  398. case c == LIST_SEPARATOR && !quote:
  399. count += 1
  400. }
  401. }
  402. start, quote = 0, false
  403. list := make([]string, count + 1, allocator)
  404. index := 0
  405. for i := 0; i < len(path); i += 1 {
  406. c := path[i]
  407. switch {
  408. case c == '"':
  409. quote = !quote
  410. case c == LIST_SEPARATOR && !quote:
  411. list[index] = path[start:i]
  412. index += 1
  413. start = i + 1
  414. }
  415. }
  416. assert(index == count)
  417. list[index] = path[start:]
  418. for s0, i in list {
  419. s, new := strings.replace_all(s0, `"`, ``, allocator)
  420. if !new {
  421. s = strings.clone(s, allocator)
  422. }
  423. list[i] = s
  424. }
  425. return list
  426. }
  427. /*
  428. Lazy_Buffer is a lazily made path buffer
  429. When it does allocate, it uses the context.allocator
  430. */
  431. @(private)
  432. Lazy_Buffer :: struct {
  433. s: string,
  434. b: []byte,
  435. w: int, // write index
  436. vol_and_path: string,
  437. vol_len: int,
  438. }
  439. @(private)
  440. lazy_buffer_index :: proc(lb: ^Lazy_Buffer, i: int) -> byte {
  441. if lb.b != nil {
  442. return lb.b[i]
  443. }
  444. return lb.s[i]
  445. }
  446. @(private)
  447. lazy_buffer_append :: proc(lb: ^Lazy_Buffer, c: byte) {
  448. if lb.b == nil {
  449. if lb.w < len(lb.s) && lb.s[lb.w] == c {
  450. lb.w += 1
  451. return
  452. }
  453. lb.b = make([]byte, len(lb.s))
  454. copy(lb.b, lb.s[:lb.w])
  455. }
  456. lb.b[lb.w] = c
  457. lb.w += 1
  458. }
  459. @(private)
  460. lazy_buffer_string :: proc(lb: ^Lazy_Buffer) -> string {
  461. if lb.b == nil {
  462. return strings.clone(lb.vol_and_path[:lb.vol_len+lb.w])
  463. }
  464. x := lb.vol_and_path[:lb.vol_len]
  465. y := string(lb.b[:lb.w])
  466. z := make([]byte, len(x)+len(y))
  467. copy(z, x)
  468. copy(z[len(x):], y)
  469. return string(z)
  470. }
  471. @(private)
  472. lazy_buffer_destroy :: proc(lb: ^Lazy_Buffer) {
  473. delete(lb.b)
  474. lb^ = {}
  475. }