parse.odin 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. package text_template_parse
  2. import "core:fmt"
  3. import "core:mem"
  4. import "core:mem/virtual"
  5. import "core:strconv"
  6. import "../scan"
  7. Error :: enum {
  8. None,
  9. Unexpected_Token,
  10. Unexpected_EOF,
  11. Expected_End,
  12. Invalid_Node,
  13. Invalid_Character,
  14. Invalid_Number,
  15. Invalid_String,
  16. Empty_Command,
  17. Missing_Value,
  18. Non_Executable_Command,
  19. Undefined_Variable,
  20. Unexpected_Operand,
  21. Invalid_For_Initialization,
  22. Too_Many_Declarations,
  23. }
  24. Tree :: struct {
  25. general_allocator: mem.Allocator,
  26. arena: virtual.Growing_Arena,
  27. name: string,
  28. tokens: []Token, // general_allocator
  29. root: ^Node_List,
  30. input: string,
  31. offset: uint,
  32. for_loop_depth: uint,
  33. vars: [dynamic]string,
  34. }
  35. @(require_results)
  36. errorf :: proc(t: ^Tree, err: Error, format: string, args: ..any) -> Error {
  37. if err != nil {
  38. fmt.eprintf(format, ..args)
  39. fmt.eprintln()
  40. }
  41. return err
  42. }
  43. @(require_results)
  44. unexpected_token :: proc(t: ^Tree, token: Token) -> Error {
  45. return errorf(t, .Unexpected_Token, "unexpected token: %s", token.value)
  46. }
  47. peek :: proc(t: ^Tree, n: uint = 0) -> Token {
  48. if t.offset+n < len(t.tokens) {
  49. return t.tokens[t.offset+n]
  50. }
  51. return Token{.EOF, "", Pos(len(t.input)), 0}
  52. }
  53. next :: proc(t: ^Tree) -> (token: Token) {
  54. if t.offset < len(t.tokens) {
  55. token = t.tokens[t.offset]
  56. t.offset += 1
  57. return
  58. }
  59. return Token{.EOF, "", Pos(len(t.input)), 0}
  60. }
  61. backup :: proc(t: ^Tree, n: uint = 1) {
  62. if n > t.offset {
  63. t.offset = 0
  64. } else {
  65. t.offset -= n
  66. }
  67. }
  68. next_non_space :: proc(t: ^Tree) -> (token: Token) {
  69. for {
  70. token = next(t)
  71. if token.kind != .Space {
  72. break
  73. }
  74. }
  75. return
  76. }
  77. peek_non_space :: proc(t: ^Tree, offset: uint = 0) -> (token: Token) {
  78. i := offset
  79. for {
  80. if t.offset+i < len(t.tokens) {
  81. token = t.tokens[t.offset+i]
  82. } else {
  83. token = Token{.EOF, "", Pos(len(t.input)), 0}
  84. }
  85. if token.kind != .Space {
  86. break
  87. }
  88. i += 1
  89. }
  90. return
  91. }
  92. peek_after_non_space :: proc(t: ^Tree) -> (token: Token) {
  93. return peek_non_space(t, 1)
  94. }
  95. expect :: proc(t: ^Tree, expected: Token_Kind, ctx: string) -> (token: Token, err: Error) {
  96. token = next_non_space(t)
  97. if token.kind != expected {
  98. err = errorf(t, .Unexpected_Token, "unexpected token, expected %s, got %s", expected, token.value)
  99. }
  100. return
  101. }
  102. parse :: proc(input: string, left_delim, right_delim: string, emit_comments: bool = false, general_allocator := context.allocator) -> (t: ^Tree, err: Error) {
  103. t = new(Tree, general_allocator)
  104. t.general_allocator = general_allocator
  105. t.vars.allocator = general_allocator
  106. t.input = input
  107. s := scan.init(&scan.Scanner{}, t.name, input, left_delim, right_delim, emit_comments)
  108. s.tokens.allocator = t.general_allocator
  109. scan.run(s)
  110. t.tokens = s.tokens[:] // general_allocator
  111. context.allocator = virtual.arena_allocator(&t.arena)
  112. t.root = new_node(Node_List)
  113. for peek(t).kind != .EOF {
  114. if peek(t).kind == .Left_Delim && peek_after_non_space(t).kind == .Declare {
  115. // TODO
  116. continue
  117. }
  118. node := text_or_action(t) or_return
  119. if node != nil {
  120. append(&t.root.nodes, node)
  121. } else {
  122. break
  123. }
  124. }
  125. return
  126. }
  127. destroy_tree :: proc(t: ^Tree) {
  128. if t != nil {
  129. virtual.arena_destroy(&t.arena)
  130. ga := t.general_allocator
  131. delete(t.tokens, ga)
  132. delete(t.vars)
  133. free(t, ga)
  134. }
  135. }
  136. text_or_action :: proc(t: ^Tree) -> (node: ^Node, err: Error) {
  137. #partial switch token := next_non_space(t); token.kind {
  138. case .Text:
  139. n := new_node(Node_Text, token.pos)
  140. n.text = token.value
  141. return n, nil
  142. case .Left_Delim:
  143. return action(t)
  144. case .Comment:
  145. n := new_node(Node_Comment, token.pos)
  146. n.text = token.value
  147. return n, nil
  148. case:
  149. return nil, unexpected_token(t, token)
  150. }
  151. return nil, nil
  152. }
  153. parse_list :: proc(t: ^Tree) -> (list: ^Node_List, next: ^Node, err: Error) {
  154. list = new_node(Node_List, peek_non_space(t).pos)
  155. for peek_non_space(t).kind != .EOF {
  156. node := text_or_action(t) or_return
  157. #partial switch n in node.variant {
  158. case ^Node_Else:
  159. next = n
  160. return
  161. case ^Node_End:
  162. next = n
  163. return
  164. }
  165. append(&list.nodes, node)
  166. }
  167. err = errorf(t, .Unexpected_EOF, "unexpected EOF")
  168. return
  169. }
  170. parse_control :: proc(t: ^Tree, allow_else_if: bool, ctx: string) -> (pipe: ^Node_Pipeline, list, else_list: ^Node_List, err: Error) {
  171. pipe = pipeline(t, ctx, .Right_Delim) or_return
  172. if ctx == "for" {
  173. t.for_loop_depth += 1
  174. }
  175. next_node: ^Node
  176. list, next_node = parse_list(t) or_return
  177. if ctx == "for" {
  178. t.for_loop_depth -= 1
  179. }
  180. #partial switch n in next_node.variant {
  181. case ^Node_End:
  182. // We are done
  183. case ^Node_Else:
  184. if allow_else_if && peek(t).kind == .If {
  185. // {{if a}}...{{else if b}}...{{end}}
  186. // is translated into
  187. // {{if a}}...{{else}}{{if b}}...{{end}}{{end}}
  188. next(t)
  189. else_list = new_node(Node_List, next_node.pos)
  190. append(&else_list.nodes, parse_if(t) or_return)
  191. break
  192. }
  193. else_list, next_node = parse_list(t) or_return
  194. if _, ok := next_node.variant.(^Node_End); !ok {
  195. errorf(t, .Expected_End, "expected end") or_return
  196. }
  197. }
  198. return
  199. }
  200. // {{if pipeline}} list {{end}}
  201. // {{if pipeline}} list {{else}} list {{end}}
  202. // {{if pipeline}} list {{else if pipeline}} list {{end}}
  203. parse_if :: proc(t: ^Tree) -> (node: ^Node_If, err: Error) {
  204. pipe, list, else_list := parse_control(t, true, "if") or_return
  205. node = new_node(Node_If, pipe.pos)
  206. node.pipe = pipe
  207. node.list = list
  208. node.else_list = else_list
  209. return
  210. }
  211. // {{for pipeline}} list {{end}}
  212. // {{for pipeline}} list {{else}} list {{end}}
  213. parse_for :: proc(t: ^Tree) -> (node: ^Node_For, err: Error) {
  214. pipe, list, else_list := parse_control(t, false, "for") or_return
  215. node = new_node(Node_For, pipe.pos)
  216. node.pipe = pipe
  217. node.list = list
  218. node.else_list = else_list
  219. return
  220. }
  221. // {{with pipeline}} list {{end}}
  222. // {{with pipeline}} list {{else}} list {{end}}
  223. parse_with :: proc(t: ^Tree) -> (node: ^Node_With, err: Error) {
  224. pipe, list, else_list := parse_control(t, false, "with") or_return
  225. node = new_node(Node_With, pipe.pos)
  226. node.pipe = pipe
  227. node.list = list
  228. node.else_list = else_list
  229. return
  230. }
  231. // {{else}}
  232. parse_else :: proc(t: ^Tree) -> (node: ^Node_Else, err: Error) {
  233. p := peek_non_space(t)
  234. if p.kind == .If {
  235. node = new_node(Node_Else, p.pos)
  236. return
  237. }
  238. token := expect(t, .Right_Delim, "else") or_return
  239. node = new_node(Node_Else, token.pos)
  240. return
  241. }
  242. // {{end}}
  243. parse_end :: proc(t: ^Tree) -> (node: ^Node_End, err: Error) {
  244. token := expect(t, .Right_Delim, "end") or_return
  245. node = new_node(Node_End, token.pos)
  246. return
  247. }
  248. action :: proc(t: ^Tree) -> (^Node, Error) {
  249. // TODO actions
  250. #partial switch token := next_non_space(t); token.kind {
  251. case .If: return parse_if(t)
  252. case .For: return parse_for(t)
  253. case .With: return parse_with(t)
  254. case .Else: return parse_else(t)
  255. case .End: return parse_end(t)
  256. case .Block:
  257. return nil, .Invalid_Node
  258. case .Break:
  259. return nil, .Invalid_Node
  260. case .Continue:
  261. return nil, .Invalid_Node
  262. case .Include:
  263. return nil, .Invalid_Node
  264. }
  265. backup(t)
  266. return pipeline(t, "command", .Right_Delim)
  267. }
  268. pipeline :: proc(t: ^Tree, ctx: string, end: Token_Kind) -> (pipe: ^Node_Pipeline, err: Error) {
  269. pipe = new_node(Node_Pipeline, peek_non_space(t).pos)
  270. decls: for v := peek_non_space(t); v.kind == .Variable; /**/ {
  271. next_non_space(t)
  272. token_after_variable := peek(t) // could be space
  273. next := peek_non_space(t)
  274. switch {
  275. case next.kind == .Assign, next.kind == .Declare:
  276. pipe.is_assign = next.kind == .Assign
  277. next_non_space(t)
  278. append(&t.vars, v.value)
  279. append(&pipe.decl, parse_variable(t, v) or_return)
  280. case next.kind == .Char && next.value == ",":
  281. next_non_space(t)
  282. append(&t.vars, v.value)
  283. append(&pipe.decl, parse_variable(t, v) or_return)
  284. if ctx == "for" && len(pipe.decl) < 2 {
  285. #partial switch peek_non_space(t).kind {
  286. case .Variable, .Right_Delim, .Right_Paren:
  287. v = peek_non_space(t)
  288. continue decls
  289. }
  290. errorf(t, .Invalid_For_Initialization, "for can only initialize variables") or_return
  291. }
  292. errorf(t, .Too_Many_Declarations, "too many declarations in %s", ctx) or_return
  293. case token_after_variable.kind == .Space:
  294. backup(t, 2)
  295. case:
  296. backup(t, 1)
  297. }
  298. break decls
  299. }
  300. for {
  301. #partial switch tok := next_non_space(t); tok.kind {
  302. case end:
  303. if len(pipe.cmds) == 0 {
  304. errorf(t, .Missing_Value, "missing value for %s", ctx) or_return
  305. }
  306. for c, i in pipe.cmds[1:] {
  307. #partial switch n in c.variant {
  308. case ^Node_Bool, ^Node_Dot, ^Node_Nil, ^Node_Number, ^Node_String:
  309. errorf(t, .Non_Executable_Command, "non executable command in pipeline stage for %d", i+2) or_return
  310. }
  311. }
  312. return
  313. case .Bool, .Char, .Dot, .Field, .Identifier, .Operator, .Number, .Nil, .Raw_String, .String, .Variable, .Left_Paren:
  314. backup(t)
  315. append(&pipe.cmds, command(t) or_return)
  316. case:
  317. err = unexpected_token(t, tok)
  318. return
  319. }
  320. }
  321. }
  322. command :: proc(t: ^Tree) -> (cmd: ^Node_Command, err: Error) {
  323. cmd = new_node(Node_Command, peek_non_space(t).pos)
  324. loop: for {
  325. op := operand(t) or_return
  326. if op != nil {
  327. append(&cmd.args, op)
  328. }
  329. #partial switch token := next(t); token.kind {
  330. case .Space:
  331. continue loop
  332. case .Right_Delim, .Right_Paren:
  333. backup(t)
  334. case .Pipe:
  335. break loop
  336. case:
  337. errorf(t, .Unexpected_Operand, "unexpected operand %s", token.value) or_return
  338. }
  339. break loop
  340. }
  341. if len(cmd.args) == 0 {
  342. err = errorf(t, .Empty_Command, "empty command")
  343. }
  344. return
  345. }
  346. operand :: proc(t: ^Tree) -> (node: ^Node, err: Error) {
  347. node = term(t) or_return
  348. if node == nil {
  349. return
  350. }
  351. if p := peek(t); p.kind == .Field {
  352. chain := new_node(Node_Chain, p.pos)
  353. chain.node = node
  354. for peek(t).kind == .Field {
  355. chain_add(chain, next(t).value)
  356. }
  357. #partial switch n in node.variant {
  358. case ^Node_Field:
  359. f := new_node(Node_Field, chain.pos)
  360. resize(&chain.fields, len(chain.fields)+len(n.idents))
  361. copy(chain.fields[len(n.idents):], chain.fields[:])
  362. copy(chain.fields[:], n.idents)
  363. f.idents = chain.fields[:]
  364. node = f
  365. case:
  366. node = chain
  367. }
  368. }
  369. return
  370. }
  371. // literal (number, string, nil, boolean)
  372. // function (identifier)
  373. // operator (function-like thing)
  374. // .
  375. // .field
  376. // $
  377. // $
  378. // '(' pipeline ')'
  379. term :: proc(t: ^Tree) -> (^Node, Error) {
  380. #partial switch token := next_non_space(t); token.kind {
  381. case .Identifier:
  382. n := new_node(Node_Identifier, token.pos)
  383. n.ident = token.value
  384. return n, nil
  385. case .Operator:
  386. n := new_node(Node_Operator, token.pos)
  387. n.value = token.value
  388. return n, nil
  389. case .Dot: return new_node(Node_Dot, token.pos), nil
  390. case .Nil: return new_node(Node_Nil, token.pos), nil
  391. case .Variable:
  392. return parse_variable(t, token)
  393. case .Field:
  394. f := new_node(Node_Field, token.pos)
  395. f.idents = make([]string, 1)
  396. f.idents[0] = token.value[1:]
  397. return f, nil
  398. case .Bool:
  399. b := new_node(Node_Bool, token.pos)
  400. b.ok = token.value == "true"
  401. return b, nil
  402. case .Char, .Number:
  403. return parse_number(t, token)
  404. case .String, .Raw_String:
  405. text, _, ok := strconv.unquote_string(token.value)
  406. if !ok {
  407. return nil, errorf(t, .Invalid_String, "invalid string literal: %s", token.value)
  408. }
  409. n := new_node(Node_String, token.pos)
  410. n.quoted = token.value
  411. n.text = text
  412. return n, nil
  413. case .Left_Paren:
  414. return pipeline(t, "parenthesized pipeline", .Right_Paren)
  415. }
  416. backup(t)
  417. return nil, nil
  418. }
  419. parse_number :: proc(t: ^Tree, token: Token) -> (^Node_Number, Error) {
  420. text := token.value
  421. n := new_node(Node_Number, token.pos)
  422. n.text = text
  423. if token.kind == .Char {
  424. r, _, tail, ok := strconv.unquote_char(text[:], text[0])
  425. if !ok || tail != "" {
  426. return nil, errorf(t, .Invalid_Character, "invalid character literal: %s", text)
  427. }
  428. n.i = i64(r)
  429. n.u = u64(r)
  430. n.f = f64(r)
  431. return n, nil
  432. }
  433. if u, ok := strconv.parse_u64(text); ok {
  434. n.u = u
  435. }
  436. if i, ok := strconv.parse_i64(text); ok {
  437. n.i = i
  438. if i == 0 {
  439. n.u = 0
  440. }
  441. }
  442. if n.u == nil && n.i == nil {
  443. if f, ok := strconv.parse_f64(text); ok {
  444. n.f = f
  445. }
  446. }
  447. if n.u == nil && n.i == nil && n.f == nil {
  448. return nil, errorf(t, .Invalid_Number, "invalid number syntax: %q", text)
  449. }
  450. return n, nil
  451. }
  452. parse_variable :: proc(t: ^Tree, token: Token) -> (^Node_Variable, Error) {
  453. v := new_node(Node_Variable, token.pos)
  454. v.name = token.value
  455. for var in t.vars {
  456. if var == v.name {
  457. return v, nil
  458. }
  459. }
  460. return nil, errorf(t, .Undefined_Variable, "undefined variable %q", v.name)
  461. }