parser.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. /*
  2. Package parser implements a parser for JavaScript.
  3. import (
  4. "github.com/dop251/goja/parser"
  5. )
  6. Parse and return an AST
  7. filename := "" // A filename is optional
  8. src := `
  9. // Sample xyzzy example
  10. (function(){
  11. if (3.14159 > 0) {
  12. console.log("Hello, World.");
  13. return;
  14. }
  15. var xyzzy = NaN;
  16. console.log("Nothing happens.");
  17. return xyzzy;
  18. })();
  19. `
  20. // Parse some JavaScript, yielding a *ast.Program and/or an ErrorList
  21. program, err := parser.ParseFile(nil, filename, src, 0)
  22. Warning
  23. The parser and AST interfaces are still works-in-progress (particularly where
  24. node types are concerned) and may change in the future.
  25. */
  26. package parser
  27. import (
  28. "bytes"
  29. "errors"
  30. "io"
  31. "io/ioutil"
  32. "github.com/dop251/goja/ast"
  33. "github.com/dop251/goja/file"
  34. "github.com/dop251/goja/token"
  35. "github.com/dop251/goja/unistring"
  36. )
  37. // A Mode value is a set of flags (or 0). They control optional parser functionality.
  38. type Mode uint
  39. const (
  40. IgnoreRegExpErrors Mode = 1 << iota // Ignore RegExp compatibility errors (allow backtracking)
  41. )
  42. type _parser struct {
  43. str string
  44. length int
  45. base int
  46. chr rune // The current character
  47. chrOffset int // The offset of current character
  48. offset int // The offset after current character (may be greater than 1)
  49. idx file.Idx // The index of token
  50. token token.Token // The token
  51. literal string // The literal of the token, if any
  52. parsedLiteral unistring.String
  53. scope *_scope
  54. insertSemicolon bool // If we see a newline, then insert an implicit semicolon
  55. implicitSemicolon bool // An implicit semicolon exists
  56. errors ErrorList
  57. recover struct {
  58. // Scratch when trying to seek to the next statement, etc.
  59. idx file.Idx
  60. count int
  61. }
  62. mode Mode
  63. file *file.File
  64. }
  65. func _newParser(filename, src string, base int) *_parser {
  66. return &_parser{
  67. chr: ' ', // This is set so we can start scanning by skipping whitespace
  68. str: src,
  69. length: len(src),
  70. base: base,
  71. file: file.NewFile(filename, src, base),
  72. }
  73. }
  74. func newParser(filename, src string) *_parser {
  75. return _newParser(filename, src, 1)
  76. }
  77. func ReadSource(filename string, src interface{}) ([]byte, error) {
  78. if src != nil {
  79. switch src := src.(type) {
  80. case string:
  81. return []byte(src), nil
  82. case []byte:
  83. return src, nil
  84. case *bytes.Buffer:
  85. if src != nil {
  86. return src.Bytes(), nil
  87. }
  88. case io.Reader:
  89. var bfr bytes.Buffer
  90. if _, err := io.Copy(&bfr, src); err != nil {
  91. return nil, err
  92. }
  93. return bfr.Bytes(), nil
  94. }
  95. return nil, errors.New("invalid source")
  96. }
  97. return ioutil.ReadFile(filename)
  98. }
  99. // ParseFile parses the source code of a single JavaScript/ECMAScript source file and returns
  100. // the corresponding ast.Program node.
  101. //
  102. // If fileSet == nil, ParseFile parses source without a FileSet.
  103. // If fileSet != nil, ParseFile first adds filename and src to fileSet.
  104. //
  105. // The filename argument is optional and is used for labelling errors, etc.
  106. //
  107. // src may be a string, a byte slice, a bytes.Buffer, or an io.Reader, but it MUST always be in UTF-8.
  108. //
  109. // // Parse some JavaScript, yielding a *ast.Program and/or an ErrorList
  110. // program, err := parser.ParseFile(nil, "", `if (abc > 1) {}`, 0)
  111. //
  112. func ParseFile(fileSet *file.FileSet, filename string, src interface{}, mode Mode) (*ast.Program, error) {
  113. str, err := ReadSource(filename, src)
  114. if err != nil {
  115. return nil, err
  116. }
  117. {
  118. str := string(str)
  119. base := 1
  120. if fileSet != nil {
  121. base = fileSet.AddFile(filename, str)
  122. }
  123. parser := _newParser(filename, str, base)
  124. parser.mode = mode
  125. return parser.parse()
  126. }
  127. }
  128. // ParseFunction parses a given parameter list and body as a function and returns the
  129. // corresponding ast.FunctionLiteral node.
  130. //
  131. // The parameter list, if any, should be a comma-separated list of identifiers.
  132. //
  133. func ParseFunction(parameterList, body string) (*ast.FunctionLiteral, error) {
  134. src := "(function(" + parameterList + ") {\n" + body + "\n})"
  135. parser := _newParser("", src, 1)
  136. program, err := parser.parse()
  137. if err != nil {
  138. return nil, err
  139. }
  140. return program.Body[0].(*ast.ExpressionStatement).Expression.(*ast.FunctionLiteral), nil
  141. }
  142. func (self *_parser) slice(idx0, idx1 file.Idx) string {
  143. from := int(idx0) - self.base
  144. to := int(idx1) - self.base
  145. if from >= 0 && to <= len(self.str) {
  146. return self.str[from:to]
  147. }
  148. return ""
  149. }
  150. func (self *_parser) parse() (*ast.Program, error) {
  151. self.next()
  152. program := self.parseProgram()
  153. if false {
  154. self.errors.Sort()
  155. }
  156. return program, self.errors.Err()
  157. }
  158. func (self *_parser) next() {
  159. self.token, self.literal, self.parsedLiteral, self.idx = self.scan()
  160. }
  161. func (self *_parser) optionalSemicolon() {
  162. if self.token == token.SEMICOLON {
  163. self.next()
  164. return
  165. }
  166. if self.implicitSemicolon {
  167. self.implicitSemicolon = false
  168. return
  169. }
  170. if self.token != token.EOF && self.token != token.RIGHT_BRACE {
  171. self.expect(token.SEMICOLON)
  172. }
  173. }
  174. func (self *_parser) semicolon() {
  175. if self.token != token.RIGHT_PARENTHESIS && self.token != token.RIGHT_BRACE {
  176. if self.implicitSemicolon {
  177. self.implicitSemicolon = false
  178. return
  179. }
  180. self.expect(token.SEMICOLON)
  181. }
  182. }
  183. func (self *_parser) idxOf(offset int) file.Idx {
  184. return file.Idx(self.base + offset)
  185. }
  186. func (self *_parser) expect(value token.Token) file.Idx {
  187. idx := self.idx
  188. if self.token != value {
  189. self.errorUnexpectedToken(self.token)
  190. }
  191. self.next()
  192. return idx
  193. }
  194. func lineCount(str string) (int, int) {
  195. line, last := 0, -1
  196. pair := false
  197. for index, chr := range str {
  198. switch chr {
  199. case '\r':
  200. line += 1
  201. last = index
  202. pair = true
  203. continue
  204. case '\n':
  205. if !pair {
  206. line += 1
  207. }
  208. last = index
  209. case '\u2028', '\u2029':
  210. line += 1
  211. last = index + 2
  212. }
  213. pair = false
  214. }
  215. return line, last
  216. }
  217. func (self *_parser) position(idx file.Idx) file.Position {
  218. position := file.Position{}
  219. offset := int(idx) - self.base
  220. str := self.str[:offset]
  221. position.Filename = self.file.Name()
  222. line, last := lineCount(str)
  223. position.Line = 1 + line
  224. if last >= 0 {
  225. position.Column = offset - last
  226. } else {
  227. position.Column = 1 + len(str)
  228. }
  229. return position
  230. }