lexer.go 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179
  1. package parser
  2. import (
  3. "errors"
  4. "fmt"
  5. "strconv"
  6. "strings"
  7. "unicode"
  8. "unicode/utf16"
  9. "unicode/utf8"
  10. "golang.org/x/text/unicode/rangetable"
  11. "github.com/dop251/goja/file"
  12. "github.com/dop251/goja/token"
  13. "github.com/dop251/goja/unistring"
  14. )
  15. var (
  16. unicodeRangeIdNeg = rangetable.Merge(unicode.Pattern_Syntax, unicode.Pattern_White_Space)
  17. unicodeRangeIdStartPos = rangetable.Merge(unicode.Letter, unicode.Nl, unicode.Other_ID_Start)
  18. unicodeRangeIdContPos = rangetable.Merge(unicodeRangeIdStartPos, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue)
  19. )
  20. func isDecimalDigit(chr rune) bool {
  21. return '0' <= chr && chr <= '9'
  22. }
  23. func IsIdentifier(s string) bool {
  24. if s == "" {
  25. return false
  26. }
  27. r, size := utf8.DecodeRuneInString(s)
  28. if !isIdentifierStart(r) {
  29. return false
  30. }
  31. for _, r := range s[size:] {
  32. if !isIdentifierPart(r) {
  33. return false
  34. }
  35. }
  36. return true
  37. }
  38. func digitValue(chr rune) int {
  39. switch {
  40. case '0' <= chr && chr <= '9':
  41. return int(chr - '0')
  42. case 'a' <= chr && chr <= 'f':
  43. return int(chr - 'a' + 10)
  44. case 'A' <= chr && chr <= 'F':
  45. return int(chr - 'A' + 10)
  46. }
  47. return 16 // Larger than any legal digit value
  48. }
  49. func isDigit(chr rune, base int) bool {
  50. return digitValue(chr) < base
  51. }
  52. func isIdStartUnicode(r rune) bool {
  53. return unicode.Is(unicodeRangeIdStartPos, r) && !unicode.Is(unicodeRangeIdNeg, r)
  54. }
  55. func isIdPartUnicode(r rune) bool {
  56. return unicode.Is(unicodeRangeIdContPos, r) && !unicode.Is(unicodeRangeIdNeg, r) || r == '\u200C' || r == '\u200D'
  57. }
  58. func isIdentifierStart(chr rune) bool {
  59. return chr == '$' || chr == '_' || chr == '\\' ||
  60. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  61. chr >= utf8.RuneSelf && isIdStartUnicode(chr)
  62. }
  63. func isIdentifierPart(chr rune) bool {
  64. return chr == '$' || chr == '_' || chr == '\\' ||
  65. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  66. '0' <= chr && chr <= '9' ||
  67. chr >= utf8.RuneSelf && isIdPartUnicode(chr)
  68. }
  69. func (self *_parser) scanIdentifier() (string, unistring.String, bool, string) {
  70. offset := self.chrOffset
  71. hasEscape := false
  72. isUnicode := false
  73. length := 0
  74. for isIdentifierPart(self.chr) {
  75. r := self.chr
  76. length++
  77. if r == '\\' {
  78. hasEscape = true
  79. distance := self.chrOffset - offset
  80. self.read()
  81. if self.chr != 'u' {
  82. return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  83. }
  84. var value rune
  85. if self._peek() == '{' {
  86. self.read()
  87. value = -1
  88. for value <= utf8.MaxRune {
  89. self.read()
  90. if self.chr == '}' {
  91. break
  92. }
  93. decimal, ok := hex2decimal(byte(self.chr))
  94. if !ok {
  95. return "", "", false, "Invalid Unicode escape sequence"
  96. }
  97. if value == -1 {
  98. value = decimal
  99. } else {
  100. value = value<<4 | decimal
  101. }
  102. }
  103. if value == -1 {
  104. return "", "", false, "Invalid Unicode escape sequence"
  105. }
  106. } else {
  107. for j := 0; j < 4; j++ {
  108. self.read()
  109. decimal, ok := hex2decimal(byte(self.chr))
  110. if !ok {
  111. return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  112. }
  113. value = value<<4 | decimal
  114. }
  115. }
  116. if value == '\\' {
  117. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  118. } else if distance == 0 {
  119. if !isIdentifierStart(value) {
  120. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  121. }
  122. } else if distance > 0 {
  123. if !isIdentifierPart(value) {
  124. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  125. }
  126. }
  127. r = value
  128. }
  129. if r >= utf8.RuneSelf {
  130. isUnicode = true
  131. if r > 0xFFFF {
  132. length++
  133. }
  134. }
  135. self.read()
  136. }
  137. literal := self.str[offset:self.chrOffset]
  138. var parsed unistring.String
  139. if hasEscape || isUnicode {
  140. var err string
  141. // TODO strict
  142. parsed, err = parseStringLiteral(literal, length, isUnicode, false)
  143. if err != "" {
  144. return "", "", false, err
  145. }
  146. } else {
  147. parsed = unistring.String(literal)
  148. }
  149. return literal, parsed, hasEscape, ""
  150. }
  151. // 7.2
  152. func isLineWhiteSpace(chr rune) bool {
  153. switch chr {
  154. case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
  155. return true
  156. case '\u000a', '\u000d', '\u2028', '\u2029':
  157. return false
  158. case '\u0085':
  159. return false
  160. }
  161. return unicode.IsSpace(chr)
  162. }
  163. // 7.3
  164. func isLineTerminator(chr rune) bool {
  165. switch chr {
  166. case '\u000a', '\u000d', '\u2028', '\u2029':
  167. return true
  168. }
  169. return false
  170. }
  171. type parserState struct {
  172. idx file.Idx
  173. tok token.Token
  174. literal string
  175. parsedLiteral unistring.String
  176. implicitSemicolon, insertSemicolon bool
  177. chr rune
  178. chrOffset, offset int
  179. errorCount int
  180. }
  181. func (self *_parser) mark(state *parserState) *parserState {
  182. if state == nil {
  183. state = &parserState{}
  184. }
  185. state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset =
  186. self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
  187. state.errorCount = len(self.errors)
  188. return state
  189. }
  190. func (self *_parser) restore(state *parserState) {
  191. self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset =
  192. state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset
  193. self.errors = self.errors[:state.errorCount]
  194. }
  195. func (self *_parser) peek() token.Token {
  196. implicitSemicolon, insertSemicolon, chr, chrOffset, offset := self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
  197. tok, _, _, _ := self.scan()
  198. self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = implicitSemicolon, insertSemicolon, chr, chrOffset, offset
  199. return tok
  200. }
  201. func (self *_parser) scan() (tkn token.Token, literal string, parsedLiteral unistring.String, idx file.Idx) {
  202. self.implicitSemicolon = false
  203. for {
  204. self.skipWhiteSpace()
  205. idx = self.idxOf(self.chrOffset)
  206. insertSemicolon := false
  207. switch chr := self.chr; {
  208. case isIdentifierStart(chr):
  209. var err string
  210. var hasEscape bool
  211. literal, parsedLiteral, hasEscape, err = self.scanIdentifier()
  212. if err != "" {
  213. tkn = token.ILLEGAL
  214. break
  215. }
  216. if len(parsedLiteral) > 1 {
  217. // Keywords are longer than 1 character, avoid lookup otherwise
  218. var strict bool
  219. tkn, strict = token.IsKeyword(string(parsedLiteral))
  220. if hasEscape {
  221. self.insertSemicolon = true
  222. if tkn == 0 || self.isBindingId(tkn) {
  223. tkn = token.IDENTIFIER
  224. } else {
  225. tkn = token.ESCAPED_RESERVED_WORD
  226. }
  227. return
  228. }
  229. switch tkn {
  230. case 0: // Not a keyword
  231. // no-op
  232. case token.KEYWORD:
  233. if strict {
  234. // TODO If strict and in strict mode, then this is not a break
  235. break
  236. }
  237. return
  238. case
  239. token.BOOLEAN,
  240. token.NULL,
  241. token.THIS,
  242. token.BREAK,
  243. token.THROW, // A newline after a throw is not allowed, but we need to detect it
  244. token.YIELD,
  245. token.RETURN,
  246. token.CONTINUE,
  247. token.DEBUGGER:
  248. self.insertSemicolon = true
  249. return
  250. case token.ASYNC:
  251. // async only has special meaning if not followed by a LineTerminator
  252. if self.skipWhiteSpaceCheckLineTerminator() {
  253. self.insertSemicolon = true
  254. tkn = token.IDENTIFIER
  255. }
  256. return
  257. default:
  258. return
  259. }
  260. }
  261. self.insertSemicolon = true
  262. tkn = token.IDENTIFIER
  263. return
  264. case '0' <= chr && chr <= '9':
  265. self.insertSemicolon = true
  266. tkn, literal = self.scanNumericLiteral(false)
  267. return
  268. default:
  269. self.read()
  270. switch chr {
  271. case -1:
  272. if self.insertSemicolon {
  273. self.insertSemicolon = false
  274. self.implicitSemicolon = true
  275. }
  276. tkn = token.EOF
  277. case '\r', '\n', '\u2028', '\u2029':
  278. self.insertSemicolon = false
  279. self.implicitSemicolon = true
  280. continue
  281. case ':':
  282. tkn = token.COLON
  283. case '.':
  284. if digitValue(self.chr) < 10 {
  285. insertSemicolon = true
  286. tkn, literal = self.scanNumericLiteral(true)
  287. } else {
  288. if self.chr == '.' {
  289. self.read()
  290. if self.chr == '.' {
  291. self.read()
  292. tkn = token.ELLIPSIS
  293. } else {
  294. tkn = token.ILLEGAL
  295. }
  296. } else {
  297. tkn = token.PERIOD
  298. }
  299. }
  300. case ',':
  301. tkn = token.COMMA
  302. case ';':
  303. tkn = token.SEMICOLON
  304. case '(':
  305. tkn = token.LEFT_PARENTHESIS
  306. case ')':
  307. tkn = token.RIGHT_PARENTHESIS
  308. insertSemicolon = true
  309. case '[':
  310. tkn = token.LEFT_BRACKET
  311. case ']':
  312. tkn = token.RIGHT_BRACKET
  313. insertSemicolon = true
  314. case '{':
  315. tkn = token.LEFT_BRACE
  316. case '}':
  317. tkn = token.RIGHT_BRACE
  318. insertSemicolon = true
  319. case '+':
  320. tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
  321. if tkn == token.INCREMENT {
  322. insertSemicolon = true
  323. }
  324. case '-':
  325. tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
  326. if tkn == token.DECREMENT {
  327. insertSemicolon = true
  328. }
  329. case '*':
  330. if self.chr == '*' {
  331. self.read()
  332. tkn = self.switch2(token.EXPONENT, token.EXPONENT_ASSIGN)
  333. } else {
  334. tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
  335. }
  336. case '/':
  337. if self.chr == '/' {
  338. self.skipSingleLineComment()
  339. continue
  340. } else if self.chr == '*' {
  341. if self.skipMultiLineComment() {
  342. self.insertSemicolon = false
  343. self.implicitSemicolon = true
  344. }
  345. continue
  346. } else {
  347. // Could be division, could be RegExp literal
  348. tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
  349. insertSemicolon = true
  350. }
  351. case '%':
  352. tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
  353. case '^':
  354. tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
  355. case '<':
  356. tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
  357. case '>':
  358. tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
  359. case '=':
  360. if self.chr == '>' {
  361. self.read()
  362. if self.implicitSemicolon {
  363. tkn = token.ILLEGAL
  364. } else {
  365. tkn = token.ARROW
  366. }
  367. } else {
  368. tkn = self.switch2(token.ASSIGN, token.EQUAL)
  369. if tkn == token.EQUAL && self.chr == '=' {
  370. self.read()
  371. tkn = token.STRICT_EQUAL
  372. }
  373. }
  374. case '!':
  375. tkn = self.switch2(token.NOT, token.NOT_EQUAL)
  376. if tkn == token.NOT_EQUAL && self.chr == '=' {
  377. self.read()
  378. tkn = token.STRICT_NOT_EQUAL
  379. }
  380. case '&':
  381. tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
  382. case '|':
  383. tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
  384. case '~':
  385. tkn = token.BITWISE_NOT
  386. case '?':
  387. if self.chr == '.' && !isDecimalDigit(self._peek()) {
  388. self.read()
  389. tkn = token.QUESTION_DOT
  390. } else if self.chr == '?' {
  391. self.read()
  392. tkn = token.COALESCE
  393. } else {
  394. tkn = token.QUESTION_MARK
  395. }
  396. case '"', '\'':
  397. insertSemicolon = true
  398. tkn = token.STRING
  399. var err string
  400. literal, parsedLiteral, err = self.scanString(self.chrOffset-1, true)
  401. if err != "" {
  402. tkn = token.ILLEGAL
  403. }
  404. case '`':
  405. tkn = token.BACKTICK
  406. case '#':
  407. if self.chrOffset == 1 && self.chr == '!' {
  408. self.skipSingleLineComment()
  409. continue
  410. }
  411. var err string
  412. literal, parsedLiteral, _, err = self.scanIdentifier()
  413. if err != "" || literal == "" {
  414. tkn = token.ILLEGAL
  415. break
  416. }
  417. self.insertSemicolon = true
  418. tkn = token.PRIVATE_IDENTIFIER
  419. return
  420. default:
  421. self.errorUnexpected(idx, chr)
  422. tkn = token.ILLEGAL
  423. }
  424. }
  425. self.insertSemicolon = insertSemicolon
  426. return
  427. }
  428. }
  429. func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
  430. if self.chr == '=' {
  431. self.read()
  432. return tkn1
  433. }
  434. return tkn0
  435. }
  436. func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
  437. if self.chr == '=' {
  438. self.read()
  439. return tkn1
  440. }
  441. if self.chr == chr2 {
  442. self.read()
  443. return tkn2
  444. }
  445. return tkn0
  446. }
  447. func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
  448. if self.chr == '=' {
  449. self.read()
  450. return tkn1
  451. }
  452. if self.chr == chr2 {
  453. self.read()
  454. if self.chr == '=' {
  455. self.read()
  456. return tkn3
  457. }
  458. return tkn2
  459. }
  460. return tkn0
  461. }
  462. func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
  463. if self.chr == '=' {
  464. self.read()
  465. return tkn1
  466. }
  467. if self.chr == chr2 {
  468. self.read()
  469. if self.chr == '=' {
  470. self.read()
  471. return tkn3
  472. }
  473. if self.chr == chr3 {
  474. self.read()
  475. if self.chr == '=' {
  476. self.read()
  477. return tkn5
  478. }
  479. return tkn4
  480. }
  481. return tkn2
  482. }
  483. return tkn0
  484. }
  485. func (self *_parser) _peek() rune {
  486. if self.offset < self.length {
  487. return rune(self.str[self.offset])
  488. }
  489. return -1
  490. }
  491. func (self *_parser) read() {
  492. if self.offset < self.length {
  493. self.chrOffset = self.offset
  494. chr, width := rune(self.str[self.offset]), 1
  495. if chr >= utf8.RuneSelf { // !ASCII
  496. chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
  497. if chr == utf8.RuneError && width == 1 {
  498. self.error(self.chrOffset, "Invalid UTF-8 character")
  499. }
  500. }
  501. self.offset += width
  502. self.chr = chr
  503. } else {
  504. self.chrOffset = self.length
  505. self.chr = -1 // EOF
  506. }
  507. }
  508. func (self *_parser) skipSingleLineComment() {
  509. for self.chr != -1 {
  510. self.read()
  511. if isLineTerminator(self.chr) {
  512. return
  513. }
  514. }
  515. }
  516. func (self *_parser) skipMultiLineComment() (hasLineTerminator bool) {
  517. self.read()
  518. for self.chr >= 0 {
  519. chr := self.chr
  520. if chr == '\r' || chr == '\n' || chr == '\u2028' || chr == '\u2029' {
  521. hasLineTerminator = true
  522. break
  523. }
  524. self.read()
  525. if chr == '*' && self.chr == '/' {
  526. self.read()
  527. return
  528. }
  529. }
  530. for self.chr >= 0 {
  531. chr := self.chr
  532. self.read()
  533. if chr == '*' && self.chr == '/' {
  534. self.read()
  535. return
  536. }
  537. }
  538. self.errorUnexpected(0, self.chr)
  539. return
  540. }
  541. func (self *_parser) skipWhiteSpaceCheckLineTerminator() bool {
  542. for {
  543. switch self.chr {
  544. case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
  545. self.read()
  546. continue
  547. case '\r':
  548. if self._peek() == '\n' {
  549. self.read()
  550. }
  551. fallthrough
  552. case '\u2028', '\u2029', '\n':
  553. return true
  554. }
  555. if self.chr >= utf8.RuneSelf {
  556. if unicode.IsSpace(self.chr) {
  557. self.read()
  558. continue
  559. }
  560. }
  561. break
  562. }
  563. return false
  564. }
  565. func (self *_parser) skipWhiteSpace() {
  566. for {
  567. switch self.chr {
  568. case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
  569. self.read()
  570. continue
  571. case '\r':
  572. if self._peek() == '\n' {
  573. self.read()
  574. }
  575. fallthrough
  576. case '\u2028', '\u2029', '\n':
  577. if self.insertSemicolon {
  578. return
  579. }
  580. self.read()
  581. continue
  582. }
  583. if self.chr >= utf8.RuneSelf {
  584. if unicode.IsSpace(self.chr) {
  585. self.read()
  586. continue
  587. }
  588. }
  589. break
  590. }
  591. }
  592. func (self *_parser) scanMantissa(base int) {
  593. for digitValue(self.chr) < base {
  594. self.read()
  595. }
  596. }
  597. func (self *_parser) scanEscape(quote rune) (int, bool) {
  598. var length, base uint32
  599. chr := self.chr
  600. switch chr {
  601. case '0', '1', '2', '3', '4', '5', '6', '7':
  602. // Octal:
  603. length, base = 3, 8
  604. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'':
  605. self.read()
  606. return 1, false
  607. case '\r':
  608. self.read()
  609. if self.chr == '\n' {
  610. self.read()
  611. return 2, false
  612. }
  613. return 1, false
  614. case '\n':
  615. self.read()
  616. return 1, false
  617. case '\u2028', '\u2029':
  618. self.read()
  619. return 1, true
  620. case 'x':
  621. self.read()
  622. length, base = 2, 16
  623. case 'u':
  624. self.read()
  625. if self.chr == '{' {
  626. self.read()
  627. length, base = 0, 16
  628. } else {
  629. length, base = 4, 16
  630. }
  631. default:
  632. self.read() // Always make progress
  633. }
  634. if base > 0 {
  635. var value uint32
  636. if length > 0 {
  637. for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
  638. digit := uint32(digitValue(self.chr))
  639. if digit >= base {
  640. break
  641. }
  642. value = value*base + digit
  643. self.read()
  644. }
  645. } else {
  646. for self.chr != quote && self.chr >= 0 && value < utf8.MaxRune {
  647. if self.chr == '}' {
  648. self.read()
  649. break
  650. }
  651. digit := uint32(digitValue(self.chr))
  652. if digit >= base {
  653. break
  654. }
  655. value = value*base + digit
  656. self.read()
  657. }
  658. }
  659. chr = rune(value)
  660. }
  661. if chr >= utf8.RuneSelf {
  662. if chr > 0xFFFF {
  663. return 2, true
  664. }
  665. return 1, true
  666. }
  667. return 1, false
  668. }
  669. func (self *_parser) scanString(offset int, parse bool) (literal string, parsed unistring.String, err string) {
  670. // " ' /
  671. quote := rune(self.str[offset])
  672. length := 0
  673. isUnicode := false
  674. for self.chr != quote {
  675. chr := self.chr
  676. if chr == '\n' || chr == '\r' || chr < 0 {
  677. goto newline
  678. }
  679. if quote == '/' && (self.chr == '\u2028' || self.chr == '\u2029') {
  680. goto newline
  681. }
  682. self.read()
  683. if chr == '\\' {
  684. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  685. if quote == '/' {
  686. goto newline
  687. }
  688. self.scanNewline()
  689. } else {
  690. l, u := self.scanEscape(quote)
  691. length += l
  692. if u {
  693. isUnicode = true
  694. }
  695. }
  696. continue
  697. } else if chr == '[' && quote == '/' {
  698. // Allow a slash (/) in a bracket character class ([...])
  699. // TODO Fix this, this is hacky...
  700. quote = -1
  701. } else if chr == ']' && quote == -1 {
  702. quote = '/'
  703. }
  704. if chr >= utf8.RuneSelf {
  705. isUnicode = true
  706. if chr > 0xFFFF {
  707. length++
  708. }
  709. }
  710. length++
  711. }
  712. // " ' /
  713. self.read()
  714. literal = self.str[offset:self.chrOffset]
  715. if parse {
  716. // TODO strict
  717. parsed, err = parseStringLiteral(literal[1:len(literal)-1], length, isUnicode, false)
  718. }
  719. return
  720. newline:
  721. self.scanNewline()
  722. errStr := "String not terminated"
  723. if quote == '/' {
  724. errStr = "Invalid regular expression: missing /"
  725. self.error(self.idxOf(offset), errStr)
  726. }
  727. return "", "", errStr
  728. }
  729. func (self *_parser) scanNewline() {
  730. if self.chr == '\u2028' || self.chr == '\u2029' {
  731. self.read()
  732. return
  733. }
  734. if self.chr == '\r' {
  735. self.read()
  736. if self.chr != '\n' {
  737. return
  738. }
  739. }
  740. self.read()
  741. }
  742. func (self *_parser) parseTemplateCharacters() (literal string, parsed unistring.String, finished bool, parseErr, err string) {
  743. offset := self.chrOffset
  744. var end int
  745. length := 0
  746. isUnicode := false
  747. hasCR := false
  748. for {
  749. chr := self.chr
  750. if chr < 0 {
  751. goto unterminated
  752. }
  753. self.read()
  754. if chr == '`' {
  755. finished = true
  756. end = self.chrOffset - 1
  757. break
  758. }
  759. if chr == '\\' {
  760. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  761. if self.chr == '\r' {
  762. hasCR = true
  763. }
  764. self.scanNewline()
  765. } else {
  766. if self.chr == '8' || self.chr == '9' {
  767. if parseErr == "" {
  768. parseErr = "\\8 and \\9 are not allowed in template strings."
  769. }
  770. }
  771. l, u := self.scanEscape('`')
  772. length += l
  773. if u {
  774. isUnicode = true
  775. }
  776. }
  777. continue
  778. }
  779. if chr == '$' && self.chr == '{' {
  780. self.read()
  781. end = self.chrOffset - 2
  782. break
  783. }
  784. if chr >= utf8.RuneSelf {
  785. isUnicode = true
  786. if chr > 0xFFFF {
  787. length++
  788. }
  789. } else if chr == '\r' {
  790. hasCR = true
  791. if self.chr == '\n' {
  792. length--
  793. }
  794. }
  795. length++
  796. }
  797. literal = self.str[offset:end]
  798. if hasCR {
  799. literal = normaliseCRLF(literal)
  800. }
  801. if parseErr == "" {
  802. parsed, parseErr = parseStringLiteral(literal, length, isUnicode, true)
  803. }
  804. self.insertSemicolon = true
  805. return
  806. unterminated:
  807. err = err_UnexpectedEndOfInput
  808. finished = true
  809. return
  810. }
  811. func normaliseCRLF(s string) string {
  812. var buf strings.Builder
  813. buf.Grow(len(s))
  814. for i := 0; i < len(s); i++ {
  815. if s[i] == '\r' {
  816. buf.WriteByte('\n')
  817. if i < len(s)-1 && s[i+1] == '\n' {
  818. i++
  819. }
  820. } else {
  821. buf.WriteByte(s[i])
  822. }
  823. }
  824. return buf.String()
  825. }
  826. func hex2decimal(chr byte) (value rune, ok bool) {
  827. {
  828. chr := rune(chr)
  829. switch {
  830. case '0' <= chr && chr <= '9':
  831. return chr - '0', true
  832. case 'a' <= chr && chr <= 'f':
  833. return chr - 'a' + 10, true
  834. case 'A' <= chr && chr <= 'F':
  835. return chr - 'A' + 10, true
  836. }
  837. return
  838. }
  839. }
  840. func parseNumberLiteral(literal string) (value interface{}, err error) {
  841. // TODO Is Uint okay? What about -MAX_UINT
  842. value, err = strconv.ParseInt(literal, 0, 64)
  843. if err == nil {
  844. return
  845. }
  846. parseIntErr := err // Save this first error, just in case
  847. value, err = strconv.ParseFloat(literal, 64)
  848. if err == nil {
  849. return
  850. } else if err.(*strconv.NumError).Err == strconv.ErrRange {
  851. // Infinity, etc.
  852. return value, nil
  853. }
  854. err = parseIntErr
  855. if err.(*strconv.NumError).Err == strconv.ErrRange {
  856. if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') {
  857. // Could just be a very large number (e.g. 0x8000000000000000)
  858. var value float64
  859. literal = literal[2:]
  860. for _, chr := range literal {
  861. digit := digitValue(chr)
  862. if digit >= 16 {
  863. goto error
  864. }
  865. value = value*16 + float64(digit)
  866. }
  867. return value, nil
  868. }
  869. }
  870. error:
  871. return nil, errors.New("Illegal numeric literal")
  872. }
  873. func parseStringLiteral(literal string, length int, unicode, strict bool) (unistring.String, string) {
  874. var sb strings.Builder
  875. var chars []uint16
  876. if unicode {
  877. chars = make([]uint16, 1, length+1)
  878. chars[0] = unistring.BOM
  879. } else {
  880. sb.Grow(length)
  881. }
  882. str := literal
  883. for len(str) > 0 {
  884. switch chr := str[0]; {
  885. // We do not explicitly handle the case of the quote
  886. // value, which can be: " ' /
  887. // This assumes we're already passed a partially well-formed literal
  888. case chr >= utf8.RuneSelf:
  889. chr, size := utf8.DecodeRuneInString(str)
  890. if chr <= 0xFFFF {
  891. chars = append(chars, uint16(chr))
  892. } else {
  893. first, second := utf16.EncodeRune(chr)
  894. chars = append(chars, uint16(first), uint16(second))
  895. }
  896. str = str[size:]
  897. continue
  898. case chr != '\\':
  899. if unicode {
  900. chars = append(chars, uint16(chr))
  901. } else {
  902. sb.WriteByte(chr)
  903. }
  904. str = str[1:]
  905. continue
  906. }
  907. if len(str) <= 1 {
  908. panic("len(str) <= 1")
  909. }
  910. chr := str[1]
  911. var value rune
  912. if chr >= utf8.RuneSelf {
  913. str = str[1:]
  914. var size int
  915. value, size = utf8.DecodeRuneInString(str)
  916. str = str[size:] // \ + <character>
  917. if value == '\u2028' || value == '\u2029' {
  918. continue
  919. }
  920. } else {
  921. str = str[2:] // \<character>
  922. switch chr {
  923. case 'b':
  924. value = '\b'
  925. case 'f':
  926. value = '\f'
  927. case 'n':
  928. value = '\n'
  929. case 'r':
  930. value = '\r'
  931. case 't':
  932. value = '\t'
  933. case 'v':
  934. value = '\v'
  935. case 'x', 'u':
  936. size := 0
  937. switch chr {
  938. case 'x':
  939. size = 2
  940. case 'u':
  941. if str == "" || str[0] != '{' {
  942. size = 4
  943. }
  944. }
  945. if size > 0 {
  946. if len(str) < size {
  947. return "", fmt.Sprintf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
  948. }
  949. for j := 0; j < size; j++ {
  950. decimal, ok := hex2decimal(str[j])
  951. if !ok {
  952. return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size])
  953. }
  954. value = value<<4 | decimal
  955. }
  956. } else {
  957. str = str[1:]
  958. var val rune
  959. value = -1
  960. for ; size < len(str); size++ {
  961. if str[size] == '}' {
  962. if size == 0 {
  963. return "", fmt.Sprintf("invalid escape: \\%s", string(chr))
  964. }
  965. size++
  966. value = val
  967. break
  968. }
  969. decimal, ok := hex2decimal(str[size])
  970. if !ok {
  971. return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size+1])
  972. }
  973. val = val<<4 | decimal
  974. if val > utf8.MaxRune {
  975. return "", fmt.Sprintf("undefined Unicode code-point: %q", str[:size+1])
  976. }
  977. }
  978. if value == -1 {
  979. return "", fmt.Sprintf("unterminated \\u{: %q", str)
  980. }
  981. }
  982. str = str[size:]
  983. if chr == 'x' {
  984. break
  985. }
  986. if value > utf8.MaxRune {
  987. panic("value > utf8.MaxRune")
  988. }
  989. case '0':
  990. if len(str) == 0 || '0' > str[0] || str[0] > '7' {
  991. value = 0
  992. break
  993. }
  994. fallthrough
  995. case '1', '2', '3', '4', '5', '6', '7':
  996. if strict {
  997. return "", "Octal escape sequences are not allowed in this context"
  998. }
  999. value = rune(chr) - '0'
  1000. j := 0
  1001. for ; j < 2; j++ {
  1002. if len(str) < j+1 {
  1003. break
  1004. }
  1005. chr := str[j]
  1006. if '0' > chr || chr > '7' {
  1007. break
  1008. }
  1009. decimal := rune(str[j]) - '0'
  1010. value = (value << 3) | decimal
  1011. }
  1012. str = str[j:]
  1013. case '\\':
  1014. value = '\\'
  1015. case '\'', '"':
  1016. value = rune(chr)
  1017. case '\r':
  1018. if len(str) > 0 {
  1019. if str[0] == '\n' {
  1020. str = str[1:]
  1021. }
  1022. }
  1023. fallthrough
  1024. case '\n':
  1025. continue
  1026. default:
  1027. value = rune(chr)
  1028. }
  1029. }
  1030. if unicode {
  1031. if value <= 0xFFFF {
  1032. chars = append(chars, uint16(value))
  1033. } else {
  1034. first, second := utf16.EncodeRune(value)
  1035. chars = append(chars, uint16(first), uint16(second))
  1036. }
  1037. } else {
  1038. if value >= utf8.RuneSelf {
  1039. return "", "Unexpected unicode character"
  1040. }
  1041. sb.WriteByte(byte(value))
  1042. }
  1043. }
  1044. if unicode {
  1045. if len(chars) != length+1 {
  1046. panic(fmt.Errorf("unexpected unicode length while parsing '%s'", literal))
  1047. }
  1048. return unistring.FromUtf16(chars), ""
  1049. }
  1050. if sb.Len() != length {
  1051. panic(fmt.Errorf("unexpected length while parsing '%s'", literal))
  1052. }
  1053. return unistring.String(sb.String()), ""
  1054. }
  1055. func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
  1056. offset := self.chrOffset
  1057. tkn := token.NUMBER
  1058. if decimalPoint {
  1059. offset--
  1060. self.scanMantissa(10)
  1061. } else {
  1062. if self.chr == '0' {
  1063. self.read()
  1064. base := 0
  1065. switch self.chr {
  1066. case 'x', 'X':
  1067. base = 16
  1068. case 'o', 'O':
  1069. base = 8
  1070. case 'b', 'B':
  1071. base = 2
  1072. case '.', 'e', 'E':
  1073. // no-op
  1074. default:
  1075. // legacy octal
  1076. self.scanMantissa(8)
  1077. goto end
  1078. }
  1079. if base > 0 {
  1080. self.read()
  1081. if !isDigit(self.chr, base) {
  1082. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1083. }
  1084. self.scanMantissa(base)
  1085. goto end
  1086. }
  1087. } else {
  1088. self.scanMantissa(10)
  1089. }
  1090. if self.chr == '.' {
  1091. self.read()
  1092. self.scanMantissa(10)
  1093. }
  1094. }
  1095. if self.chr == 'e' || self.chr == 'E' {
  1096. self.read()
  1097. if self.chr == '-' || self.chr == '+' {
  1098. self.read()
  1099. }
  1100. if isDecimalDigit(self.chr) {
  1101. self.read()
  1102. self.scanMantissa(10)
  1103. } else {
  1104. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1105. }
  1106. }
  1107. end:
  1108. if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) {
  1109. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1110. }
  1111. return tkn, self.str[offset:self.chrOffset]
  1112. }