lexer.go 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210
  1. package parser
  2. import (
  3. "errors"
  4. "fmt"
  5. "math/big"
  6. "strconv"
  7. "strings"
  8. "unicode"
  9. "unicode/utf16"
  10. "unicode/utf8"
  11. "golang.org/x/text/unicode/rangetable"
  12. "github.com/dop251/goja/file"
  13. "github.com/dop251/goja/token"
  14. "github.com/dop251/goja/unistring"
  15. )
  16. var (
  17. unicodeRangeIdNeg = rangetable.Merge(unicode.Pattern_Syntax, unicode.Pattern_White_Space)
  18. unicodeRangeIdStartPos = rangetable.Merge(unicode.Letter, unicode.Nl, unicode.Other_ID_Start)
  19. unicodeRangeIdContPos = rangetable.Merge(unicodeRangeIdStartPos, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue)
  20. )
  21. func isDecimalDigit(chr rune) bool {
  22. return '0' <= chr && chr <= '9'
  23. }
  24. func IsIdentifier(s string) bool {
  25. if s == "" {
  26. return false
  27. }
  28. r, size := utf8.DecodeRuneInString(s)
  29. if !isIdentifierStart(r) {
  30. return false
  31. }
  32. for _, r := range s[size:] {
  33. if !isIdentifierPart(r) {
  34. return false
  35. }
  36. }
  37. return true
  38. }
  39. func digitValue(chr rune) int {
  40. switch {
  41. case '0' <= chr && chr <= '9':
  42. return int(chr - '0')
  43. case 'a' <= chr && chr <= 'f':
  44. return int(chr - 'a' + 10)
  45. case 'A' <= chr && chr <= 'F':
  46. return int(chr - 'A' + 10)
  47. }
  48. return 16 // Larger than any legal digit value
  49. }
  50. func isDigit(chr rune, base int) bool {
  51. return digitValue(chr) < base
  52. }
  53. func isIdStartUnicode(r rune) bool {
  54. return unicode.Is(unicodeRangeIdStartPos, r) && !unicode.Is(unicodeRangeIdNeg, r)
  55. }
  56. func isIdPartUnicode(r rune) bool {
  57. return unicode.Is(unicodeRangeIdContPos, r) && !unicode.Is(unicodeRangeIdNeg, r) || r == '\u200C' || r == '\u200D'
  58. }
  59. func isIdentifierStart(chr rune) bool {
  60. return chr == '$' || chr == '_' || chr == '\\' ||
  61. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  62. chr >= utf8.RuneSelf && isIdStartUnicode(chr)
  63. }
  64. func isIdentifierPart(chr rune) bool {
  65. return chr == '$' || chr == '_' || chr == '\\' ||
  66. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  67. '0' <= chr && chr <= '9' ||
  68. chr >= utf8.RuneSelf && isIdPartUnicode(chr)
  69. }
  70. func (self *_parser) scanIdentifier() (string, unistring.String, bool, string) {
  71. offset := self.chrOffset
  72. hasEscape := false
  73. isUnicode := false
  74. length := 0
  75. for isIdentifierPart(self.chr) {
  76. r := self.chr
  77. length++
  78. if r == '\\' {
  79. hasEscape = true
  80. distance := self.chrOffset - offset
  81. self.read()
  82. if self.chr != 'u' {
  83. return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  84. }
  85. var value rune
  86. if self._peek() == '{' {
  87. self.read()
  88. value = -1
  89. for value <= utf8.MaxRune {
  90. self.read()
  91. if self.chr == '}' {
  92. break
  93. }
  94. decimal, ok := hex2decimal(byte(self.chr))
  95. if !ok {
  96. return "", "", false, "Invalid Unicode escape sequence"
  97. }
  98. if value == -1 {
  99. value = decimal
  100. } else {
  101. value = value<<4 | decimal
  102. }
  103. }
  104. if value == -1 {
  105. return "", "", false, "Invalid Unicode escape sequence"
  106. }
  107. } else {
  108. for j := 0; j < 4; j++ {
  109. self.read()
  110. decimal, ok := hex2decimal(byte(self.chr))
  111. if !ok {
  112. return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  113. }
  114. value = value<<4 | decimal
  115. }
  116. }
  117. if value == '\\' {
  118. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  119. } else if distance == 0 {
  120. if !isIdentifierStart(value) {
  121. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  122. }
  123. } else if distance > 0 {
  124. if !isIdentifierPart(value) {
  125. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  126. }
  127. }
  128. r = value
  129. }
  130. if r >= utf8.RuneSelf {
  131. isUnicode = true
  132. if r > 0xFFFF {
  133. length++
  134. }
  135. }
  136. self.read()
  137. }
  138. literal := self.str[offset:self.chrOffset]
  139. var parsed unistring.String
  140. if hasEscape || isUnicode {
  141. var err string
  142. // TODO strict
  143. parsed, err = parseStringLiteral(literal, length, isUnicode, false)
  144. if err != "" {
  145. return "", "", false, err
  146. }
  147. } else {
  148. parsed = unistring.String(literal)
  149. }
  150. return literal, parsed, hasEscape, ""
  151. }
  152. // 7.2
  153. func isLineWhiteSpace(chr rune) bool {
  154. switch chr {
  155. case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
  156. return true
  157. case '\u000a', '\u000d', '\u2028', '\u2029':
  158. return false
  159. case '\u0085':
  160. return false
  161. }
  162. return unicode.IsSpace(chr)
  163. }
  164. // 7.3
  165. func isLineTerminator(chr rune) bool {
  166. switch chr {
  167. case '\u000a', '\u000d', '\u2028', '\u2029':
  168. return true
  169. }
  170. return false
  171. }
  172. type parserState struct {
  173. idx file.Idx
  174. tok token.Token
  175. literal string
  176. parsedLiteral unistring.String
  177. implicitSemicolon, insertSemicolon bool
  178. chr rune
  179. chrOffset, offset int
  180. errorCount int
  181. }
  182. func (self *_parser) mark(state *parserState) *parserState {
  183. if state == nil {
  184. state = &parserState{}
  185. }
  186. state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset =
  187. self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
  188. state.errorCount = len(self.errors)
  189. return state
  190. }
  191. func (self *_parser) restore(state *parserState) {
  192. self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset =
  193. state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset
  194. self.errors = self.errors[:state.errorCount]
  195. }
  196. func (self *_parser) peek() token.Token {
  197. implicitSemicolon, insertSemicolon, chr, chrOffset, offset := self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
  198. tok, _, _, _ := self.scan()
  199. self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = implicitSemicolon, insertSemicolon, chr, chrOffset, offset
  200. return tok
  201. }
  202. func (self *_parser) scan() (tkn token.Token, literal string, parsedLiteral unistring.String, idx file.Idx) {
  203. self.implicitSemicolon = false
  204. for {
  205. self.skipWhiteSpace()
  206. idx = self.idxOf(self.chrOffset)
  207. insertSemicolon := false
  208. switch chr := self.chr; {
  209. case isIdentifierStart(chr):
  210. var err string
  211. var hasEscape bool
  212. literal, parsedLiteral, hasEscape, err = self.scanIdentifier()
  213. if err != "" {
  214. tkn = token.ILLEGAL
  215. break
  216. }
  217. if len(parsedLiteral) > 1 {
  218. // Keywords are longer than 1 character, avoid lookup otherwise
  219. var strict bool
  220. tkn, strict = token.IsKeyword(string(parsedLiteral))
  221. if hasEscape {
  222. self.insertSemicolon = true
  223. if tkn == 0 || self.isBindingId(tkn) {
  224. tkn = token.IDENTIFIER
  225. } else {
  226. tkn = token.ESCAPED_RESERVED_WORD
  227. }
  228. return
  229. }
  230. switch tkn {
  231. case 0: // Not a keyword
  232. // no-op
  233. case token.KEYWORD:
  234. if strict {
  235. // TODO If strict and in strict mode, then this is not a break
  236. break
  237. }
  238. return
  239. case
  240. token.BOOLEAN,
  241. token.NULL,
  242. token.THIS,
  243. token.BREAK,
  244. token.THROW, // A newline after a throw is not allowed, but we need to detect it
  245. token.YIELD,
  246. token.RETURN,
  247. token.CONTINUE,
  248. token.DEBUGGER:
  249. self.insertSemicolon = true
  250. return
  251. case token.ASYNC:
  252. // async only has special meaning if not followed by a LineTerminator
  253. if self.skipWhiteSpaceCheckLineTerminator() {
  254. self.insertSemicolon = true
  255. tkn = token.IDENTIFIER
  256. }
  257. return
  258. default:
  259. return
  260. }
  261. }
  262. self.insertSemicolon = true
  263. tkn = token.IDENTIFIER
  264. return
  265. case '0' <= chr && chr <= '9':
  266. self.insertSemicolon = true
  267. tkn, literal = self.scanNumericLiteral(false)
  268. return
  269. default:
  270. self.read()
  271. switch chr {
  272. case -1:
  273. if self.insertSemicolon {
  274. self.insertSemicolon = false
  275. self.implicitSemicolon = true
  276. }
  277. tkn = token.EOF
  278. case '\r', '\n', '\u2028', '\u2029':
  279. self.insertSemicolon = false
  280. self.implicitSemicolon = true
  281. continue
  282. case ':':
  283. tkn = token.COLON
  284. case '.':
  285. if digitValue(self.chr) < 10 {
  286. insertSemicolon = true
  287. tkn, literal = self.scanNumericLiteral(true)
  288. } else {
  289. if self.chr == '.' {
  290. self.read()
  291. if self.chr == '.' {
  292. self.read()
  293. tkn = token.ELLIPSIS
  294. } else {
  295. tkn = token.ILLEGAL
  296. }
  297. } else {
  298. tkn = token.PERIOD
  299. }
  300. }
  301. case ',':
  302. tkn = token.COMMA
  303. case ';':
  304. tkn = token.SEMICOLON
  305. case '(':
  306. tkn = token.LEFT_PARENTHESIS
  307. case ')':
  308. tkn = token.RIGHT_PARENTHESIS
  309. insertSemicolon = true
  310. case '[':
  311. tkn = token.LEFT_BRACKET
  312. case ']':
  313. tkn = token.RIGHT_BRACKET
  314. insertSemicolon = true
  315. case '{':
  316. tkn = token.LEFT_BRACE
  317. case '}':
  318. tkn = token.RIGHT_BRACE
  319. insertSemicolon = true
  320. case '+':
  321. tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
  322. if tkn == token.INCREMENT {
  323. insertSemicolon = true
  324. }
  325. case '-':
  326. tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
  327. if tkn == token.DECREMENT {
  328. insertSemicolon = true
  329. }
  330. case '*':
  331. if self.chr == '*' {
  332. self.read()
  333. tkn = self.switch2(token.EXPONENT, token.EXPONENT_ASSIGN)
  334. } else {
  335. tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
  336. }
  337. case '/':
  338. if self.chr == '/' {
  339. self.skipSingleLineComment()
  340. continue
  341. } else if self.chr == '*' {
  342. if self.skipMultiLineComment() {
  343. self.insertSemicolon = false
  344. self.implicitSemicolon = true
  345. }
  346. continue
  347. } else {
  348. // Could be division, could be RegExp literal
  349. tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
  350. insertSemicolon = true
  351. }
  352. case '%':
  353. tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
  354. case '^':
  355. tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
  356. case '<':
  357. tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
  358. case '>':
  359. tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
  360. case '=':
  361. if self.chr == '>' {
  362. self.read()
  363. if self.implicitSemicolon {
  364. tkn = token.ILLEGAL
  365. } else {
  366. tkn = token.ARROW
  367. }
  368. } else {
  369. tkn = self.switch2(token.ASSIGN, token.EQUAL)
  370. if tkn == token.EQUAL && self.chr == '=' {
  371. self.read()
  372. tkn = token.STRICT_EQUAL
  373. }
  374. }
  375. case '!':
  376. tkn = self.switch2(token.NOT, token.NOT_EQUAL)
  377. if tkn == token.NOT_EQUAL && self.chr == '=' {
  378. self.read()
  379. tkn = token.STRICT_NOT_EQUAL
  380. }
  381. case '&':
  382. tkn = self.switch4(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND, token.LOGICAL_AND_ASSIGN)
  383. case '|':
  384. tkn = self.switch4(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR, token.LOGICAL_OR_ASSIGN)
  385. case '~':
  386. tkn = token.BITWISE_NOT
  387. case '?':
  388. if self.chr == '.' && !isDecimalDigit(self._peek()) {
  389. self.read()
  390. tkn = token.QUESTION_DOT
  391. } else if self.chr == '?' {
  392. self.read()
  393. if self.chr == '=' {
  394. self.read()
  395. tkn = token.COALESCE_ASSIGN
  396. } else {
  397. tkn = token.COALESCE
  398. }
  399. } else {
  400. tkn = token.QUESTION_MARK
  401. }
  402. case '"', '\'':
  403. insertSemicolon = true
  404. tkn = token.STRING
  405. var err string
  406. literal, parsedLiteral, err = self.scanString(self.chrOffset-1, true)
  407. if err != "" {
  408. tkn = token.ILLEGAL
  409. }
  410. case '`':
  411. tkn = token.BACKTICK
  412. case '#':
  413. if self.chrOffset == 1 && self.chr == '!' {
  414. self.skipSingleLineComment()
  415. continue
  416. }
  417. var err string
  418. literal, parsedLiteral, _, err = self.scanIdentifier()
  419. if err != "" || literal == "" {
  420. tkn = token.ILLEGAL
  421. break
  422. }
  423. self.insertSemicolon = true
  424. tkn = token.PRIVATE_IDENTIFIER
  425. return
  426. default:
  427. self.errorUnexpected(idx, chr)
  428. tkn = token.ILLEGAL
  429. }
  430. }
  431. self.insertSemicolon = insertSemicolon
  432. return
  433. }
  434. }
  435. func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
  436. if self.chr == '=' {
  437. self.read()
  438. return tkn1
  439. }
  440. return tkn0
  441. }
  442. func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
  443. if self.chr == '=' {
  444. self.read()
  445. return tkn1
  446. }
  447. if self.chr == chr2 {
  448. self.read()
  449. return tkn2
  450. }
  451. return tkn0
  452. }
  453. func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
  454. if self.chr == '=' {
  455. self.read()
  456. return tkn1
  457. }
  458. if self.chr == chr2 {
  459. self.read()
  460. if self.chr == '=' {
  461. self.read()
  462. return tkn3
  463. }
  464. return tkn2
  465. }
  466. return tkn0
  467. }
  468. func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
  469. if self.chr == '=' {
  470. self.read()
  471. return tkn1
  472. }
  473. if self.chr == chr2 {
  474. self.read()
  475. if self.chr == '=' {
  476. self.read()
  477. return tkn3
  478. }
  479. if self.chr == chr3 {
  480. self.read()
  481. if self.chr == '=' {
  482. self.read()
  483. return tkn5
  484. }
  485. return tkn4
  486. }
  487. return tkn2
  488. }
  489. return tkn0
  490. }
  491. func (self *_parser) _peek() rune {
  492. if self.offset < self.length {
  493. return rune(self.str[self.offset])
  494. }
  495. return -1
  496. }
  497. func (self *_parser) read() {
  498. if self.offset < self.length {
  499. self.chrOffset = self.offset
  500. chr, width := rune(self.str[self.offset]), 1
  501. if chr >= utf8.RuneSelf { // !ASCII
  502. chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
  503. if chr == utf8.RuneError && width == 1 {
  504. self.error(self.chrOffset, "Invalid UTF-8 character")
  505. }
  506. }
  507. self.offset += width
  508. self.chr = chr
  509. } else {
  510. self.chrOffset = self.length
  511. self.chr = -1 // EOF
  512. }
  513. }
  514. func (self *_parser) skipSingleLineComment() {
  515. for self.chr != -1 {
  516. self.read()
  517. if isLineTerminator(self.chr) {
  518. return
  519. }
  520. }
  521. }
  522. func (self *_parser) skipMultiLineComment() (hasLineTerminator bool) {
  523. self.read()
  524. for self.chr >= 0 {
  525. chr := self.chr
  526. if chr == '\r' || chr == '\n' || chr == '\u2028' || chr == '\u2029' {
  527. hasLineTerminator = true
  528. break
  529. }
  530. self.read()
  531. if chr == '*' && self.chr == '/' {
  532. self.read()
  533. return
  534. }
  535. }
  536. for self.chr >= 0 {
  537. chr := self.chr
  538. self.read()
  539. if chr == '*' && self.chr == '/' {
  540. self.read()
  541. return
  542. }
  543. }
  544. self.errorUnexpected(0, self.chr)
  545. return
  546. }
  547. func (self *_parser) skipWhiteSpaceCheckLineTerminator() bool {
  548. for {
  549. switch self.chr {
  550. case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
  551. self.read()
  552. continue
  553. case '\r':
  554. if self._peek() == '\n' {
  555. self.read()
  556. }
  557. fallthrough
  558. case '\u2028', '\u2029', '\n':
  559. return true
  560. }
  561. if self.chr >= utf8.RuneSelf {
  562. if unicode.IsSpace(self.chr) {
  563. self.read()
  564. continue
  565. }
  566. }
  567. break
  568. }
  569. return false
  570. }
  571. func (self *_parser) skipWhiteSpace() {
  572. for {
  573. switch self.chr {
  574. case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
  575. self.read()
  576. continue
  577. case '\r':
  578. if self._peek() == '\n' {
  579. self.read()
  580. }
  581. fallthrough
  582. case '\u2028', '\u2029', '\n':
  583. if self.insertSemicolon {
  584. return
  585. }
  586. self.read()
  587. continue
  588. }
  589. if self.chr >= utf8.RuneSelf {
  590. if unicode.IsSpace(self.chr) {
  591. self.read()
  592. continue
  593. }
  594. }
  595. break
  596. }
  597. }
  598. func (self *_parser) scanMantissa(base int, allowSeparator bool) {
  599. for digitValue(self.chr) < base || (allowSeparator && self.chr == '_') {
  600. afterUnderscore := self.chr == '_'
  601. self.read()
  602. if afterUnderscore && !isDigit(self.chr, base) {
  603. self.error(self.chrOffset, "Only one underscore is allowed as numeric separator")
  604. }
  605. }
  606. }
  607. func (self *_parser) scanEscape(quote rune) (int, bool) {
  608. var length, base uint32
  609. chr := self.chr
  610. switch chr {
  611. case '0', '1', '2', '3', '4', '5', '6', '7':
  612. // Octal:
  613. length, base = 3, 8
  614. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'':
  615. self.read()
  616. return 1, false
  617. case '\r':
  618. self.read()
  619. if self.chr == '\n' {
  620. self.read()
  621. return 2, false
  622. }
  623. return 1, false
  624. case '\n':
  625. self.read()
  626. return 1, false
  627. case '\u2028', '\u2029':
  628. self.read()
  629. return 1, true
  630. case 'x':
  631. self.read()
  632. length, base = 2, 16
  633. case 'u':
  634. self.read()
  635. if self.chr == '{' {
  636. self.read()
  637. length, base = 0, 16
  638. } else {
  639. length, base = 4, 16
  640. }
  641. default:
  642. self.read() // Always make progress
  643. }
  644. if base > 0 {
  645. var value uint32
  646. if length > 0 {
  647. for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
  648. digit := uint32(digitValue(self.chr))
  649. if digit >= base {
  650. break
  651. }
  652. value = value*base + digit
  653. self.read()
  654. }
  655. } else {
  656. for self.chr != quote && self.chr >= 0 && value < utf8.MaxRune {
  657. if self.chr == '}' {
  658. self.read()
  659. break
  660. }
  661. digit := uint32(digitValue(self.chr))
  662. if digit >= base {
  663. break
  664. }
  665. value = value*base + digit
  666. self.read()
  667. }
  668. }
  669. chr = rune(value)
  670. }
  671. if chr >= utf8.RuneSelf {
  672. if chr > 0xFFFF {
  673. return 2, true
  674. }
  675. return 1, true
  676. }
  677. return 1, false
  678. }
  679. func (self *_parser) scanString(offset int, parse bool) (literal string, parsed unistring.String, err string) {
  680. // " ' /
  681. quote := rune(self.str[offset])
  682. length := 0
  683. isUnicode := false
  684. for self.chr != quote {
  685. chr := self.chr
  686. if chr == '\n' || chr == '\r' || chr < 0 {
  687. goto newline
  688. }
  689. if quote == '/' && (self.chr == '\u2028' || self.chr == '\u2029') {
  690. goto newline
  691. }
  692. self.read()
  693. if chr == '\\' {
  694. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  695. if quote == '/' {
  696. goto newline
  697. }
  698. self.scanNewline()
  699. } else {
  700. l, u := self.scanEscape(quote)
  701. length += l
  702. if u {
  703. isUnicode = true
  704. }
  705. }
  706. continue
  707. } else if chr == '[' && quote == '/' {
  708. // Allow a slash (/) in a bracket character class ([...])
  709. // TODO Fix this, this is hacky...
  710. quote = -1
  711. } else if chr == ']' && quote == -1 {
  712. quote = '/'
  713. }
  714. if chr >= utf8.RuneSelf {
  715. isUnicode = true
  716. if chr > 0xFFFF {
  717. length++
  718. }
  719. }
  720. length++
  721. }
  722. // " ' /
  723. self.read()
  724. literal = self.str[offset:self.chrOffset]
  725. if parse {
  726. // TODO strict
  727. parsed, err = parseStringLiteral(literal[1:len(literal)-1], length, isUnicode, false)
  728. }
  729. return
  730. newline:
  731. self.scanNewline()
  732. errStr := "String not terminated"
  733. if quote == '/' {
  734. errStr = "Invalid regular expression: missing /"
  735. self.error(self.idxOf(offset), errStr)
  736. }
  737. return "", "", errStr
  738. }
  739. func (self *_parser) scanNewline() {
  740. if self.chr == '\u2028' || self.chr == '\u2029' {
  741. self.read()
  742. return
  743. }
  744. if self.chr == '\r' {
  745. self.read()
  746. if self.chr != '\n' {
  747. return
  748. }
  749. }
  750. self.read()
  751. }
  752. func (self *_parser) parseTemplateCharacters() (literal string, parsed unistring.String, finished bool, parseErr, err string) {
  753. offset := self.chrOffset
  754. var end int
  755. length := 0
  756. isUnicode := false
  757. hasCR := false
  758. for {
  759. chr := self.chr
  760. if chr < 0 {
  761. goto unterminated
  762. }
  763. self.read()
  764. if chr == '`' {
  765. finished = true
  766. end = self.chrOffset - 1
  767. break
  768. }
  769. if chr == '\\' {
  770. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  771. if self.chr == '\r' {
  772. hasCR = true
  773. }
  774. self.scanNewline()
  775. } else {
  776. if self.chr == '8' || self.chr == '9' {
  777. if parseErr == "" {
  778. parseErr = "\\8 and \\9 are not allowed in template strings."
  779. }
  780. }
  781. l, u := self.scanEscape('`')
  782. length += l
  783. if u {
  784. isUnicode = true
  785. }
  786. }
  787. continue
  788. }
  789. if chr == '$' && self.chr == '{' {
  790. self.read()
  791. end = self.chrOffset - 2
  792. break
  793. }
  794. if chr >= utf8.RuneSelf {
  795. isUnicode = true
  796. if chr > 0xFFFF {
  797. length++
  798. }
  799. } else if chr == '\r' {
  800. hasCR = true
  801. if self.chr == '\n' {
  802. length--
  803. }
  804. }
  805. length++
  806. }
  807. literal = self.str[offset:end]
  808. if hasCR {
  809. literal = normaliseCRLF(literal)
  810. }
  811. if parseErr == "" {
  812. parsed, parseErr = parseStringLiteral(literal, length, isUnicode, true)
  813. }
  814. self.insertSemicolon = true
  815. return
  816. unterminated:
  817. err = err_UnexpectedEndOfInput
  818. finished = true
  819. return
  820. }
  821. func normaliseCRLF(s string) string {
  822. var buf strings.Builder
  823. buf.Grow(len(s))
  824. for i := 0; i < len(s); i++ {
  825. if s[i] == '\r' {
  826. buf.WriteByte('\n')
  827. if i < len(s)-1 && s[i+1] == '\n' {
  828. i++
  829. }
  830. } else {
  831. buf.WriteByte(s[i])
  832. }
  833. }
  834. return buf.String()
  835. }
  836. func hex2decimal(chr byte) (value rune, ok bool) {
  837. {
  838. chr := rune(chr)
  839. switch {
  840. case '0' <= chr && chr <= '9':
  841. return chr - '0', true
  842. case 'a' <= chr && chr <= 'f':
  843. return chr - 'a' + 10, true
  844. case 'A' <= chr && chr <= 'F':
  845. return chr - 'A' + 10, true
  846. }
  847. return
  848. }
  849. }
  850. func parseNumberLiteral(literal string) (value interface{}, err error) {
  851. // TODO Is Uint okay? What about -MAX_UINT
  852. value, err = strconv.ParseInt(literal, 0, 64)
  853. if err == nil {
  854. return
  855. }
  856. parseIntErr := err // Save this first error, just in case
  857. value, err = strconv.ParseFloat(literal, 64)
  858. if err == nil {
  859. return
  860. } else if err.(*strconv.NumError).Err == strconv.ErrRange {
  861. // Infinity, etc.
  862. return value, nil
  863. }
  864. err = parseIntErr
  865. if err.(*strconv.NumError).Err == strconv.ErrRange {
  866. if len(literal) > 2 &&
  867. literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') &&
  868. literal[len(literal)-1] != 'n' {
  869. // Could just be a very large number (e.g. 0x8000000000000000)
  870. var value float64
  871. literal = literal[2:]
  872. for _, chr := range literal {
  873. digit := digitValue(chr)
  874. if digit >= 16 {
  875. goto error
  876. }
  877. value = value*16 + float64(digit)
  878. }
  879. return value, nil
  880. }
  881. }
  882. if len(literal) > 1 && literal[len(literal)-1] == 'n' {
  883. if literal[0] == '0' {
  884. if len(literal) > 2 && isDecimalDigit(rune(literal[1])) {
  885. goto error
  886. }
  887. }
  888. // Parse as big.Int
  889. bigInt := new(big.Int)
  890. _, ok := bigInt.SetString(literal[:len(literal)-1], 0)
  891. if !ok {
  892. goto error
  893. }
  894. return bigInt, nil
  895. }
  896. error:
  897. return nil, errors.New("Illegal numeric literal")
  898. }
  899. func parseStringLiteral(literal string, length int, unicode, strict bool) (unistring.String, string) {
  900. var sb strings.Builder
  901. var chars []uint16
  902. if unicode {
  903. chars = make([]uint16, 1, length+1)
  904. chars[0] = unistring.BOM
  905. } else {
  906. sb.Grow(length)
  907. }
  908. str := literal
  909. for len(str) > 0 {
  910. switch chr := str[0]; {
  911. // We do not explicitly handle the case of the quote
  912. // value, which can be: " ' /
  913. // This assumes we're already passed a partially well-formed literal
  914. case chr >= utf8.RuneSelf:
  915. chr, size := utf8.DecodeRuneInString(str)
  916. if chr <= 0xFFFF {
  917. chars = append(chars, uint16(chr))
  918. } else {
  919. first, second := utf16.EncodeRune(chr)
  920. chars = append(chars, uint16(first), uint16(second))
  921. }
  922. str = str[size:]
  923. continue
  924. case chr != '\\':
  925. if unicode {
  926. chars = append(chars, uint16(chr))
  927. } else {
  928. sb.WriteByte(chr)
  929. }
  930. str = str[1:]
  931. continue
  932. }
  933. if len(str) <= 1 {
  934. panic("len(str) <= 1")
  935. }
  936. chr := str[1]
  937. var value rune
  938. if chr >= utf8.RuneSelf {
  939. str = str[1:]
  940. var size int
  941. value, size = utf8.DecodeRuneInString(str)
  942. str = str[size:] // \ + <character>
  943. if value == '\u2028' || value == '\u2029' {
  944. continue
  945. }
  946. } else {
  947. str = str[2:] // \<character>
  948. switch chr {
  949. case 'b':
  950. value = '\b'
  951. case 'f':
  952. value = '\f'
  953. case 'n':
  954. value = '\n'
  955. case 'r':
  956. value = '\r'
  957. case 't':
  958. value = '\t'
  959. case 'v':
  960. value = '\v'
  961. case 'x', 'u':
  962. size := 0
  963. switch chr {
  964. case 'x':
  965. size = 2
  966. case 'u':
  967. if str == "" || str[0] != '{' {
  968. size = 4
  969. }
  970. }
  971. if size > 0 {
  972. if len(str) < size {
  973. return "", fmt.Sprintf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
  974. }
  975. for j := 0; j < size; j++ {
  976. decimal, ok := hex2decimal(str[j])
  977. if !ok {
  978. return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size])
  979. }
  980. value = value<<4 | decimal
  981. }
  982. } else {
  983. str = str[1:]
  984. var val rune
  985. value = -1
  986. for ; size < len(str); size++ {
  987. if str[size] == '}' {
  988. if size == 0 {
  989. return "", fmt.Sprintf("invalid escape: \\%s", string(chr))
  990. }
  991. size++
  992. value = val
  993. break
  994. }
  995. decimal, ok := hex2decimal(str[size])
  996. if !ok {
  997. return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size+1])
  998. }
  999. val = val<<4 | decimal
  1000. if val > utf8.MaxRune {
  1001. return "", fmt.Sprintf("undefined Unicode code-point: %q", str[:size+1])
  1002. }
  1003. }
  1004. if value == -1 {
  1005. return "", fmt.Sprintf("unterminated \\u{: %q", str)
  1006. }
  1007. }
  1008. str = str[size:]
  1009. if chr == 'x' {
  1010. break
  1011. }
  1012. if value > utf8.MaxRune {
  1013. panic("value > utf8.MaxRune")
  1014. }
  1015. case '0':
  1016. if len(str) == 0 || '0' > str[0] || str[0] > '7' {
  1017. value = 0
  1018. break
  1019. }
  1020. fallthrough
  1021. case '1', '2', '3', '4', '5', '6', '7':
  1022. if strict {
  1023. return "", "Octal escape sequences are not allowed in this context"
  1024. }
  1025. value = rune(chr) - '0'
  1026. j := 0
  1027. for ; j < 2; j++ {
  1028. if len(str) < j+1 {
  1029. break
  1030. }
  1031. chr := str[j]
  1032. if '0' > chr || chr > '7' {
  1033. break
  1034. }
  1035. decimal := rune(str[j]) - '0'
  1036. value = (value << 3) | decimal
  1037. }
  1038. str = str[j:]
  1039. case '\\':
  1040. value = '\\'
  1041. case '\'', '"':
  1042. value = rune(chr)
  1043. case '\r':
  1044. if len(str) > 0 {
  1045. if str[0] == '\n' {
  1046. str = str[1:]
  1047. }
  1048. }
  1049. fallthrough
  1050. case '\n':
  1051. continue
  1052. default:
  1053. value = rune(chr)
  1054. }
  1055. }
  1056. if unicode {
  1057. if value <= 0xFFFF {
  1058. chars = append(chars, uint16(value))
  1059. } else {
  1060. first, second := utf16.EncodeRune(value)
  1061. chars = append(chars, uint16(first), uint16(second))
  1062. }
  1063. } else {
  1064. if value >= utf8.RuneSelf {
  1065. return "", "Unexpected unicode character"
  1066. }
  1067. sb.WriteByte(byte(value))
  1068. }
  1069. }
  1070. if unicode {
  1071. if len(chars) != length+1 {
  1072. panic(fmt.Errorf("unexpected unicode length while parsing '%s'", literal))
  1073. }
  1074. return unistring.FromUtf16(chars), ""
  1075. }
  1076. if sb.Len() != length {
  1077. panic(fmt.Errorf("unexpected length while parsing '%s'", literal))
  1078. }
  1079. return unistring.String(sb.String()), ""
  1080. }
  1081. func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
  1082. offset := self.chrOffset
  1083. tkn := token.NUMBER
  1084. if decimalPoint {
  1085. offset--
  1086. self.scanMantissa(10, true)
  1087. } else {
  1088. if self.chr == '0' {
  1089. self.read()
  1090. base := 0
  1091. switch self.chr {
  1092. case 'x', 'X':
  1093. base = 16
  1094. case 'o', 'O':
  1095. base = 8
  1096. case 'b', 'B':
  1097. base = 2
  1098. case '.', 'e', 'E':
  1099. // no-op
  1100. default:
  1101. // legacy octal
  1102. self.scanMantissa(8, false)
  1103. goto end
  1104. }
  1105. if base > 0 {
  1106. self.read()
  1107. if !isDigit(self.chr, base) {
  1108. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1109. }
  1110. self.scanMantissa(base, true)
  1111. goto end
  1112. }
  1113. } else {
  1114. self.scanMantissa(10, true)
  1115. }
  1116. if self.chr == '.' {
  1117. self.read()
  1118. self.scanMantissa(10, true)
  1119. }
  1120. }
  1121. if self.chr == 'e' || self.chr == 'E' {
  1122. self.read()
  1123. if self.chr == '-' || self.chr == '+' {
  1124. self.read()
  1125. }
  1126. if isDecimalDigit(self.chr) {
  1127. self.read()
  1128. self.scanMantissa(10, true)
  1129. } else {
  1130. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1131. }
  1132. }
  1133. end:
  1134. if self.chr == 'n' || self.chr == 'N' {
  1135. self.read()
  1136. return tkn, self.str[offset:self.chrOffset]
  1137. }
  1138. if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) {
  1139. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1140. }
  1141. return tkn, self.str[offset:self.chrOffset]
  1142. }