lexer.go 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111
  1. package parser
  2. import (
  3. "errors"
  4. "fmt"
  5. "regexp"
  6. "strconv"
  7. "strings"
  8. "unicode"
  9. "unicode/utf16"
  10. "unicode/utf8"
  11. "github.com/dop251/goja/file"
  12. "github.com/dop251/goja/token"
  13. "github.com/dop251/goja/unistring"
  14. )
  15. var matchIdentifier = regexp.MustCompile(`^[$_\p{L}][$_\p{L}\d}]*$`)
  16. func isDecimalDigit(chr rune) bool {
  17. return '0' <= chr && chr <= '9'
  18. }
  19. func IsIdentifier(s string) bool {
  20. return matchIdentifier.MatchString(s)
  21. }
  22. func digitValue(chr rune) int {
  23. switch {
  24. case '0' <= chr && chr <= '9':
  25. return int(chr - '0')
  26. case 'a' <= chr && chr <= 'f':
  27. return int(chr - 'a' + 10)
  28. case 'A' <= chr && chr <= 'F':
  29. return int(chr - 'A' + 10)
  30. }
  31. return 16 // Larger than any legal digit value
  32. }
  33. func isDigit(chr rune, base int) bool {
  34. return digitValue(chr) < base
  35. }
  36. func isIdentifierStart(chr rune) bool {
  37. return chr == '$' || chr == '_' || chr == '\\' ||
  38. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  39. chr >= utf8.RuneSelf && unicode.IsLetter(chr)
  40. }
  41. func isIdentifierPart(chr rune) bool {
  42. return chr == '$' || chr == '_' || chr == '\\' ||
  43. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  44. '0' <= chr && chr <= '9' ||
  45. chr >= utf8.RuneSelf && (unicode.IsLetter(chr) || unicode.IsDigit(chr))
  46. }
  47. func (self *_parser) scanIdentifier() (string, unistring.String, bool, error) {
  48. offset := self.chrOffset
  49. hasEscape := false
  50. isUnicode := false
  51. length := 0
  52. for isIdentifierPart(self.chr) {
  53. r := self.chr
  54. length++
  55. if r == '\\' {
  56. hasEscape = true
  57. distance := self.chrOffset - offset
  58. self.read()
  59. if self.chr != 'u' {
  60. return "", "", false, fmt.Errorf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  61. }
  62. var value rune
  63. for j := 0; j < 4; j++ {
  64. self.read()
  65. decimal, ok := hex2decimal(byte(self.chr))
  66. if !ok {
  67. return "", "", false, fmt.Errorf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  68. }
  69. value = value<<4 | decimal
  70. }
  71. if value == '\\' {
  72. return "", "", false, fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
  73. } else if distance == 0 {
  74. if !isIdentifierStart(value) {
  75. return "", "", false, fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
  76. }
  77. } else if distance > 0 {
  78. if !isIdentifierPart(value) {
  79. return "", "", false, fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
  80. }
  81. }
  82. r = value
  83. }
  84. if r >= utf8.RuneSelf {
  85. isUnicode = true
  86. if r > 0xFFFF {
  87. length++
  88. }
  89. }
  90. self.read()
  91. }
  92. literal := self.str[offset:self.chrOffset]
  93. var parsed unistring.String
  94. if hasEscape || isUnicode {
  95. var err error
  96. // TODO strict
  97. parsed, err = parseStringLiteral(literal, length, isUnicode, false)
  98. if err != nil {
  99. return "", "", false, err
  100. }
  101. } else {
  102. parsed = unistring.String(literal)
  103. }
  104. return literal, parsed, hasEscape, nil
  105. }
  106. // 7.2
  107. func isLineWhiteSpace(chr rune) bool {
  108. switch chr {
  109. case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
  110. return true
  111. case '\u000a', '\u000d', '\u2028', '\u2029':
  112. return false
  113. case '\u0085':
  114. return false
  115. }
  116. return unicode.IsSpace(chr)
  117. }
  118. // 7.3
  119. func isLineTerminator(chr rune) bool {
  120. switch chr {
  121. case '\u000a', '\u000d', '\u2028', '\u2029':
  122. return true
  123. }
  124. return false
  125. }
  126. func isId(tkn token.Token) bool {
  127. switch tkn {
  128. case token.KEYWORD,
  129. token.BOOLEAN,
  130. token.NULL,
  131. token.THIS,
  132. token.IF,
  133. token.IN,
  134. token.OF,
  135. token.DO,
  136. token.VAR,
  137. token.LET,
  138. token.FOR,
  139. token.NEW,
  140. token.TRY,
  141. token.ELSE,
  142. token.CASE,
  143. token.VOID,
  144. token.WITH,
  145. token.CONST,
  146. token.WHILE,
  147. token.BREAK,
  148. token.CATCH,
  149. token.THROW,
  150. token.RETURN,
  151. token.TYPEOF,
  152. token.DELETE,
  153. token.SWITCH,
  154. token.DEFAULT,
  155. token.FINALLY,
  156. token.FUNCTION,
  157. token.CONTINUE,
  158. token.DEBUGGER,
  159. token.INSTANCEOF:
  160. return true
  161. }
  162. return false
  163. }
  164. type parserState struct {
  165. tok token.Token
  166. literal string
  167. parsedLiteral unistring.String
  168. implicitSemicolon, insertSemicolon bool
  169. chr rune
  170. chrOffset, offset int
  171. errorCount int
  172. }
  173. func (self *_parser) mark(state *parserState) *parserState {
  174. if state == nil {
  175. state = &parserState{}
  176. }
  177. state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset =
  178. self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
  179. state.errorCount = len(self.errors)
  180. return state
  181. }
  182. func (self *_parser) restore(state *parserState) {
  183. self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset =
  184. state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset
  185. self.errors = self.errors[:state.errorCount]
  186. }
  187. func (self *_parser) peek() token.Token {
  188. implicitSemicolon, insertSemicolon, chr, chrOffset, offset := self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
  189. tok, _, _, _ := self.scan()
  190. self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = implicitSemicolon, insertSemicolon, chr, chrOffset, offset
  191. return tok
  192. }
  193. func (self *_parser) scan() (tkn token.Token, literal string, parsedLiteral unistring.String, idx file.Idx) {
  194. self.implicitSemicolon = false
  195. for {
  196. self.skipWhiteSpace()
  197. idx = self.idxOf(self.chrOffset)
  198. insertSemicolon := false
  199. switch chr := self.chr; {
  200. case isIdentifierStart(chr):
  201. var err error
  202. var hasEscape bool
  203. literal, parsedLiteral, hasEscape, err = self.scanIdentifier()
  204. if err != nil {
  205. tkn = token.ILLEGAL
  206. break
  207. }
  208. if len(parsedLiteral) > 1 {
  209. // Keywords are longer than 1 character, avoid lookup otherwise
  210. var strict bool
  211. tkn, strict = token.IsKeyword(string(parsedLiteral))
  212. switch tkn {
  213. case 0: // Not a keyword
  214. if parsedLiteral == "true" || parsedLiteral == "false" {
  215. if hasEscape {
  216. tkn = token.STRING
  217. return
  218. }
  219. self.insertSemicolon = true
  220. tkn = token.BOOLEAN
  221. return
  222. } else if parsedLiteral == "null" {
  223. if hasEscape {
  224. tkn = token.STRING
  225. return
  226. }
  227. self.insertSemicolon = true
  228. tkn = token.NULL
  229. return
  230. }
  231. case token.KEYWORD:
  232. if hasEscape {
  233. tkn = token.STRING
  234. return
  235. }
  236. tkn = token.KEYWORD
  237. if strict {
  238. // TODO If strict and in strict mode, then this is not a break
  239. break
  240. }
  241. return
  242. case
  243. token.THIS,
  244. token.BREAK,
  245. token.THROW, // A newline after a throw is not allowed, but we need to detect it
  246. token.RETURN,
  247. token.CONTINUE,
  248. token.DEBUGGER:
  249. if hasEscape {
  250. tkn = token.STRING
  251. return
  252. }
  253. self.insertSemicolon = true
  254. return
  255. default:
  256. if hasEscape {
  257. tkn = token.STRING
  258. }
  259. return
  260. }
  261. }
  262. self.insertSemicolon = true
  263. tkn = token.IDENTIFIER
  264. return
  265. case '0' <= chr && chr <= '9':
  266. self.insertSemicolon = true
  267. tkn, literal = self.scanNumericLiteral(false)
  268. return
  269. default:
  270. self.read()
  271. switch chr {
  272. case -1:
  273. if self.insertSemicolon {
  274. self.insertSemicolon = false
  275. self.implicitSemicolon = true
  276. }
  277. tkn = token.EOF
  278. case '\r', '\n', '\u2028', '\u2029':
  279. self.insertSemicolon = false
  280. self.implicitSemicolon = true
  281. continue
  282. case ':':
  283. tkn = token.COLON
  284. case '.':
  285. if digitValue(self.chr) < 10 {
  286. insertSemicolon = true
  287. tkn, literal = self.scanNumericLiteral(true)
  288. } else {
  289. if self.chr == '.' {
  290. self.read()
  291. if self.chr == '.' {
  292. self.read()
  293. tkn = token.ELLIPSIS
  294. } else {
  295. tkn = token.ILLEGAL
  296. }
  297. } else {
  298. tkn = token.PERIOD
  299. }
  300. }
  301. case ',':
  302. tkn = token.COMMA
  303. case ';':
  304. tkn = token.SEMICOLON
  305. case '(':
  306. tkn = token.LEFT_PARENTHESIS
  307. case ')':
  308. tkn = token.RIGHT_PARENTHESIS
  309. insertSemicolon = true
  310. case '[':
  311. tkn = token.LEFT_BRACKET
  312. case ']':
  313. tkn = token.RIGHT_BRACKET
  314. insertSemicolon = true
  315. case '{':
  316. tkn = token.LEFT_BRACE
  317. case '}':
  318. tkn = token.RIGHT_BRACE
  319. insertSemicolon = true
  320. case '+':
  321. tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
  322. if tkn == token.INCREMENT {
  323. insertSemicolon = true
  324. }
  325. case '-':
  326. tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
  327. if tkn == token.DECREMENT {
  328. insertSemicolon = true
  329. }
  330. case '*':
  331. tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
  332. case '/':
  333. if self.chr == '/' {
  334. self.skipSingleLineComment()
  335. continue
  336. } else if self.chr == '*' {
  337. self.skipMultiLineComment()
  338. continue
  339. } else {
  340. // Could be division, could be RegExp literal
  341. tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
  342. insertSemicolon = true
  343. }
  344. case '%':
  345. tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
  346. case '^':
  347. tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
  348. case '<':
  349. tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
  350. case '>':
  351. tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
  352. case '=':
  353. if self.chr == '>' {
  354. self.read()
  355. if self.implicitSemicolon {
  356. tkn = token.ILLEGAL
  357. } else {
  358. tkn = token.ARROW
  359. }
  360. } else {
  361. tkn = self.switch2(token.ASSIGN, token.EQUAL)
  362. if tkn == token.EQUAL && self.chr == '=' {
  363. self.read()
  364. tkn = token.STRICT_EQUAL
  365. }
  366. }
  367. case '!':
  368. tkn = self.switch2(token.NOT, token.NOT_EQUAL)
  369. if tkn == token.NOT_EQUAL && self.chr == '=' {
  370. self.read()
  371. tkn = token.STRICT_NOT_EQUAL
  372. }
  373. case '&':
  374. tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
  375. case '|':
  376. tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
  377. case '~':
  378. tkn = token.BITWISE_NOT
  379. case '?':
  380. tkn = token.QUESTION_MARK
  381. case '"', '\'':
  382. insertSemicolon = true
  383. tkn = token.STRING
  384. var err error
  385. literal, parsedLiteral, err = self.scanString(self.chrOffset-1, true)
  386. if err != nil {
  387. tkn = token.ILLEGAL
  388. }
  389. case '`':
  390. tkn = token.BACKTICK
  391. default:
  392. self.errorUnexpected(idx, chr)
  393. tkn = token.ILLEGAL
  394. }
  395. }
  396. self.insertSemicolon = insertSemicolon
  397. return
  398. }
  399. }
  400. func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
  401. if self.chr == '=' {
  402. self.read()
  403. return tkn1
  404. }
  405. return tkn0
  406. }
  407. func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
  408. if self.chr == '=' {
  409. self.read()
  410. return tkn1
  411. }
  412. if self.chr == chr2 {
  413. self.read()
  414. return tkn2
  415. }
  416. return tkn0
  417. }
  418. func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
  419. if self.chr == '=' {
  420. self.read()
  421. return tkn1
  422. }
  423. if self.chr == chr2 {
  424. self.read()
  425. if self.chr == '=' {
  426. self.read()
  427. return tkn3
  428. }
  429. return tkn2
  430. }
  431. return tkn0
  432. }
  433. func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
  434. if self.chr == '=' {
  435. self.read()
  436. return tkn1
  437. }
  438. if self.chr == chr2 {
  439. self.read()
  440. if self.chr == '=' {
  441. self.read()
  442. return tkn3
  443. }
  444. if self.chr == chr3 {
  445. self.read()
  446. if self.chr == '=' {
  447. self.read()
  448. return tkn5
  449. }
  450. return tkn4
  451. }
  452. return tkn2
  453. }
  454. return tkn0
  455. }
  456. func (self *_parser) _peek() rune {
  457. if self.offset < self.length {
  458. return rune(self.str[self.offset])
  459. }
  460. return -1
  461. }
  462. func (self *_parser) read() {
  463. if self.offset < self.length {
  464. self.chrOffset = self.offset
  465. chr, width := rune(self.str[self.offset]), 1
  466. if chr >= utf8.RuneSelf { // !ASCII
  467. chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
  468. if chr == utf8.RuneError && width == 1 {
  469. self.error(self.chrOffset, "Invalid UTF-8 character")
  470. }
  471. }
  472. self.offset += width
  473. self.chr = chr
  474. } else {
  475. self.chrOffset = self.length
  476. self.chr = -1 // EOF
  477. }
  478. }
  479. func (self *_parser) skipSingleLineComment() {
  480. for self.chr != -1 {
  481. self.read()
  482. if isLineTerminator(self.chr) {
  483. return
  484. }
  485. }
  486. }
  487. func (self *_parser) skipMultiLineComment() {
  488. self.read()
  489. for self.chr >= 0 {
  490. chr := self.chr
  491. self.read()
  492. if chr == '*' && self.chr == '/' {
  493. self.read()
  494. return
  495. }
  496. }
  497. self.errorUnexpected(0, self.chr)
  498. }
  499. func (self *_parser) skipWhiteSpace() {
  500. for {
  501. switch self.chr {
  502. case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
  503. self.read()
  504. continue
  505. case '\r':
  506. if self._peek() == '\n' {
  507. self.read()
  508. }
  509. fallthrough
  510. case '\u2028', '\u2029', '\n':
  511. if self.insertSemicolon {
  512. return
  513. }
  514. self.read()
  515. continue
  516. }
  517. if self.chr >= utf8.RuneSelf {
  518. if unicode.IsSpace(self.chr) {
  519. self.read()
  520. continue
  521. }
  522. }
  523. break
  524. }
  525. }
  526. func (self *_parser) skipLineWhiteSpace() {
  527. for isLineWhiteSpace(self.chr) {
  528. self.read()
  529. }
  530. }
  531. func (self *_parser) scanMantissa(base int) {
  532. for digitValue(self.chr) < base {
  533. self.read()
  534. }
  535. }
  536. func (self *_parser) scanEscape(quote rune) (int, bool) {
  537. var length, base uint32
  538. chr := self.chr
  539. switch chr {
  540. case '0', '1', '2', '3', '4', '5', '6', '7':
  541. // Octal:
  542. length, base = 3, 8
  543. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'':
  544. self.read()
  545. return 1, false
  546. case '\r':
  547. self.read()
  548. if self.chr == '\n' {
  549. self.read()
  550. return 2, false
  551. }
  552. return 1, false
  553. case '\n':
  554. self.read()
  555. return 1, false
  556. case '\u2028', '\u2029':
  557. self.read()
  558. return 1, true
  559. case 'x':
  560. self.read()
  561. length, base = 2, 16
  562. case 'u':
  563. self.read()
  564. if self.chr == '{' {
  565. self.read()
  566. length, base = 0, 16
  567. } else {
  568. length, base = 4, 16
  569. }
  570. default:
  571. self.read() // Always make progress
  572. }
  573. if base > 0 {
  574. var value uint32
  575. if length > 0 {
  576. for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
  577. digit := uint32(digitValue(self.chr))
  578. if digit >= base {
  579. break
  580. }
  581. value = value*base + digit
  582. self.read()
  583. }
  584. } else {
  585. for self.chr != quote && self.chr >= 0 && value < utf8.MaxRune {
  586. if self.chr == '}' {
  587. self.read()
  588. break
  589. }
  590. digit := uint32(digitValue(self.chr))
  591. if digit >= base {
  592. break
  593. }
  594. value = value*base + digit
  595. self.read()
  596. }
  597. }
  598. chr = rune(value)
  599. }
  600. if chr >= utf8.RuneSelf {
  601. if chr > 0xFFFF {
  602. return 2, true
  603. }
  604. return 1, true
  605. }
  606. return 1, false
  607. }
  608. func (self *_parser) scanString(offset int, parse bool) (literal string, parsed unistring.String, err error) {
  609. // " ' /
  610. quote := rune(self.str[offset])
  611. length := 0
  612. isUnicode := false
  613. for self.chr != quote {
  614. chr := self.chr
  615. if chr == '\n' || chr == '\r' || chr == '\u2028' || chr == '\u2029' || chr < 0 {
  616. goto newline
  617. }
  618. self.read()
  619. if chr == '\\' {
  620. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  621. if quote == '/' {
  622. goto newline
  623. }
  624. self.scanNewline()
  625. } else {
  626. l, u := self.scanEscape(quote)
  627. length += l
  628. if u {
  629. isUnicode = true
  630. }
  631. }
  632. continue
  633. } else if chr == '[' && quote == '/' {
  634. // Allow a slash (/) in a bracket character class ([...])
  635. // TODO Fix this, this is hacky...
  636. quote = -1
  637. } else if chr == ']' && quote == -1 {
  638. quote = '/'
  639. }
  640. if chr >= utf8.RuneSelf {
  641. isUnicode = true
  642. if chr > 0xFFFF {
  643. length++
  644. }
  645. }
  646. length++
  647. }
  648. // " ' /
  649. self.read()
  650. literal = self.str[offset:self.chrOffset]
  651. if parse {
  652. // TODO strict
  653. parsed, err = parseStringLiteral(literal[1:len(literal)-1], length, isUnicode, false)
  654. }
  655. return
  656. newline:
  657. self.scanNewline()
  658. errStr := "String not terminated"
  659. if quote == '/' {
  660. errStr = "Invalid regular expression: missing /"
  661. self.error(self.idxOf(offset), errStr)
  662. }
  663. return "", "", errors.New(errStr)
  664. }
  665. func (self *_parser) scanNewline() {
  666. if self.chr == '\r' {
  667. self.read()
  668. if self.chr != '\n' {
  669. return
  670. }
  671. }
  672. self.read()
  673. }
  674. func (self *_parser) parseTemplateCharacters() (literal string, parsed unistring.String, finished bool, parseErr, err error) {
  675. offset := self.chrOffset
  676. var end int
  677. length := 0
  678. isUnicode := false
  679. hasCR := false
  680. for {
  681. chr := self.chr
  682. if chr < 0 {
  683. goto unterminated
  684. }
  685. self.read()
  686. if chr == '`' {
  687. finished = true
  688. end = self.chrOffset - 1
  689. break
  690. }
  691. if chr == '\\' {
  692. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  693. if self.chr == '\r' {
  694. hasCR = true
  695. }
  696. self.scanNewline()
  697. } else {
  698. l, u := self.scanEscape('`')
  699. length += l
  700. if u {
  701. isUnicode = true
  702. }
  703. }
  704. continue
  705. }
  706. if chr == '$' && self.chr == '{' {
  707. self.read()
  708. end = self.chrOffset - 2
  709. break
  710. }
  711. if chr >= utf8.RuneSelf {
  712. isUnicode = true
  713. if chr > 0xFFFF {
  714. length++
  715. }
  716. } else if chr == '\r' {
  717. hasCR = true
  718. if self.chr == '\n' {
  719. length--
  720. }
  721. }
  722. length++
  723. }
  724. literal = self.str[offset:end]
  725. if hasCR {
  726. literal = normaliseCRLF(literal)
  727. }
  728. parsed, parseErr = parseStringLiteral(literal, length, isUnicode, true)
  729. self.insertSemicolon = true
  730. return
  731. unterminated:
  732. err = errors.New(err_UnexpectedEndOfInput)
  733. return
  734. }
  735. func normaliseCRLF(s string) string {
  736. var buf strings.Builder
  737. buf.Grow(len(s))
  738. for i := 0; i < len(s); i++ {
  739. if s[i] == '\r' {
  740. buf.WriteByte('\n')
  741. if i < len(s)-1 && s[i+1] == '\n' {
  742. i++
  743. }
  744. } else {
  745. buf.WriteByte(s[i])
  746. }
  747. }
  748. return buf.String()
  749. }
  750. func hex2decimal(chr byte) (value rune, ok bool) {
  751. {
  752. chr := rune(chr)
  753. switch {
  754. case '0' <= chr && chr <= '9':
  755. return chr - '0', true
  756. case 'a' <= chr && chr <= 'f':
  757. return chr - 'a' + 10, true
  758. case 'A' <= chr && chr <= 'F':
  759. return chr - 'A' + 10, true
  760. }
  761. return
  762. }
  763. }
  764. func parseNumberLiteral(literal string) (value interface{}, err error) {
  765. // TODO Is Uint okay? What about -MAX_UINT
  766. value, err = strconv.ParseInt(literal, 0, 64)
  767. if err == nil {
  768. return
  769. }
  770. parseIntErr := err // Save this first error, just in case
  771. value, err = strconv.ParseFloat(literal, 64)
  772. if err == nil {
  773. return
  774. } else if err.(*strconv.NumError).Err == strconv.ErrRange {
  775. // Infinity, etc.
  776. return value, nil
  777. }
  778. err = parseIntErr
  779. if err.(*strconv.NumError).Err == strconv.ErrRange {
  780. if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') {
  781. // Could just be a very large number (e.g. 0x8000000000000000)
  782. var value float64
  783. literal = literal[2:]
  784. for _, chr := range literal {
  785. digit := digitValue(chr)
  786. if digit >= 16 {
  787. goto error
  788. }
  789. value = value*16 + float64(digit)
  790. }
  791. return value, nil
  792. }
  793. }
  794. error:
  795. return nil, errors.New("Illegal numeric literal")
  796. }
  797. func parseStringLiteral(literal string, length int, unicode, strict bool) (unistring.String, error) {
  798. var sb strings.Builder
  799. var chars []uint16
  800. if unicode {
  801. chars = make([]uint16, 1, length+1)
  802. chars[0] = unistring.BOM
  803. } else {
  804. sb.Grow(length)
  805. }
  806. str := literal
  807. for len(str) > 0 {
  808. switch chr := str[0]; {
  809. // We do not explicitly handle the case of the quote
  810. // value, which can be: " ' /
  811. // This assumes we're already passed a partially well-formed literal
  812. case chr >= utf8.RuneSelf:
  813. chr, size := utf8.DecodeRuneInString(str)
  814. if chr <= 0xFFFF {
  815. chars = append(chars, uint16(chr))
  816. } else {
  817. first, second := utf16.EncodeRune(chr)
  818. chars = append(chars, uint16(first), uint16(second))
  819. }
  820. str = str[size:]
  821. continue
  822. case chr != '\\':
  823. if unicode {
  824. chars = append(chars, uint16(chr))
  825. } else {
  826. sb.WriteByte(chr)
  827. }
  828. str = str[1:]
  829. continue
  830. }
  831. if len(str) <= 1 {
  832. panic("len(str) <= 1")
  833. }
  834. chr := str[1]
  835. var value rune
  836. if chr >= utf8.RuneSelf {
  837. str = str[1:]
  838. var size int
  839. value, size = utf8.DecodeRuneInString(str)
  840. str = str[size:] // \ + <character>
  841. if value == '\u2028' || value == '\u2029' {
  842. continue
  843. }
  844. } else {
  845. str = str[2:] // \<character>
  846. switch chr {
  847. case 'b':
  848. value = '\b'
  849. case 'f':
  850. value = '\f'
  851. case 'n':
  852. value = '\n'
  853. case 'r':
  854. value = '\r'
  855. case 't':
  856. value = '\t'
  857. case 'v':
  858. value = '\v'
  859. case 'x', 'u':
  860. size := 0
  861. switch chr {
  862. case 'x':
  863. size = 2
  864. case 'u':
  865. if str == "" || str[0] != '{' {
  866. size = 4
  867. }
  868. }
  869. if size > 0 {
  870. if len(str) < size {
  871. return "", fmt.Errorf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
  872. }
  873. for j := 0; j < size; j++ {
  874. decimal, ok := hex2decimal(str[j])
  875. if !ok {
  876. return "", fmt.Errorf("invalid escape: \\%s: %q", string(chr), str[:size])
  877. }
  878. value = value<<4 | decimal
  879. }
  880. } else {
  881. str = str[1:]
  882. var val rune
  883. value = -1
  884. for ; size < len(str); size++ {
  885. if str[size] == '}' {
  886. if size == 0 {
  887. return "", fmt.Errorf("invalid escape: \\%s", string(chr))
  888. }
  889. size++
  890. value = val
  891. break
  892. }
  893. decimal, ok := hex2decimal(str[size])
  894. if !ok {
  895. return "", fmt.Errorf("invalid escape: \\%s: %q", string(chr), str[:size+1])
  896. }
  897. val = val<<4 | decimal
  898. if val > utf8.MaxRune {
  899. return "", fmt.Errorf("undefined Unicode code-point: %q", str[:size+1])
  900. }
  901. }
  902. if value == -1 {
  903. return "", fmt.Errorf("unterminated \\u{: %q", str)
  904. }
  905. }
  906. str = str[size:]
  907. if chr == 'x' {
  908. break
  909. }
  910. if value > utf8.MaxRune {
  911. panic("value > utf8.MaxRune")
  912. }
  913. case '0':
  914. if len(str) == 0 || '0' > str[0] || str[0] > '7' {
  915. value = 0
  916. break
  917. }
  918. fallthrough
  919. case '1', '2', '3', '4', '5', '6', '7':
  920. if strict {
  921. return "", errors.New("Octal escape sequences are not allowed in this context")
  922. }
  923. value = rune(chr) - '0'
  924. j := 0
  925. for ; j < 2; j++ {
  926. if len(str) < j+1 {
  927. break
  928. }
  929. chr := str[j]
  930. if '0' > chr || chr > '7' {
  931. break
  932. }
  933. decimal := rune(str[j]) - '0'
  934. value = (value << 3) | decimal
  935. }
  936. str = str[j:]
  937. case '\\':
  938. value = '\\'
  939. case '\'', '"':
  940. value = rune(chr)
  941. case '\r':
  942. if len(str) > 0 {
  943. if str[0] == '\n' {
  944. str = str[1:]
  945. }
  946. }
  947. fallthrough
  948. case '\n':
  949. continue
  950. default:
  951. value = rune(chr)
  952. }
  953. }
  954. if unicode {
  955. if value <= 0xFFFF {
  956. chars = append(chars, uint16(value))
  957. } else {
  958. first, second := utf16.EncodeRune(value)
  959. chars = append(chars, uint16(first), uint16(second))
  960. }
  961. } else {
  962. if value >= utf8.RuneSelf {
  963. return "", fmt.Errorf("Unexpected unicode character")
  964. }
  965. sb.WriteByte(byte(value))
  966. }
  967. }
  968. if unicode {
  969. if len(chars) != length+1 {
  970. panic(fmt.Errorf("unexpected unicode length while parsing '%s'", literal))
  971. }
  972. return unistring.FromUtf16(chars), nil
  973. }
  974. if sb.Len() != length {
  975. panic(fmt.Errorf("unexpected length while parsing '%s'", literal))
  976. }
  977. return unistring.String(sb.String()), nil
  978. }
  979. func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
  980. offset := self.chrOffset
  981. tkn := token.NUMBER
  982. if decimalPoint {
  983. offset--
  984. self.scanMantissa(10)
  985. } else {
  986. if self.chr == '0' {
  987. self.read()
  988. base := 0
  989. switch self.chr {
  990. case 'x', 'X':
  991. base = 16
  992. case 'o', 'O':
  993. base = 8
  994. case 'b', 'B':
  995. base = 2
  996. case '.', 'e', 'E':
  997. // no-op
  998. default:
  999. // legacy octal
  1000. self.scanMantissa(8)
  1001. goto end
  1002. }
  1003. if base > 0 {
  1004. self.read()
  1005. if !isDigit(self.chr, base) {
  1006. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1007. }
  1008. self.scanMantissa(base)
  1009. goto end
  1010. }
  1011. } else {
  1012. self.scanMantissa(10)
  1013. }
  1014. if self.chr == '.' {
  1015. self.read()
  1016. self.scanMantissa(10)
  1017. }
  1018. }
  1019. if self.chr == 'e' || self.chr == 'E' {
  1020. self.read()
  1021. if self.chr == '-' || self.chr == '+' {
  1022. self.read()
  1023. }
  1024. if isDecimalDigit(self.chr) {
  1025. self.read()
  1026. self.scanMantissa(10)
  1027. } else {
  1028. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1029. }
  1030. }
  1031. end:
  1032. if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) {
  1033. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1034. }
  1035. return tkn, self.str[offset:self.chrOffset]
  1036. }