lexer.go 25 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154
  1. package parser
  2. import (
  3. "errors"
  4. "fmt"
  5. "strconv"
  6. "strings"
  7. "unicode"
  8. "unicode/utf16"
  9. "unicode/utf8"
  10. "golang.org/x/text/unicode/rangetable"
  11. "github.com/dop251/goja/file"
  12. "github.com/dop251/goja/token"
  13. "github.com/dop251/goja/unistring"
  14. )
  15. var (
  16. unicodeRangeIdNeg = rangetable.Merge(unicode.Pattern_Syntax, unicode.Pattern_White_Space)
  17. unicodeRangeIdStartPos = rangetable.Merge(unicode.Letter, unicode.Nl, unicode.Other_ID_Start)
  18. unicodeRangeIdContPos = rangetable.Merge(unicodeRangeIdStartPos, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue)
  19. )
  20. func isDecimalDigit(chr rune) bool {
  21. return '0' <= chr && chr <= '9'
  22. }
  23. func IsIdentifier(s string) bool {
  24. if s == "" {
  25. return false
  26. }
  27. r, size := utf8.DecodeRuneInString(s)
  28. if !isIdentifierStart(r) {
  29. return false
  30. }
  31. for _, r := range s[size:] {
  32. if !isIdentifierPart(r) {
  33. return false
  34. }
  35. }
  36. return true
  37. }
  38. func digitValue(chr rune) int {
  39. switch {
  40. case '0' <= chr && chr <= '9':
  41. return int(chr - '0')
  42. case 'a' <= chr && chr <= 'f':
  43. return int(chr - 'a' + 10)
  44. case 'A' <= chr && chr <= 'F':
  45. return int(chr - 'A' + 10)
  46. }
  47. return 16 // Larger than any legal digit value
  48. }
  49. func isDigit(chr rune, base int) bool {
  50. return digitValue(chr) < base
  51. }
  52. func isIdStartUnicode(r rune) bool {
  53. return unicode.Is(unicodeRangeIdStartPos, r) && !unicode.Is(unicodeRangeIdNeg, r)
  54. }
  55. func isIdPartUnicode(r rune) bool {
  56. return unicode.Is(unicodeRangeIdContPos, r) && !unicode.Is(unicodeRangeIdNeg, r) || r == '\u200C' || r == '\u200D'
  57. }
  58. func isIdentifierStart(chr rune) bool {
  59. return chr == '$' || chr == '_' || chr == '\\' ||
  60. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  61. chr >= utf8.RuneSelf && isIdStartUnicode(chr)
  62. }
  63. func isIdentifierPart(chr rune) bool {
  64. return chr == '$' || chr == '_' || chr == '\\' ||
  65. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  66. '0' <= chr && chr <= '9' ||
  67. chr >= utf8.RuneSelf && isIdPartUnicode(chr)
  68. }
  69. func (self *_parser) scanIdentifier() (string, unistring.String, bool, string) {
  70. offset := self.chrOffset
  71. hasEscape := false
  72. isUnicode := false
  73. length := 0
  74. for isIdentifierPart(self.chr) {
  75. r := self.chr
  76. length++
  77. if r == '\\' {
  78. hasEscape = true
  79. distance := self.chrOffset - offset
  80. self.read()
  81. if self.chr != 'u' {
  82. return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  83. }
  84. var value rune
  85. if self._peek() == '{' {
  86. self.read()
  87. value = -1
  88. for value <= utf8.MaxRune {
  89. self.read()
  90. if self.chr == '}' {
  91. break
  92. }
  93. decimal, ok := hex2decimal(byte(self.chr))
  94. if !ok {
  95. return "", "", false, "Invalid Unicode escape sequence"
  96. }
  97. if value == -1 {
  98. value = decimal
  99. } else {
  100. value = value<<4 | decimal
  101. }
  102. }
  103. if value == -1 {
  104. return "", "", false, "Invalid Unicode escape sequence"
  105. }
  106. } else {
  107. for j := 0; j < 4; j++ {
  108. self.read()
  109. decimal, ok := hex2decimal(byte(self.chr))
  110. if !ok {
  111. return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  112. }
  113. value = value<<4 | decimal
  114. }
  115. }
  116. if value == '\\' {
  117. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  118. } else if distance == 0 {
  119. if !isIdentifierStart(value) {
  120. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  121. }
  122. } else if distance > 0 {
  123. if !isIdentifierPart(value) {
  124. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  125. }
  126. }
  127. r = value
  128. }
  129. if r >= utf8.RuneSelf {
  130. isUnicode = true
  131. if r > 0xFFFF {
  132. length++
  133. }
  134. }
  135. self.read()
  136. }
  137. literal := self.str[offset:self.chrOffset]
  138. var parsed unistring.String
  139. if hasEscape || isUnicode {
  140. var err string
  141. // TODO strict
  142. parsed, err = parseStringLiteral(literal, length, isUnicode, false)
  143. if err != "" {
  144. return "", "", false, err
  145. }
  146. } else {
  147. parsed = unistring.String(literal)
  148. }
  149. return literal, parsed, hasEscape, ""
  150. }
  151. // 7.2
  152. func isLineWhiteSpace(chr rune) bool {
  153. switch chr {
  154. case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
  155. return true
  156. case '\u000a', '\u000d', '\u2028', '\u2029':
  157. return false
  158. case '\u0085':
  159. return false
  160. }
  161. return unicode.IsSpace(chr)
  162. }
  163. // 7.3
  164. func isLineTerminator(chr rune) bool {
  165. switch chr {
  166. case '\u000a', '\u000d', '\u2028', '\u2029':
  167. return true
  168. }
  169. return false
  170. }
  171. func isId(tkn token.Token) bool {
  172. switch tkn {
  173. case token.KEYWORD,
  174. token.BOOLEAN,
  175. token.NULL,
  176. token.THIS,
  177. token.IF,
  178. token.IN,
  179. token.OF,
  180. token.DO,
  181. token.VAR,
  182. token.LET,
  183. token.FOR,
  184. token.NEW,
  185. token.TRY,
  186. token.ELSE,
  187. token.CASE,
  188. token.VOID,
  189. token.WITH,
  190. token.CONST,
  191. token.WHILE,
  192. token.BREAK,
  193. token.CATCH,
  194. token.THROW,
  195. token.RETURN,
  196. token.TYPEOF,
  197. token.DELETE,
  198. token.SWITCH,
  199. token.DEFAULT,
  200. token.FINALLY,
  201. token.FUNCTION,
  202. token.CONTINUE,
  203. token.DEBUGGER,
  204. token.INSTANCEOF:
  205. return true
  206. }
  207. return false
  208. }
  209. type parserState struct {
  210. tok token.Token
  211. literal string
  212. parsedLiteral unistring.String
  213. implicitSemicolon, insertSemicolon bool
  214. chr rune
  215. chrOffset, offset int
  216. errorCount int
  217. }
  218. func (self *_parser) mark(state *parserState) *parserState {
  219. if state == nil {
  220. state = &parserState{}
  221. }
  222. state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset =
  223. self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
  224. state.errorCount = len(self.errors)
  225. return state
  226. }
  227. func (self *_parser) restore(state *parserState) {
  228. self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset =
  229. state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset
  230. self.errors = self.errors[:state.errorCount]
  231. }
  232. func (self *_parser) peek() token.Token {
  233. implicitSemicolon, insertSemicolon, chr, chrOffset, offset := self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
  234. tok, _, _, _ := self.scan()
  235. self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = implicitSemicolon, insertSemicolon, chr, chrOffset, offset
  236. return tok
  237. }
  238. func (self *_parser) scan() (tkn token.Token, literal string, parsedLiteral unistring.String, idx file.Idx) {
  239. self.implicitSemicolon = false
  240. for {
  241. self.skipWhiteSpace()
  242. idx = self.idxOf(self.chrOffset)
  243. insertSemicolon := false
  244. switch chr := self.chr; {
  245. case isIdentifierStart(chr):
  246. var err string
  247. var hasEscape bool
  248. literal, parsedLiteral, hasEscape, err = self.scanIdentifier()
  249. if err != "" {
  250. tkn = token.ILLEGAL
  251. break
  252. }
  253. if len(parsedLiteral) > 1 {
  254. // Keywords are longer than 1 character, avoid lookup otherwise
  255. var strict bool
  256. tkn, strict = token.IsKeyword(string(parsedLiteral))
  257. if hasEscape {
  258. self.insertSemicolon = true
  259. if tkn != 0 && tkn != token.LET || parsedLiteral == "true" || parsedLiteral == "false" || parsedLiteral == "null" {
  260. tkn = token.KEYWORD
  261. } else {
  262. tkn = token.IDENTIFIER
  263. }
  264. return
  265. }
  266. switch tkn {
  267. case 0: // Not a keyword
  268. if parsedLiteral == "true" || parsedLiteral == "false" {
  269. self.insertSemicolon = true
  270. tkn = token.BOOLEAN
  271. return
  272. } else if parsedLiteral == "null" {
  273. self.insertSemicolon = true
  274. tkn = token.NULL
  275. return
  276. }
  277. case token.KEYWORD:
  278. if strict {
  279. // TODO If strict and in strict mode, then this is not a break
  280. break
  281. }
  282. return
  283. case
  284. token.THIS,
  285. token.BREAK,
  286. token.THROW, // A newline after a throw is not allowed, but we need to detect it
  287. token.RETURN,
  288. token.CONTINUE,
  289. token.DEBUGGER:
  290. self.insertSemicolon = true
  291. return
  292. default:
  293. return
  294. }
  295. }
  296. self.insertSemicolon = true
  297. tkn = token.IDENTIFIER
  298. return
  299. case '0' <= chr && chr <= '9':
  300. self.insertSemicolon = true
  301. tkn, literal = self.scanNumericLiteral(false)
  302. return
  303. default:
  304. self.read()
  305. switch chr {
  306. case -1:
  307. if self.insertSemicolon {
  308. self.insertSemicolon = false
  309. self.implicitSemicolon = true
  310. }
  311. tkn = token.EOF
  312. case '\r', '\n', '\u2028', '\u2029':
  313. self.insertSemicolon = false
  314. self.implicitSemicolon = true
  315. continue
  316. case ':':
  317. tkn = token.COLON
  318. case '.':
  319. if digitValue(self.chr) < 10 {
  320. insertSemicolon = true
  321. tkn, literal = self.scanNumericLiteral(true)
  322. } else {
  323. if self.chr == '.' {
  324. self.read()
  325. if self.chr == '.' {
  326. self.read()
  327. tkn = token.ELLIPSIS
  328. } else {
  329. tkn = token.ILLEGAL
  330. }
  331. } else {
  332. tkn = token.PERIOD
  333. }
  334. }
  335. case ',':
  336. tkn = token.COMMA
  337. case ';':
  338. tkn = token.SEMICOLON
  339. case '(':
  340. tkn = token.LEFT_PARENTHESIS
  341. case ')':
  342. tkn = token.RIGHT_PARENTHESIS
  343. insertSemicolon = true
  344. case '[':
  345. tkn = token.LEFT_BRACKET
  346. case ']':
  347. tkn = token.RIGHT_BRACKET
  348. insertSemicolon = true
  349. case '{':
  350. tkn = token.LEFT_BRACE
  351. case '}':
  352. tkn = token.RIGHT_BRACE
  353. insertSemicolon = true
  354. case '+':
  355. tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
  356. if tkn == token.INCREMENT {
  357. insertSemicolon = true
  358. }
  359. case '-':
  360. tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
  361. if tkn == token.DECREMENT {
  362. insertSemicolon = true
  363. }
  364. case '*':
  365. tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
  366. case '/':
  367. if self.chr == '/' {
  368. self.skipSingleLineComment()
  369. continue
  370. } else if self.chr == '*' {
  371. self.skipMultiLineComment()
  372. continue
  373. } else {
  374. // Could be division, could be RegExp literal
  375. tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
  376. insertSemicolon = true
  377. }
  378. case '%':
  379. tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
  380. case '^':
  381. tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
  382. case '<':
  383. tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
  384. case '>':
  385. tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
  386. case '=':
  387. if self.chr == '>' {
  388. self.read()
  389. if self.implicitSemicolon {
  390. tkn = token.ILLEGAL
  391. } else {
  392. tkn = token.ARROW
  393. }
  394. } else {
  395. tkn = self.switch2(token.ASSIGN, token.EQUAL)
  396. if tkn == token.EQUAL && self.chr == '=' {
  397. self.read()
  398. tkn = token.STRICT_EQUAL
  399. }
  400. }
  401. case '!':
  402. tkn = self.switch2(token.NOT, token.NOT_EQUAL)
  403. if tkn == token.NOT_EQUAL && self.chr == '=' {
  404. self.read()
  405. tkn = token.STRICT_NOT_EQUAL
  406. }
  407. case '&':
  408. tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
  409. case '|':
  410. tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
  411. case '~':
  412. tkn = token.BITWISE_NOT
  413. case '?':
  414. tkn = token.QUESTION_MARK
  415. case '"', '\'':
  416. insertSemicolon = true
  417. tkn = token.STRING
  418. var err string
  419. literal, parsedLiteral, err = self.scanString(self.chrOffset-1, true)
  420. if err != "" {
  421. tkn = token.ILLEGAL
  422. }
  423. case '`':
  424. tkn = token.BACKTICK
  425. default:
  426. self.errorUnexpected(idx, chr)
  427. tkn = token.ILLEGAL
  428. }
  429. }
  430. self.insertSemicolon = insertSemicolon
  431. return
  432. }
  433. }
  434. func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
  435. if self.chr == '=' {
  436. self.read()
  437. return tkn1
  438. }
  439. return tkn0
  440. }
  441. func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
  442. if self.chr == '=' {
  443. self.read()
  444. return tkn1
  445. }
  446. if self.chr == chr2 {
  447. self.read()
  448. return tkn2
  449. }
  450. return tkn0
  451. }
  452. func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
  453. if self.chr == '=' {
  454. self.read()
  455. return tkn1
  456. }
  457. if self.chr == chr2 {
  458. self.read()
  459. if self.chr == '=' {
  460. self.read()
  461. return tkn3
  462. }
  463. return tkn2
  464. }
  465. return tkn0
  466. }
  467. func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
  468. if self.chr == '=' {
  469. self.read()
  470. return tkn1
  471. }
  472. if self.chr == chr2 {
  473. self.read()
  474. if self.chr == '=' {
  475. self.read()
  476. return tkn3
  477. }
  478. if self.chr == chr3 {
  479. self.read()
  480. if self.chr == '=' {
  481. self.read()
  482. return tkn5
  483. }
  484. return tkn4
  485. }
  486. return tkn2
  487. }
  488. return tkn0
  489. }
  490. func (self *_parser) _peek() rune {
  491. if self.offset < self.length {
  492. return rune(self.str[self.offset])
  493. }
  494. return -1
  495. }
  496. func (self *_parser) read() {
  497. if self.offset < self.length {
  498. self.chrOffset = self.offset
  499. chr, width := rune(self.str[self.offset]), 1
  500. if chr >= utf8.RuneSelf { // !ASCII
  501. chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
  502. if chr == utf8.RuneError && width == 1 {
  503. self.error(self.chrOffset, "Invalid UTF-8 character")
  504. }
  505. }
  506. self.offset += width
  507. self.chr = chr
  508. } else {
  509. self.chrOffset = self.length
  510. self.chr = -1 // EOF
  511. }
  512. }
  513. func (self *_parser) skipSingleLineComment() {
  514. for self.chr != -1 {
  515. self.read()
  516. if isLineTerminator(self.chr) {
  517. return
  518. }
  519. }
  520. }
  521. func (self *_parser) skipMultiLineComment() {
  522. self.read()
  523. for self.chr >= 0 {
  524. chr := self.chr
  525. self.read()
  526. if chr == '*' && self.chr == '/' {
  527. self.read()
  528. return
  529. }
  530. }
  531. self.errorUnexpected(0, self.chr)
  532. }
  533. func (self *_parser) skipWhiteSpace() {
  534. for {
  535. switch self.chr {
  536. case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
  537. self.read()
  538. continue
  539. case '\r':
  540. if self._peek() == '\n' {
  541. self.read()
  542. }
  543. fallthrough
  544. case '\u2028', '\u2029', '\n':
  545. if self.insertSemicolon {
  546. return
  547. }
  548. self.read()
  549. continue
  550. }
  551. if self.chr >= utf8.RuneSelf {
  552. if unicode.IsSpace(self.chr) {
  553. self.read()
  554. continue
  555. }
  556. }
  557. break
  558. }
  559. }
  560. func (self *_parser) skipLineWhiteSpace() {
  561. for isLineWhiteSpace(self.chr) {
  562. self.read()
  563. }
  564. }
  565. func (self *_parser) scanMantissa(base int) {
  566. for digitValue(self.chr) < base {
  567. self.read()
  568. }
  569. }
  570. func (self *_parser) scanEscape(quote rune) (int, bool) {
  571. var length, base uint32
  572. chr := self.chr
  573. switch chr {
  574. case '0', '1', '2', '3', '4', '5', '6', '7':
  575. // Octal:
  576. length, base = 3, 8
  577. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'':
  578. self.read()
  579. return 1, false
  580. case '\r':
  581. self.read()
  582. if self.chr == '\n' {
  583. self.read()
  584. return 2, false
  585. }
  586. return 1, false
  587. case '\n':
  588. self.read()
  589. return 1, false
  590. case '\u2028', '\u2029':
  591. self.read()
  592. return 1, true
  593. case 'x':
  594. self.read()
  595. length, base = 2, 16
  596. case 'u':
  597. self.read()
  598. if self.chr == '{' {
  599. self.read()
  600. length, base = 0, 16
  601. } else {
  602. length, base = 4, 16
  603. }
  604. default:
  605. self.read() // Always make progress
  606. }
  607. if base > 0 {
  608. var value uint32
  609. if length > 0 {
  610. for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
  611. digit := uint32(digitValue(self.chr))
  612. if digit >= base {
  613. break
  614. }
  615. value = value*base + digit
  616. self.read()
  617. }
  618. } else {
  619. for self.chr != quote && self.chr >= 0 && value < utf8.MaxRune {
  620. if self.chr == '}' {
  621. self.read()
  622. break
  623. }
  624. digit := uint32(digitValue(self.chr))
  625. if digit >= base {
  626. break
  627. }
  628. value = value*base + digit
  629. self.read()
  630. }
  631. }
  632. chr = rune(value)
  633. }
  634. if chr >= utf8.RuneSelf {
  635. if chr > 0xFFFF {
  636. return 2, true
  637. }
  638. return 1, true
  639. }
  640. return 1, false
  641. }
  642. func (self *_parser) scanString(offset int, parse bool) (literal string, parsed unistring.String, err string) {
  643. // " ' /
  644. quote := rune(self.str[offset])
  645. length := 0
  646. isUnicode := false
  647. for self.chr != quote {
  648. chr := self.chr
  649. if chr == '\n' || chr == '\r' || chr == '\u2028' || chr == '\u2029' || chr < 0 {
  650. goto newline
  651. }
  652. self.read()
  653. if chr == '\\' {
  654. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  655. if quote == '/' {
  656. goto newline
  657. }
  658. self.scanNewline()
  659. } else {
  660. l, u := self.scanEscape(quote)
  661. length += l
  662. if u {
  663. isUnicode = true
  664. }
  665. }
  666. continue
  667. } else if chr == '[' && quote == '/' {
  668. // Allow a slash (/) in a bracket character class ([...])
  669. // TODO Fix this, this is hacky...
  670. quote = -1
  671. } else if chr == ']' && quote == -1 {
  672. quote = '/'
  673. }
  674. if chr >= utf8.RuneSelf {
  675. isUnicode = true
  676. if chr > 0xFFFF {
  677. length++
  678. }
  679. }
  680. length++
  681. }
  682. // " ' /
  683. self.read()
  684. literal = self.str[offset:self.chrOffset]
  685. if parse {
  686. // TODO strict
  687. parsed, err = parseStringLiteral(literal[1:len(literal)-1], length, isUnicode, false)
  688. }
  689. return
  690. newline:
  691. self.scanNewline()
  692. errStr := "String not terminated"
  693. if quote == '/' {
  694. errStr = "Invalid regular expression: missing /"
  695. self.error(self.idxOf(offset), errStr)
  696. }
  697. return "", "", errStr
  698. }
  699. func (self *_parser) scanNewline() {
  700. if self.chr == '\r' {
  701. self.read()
  702. if self.chr != '\n' {
  703. return
  704. }
  705. }
  706. self.read()
  707. }
  708. func (self *_parser) parseTemplateCharacters() (literal string, parsed unistring.String, finished bool, parseErr, err string) {
  709. offset := self.chrOffset
  710. var end int
  711. length := 0
  712. isUnicode := false
  713. hasCR := false
  714. for {
  715. chr := self.chr
  716. if chr < 0 {
  717. goto unterminated
  718. }
  719. self.read()
  720. if chr == '`' {
  721. finished = true
  722. end = self.chrOffset - 1
  723. break
  724. }
  725. if chr == '\\' {
  726. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  727. if self.chr == '\r' {
  728. hasCR = true
  729. }
  730. self.scanNewline()
  731. } else {
  732. if self.chr == '8' || self.chr == '9' {
  733. if parseErr == "" {
  734. parseErr = "\\8 and \\9 are not allowed in template strings."
  735. }
  736. }
  737. l, u := self.scanEscape('`')
  738. length += l
  739. if u {
  740. isUnicode = true
  741. }
  742. }
  743. continue
  744. }
  745. if chr == '$' && self.chr == '{' {
  746. self.read()
  747. end = self.chrOffset - 2
  748. break
  749. }
  750. if chr >= utf8.RuneSelf {
  751. isUnicode = true
  752. if chr > 0xFFFF {
  753. length++
  754. }
  755. } else if chr == '\r' {
  756. hasCR = true
  757. if self.chr == '\n' {
  758. length--
  759. }
  760. }
  761. length++
  762. }
  763. literal = self.str[offset:end]
  764. if hasCR {
  765. literal = normaliseCRLF(literal)
  766. }
  767. if parseErr == "" {
  768. parsed, parseErr = parseStringLiteral(literal, length, isUnicode, true)
  769. }
  770. self.insertSemicolon = true
  771. return
  772. unterminated:
  773. err = err_UnexpectedEndOfInput
  774. return
  775. }
  776. func normaliseCRLF(s string) string {
  777. var buf strings.Builder
  778. buf.Grow(len(s))
  779. for i := 0; i < len(s); i++ {
  780. if s[i] == '\r' {
  781. buf.WriteByte('\n')
  782. if i < len(s)-1 && s[i+1] == '\n' {
  783. i++
  784. }
  785. } else {
  786. buf.WriteByte(s[i])
  787. }
  788. }
  789. return buf.String()
  790. }
  791. func hex2decimal(chr byte) (value rune, ok bool) {
  792. {
  793. chr := rune(chr)
  794. switch {
  795. case '0' <= chr && chr <= '9':
  796. return chr - '0', true
  797. case 'a' <= chr && chr <= 'f':
  798. return chr - 'a' + 10, true
  799. case 'A' <= chr && chr <= 'F':
  800. return chr - 'A' + 10, true
  801. }
  802. return
  803. }
  804. }
  805. func parseNumberLiteral(literal string) (value interface{}, err error) {
  806. // TODO Is Uint okay? What about -MAX_UINT
  807. value, err = strconv.ParseInt(literal, 0, 64)
  808. if err == nil {
  809. return
  810. }
  811. parseIntErr := err // Save this first error, just in case
  812. value, err = strconv.ParseFloat(literal, 64)
  813. if err == nil {
  814. return
  815. } else if err.(*strconv.NumError).Err == strconv.ErrRange {
  816. // Infinity, etc.
  817. return value, nil
  818. }
  819. err = parseIntErr
  820. if err.(*strconv.NumError).Err == strconv.ErrRange {
  821. if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') {
  822. // Could just be a very large number (e.g. 0x8000000000000000)
  823. var value float64
  824. literal = literal[2:]
  825. for _, chr := range literal {
  826. digit := digitValue(chr)
  827. if digit >= 16 {
  828. goto error
  829. }
  830. value = value*16 + float64(digit)
  831. }
  832. return value, nil
  833. }
  834. }
  835. error:
  836. return nil, errors.New("Illegal numeric literal")
  837. }
  838. func parseStringLiteral(literal string, length int, unicode, strict bool) (unistring.String, string) {
  839. var sb strings.Builder
  840. var chars []uint16
  841. if unicode {
  842. chars = make([]uint16, 1, length+1)
  843. chars[0] = unistring.BOM
  844. } else {
  845. sb.Grow(length)
  846. }
  847. str := literal
  848. for len(str) > 0 {
  849. switch chr := str[0]; {
  850. // We do not explicitly handle the case of the quote
  851. // value, which can be: " ' /
  852. // This assumes we're already passed a partially well-formed literal
  853. case chr >= utf8.RuneSelf:
  854. chr, size := utf8.DecodeRuneInString(str)
  855. if chr <= 0xFFFF {
  856. chars = append(chars, uint16(chr))
  857. } else {
  858. first, second := utf16.EncodeRune(chr)
  859. chars = append(chars, uint16(first), uint16(second))
  860. }
  861. str = str[size:]
  862. continue
  863. case chr != '\\':
  864. if unicode {
  865. chars = append(chars, uint16(chr))
  866. } else {
  867. sb.WriteByte(chr)
  868. }
  869. str = str[1:]
  870. continue
  871. }
  872. if len(str) <= 1 {
  873. panic("len(str) <= 1")
  874. }
  875. chr := str[1]
  876. var value rune
  877. if chr >= utf8.RuneSelf {
  878. str = str[1:]
  879. var size int
  880. value, size = utf8.DecodeRuneInString(str)
  881. str = str[size:] // \ + <character>
  882. if value == '\u2028' || value == '\u2029' {
  883. continue
  884. }
  885. } else {
  886. str = str[2:] // \<character>
  887. switch chr {
  888. case 'b':
  889. value = '\b'
  890. case 'f':
  891. value = '\f'
  892. case 'n':
  893. value = '\n'
  894. case 'r':
  895. value = '\r'
  896. case 't':
  897. value = '\t'
  898. case 'v':
  899. value = '\v'
  900. case 'x', 'u':
  901. size := 0
  902. switch chr {
  903. case 'x':
  904. size = 2
  905. case 'u':
  906. if str == "" || str[0] != '{' {
  907. size = 4
  908. }
  909. }
  910. if size > 0 {
  911. if len(str) < size {
  912. return "", fmt.Sprintf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
  913. }
  914. for j := 0; j < size; j++ {
  915. decimal, ok := hex2decimal(str[j])
  916. if !ok {
  917. return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size])
  918. }
  919. value = value<<4 | decimal
  920. }
  921. } else {
  922. str = str[1:]
  923. var val rune
  924. value = -1
  925. for ; size < len(str); size++ {
  926. if str[size] == '}' {
  927. if size == 0 {
  928. return "", fmt.Sprintf("invalid escape: \\%s", string(chr))
  929. }
  930. size++
  931. value = val
  932. break
  933. }
  934. decimal, ok := hex2decimal(str[size])
  935. if !ok {
  936. return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size+1])
  937. }
  938. val = val<<4 | decimal
  939. if val > utf8.MaxRune {
  940. return "", fmt.Sprintf("undefined Unicode code-point: %q", str[:size+1])
  941. }
  942. }
  943. if value == -1 {
  944. return "", fmt.Sprintf("unterminated \\u{: %q", str)
  945. }
  946. }
  947. str = str[size:]
  948. if chr == 'x' {
  949. break
  950. }
  951. if value > utf8.MaxRune {
  952. panic("value > utf8.MaxRune")
  953. }
  954. case '0':
  955. if len(str) == 0 || '0' > str[0] || str[0] > '7' {
  956. value = 0
  957. break
  958. }
  959. fallthrough
  960. case '1', '2', '3', '4', '5', '6', '7':
  961. if strict {
  962. return "", "Octal escape sequences are not allowed in this context"
  963. }
  964. value = rune(chr) - '0'
  965. j := 0
  966. for ; j < 2; j++ {
  967. if len(str) < j+1 {
  968. break
  969. }
  970. chr := str[j]
  971. if '0' > chr || chr > '7' {
  972. break
  973. }
  974. decimal := rune(str[j]) - '0'
  975. value = (value << 3) | decimal
  976. }
  977. str = str[j:]
  978. case '\\':
  979. value = '\\'
  980. case '\'', '"':
  981. value = rune(chr)
  982. case '\r':
  983. if len(str) > 0 {
  984. if str[0] == '\n' {
  985. str = str[1:]
  986. }
  987. }
  988. fallthrough
  989. case '\n':
  990. continue
  991. default:
  992. value = rune(chr)
  993. }
  994. }
  995. if unicode {
  996. if value <= 0xFFFF {
  997. chars = append(chars, uint16(value))
  998. } else {
  999. first, second := utf16.EncodeRune(value)
  1000. chars = append(chars, uint16(first), uint16(second))
  1001. }
  1002. } else {
  1003. if value >= utf8.RuneSelf {
  1004. return "", "Unexpected unicode character"
  1005. }
  1006. sb.WriteByte(byte(value))
  1007. }
  1008. }
  1009. if unicode {
  1010. if len(chars) != length+1 {
  1011. panic(fmt.Errorf("unexpected unicode length while parsing '%s'", literal))
  1012. }
  1013. return unistring.FromUtf16(chars), ""
  1014. }
  1015. if sb.Len() != length {
  1016. panic(fmt.Errorf("unexpected length while parsing '%s'", literal))
  1017. }
  1018. return unistring.String(sb.String()), ""
  1019. }
  1020. func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
  1021. offset := self.chrOffset
  1022. tkn := token.NUMBER
  1023. if decimalPoint {
  1024. offset--
  1025. self.scanMantissa(10)
  1026. } else {
  1027. if self.chr == '0' {
  1028. self.read()
  1029. base := 0
  1030. switch self.chr {
  1031. case 'x', 'X':
  1032. base = 16
  1033. case 'o', 'O':
  1034. base = 8
  1035. case 'b', 'B':
  1036. base = 2
  1037. case '.', 'e', 'E':
  1038. // no-op
  1039. default:
  1040. // legacy octal
  1041. self.scanMantissa(8)
  1042. goto end
  1043. }
  1044. if base > 0 {
  1045. self.read()
  1046. if !isDigit(self.chr, base) {
  1047. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1048. }
  1049. self.scanMantissa(base)
  1050. goto end
  1051. }
  1052. } else {
  1053. self.scanMantissa(10)
  1054. }
  1055. if self.chr == '.' {
  1056. self.read()
  1057. self.scanMantissa(10)
  1058. }
  1059. }
  1060. if self.chr == 'e' || self.chr == 'E' {
  1061. self.read()
  1062. if self.chr == '-' || self.chr == '+' {
  1063. self.read()
  1064. }
  1065. if isDecimalDigit(self.chr) {
  1066. self.read()
  1067. self.scanMantissa(10)
  1068. } else {
  1069. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1070. }
  1071. }
  1072. end:
  1073. if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) {
  1074. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1075. }
  1076. return tkn, self.str[offset:self.chrOffset]
  1077. }