lexer.go 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175
  1. package parser
  2. import (
  3. "errors"
  4. "fmt"
  5. "math/big"
  6. "strconv"
  7. "strings"
  8. "unicode"
  9. "unicode/utf16"
  10. "unicode/utf8"
  11. "golang.org/x/text/unicode/rangetable"
  12. "github.com/dop251/goja/file"
  13. "github.com/dop251/goja/token"
  14. "github.com/dop251/goja/unistring"
  15. )
  16. var (
  17. unicodeRangeIdNeg = rangetable.Merge(unicode.Pattern_Syntax, unicode.Pattern_White_Space)
  18. unicodeRangeIdStartPos = rangetable.Merge(unicode.Letter, unicode.Nl, unicode.Other_ID_Start)
  19. unicodeRangeIdContPos = rangetable.Merge(unicodeRangeIdStartPos, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue)
  20. )
  21. func isDecimalDigit(chr rune) bool {
  22. return '0' <= chr && chr <= '9'
  23. }
  24. func IsIdentifier(s string) bool {
  25. if s == "" {
  26. return false
  27. }
  28. r, size := utf8.DecodeRuneInString(s)
  29. if !isIdentifierStart(r) {
  30. return false
  31. }
  32. for _, r := range s[size:] {
  33. if !isIdentifierPart(r) {
  34. return false
  35. }
  36. }
  37. return true
  38. }
  39. func digitValue(chr rune) int {
  40. switch {
  41. case '0' <= chr && chr <= '9':
  42. return int(chr - '0')
  43. case 'a' <= chr && chr <= 'f':
  44. return int(chr - 'a' + 10)
  45. case 'A' <= chr && chr <= 'F':
  46. return int(chr - 'A' + 10)
  47. }
  48. return 16 // Larger than any legal digit value
  49. }
  50. func isDigit(chr rune, base int) bool {
  51. return digitValue(chr) < base
  52. }
  53. func isIdStartUnicode(r rune) bool {
  54. return unicode.Is(unicodeRangeIdStartPos, r) && !unicode.Is(unicodeRangeIdNeg, r)
  55. }
  56. func isIdPartUnicode(r rune) bool {
  57. return unicode.Is(unicodeRangeIdContPos, r) && !unicode.Is(unicodeRangeIdNeg, r) || r == '\u200C' || r == '\u200D'
  58. }
  59. func isIdentifierStart(chr rune) bool {
  60. return chr == '$' || chr == '_' || chr == '\\' ||
  61. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  62. chr >= utf8.RuneSelf && isIdStartUnicode(chr)
  63. }
  64. func isIdentifierPart(chr rune) bool {
  65. return chr == '$' || chr == '_' || chr == '\\' ||
  66. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  67. '0' <= chr && chr <= '9' ||
  68. chr >= utf8.RuneSelf && isIdPartUnicode(chr)
  69. }
  70. func (self *_parser) scanIdentifier() (string, unistring.String, bool, string) {
  71. offset := self.chrOffset
  72. hasEscape := false
  73. isUnicode := false
  74. length := 0
  75. for isIdentifierPart(self.chr) {
  76. r := self.chr
  77. length++
  78. if r == '\\' {
  79. hasEscape = true
  80. distance := self.chrOffset - offset
  81. self.read()
  82. if self.chr != 'u' {
  83. return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  84. }
  85. var value rune
  86. if self._peek() == '{' {
  87. self.read()
  88. value = -1
  89. for value <= utf8.MaxRune {
  90. self.read()
  91. if self.chr == '}' {
  92. break
  93. }
  94. decimal, ok := hex2decimal(byte(self.chr))
  95. if !ok {
  96. return "", "", false, "Invalid Unicode escape sequence"
  97. }
  98. if value == -1 {
  99. value = decimal
  100. } else {
  101. value = value<<4 | decimal
  102. }
  103. }
  104. if value == -1 {
  105. return "", "", false, "Invalid Unicode escape sequence"
  106. }
  107. } else {
  108. for j := 0; j < 4; j++ {
  109. self.read()
  110. decimal, ok := hex2decimal(byte(self.chr))
  111. if !ok {
  112. return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  113. }
  114. value = value<<4 | decimal
  115. }
  116. }
  117. if value == '\\' {
  118. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  119. } else if distance == 0 {
  120. if !isIdentifierStart(value) {
  121. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  122. }
  123. } else if distance > 0 {
  124. if !isIdentifierPart(value) {
  125. return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
  126. }
  127. }
  128. r = value
  129. }
  130. if r >= utf8.RuneSelf {
  131. isUnicode = true
  132. if r > 0xFFFF {
  133. length++
  134. }
  135. }
  136. self.read()
  137. }
  138. literal := self.str[offset:self.chrOffset]
  139. var parsed unistring.String
  140. if hasEscape || isUnicode {
  141. var err string
  142. // TODO strict
  143. parsed, err = parseStringLiteral(literal, length, isUnicode, false)
  144. if err != "" {
  145. return "", "", false, err
  146. }
  147. } else {
  148. parsed = unistring.String(literal)
  149. }
  150. return literal, parsed, hasEscape, ""
  151. }
  152. // 7.2
  153. func isLineWhiteSpace(chr rune) bool {
  154. switch chr {
  155. case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
  156. return true
  157. case '\u000a', '\u000d', '\u2028', '\u2029':
  158. return false
  159. case '\u0085':
  160. return false
  161. }
  162. return unicode.IsSpace(chr)
  163. }
  164. // 7.3
  165. func isLineTerminator(chr rune) bool {
  166. switch chr {
  167. case '\u000a', '\u000d', '\u2028', '\u2029':
  168. return true
  169. }
  170. return false
  171. }
  172. func isId(tkn token.Token) bool {
  173. switch tkn {
  174. case token.KEYWORD,
  175. token.BOOLEAN,
  176. token.NULL,
  177. token.THIS,
  178. token.IF,
  179. token.IN,
  180. token.OF,
  181. token.DO,
  182. token.VAR,
  183. token.LET,
  184. token.FOR,
  185. token.NEW,
  186. token.TRY,
  187. token.ELSE,
  188. token.CASE,
  189. token.VOID,
  190. token.WITH,
  191. token.CONST,
  192. token.WHILE,
  193. token.BREAK,
  194. token.CATCH,
  195. token.THROW,
  196. token.RETURN,
  197. token.TYPEOF,
  198. token.DELETE,
  199. token.SWITCH,
  200. token.DEFAULT,
  201. token.FINALLY,
  202. token.FUNCTION,
  203. token.CONTINUE,
  204. token.DEBUGGER,
  205. token.INSTANCEOF:
  206. return true
  207. }
  208. return false
  209. }
  210. type parserState struct {
  211. tok token.Token
  212. literal string
  213. parsedLiteral unistring.String
  214. implicitSemicolon, insertSemicolon bool
  215. chr rune
  216. chrOffset, offset int
  217. errorCount int
  218. }
  219. func (self *_parser) mark(state *parserState) *parserState {
  220. if state == nil {
  221. state = &parserState{}
  222. }
  223. state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset =
  224. self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
  225. state.errorCount = len(self.errors)
  226. return state
  227. }
  228. func (self *_parser) restore(state *parserState) {
  229. self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset =
  230. state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset
  231. self.errors = self.errors[:state.errorCount]
  232. }
  233. func (self *_parser) peek() token.Token {
  234. implicitSemicolon, insertSemicolon, chr, chrOffset, offset := self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
  235. tok, _, _, _ := self.scan()
  236. self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = implicitSemicolon, insertSemicolon, chr, chrOffset, offset
  237. return tok
  238. }
  239. func (self *_parser) scan() (tkn token.Token, literal string, parsedLiteral unistring.String, idx file.Idx) {
  240. self.implicitSemicolon = false
  241. for {
  242. self.skipWhiteSpace()
  243. idx = self.idxOf(self.chrOffset)
  244. insertSemicolon := false
  245. switch chr := self.chr; {
  246. case isIdentifierStart(chr):
  247. var err string
  248. var hasEscape bool
  249. literal, parsedLiteral, hasEscape, err = self.scanIdentifier()
  250. if err != "" {
  251. tkn = token.ILLEGAL
  252. break
  253. }
  254. if len(parsedLiteral) > 1 {
  255. // Keywords are longer than 1 character, avoid lookup otherwise
  256. var strict bool
  257. tkn, strict = token.IsKeyword(string(parsedLiteral))
  258. if hasEscape {
  259. self.insertSemicolon = true
  260. if tkn != 0 && tkn != token.LET || parsedLiteral == "true" || parsedLiteral == "false" || parsedLiteral == "null" {
  261. tkn = token.KEYWORD
  262. } else {
  263. tkn = token.IDENTIFIER
  264. }
  265. return
  266. }
  267. switch tkn {
  268. case 0: // Not a keyword
  269. if parsedLiteral == "true" || parsedLiteral == "false" {
  270. self.insertSemicolon = true
  271. tkn = token.BOOLEAN
  272. return
  273. } else if parsedLiteral == "null" {
  274. self.insertSemicolon = true
  275. tkn = token.NULL
  276. return
  277. }
  278. case token.KEYWORD:
  279. if strict {
  280. // TODO If strict and in strict mode, then this is not a break
  281. break
  282. }
  283. return
  284. case
  285. token.THIS,
  286. token.BREAK,
  287. token.THROW, // A newline after a throw is not allowed, but we need to detect it
  288. token.RETURN,
  289. token.CONTINUE,
  290. token.DEBUGGER:
  291. self.insertSemicolon = true
  292. return
  293. default:
  294. return
  295. }
  296. }
  297. self.insertSemicolon = true
  298. tkn = token.IDENTIFIER
  299. return
  300. case '0' <= chr && chr <= '9':
  301. self.insertSemicolon = true
  302. tkn, literal = self.scanNumericLiteral(false)
  303. return
  304. default:
  305. self.read()
  306. switch chr {
  307. case -1:
  308. if self.insertSemicolon {
  309. self.insertSemicolon = false
  310. self.implicitSemicolon = true
  311. }
  312. tkn = token.EOF
  313. case '\r', '\n', '\u2028', '\u2029':
  314. self.insertSemicolon = false
  315. self.implicitSemicolon = true
  316. continue
  317. case ':':
  318. tkn = token.COLON
  319. case '.':
  320. if digitValue(self.chr) < 10 {
  321. insertSemicolon = true
  322. tkn, literal = self.scanNumericLiteral(true)
  323. } else {
  324. if self.chr == '.' {
  325. self.read()
  326. if self.chr == '.' {
  327. self.read()
  328. tkn = token.ELLIPSIS
  329. } else {
  330. tkn = token.ILLEGAL
  331. }
  332. } else {
  333. tkn = token.PERIOD
  334. }
  335. }
  336. case ',':
  337. tkn = token.COMMA
  338. case ';':
  339. tkn = token.SEMICOLON
  340. case '(':
  341. tkn = token.LEFT_PARENTHESIS
  342. case ')':
  343. tkn = token.RIGHT_PARENTHESIS
  344. insertSemicolon = true
  345. case '[':
  346. tkn = token.LEFT_BRACKET
  347. case ']':
  348. tkn = token.RIGHT_BRACKET
  349. insertSemicolon = true
  350. case '{':
  351. tkn = token.LEFT_BRACE
  352. case '}':
  353. tkn = token.RIGHT_BRACE
  354. insertSemicolon = true
  355. case '+':
  356. tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
  357. if tkn == token.INCREMENT {
  358. insertSemicolon = true
  359. }
  360. case '-':
  361. tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
  362. if tkn == token.DECREMENT {
  363. insertSemicolon = true
  364. }
  365. case '*':
  366. tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
  367. case '/':
  368. if self.chr == '/' {
  369. self.skipSingleLineComment()
  370. continue
  371. } else if self.chr == '*' {
  372. self.skipMultiLineComment()
  373. continue
  374. } else {
  375. // Could be division, could be RegExp literal
  376. tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
  377. insertSemicolon = true
  378. }
  379. case '%':
  380. tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
  381. case '^':
  382. tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
  383. case '<':
  384. tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
  385. case '>':
  386. tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
  387. case '=':
  388. if self.chr == '>' {
  389. self.read()
  390. if self.implicitSemicolon {
  391. tkn = token.ILLEGAL
  392. } else {
  393. tkn = token.ARROW
  394. }
  395. } else {
  396. tkn = self.switch2(token.ASSIGN, token.EQUAL)
  397. if tkn == token.EQUAL && self.chr == '=' {
  398. self.read()
  399. tkn = token.STRICT_EQUAL
  400. }
  401. }
  402. case '!':
  403. tkn = self.switch2(token.NOT, token.NOT_EQUAL)
  404. if tkn == token.NOT_EQUAL && self.chr == '=' {
  405. self.read()
  406. tkn = token.STRICT_NOT_EQUAL
  407. }
  408. case '&':
  409. tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
  410. case '|':
  411. tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
  412. case '~':
  413. tkn = token.BITWISE_NOT
  414. case '?':
  415. tkn = token.QUESTION_MARK
  416. case '"', '\'':
  417. insertSemicolon = true
  418. tkn = token.STRING
  419. var err string
  420. literal, parsedLiteral, err = self.scanString(self.chrOffset-1, true)
  421. if err != "" {
  422. tkn = token.ILLEGAL
  423. }
  424. case '`':
  425. tkn = token.BACKTICK
  426. default:
  427. self.errorUnexpected(idx, chr)
  428. tkn = token.ILLEGAL
  429. }
  430. }
  431. self.insertSemicolon = insertSemicolon
  432. return
  433. }
  434. }
  435. func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
  436. if self.chr == '=' {
  437. self.read()
  438. return tkn1
  439. }
  440. return tkn0
  441. }
  442. func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
  443. if self.chr == '=' {
  444. self.read()
  445. return tkn1
  446. }
  447. if self.chr == chr2 {
  448. self.read()
  449. return tkn2
  450. }
  451. return tkn0
  452. }
  453. func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
  454. if self.chr == '=' {
  455. self.read()
  456. return tkn1
  457. }
  458. if self.chr == chr2 {
  459. self.read()
  460. if self.chr == '=' {
  461. self.read()
  462. return tkn3
  463. }
  464. return tkn2
  465. }
  466. return tkn0
  467. }
  468. func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
  469. if self.chr == '=' {
  470. self.read()
  471. return tkn1
  472. }
  473. if self.chr == chr2 {
  474. self.read()
  475. if self.chr == '=' {
  476. self.read()
  477. return tkn3
  478. }
  479. if self.chr == chr3 {
  480. self.read()
  481. if self.chr == '=' {
  482. self.read()
  483. return tkn5
  484. }
  485. return tkn4
  486. }
  487. return tkn2
  488. }
  489. return tkn0
  490. }
  491. func (self *_parser) _peek() rune {
  492. if self.offset < self.length {
  493. return rune(self.str[self.offset])
  494. }
  495. return -1
  496. }
  497. func (self *_parser) read() {
  498. if self.offset < self.length {
  499. self.chrOffset = self.offset
  500. chr, width := rune(self.str[self.offset]), 1
  501. if chr >= utf8.RuneSelf { // !ASCII
  502. chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
  503. if chr == utf8.RuneError && width == 1 {
  504. self.error(self.chrOffset, "Invalid UTF-8 character")
  505. }
  506. }
  507. self.offset += width
  508. self.chr = chr
  509. } else {
  510. self.chrOffset = self.length
  511. self.chr = -1 // EOF
  512. }
  513. }
  514. func (self *_parser) skipSingleLineComment() {
  515. for self.chr != -1 {
  516. self.read()
  517. if isLineTerminator(self.chr) {
  518. return
  519. }
  520. }
  521. }
  522. func (self *_parser) skipMultiLineComment() {
  523. self.read()
  524. for self.chr >= 0 {
  525. chr := self.chr
  526. self.read()
  527. if chr == '*' && self.chr == '/' {
  528. self.read()
  529. return
  530. }
  531. }
  532. self.errorUnexpected(0, self.chr)
  533. }
  534. func (self *_parser) skipWhiteSpace() {
  535. for {
  536. switch self.chr {
  537. case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
  538. self.read()
  539. continue
  540. case '\r':
  541. if self._peek() == '\n' {
  542. self.read()
  543. }
  544. fallthrough
  545. case '\u2028', '\u2029', '\n':
  546. if self.insertSemicolon {
  547. return
  548. }
  549. self.read()
  550. continue
  551. }
  552. if self.chr >= utf8.RuneSelf {
  553. if unicode.IsSpace(self.chr) {
  554. self.read()
  555. continue
  556. }
  557. }
  558. break
  559. }
  560. }
  561. func (self *_parser) skipLineWhiteSpace() {
  562. for isLineWhiteSpace(self.chr) {
  563. self.read()
  564. }
  565. }
  566. func (self *_parser) scanMantissa(base int) {
  567. for digitValue(self.chr) < base {
  568. self.read()
  569. }
  570. }
  571. func (self *_parser) scanEscape(quote rune) (int, bool) {
  572. var length, base uint32
  573. chr := self.chr
  574. switch chr {
  575. case '0', '1', '2', '3', '4', '5', '6', '7':
  576. // Octal:
  577. length, base = 3, 8
  578. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'':
  579. self.read()
  580. return 1, false
  581. case '\r':
  582. self.read()
  583. if self.chr == '\n' {
  584. self.read()
  585. return 2, false
  586. }
  587. return 1, false
  588. case '\n':
  589. self.read()
  590. return 1, false
  591. case '\u2028', '\u2029':
  592. self.read()
  593. return 1, true
  594. case 'x':
  595. self.read()
  596. length, base = 2, 16
  597. case 'u':
  598. self.read()
  599. if self.chr == '{' {
  600. self.read()
  601. length, base = 0, 16
  602. } else {
  603. length, base = 4, 16
  604. }
  605. default:
  606. self.read() // Always make progress
  607. }
  608. if base > 0 {
  609. var value uint32
  610. if length > 0 {
  611. for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
  612. digit := uint32(digitValue(self.chr))
  613. if digit >= base {
  614. break
  615. }
  616. value = value*base + digit
  617. self.read()
  618. }
  619. } else {
  620. for self.chr != quote && self.chr >= 0 && value < utf8.MaxRune {
  621. if self.chr == '}' {
  622. self.read()
  623. break
  624. }
  625. digit := uint32(digitValue(self.chr))
  626. if digit >= base {
  627. break
  628. }
  629. value = value*base + digit
  630. self.read()
  631. }
  632. }
  633. chr = rune(value)
  634. }
  635. if chr >= utf8.RuneSelf {
  636. if chr > 0xFFFF {
  637. return 2, true
  638. }
  639. return 1, true
  640. }
  641. return 1, false
  642. }
  643. func (self *_parser) scanString(offset int, parse bool) (literal string, parsed unistring.String, err string) {
  644. // " ' /
  645. quote := rune(self.str[offset])
  646. length := 0
  647. isUnicode := false
  648. for self.chr != quote {
  649. chr := self.chr
  650. if chr == '\n' || chr == '\r' || chr == '\u2028' || chr == '\u2029' || chr < 0 {
  651. goto newline
  652. }
  653. self.read()
  654. if chr == '\\' {
  655. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  656. if quote == '/' {
  657. goto newline
  658. }
  659. self.scanNewline()
  660. } else {
  661. l, u := self.scanEscape(quote)
  662. length += l
  663. if u {
  664. isUnicode = true
  665. }
  666. }
  667. continue
  668. } else if chr == '[' && quote == '/' {
  669. // Allow a slash (/) in a bracket character class ([...])
  670. // TODO Fix this, this is hacky...
  671. quote = -1
  672. } else if chr == ']' && quote == -1 {
  673. quote = '/'
  674. }
  675. if chr >= utf8.RuneSelf {
  676. isUnicode = true
  677. if chr > 0xFFFF {
  678. length++
  679. }
  680. }
  681. length++
  682. }
  683. // " ' /
  684. self.read()
  685. literal = self.str[offset:self.chrOffset]
  686. if parse {
  687. // TODO strict
  688. parsed, err = parseStringLiteral(literal[1:len(literal)-1], length, isUnicode, false)
  689. }
  690. return
  691. newline:
  692. self.scanNewline()
  693. errStr := "String not terminated"
  694. if quote == '/' {
  695. errStr = "Invalid regular expression: missing /"
  696. self.error(self.idxOf(offset), errStr)
  697. }
  698. return "", "", errStr
  699. }
  700. func (self *_parser) scanNewline() {
  701. if self.chr == '\r' {
  702. self.read()
  703. if self.chr != '\n' {
  704. return
  705. }
  706. }
  707. self.read()
  708. }
  709. func (self *_parser) parseTemplateCharacters() (literal string, parsed unistring.String, finished bool, parseErr, err string) {
  710. offset := self.chrOffset
  711. var end int
  712. length := 0
  713. isUnicode := false
  714. hasCR := false
  715. for {
  716. chr := self.chr
  717. if chr < 0 {
  718. goto unterminated
  719. }
  720. self.read()
  721. if chr == '`' {
  722. finished = true
  723. end = self.chrOffset - 1
  724. break
  725. }
  726. if chr == '\\' {
  727. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  728. if self.chr == '\r' {
  729. hasCR = true
  730. }
  731. self.scanNewline()
  732. } else {
  733. if self.chr == '8' || self.chr == '9' {
  734. if parseErr == "" {
  735. parseErr = "\\8 and \\9 are not allowed in template strings."
  736. }
  737. }
  738. l, u := self.scanEscape('`')
  739. length += l
  740. if u {
  741. isUnicode = true
  742. }
  743. }
  744. continue
  745. }
  746. if chr == '$' && self.chr == '{' {
  747. self.read()
  748. end = self.chrOffset - 2
  749. break
  750. }
  751. if chr >= utf8.RuneSelf {
  752. isUnicode = true
  753. if chr > 0xFFFF {
  754. length++
  755. }
  756. } else if chr == '\r' {
  757. hasCR = true
  758. if self.chr == '\n' {
  759. length--
  760. }
  761. }
  762. length++
  763. }
  764. literal = self.str[offset:end]
  765. if hasCR {
  766. literal = normaliseCRLF(literal)
  767. }
  768. if parseErr == "" {
  769. parsed, parseErr = parseStringLiteral(literal, length, isUnicode, true)
  770. }
  771. self.insertSemicolon = true
  772. return
  773. unterminated:
  774. err = err_UnexpectedEndOfInput
  775. return
  776. }
  777. func normaliseCRLF(s string) string {
  778. var buf strings.Builder
  779. buf.Grow(len(s))
  780. for i := 0; i < len(s); i++ {
  781. if s[i] == '\r' {
  782. buf.WriteByte('\n')
  783. if i < len(s)-1 && s[i+1] == '\n' {
  784. i++
  785. }
  786. } else {
  787. buf.WriteByte(s[i])
  788. }
  789. }
  790. return buf.String()
  791. }
  792. func hex2decimal(chr byte) (value rune, ok bool) {
  793. {
  794. chr := rune(chr)
  795. switch {
  796. case '0' <= chr && chr <= '9':
  797. return chr - '0', true
  798. case 'a' <= chr && chr <= 'f':
  799. return chr - 'a' + 10, true
  800. case 'A' <= chr && chr <= 'F':
  801. return chr - 'A' + 10, true
  802. }
  803. return
  804. }
  805. }
  806. func parseNumberLiteral(literal string) (value interface{}, err error) {
  807. // TODO Is Uint okay? What about -MAX_UINT
  808. value, err = strconv.ParseInt(literal, 0, 64)
  809. if err == nil {
  810. return
  811. }
  812. parseIntErr := err // Save this first error, just in case
  813. value, err = strconv.ParseFloat(literal, 64)
  814. if err == nil {
  815. return
  816. } else if err.(*strconv.NumError).Err == strconv.ErrRange {
  817. // Infinity, etc.
  818. return value, nil
  819. }
  820. err = parseIntErr
  821. if err.(*strconv.NumError).Err == strconv.ErrRange {
  822. if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') {
  823. // Could just be a very large number (e.g. 0x8000000000000000)
  824. var value float64
  825. literal = literal[2:]
  826. for _, chr := range literal {
  827. digit := digitValue(chr)
  828. if digit >= 16 {
  829. goto error
  830. }
  831. value = value*16 + float64(digit)
  832. }
  833. return value, nil
  834. }
  835. }
  836. error:
  837. return nil, errors.New("Illegal numeric literal")
  838. }
  839. func parseBigIntLiteral(literal string) (value interface{}, err error) {
  840. if literal[len(literal)-1] != 'n' {
  841. return nil, fmt.Errorf("expected suffix n")
  842. }
  843. literal = literal[:len(literal)-1]
  844. b := &big.Int{}
  845. b, ok := b.SetString(literal, 0)
  846. if ok {
  847. value = b
  848. return
  849. } else {
  850. err = errors.New("Illegal bigint literal")
  851. }
  852. return
  853. }
  854. func parseStringLiteral(literal string, length int, unicode, strict bool) (unistring.String, string) {
  855. var sb strings.Builder
  856. var chars []uint16
  857. if unicode {
  858. chars = make([]uint16, 1, length+1)
  859. chars[0] = unistring.BOM
  860. } else {
  861. sb.Grow(length)
  862. }
  863. str := literal
  864. for len(str) > 0 {
  865. switch chr := str[0]; {
  866. // We do not explicitly handle the case of the quote
  867. // value, which can be: " ' /
  868. // This assumes we're already passed a partially well-formed literal
  869. case chr >= utf8.RuneSelf:
  870. chr, size := utf8.DecodeRuneInString(str)
  871. if chr <= 0xFFFF {
  872. chars = append(chars, uint16(chr))
  873. } else {
  874. first, second := utf16.EncodeRune(chr)
  875. chars = append(chars, uint16(first), uint16(second))
  876. }
  877. str = str[size:]
  878. continue
  879. case chr != '\\':
  880. if unicode {
  881. chars = append(chars, uint16(chr))
  882. } else {
  883. sb.WriteByte(chr)
  884. }
  885. str = str[1:]
  886. continue
  887. }
  888. if len(str) <= 1 {
  889. panic("len(str) <= 1")
  890. }
  891. chr := str[1]
  892. var value rune
  893. if chr >= utf8.RuneSelf {
  894. str = str[1:]
  895. var size int
  896. value, size = utf8.DecodeRuneInString(str)
  897. str = str[size:] // \ + <character>
  898. if value == '\u2028' || value == '\u2029' {
  899. continue
  900. }
  901. } else {
  902. str = str[2:] // \<character>
  903. switch chr {
  904. case 'b':
  905. value = '\b'
  906. case 'f':
  907. value = '\f'
  908. case 'n':
  909. value = '\n'
  910. case 'r':
  911. value = '\r'
  912. case 't':
  913. value = '\t'
  914. case 'v':
  915. value = '\v'
  916. case 'x', 'u':
  917. size := 0
  918. switch chr {
  919. case 'x':
  920. size = 2
  921. case 'u':
  922. if str == "" || str[0] != '{' {
  923. size = 4
  924. }
  925. }
  926. if size > 0 {
  927. if len(str) < size {
  928. return "", fmt.Sprintf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
  929. }
  930. for j := 0; j < size; j++ {
  931. decimal, ok := hex2decimal(str[j])
  932. if !ok {
  933. return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size])
  934. }
  935. value = value<<4 | decimal
  936. }
  937. } else {
  938. str = str[1:]
  939. var val rune
  940. value = -1
  941. for ; size < len(str); size++ {
  942. if str[size] == '}' {
  943. if size == 0 {
  944. return "", fmt.Sprintf("invalid escape: \\%s", string(chr))
  945. }
  946. size++
  947. value = val
  948. break
  949. }
  950. decimal, ok := hex2decimal(str[size])
  951. if !ok {
  952. return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size+1])
  953. }
  954. val = val<<4 | decimal
  955. if val > utf8.MaxRune {
  956. return "", fmt.Sprintf("undefined Unicode code-point: %q", str[:size+1])
  957. }
  958. }
  959. if value == -1 {
  960. return "", fmt.Sprintf("unterminated \\u{: %q", str)
  961. }
  962. }
  963. str = str[size:]
  964. if chr == 'x' {
  965. break
  966. }
  967. if value > utf8.MaxRune {
  968. panic("value > utf8.MaxRune")
  969. }
  970. case '0':
  971. if len(str) == 0 || '0' > str[0] || str[0] > '7' {
  972. value = 0
  973. break
  974. }
  975. fallthrough
  976. case '1', '2', '3', '4', '5', '6', '7':
  977. if strict {
  978. return "", "Octal escape sequences are not allowed in this context"
  979. }
  980. value = rune(chr) - '0'
  981. j := 0
  982. for ; j < 2; j++ {
  983. if len(str) < j+1 {
  984. break
  985. }
  986. chr := str[j]
  987. if '0' > chr || chr > '7' {
  988. break
  989. }
  990. decimal := rune(str[j]) - '0'
  991. value = (value << 3) | decimal
  992. }
  993. str = str[j:]
  994. case '\\':
  995. value = '\\'
  996. case '\'', '"':
  997. value = rune(chr)
  998. case '\r':
  999. if len(str) > 0 {
  1000. if str[0] == '\n' {
  1001. str = str[1:]
  1002. }
  1003. }
  1004. fallthrough
  1005. case '\n':
  1006. continue
  1007. default:
  1008. value = rune(chr)
  1009. }
  1010. }
  1011. if unicode {
  1012. if value <= 0xFFFF {
  1013. chars = append(chars, uint16(value))
  1014. } else {
  1015. first, second := utf16.EncodeRune(value)
  1016. chars = append(chars, uint16(first), uint16(second))
  1017. }
  1018. } else {
  1019. if value >= utf8.RuneSelf {
  1020. return "", "Unexpected unicode character"
  1021. }
  1022. sb.WriteByte(byte(value))
  1023. }
  1024. }
  1025. if unicode {
  1026. if len(chars) != length+1 {
  1027. panic(fmt.Errorf("unexpected unicode length while parsing '%s'", literal))
  1028. }
  1029. return unistring.FromUtf16(chars), ""
  1030. }
  1031. if sb.Len() != length {
  1032. panic(fmt.Errorf("unexpected length while parsing '%s'", literal))
  1033. }
  1034. return unistring.String(sb.String()), ""
  1035. }
  1036. func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
  1037. offset := self.chrOffset
  1038. tkn := token.NUMBER
  1039. if decimalPoint {
  1040. offset--
  1041. self.scanMantissa(10)
  1042. } else {
  1043. if self.chr == '0' {
  1044. self.read()
  1045. base := 0
  1046. switch self.chr {
  1047. case 'x', 'X':
  1048. base = 16
  1049. case 'o', 'O':
  1050. base = 8
  1051. case 'b', 'B':
  1052. base = 2
  1053. case '.', 'e', 'E', 'n':
  1054. // no-op
  1055. default:
  1056. // legacy octal
  1057. self.scanMantissa(8)
  1058. goto end
  1059. }
  1060. if base > 0 {
  1061. self.read()
  1062. if !isDigit(self.chr, base) {
  1063. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1064. }
  1065. self.scanMantissa(base)
  1066. goto end
  1067. }
  1068. } else {
  1069. self.scanMantissa(10)
  1070. }
  1071. if self.chr == '.' {
  1072. self.read()
  1073. self.scanMantissa(10)
  1074. }
  1075. }
  1076. if self.chr == 'e' || self.chr == 'E' {
  1077. self.read()
  1078. if self.chr == '-' || self.chr == '+' {
  1079. self.read()
  1080. }
  1081. if isDecimalDigit(self.chr) {
  1082. self.read()
  1083. self.scanMantissa(10)
  1084. } else {
  1085. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1086. }
  1087. }
  1088. end:
  1089. if (isIdentifierStart(self.chr) || isDecimalDigit(self.chr)) && self.chr != 'n' {
  1090. return token.ILLEGAL, self.str[offset:self.chrOffset]
  1091. } else if self.chr == 'n' {
  1092. self.read()
  1093. return tkn, self.str[offset:self.chrOffset]
  1094. }
  1095. return tkn, self.str[offset:self.chrOffset]
  1096. }