parse.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. package rfc5321
  2. // Parse RFC5321 productions, no regex
  3. import (
  4. "bytes"
  5. "errors"
  6. "net"
  7. "strconv"
  8. "strings"
  9. )
  10. const (
  11. // The maximum total length of a reverse-path or forward-path is 256
  12. LimitPath = 256
  13. // The maximum total length of a user name or other local-part is 64
  14. // however, here we double it, since a few major services don't respect that and go over
  15. LimitLocalPart = 64 * 2
  16. // //The maximum total length of a domain name or number is 255
  17. LimitDomain = 255
  18. // The minimum total number of recipients that must be buffered is 100
  19. LimitRecipients = 100
  20. )
  21. var atExpected = errors.New("@ expected as part of mailbox")
  22. // Parse Email Addresses according to https://tools.ietf.org/html/rfc5321
  23. type Parser struct {
  24. accept bytes.Buffer
  25. buf []byte
  26. PathParams [][]string
  27. ADL []string
  28. LocalPart string
  29. LocalPartQuotes bool // does the local part need quotes?
  30. Domain string // can be an ip-address, enclosed in square brackets if it is
  31. IP net.IP
  32. pos int
  33. NullPath bool
  34. ch byte
  35. }
  36. func NewParser(buf []byte) *Parser {
  37. s := new(Parser)
  38. s.buf = buf
  39. s.pos = -1
  40. return s
  41. }
  42. func (s *Parser) Reset() {
  43. s.buf = s.buf[:0]
  44. if s.pos != -1 {
  45. s.pos = -1
  46. s.ADL = nil
  47. s.PathParams = nil
  48. s.NullPath = false
  49. s.LocalPart = ""
  50. s.Domain = ""
  51. s.accept.Reset()
  52. s.LocalPartQuotes = false
  53. s.IP = nil
  54. }
  55. }
  56. func (s *Parser) set(input []byte) {
  57. s.Reset()
  58. s.buf = input
  59. }
  60. func (s *Parser) next() byte {
  61. s.pos++
  62. if s.pos < len(s.buf) {
  63. s.ch = s.buf[s.pos]
  64. return s.ch
  65. }
  66. return 0
  67. }
  68. func (s *Parser) peek() byte {
  69. if s.pos+1 < len(s.buf) {
  70. return s.buf[s.pos+1]
  71. }
  72. return 0
  73. }
  74. func (s *Parser) reversePath() (err error) {
  75. if s.peek() == ' ' {
  76. s.next() // tolerate a space at the front
  77. }
  78. if i := bytes.Index(s.buf[s.pos+1:], []byte{'<', '>'}); i == 0 {
  79. s.NullPath = true
  80. return nil
  81. }
  82. if err = s.path(); err != nil {
  83. return err
  84. }
  85. return nil
  86. }
  87. func (s *Parser) forwardPath() (err error) {
  88. if s.peek() == ' ' {
  89. s.next() // tolerate a space at the front
  90. }
  91. if err = s.path(); err != nil && err != atExpected {
  92. return err
  93. }
  94. // special case for forwardPath only - can just be addressed to postmaster
  95. if i := strings.Index(strings.ToLower(s.LocalPart), postmasterLocalPart); i == 0 {
  96. s.LocalPart = postmasterLocalPart
  97. return nil // atExpected will be ignored, postmaster doesn't need @
  98. }
  99. return err // it may return atExpected
  100. }
  101. //MailFrom accepts the following syntax: Reverse-path [SP Mail-parameters] CRLF
  102. func (s *Parser) MailFrom(input []byte) (err error) {
  103. s.set(input)
  104. if err := s.reversePath(); err != nil {
  105. return err
  106. }
  107. s.next()
  108. if p := s.next(); p == ' ' {
  109. // parse Rcpt-parameters
  110. // The optional <mail-parameters> are associated with negotiated SMTP
  111. // service extensions
  112. if tup, err := s.parameters(); err != nil {
  113. return errors.New("param parse error")
  114. } else if len(tup) > 0 {
  115. s.PathParams = tup
  116. }
  117. }
  118. return nil
  119. }
  120. const postmasterLocalPart = "postmaster"
  121. //RcptTo accepts the following syntax: ( "<Postmaster@" Domain ">" / "<Postmaster>" /
  122. // Forward-path ) [SP Rcpt-parameters] CRLF
  123. func (s *Parser) RcptTo(input []byte) (err error) {
  124. s.set(input)
  125. if err := s.forwardPath(); err != nil {
  126. return err
  127. }
  128. s.next()
  129. if p := s.next(); p == ' ' {
  130. // parse Rcpt-parameters
  131. if tup, err := s.parameters(); err != nil {
  132. return errors.New("param parse error")
  133. } else if len(tup) > 0 {
  134. s.PathParams = tup
  135. }
  136. }
  137. return nil
  138. }
  139. // esmtp-param *(SP esmtp-param)
  140. func (s *Parser) parameters() ([][]string, error) {
  141. params := make([][]string, 0)
  142. for {
  143. if result, err := s.param(); err != nil {
  144. return params, err
  145. } else {
  146. params = append(params, result)
  147. }
  148. if p := s.next(); p != ' ' {
  149. return params, nil
  150. }
  151. }
  152. }
  153. func isESMTPValue(c byte) bool {
  154. if ('!' <= c && c <= '<') ||
  155. ('>' <= c && c <= '~') {
  156. return true
  157. }
  158. return false
  159. }
  160. // esmtp-param = esmtp-keyword ["=" esmtp-value]
  161. // esmtp-keyword = (ALPHA / DIGIT) *(ALPHA / DIGIT / "-")
  162. // esmtp-value = 1*(%d33-60 / %d62-126)
  163. func (s *Parser) param() (result []string, err error) {
  164. state := 0
  165. var key, value string
  166. defer func() {
  167. result = append(result, key, value)
  168. s.accept.Reset()
  169. }()
  170. for c := s.next(); ; c = s.next() {
  171. switch state {
  172. case 0:
  173. // first char must be let-dig
  174. if !isLetDig(c) {
  175. return result, errors.New("parse error")
  176. }
  177. // accept
  178. s.accept.WriteByte(c)
  179. state = 1
  180. case 1:
  181. // *(ALPHA / DIGIT / "-")
  182. if !isLetDig(c) {
  183. if c == '=' {
  184. key = s.accept.String()
  185. s.accept.Reset()
  186. state = 2
  187. continue
  188. } else if c == '-' {
  189. // cannot have - at the end of a keyword
  190. if p := s.peek(); !isLetDig(p) && p != '-' {
  191. return result, errors.New("parse error")
  192. }
  193. s.accept.WriteByte(c)
  194. continue
  195. }
  196. key = s.accept.String()
  197. return result, nil
  198. }
  199. s.accept.WriteByte(c)
  200. case 2:
  201. // start of value, must match at least 1
  202. if !isESMTPValue(c) {
  203. return result, errors.New("parse error")
  204. }
  205. s.accept.WriteByte(c)
  206. if !isESMTPValue(s.peek()) {
  207. value = s.accept.String()
  208. return result, nil
  209. }
  210. state = 3
  211. case 3:
  212. // 1*(%d33-60 / %d62-126)
  213. s.accept.WriteByte(c)
  214. if !isESMTPValue(s.peek()) {
  215. value = s.accept.String()
  216. return result, nil
  217. }
  218. }
  219. }
  220. }
  221. // "<" [ A-d-l ":" ] Mailbox ">"
  222. func (s *Parser) path() (err error) {
  223. if s.next() == '<' && s.peek() == '@' {
  224. if err = s.adl(); err == nil {
  225. s.next()
  226. if s.ch != ':' {
  227. return errors.New("syntax error")
  228. }
  229. }
  230. }
  231. if err = s.mailbox(); err != nil {
  232. return err
  233. }
  234. if p := s.peek(); p != '>' {
  235. return errors.New("missing closing >")
  236. }
  237. return nil
  238. }
  239. // At-domain *( "," At-domain )
  240. func (s *Parser) adl() error {
  241. for {
  242. if err := s.atDomain(); err != nil {
  243. return err
  244. }
  245. s.ADL = append(s.ADL, s.accept.String())
  246. s.accept.Reset()
  247. if s.peek() != ',' {
  248. break
  249. }
  250. s.next()
  251. }
  252. return nil
  253. }
  254. // At-domain = "@" Domain
  255. func (s *Parser) atDomain() error {
  256. if s.next() == '@' {
  257. s.accept.WriteByte('@')
  258. return s.domain()
  259. }
  260. return errors.New("syntax error")
  261. }
  262. // sub-domain *("." sub-domain)
  263. func (s *Parser) domain() error {
  264. for {
  265. if err := s.subdomain(); err != nil {
  266. return err
  267. }
  268. if p := s.peek(); p != '.' {
  269. if p != ':' && p != ',' && p != '>' && p != 0 {
  270. return errors.New("domain parse error")
  271. }
  272. break
  273. }
  274. s.accept.WriteByte(s.next())
  275. }
  276. return nil
  277. }
  278. // Let-dig [Ldh-str]
  279. func (s *Parser) subdomain() error {
  280. state := 0
  281. for c := s.next(); ; c = s.next() {
  282. switch state {
  283. case 0:
  284. p := s.peek()
  285. if isLetDig(c) {
  286. s.accept.WriteByte(c)
  287. if !isLetDig(p) && p != '-' {
  288. return nil
  289. }
  290. state = 1
  291. continue
  292. }
  293. return errors.New("subdomain parse err")
  294. case 1:
  295. p := s.peek()
  296. if isLetDig(c) || c == '-' {
  297. s.accept.WriteByte(c)
  298. }
  299. if !isLetDig(p) && p != '-' {
  300. if c == '-' {
  301. return errors.New("subdomain parse err")
  302. }
  303. return nil
  304. }
  305. }
  306. }
  307. }
  308. // Local-part "@" ( Domain / address-literal )
  309. func (s *Parser) mailbox() error {
  310. defer func() {
  311. if s.accept.Len() > 0 {
  312. s.Domain = s.accept.String()
  313. s.accept.Reset()
  314. }
  315. }()
  316. err := s.localPart()
  317. if err != nil {
  318. return err
  319. }
  320. if s.ch != '@' {
  321. return atExpected
  322. }
  323. if p := s.peek(); p == '[' {
  324. return s.addressLiteral()
  325. } else {
  326. return s.domain()
  327. }
  328. }
  329. // "[" ( IPv4-address-literal /
  330. // IPv6-address-literal /
  331. // General-address-literal ) "]"
  332. func (s *Parser) addressLiteral() error {
  333. ch := s.next()
  334. if ch == '[' {
  335. p := s.peek()
  336. var err error
  337. if p == 'I' || p == 'i' {
  338. for i := 0; i < 5; i++ {
  339. s.next() // IPv6:
  340. }
  341. err = s.ipv6AddressLiteral()
  342. } else if p >= 48 && p <= 57 {
  343. err = s.ipv4AddressLiteral()
  344. }
  345. if err != nil {
  346. return err
  347. }
  348. if s.ch != ']' {
  349. return errors.New("] expected for address literal")
  350. }
  351. return nil
  352. }
  353. return nil
  354. }
  355. // Snum 3("." Snum)
  356. func (s *Parser) ipv4AddressLiteral() error {
  357. for i := 0; i < 4; i++ {
  358. if err := s.snum(); err != nil {
  359. return err
  360. }
  361. if s.ch != '.' {
  362. break
  363. }
  364. s.accept.WriteByte(s.ch)
  365. }
  366. ip := net.ParseIP(s.accept.String())
  367. if ip == nil {
  368. return errors.New("invalid ip")
  369. }
  370. s.IP = ip
  371. return nil
  372. }
  373. // 1*3DIGIT
  374. // representing a decimal integer
  375. // value accept the range 0 through 255
  376. func (s *Parser) snum() error {
  377. state := 0
  378. var num bytes.Buffer
  379. for i := 4; i > 0; i-- {
  380. c := s.next()
  381. if state == 0 {
  382. if !(c >= 48 && c <= 57) {
  383. return errors.New("snum parse error")
  384. } else {
  385. num.WriteByte(s.ch)
  386. s.accept.WriteByte(s.ch)
  387. state = 1
  388. continue
  389. }
  390. }
  391. if state == 1 {
  392. if !(c >= 48 && c <= 57) {
  393. if v, err := strconv.Atoi(num.String()); err != nil {
  394. return err
  395. } else if v >= 0 && v <= 255 {
  396. return nil
  397. } else {
  398. return errors.New("invalid ipv4")
  399. }
  400. } else {
  401. num.WriteByte(s.ch)
  402. s.accept.WriteByte(s.ch)
  403. }
  404. }
  405. }
  406. return errors.New("too many digits")
  407. }
  408. //IPv6:" IPv6-addr
  409. func (s *Parser) ipv6AddressLiteral() error {
  410. var ip bytes.Buffer
  411. for c := s.next(); ; c = s.next() {
  412. if !(c >= 48 && c <= 57) &&
  413. !(c >= 65 && c <= 70) &&
  414. !(c >= 97 && c <= 102) &&
  415. c != ':' && c != '.' {
  416. ipstr := ip.String()
  417. if v := net.ParseIP(ipstr); v != nil {
  418. s.accept.WriteString(v.String())
  419. s.IP = v
  420. return nil
  421. }
  422. return errors.New("invalid ipv6")
  423. } else {
  424. ip.WriteByte(c)
  425. }
  426. }
  427. }
  428. // Dot-string / Quoted-string
  429. func (s *Parser) localPart() error {
  430. defer func() {
  431. if s.accept.Len() > 0 {
  432. s.LocalPart = s.accept.String()
  433. s.accept.Reset()
  434. }
  435. }()
  436. p := s.peek()
  437. if p == '"' {
  438. return s.quotedString()
  439. } else {
  440. return s.dotString()
  441. }
  442. }
  443. // DQUOTE *QcontentSMTP DQUOTE
  444. func (s *Parser) quotedString() error {
  445. if s.next() == '"' {
  446. if err := s.QcontentSMTP(); err != nil {
  447. return err
  448. }
  449. if s.ch != '"' {
  450. return errors.New("quoted string not closed")
  451. } else {
  452. // accept the "
  453. s.next()
  454. }
  455. }
  456. return nil
  457. }
  458. // qtextSMTP / quoted-pairSMTP
  459. // quoted-pairSMTP = %d92 %d32-126
  460. // qtextSMTP = %d32-33 / %d35-91 / %d93-126
  461. func (s *Parser) QcontentSMTP() error {
  462. state := 0
  463. for {
  464. ch := s.next()
  465. switch state {
  466. case 0:
  467. if ch == '\\' {
  468. state = 1
  469. // s.accept.WriteByte(ch)
  470. continue
  471. } else if ch == 32 || ch == 33 ||
  472. (ch >= 35 && ch <= 91) ||
  473. (ch >= 93 && ch <= 126) {
  474. if s.LocalPartQuotes == false && !s.isAtext(ch) {
  475. s.LocalPartQuotes = true
  476. }
  477. s.accept.WriteByte(ch)
  478. continue
  479. }
  480. return nil
  481. case 1:
  482. // escaped character state
  483. if ch >= 32 && ch <= 126 {
  484. if s.LocalPartQuotes == false && !s.isAtext(ch) {
  485. s.LocalPartQuotes = true
  486. }
  487. s.accept.WriteByte(ch)
  488. state = 0
  489. continue
  490. } else {
  491. return errors.New("non-printable character found")
  492. }
  493. }
  494. }
  495. }
  496. //Dot-string = Atom *("." Atom)
  497. func (s *Parser) dotString() error {
  498. for {
  499. if err := s.atom(); err != nil {
  500. return err
  501. }
  502. if s.ch != '.' {
  503. break
  504. }
  505. s.accept.WriteByte(s.ch)
  506. }
  507. return nil
  508. }
  509. // 1*atext
  510. func (s *Parser) atom() error {
  511. state := 0
  512. for {
  513. if state == 0 {
  514. if !s.isAtext(s.next()) {
  515. return errors.New("atom parse error")
  516. } else {
  517. s.accept.WriteByte(s.ch)
  518. state = 1
  519. continue
  520. }
  521. }
  522. if state == 1 {
  523. if !s.isAtext(s.next()) {
  524. return nil
  525. } else {
  526. s.accept.WriteByte(s.ch)
  527. }
  528. }
  529. }
  530. }
  531. /*
  532. Dot-string = Atom *("." Atom)
  533. Atom = 1*atext
  534. atext = ALPHA / DIGIT / ; Any character except controls,
  535. "!" / "#" / ; SP, and specials.
  536. "$" / "%" / ; Used for atoms
  537. "&" / "'" /
  538. "*" / "+" /
  539. "-" / "/" /
  540. "=" / "?" /
  541. "^" / "_" /
  542. "`" / "{" /
  543. "|" / "}" /
  544. "~"
  545. */
  546. func (s *Parser) isAtext(c byte) bool {
  547. if ('0' <= c && c <= '9') ||
  548. ('a' <= c && c <= 'z') ||
  549. ('A' <= c && c <= 'Z') ||
  550. c == '!' || c == '#' ||
  551. c == '$' || c == '%' ||
  552. c == '&' || c == '\'' ||
  553. c == '*' || c == '+' ||
  554. c == '-' || c == '/' ||
  555. c == '=' || c == '?' ||
  556. c == '^' || c == '_' ||
  557. c == '`' || c == '{' ||
  558. c == '|' || c == '}' ||
  559. c == '~' {
  560. return true
  561. }
  562. return false
  563. }
  564. func isLetDig(c byte) bool {
  565. if ('0' <= c && c <= '9') ||
  566. ('A' <= c && c <= 'Z') ||
  567. ('a' <= c && c <= 'z') {
  568. return true
  569. }
  570. return false
  571. }
  572. //ehlo = "EHLO" SP ( Domain / address-literal ) CRLF
  573. // Note: "HELO" is ignored here
  574. func (s *Parser) Ehlo(input []byte) (domain string, ip net.IP, err error) {
  575. s.set(input)
  576. s.next()
  577. if s.ch == ' ' {
  578. if p := s.peek(); p == '[' {
  579. err = s.addressLiteral()
  580. if err == nil {
  581. domain = s.accept.String()
  582. ip = net.ParseIP(domain)
  583. if ip == nil {
  584. err = errors.New("invalid ip")
  585. }
  586. return
  587. }
  588. } else {
  589. err = s.domain()
  590. if err == nil {
  591. domain = s.accept.String()
  592. }
  593. return
  594. }
  595. } else {
  596. err = errors.New("ehlo parse error")
  597. }
  598. return domain, ip, err
  599. }
  600. // helo = "HELO" SP Domain CRLF
  601. // Note: "HELO" is ignored here, so is the CRLF at the end
  602. func (s *Parser) Helo(input []byte) (domain string, err error) {
  603. s.set(input)
  604. s.next()
  605. if s.ch == ' ' {
  606. err = s.domain()
  607. if err == nil {
  608. domain = s.accept.String()
  609. }
  610. return
  611. } else {
  612. err = errors.New("helo parse error")
  613. }
  614. return
  615. }