parse.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721
  1. package smtp
  2. // Parse RFC5321 productions, no regex
  3. import (
  4. "bytes"
  5. "errors"
  6. "net"
  7. "strconv"
  8. "strings"
  9. )
  10. const (
  11. // The maximum total length of a reverse-path or forward-path is 256
  12. LimitPath = 256
  13. // The maximum total length of a user name or other local-part is 64
  14. // however, here we double it, since a few major services don't respect that and go over
  15. LimitLocalPart = 64 * 2
  16. // //The maximum total length of a domain name or number is 255
  17. LimitDomain = 255
  18. // The minimum total number of recipients that must be buffered is 100
  19. LimitRecipients = 100
  20. )
  21. type PathParam []string
  22. // A TransportType specifies the message transport according to https://tools.ietf.org/html/rfc6152
  23. type TransportType int
  24. const (
  25. TransportType7bit TransportType = iota
  26. TransportType8bit
  27. TransportTypeUnspecified
  28. TransportTypeInvalid
  29. )
  30. func (t TransportType) String() string {
  31. switch t {
  32. case TransportType7bit:
  33. return "7bit"
  34. case TransportType8bit:
  35. return "8bit"
  36. case TransportTypeUnspecified:
  37. return "unknown"
  38. case TransportTypeInvalid:
  39. return "invalid"
  40. }
  41. return "invalid"
  42. }
  43. func ParseTransportType(str string) TransportType {
  44. switch {
  45. case str == "7bit":
  46. return TransportType7bit
  47. case str == "8bit":
  48. return TransportType8bit
  49. case str == "unknown":
  50. return TransportTypeUnspecified
  51. case str == "invalid":
  52. return TransportTypeInvalid
  53. }
  54. return TransportTypeInvalid
  55. }
  56. // is8BitMime checks for the BODY parameter as
  57. func (p PathParam) Transport() TransportType {
  58. if len(p) != 2 {
  59. return TransportTypeUnspecified
  60. }
  61. if strings.ToUpper(p[0]) != "BODY" {
  62. // this is not a 'BODY' param
  63. return TransportTypeUnspecified
  64. }
  65. if strings.ToUpper(p[1]) == "8BITMIME" {
  66. return TransportType8bit
  67. } else if strings.ToUpper(p[1]) == "7BIT" {
  68. return TransportType7bit
  69. }
  70. return TransportTypeInvalid
  71. }
  72. var atExpected = errors.New("@ expected as part of mailbox")
  73. // Parse Email Addresses according to https://tools.ietf.org/html/rfc5321
  74. type Parser struct {
  75. NullPath bool
  76. LocalPart string
  77. LocalPartQuotes bool // does the local part need quotes?
  78. Domain string
  79. IP net.IP
  80. ADL []string
  81. PathParams []PathParam
  82. pos int
  83. ch byte
  84. buf []byte
  85. accept bytes.Buffer
  86. }
  87. func NewParser(buf []byte) *Parser {
  88. s := new(Parser)
  89. s.buf = buf
  90. s.pos = -1
  91. return s
  92. }
  93. func (s *Parser) Reset() {
  94. s.buf = s.buf[:0]
  95. if s.pos != -1 {
  96. s.pos = -1
  97. s.ADL = nil
  98. s.PathParams = nil
  99. s.NullPath = false
  100. s.LocalPart = ""
  101. s.Domain = ""
  102. s.accept.Reset()
  103. s.LocalPartQuotes = false
  104. s.IP = nil
  105. }
  106. }
  107. func (s *Parser) set(input []byte) {
  108. s.Reset()
  109. s.buf = input
  110. }
  111. func (s *Parser) next() byte {
  112. s.pos++
  113. if s.pos < len(s.buf) {
  114. s.ch = s.buf[s.pos]
  115. return s.ch
  116. }
  117. return 0
  118. }
  119. func (s *Parser) peek() byte {
  120. if s.pos+1 < len(s.buf) {
  121. return s.buf[s.pos+1]
  122. }
  123. return 0
  124. }
  125. func (s *Parser) reversePath() (err error) {
  126. if s.peek() == ' ' {
  127. s.next() // tolerate a space at the front
  128. }
  129. if i := bytes.Index(s.buf[s.pos+1:], []byte{'<', '>'}); i == 0 {
  130. s.NullPath = true
  131. return nil
  132. }
  133. if err = s.path(); err != nil {
  134. return err
  135. }
  136. return nil
  137. }
  138. func (s *Parser) forwardPath() (err error) {
  139. if s.peek() == ' ' {
  140. s.next() // tolerate a space at the front
  141. }
  142. if err = s.path(); err != nil && err != atExpected {
  143. return err
  144. }
  145. // special case for forwardPath only - can just be addressed to postmaster
  146. if i := strings.Index(strings.ToLower(s.LocalPart), postmasterLocalPart); i == 0 {
  147. s.LocalPart = postmasterLocalPart
  148. return nil // atExpected will be ignored, postmaster doesn't need @
  149. }
  150. return err // it may return atExpected
  151. }
  152. //MailFrom accepts the following syntax: Reverse-path [SP Mail-parameters] CRLF
  153. func (s *Parser) MailFrom(input []byte) (err error) {
  154. s.set(input)
  155. if err := s.reversePath(); err != nil {
  156. return err
  157. }
  158. s.next()
  159. if p := s.next(); p == ' ' {
  160. // parse Rcpt-parameters
  161. // The optional <mail-parameters> are associated with negotiated SMTP
  162. // service extensions
  163. if tup, err := s.parameters(); err != nil {
  164. return errors.New("param parse error")
  165. } else if len(tup) > 0 {
  166. s.PathParams = tup
  167. }
  168. }
  169. return nil
  170. }
  171. const postmasterLocalPart = "postmaster"
  172. //RcptTo accepts the following syntax: ( "<Postmaster@" Domain ">" / "<Postmaster>" /
  173. // Forward-path ) [SP Rcpt-parameters] CRLF
  174. func (s *Parser) RcptTo(input []byte) (err error) {
  175. s.set(input)
  176. if err := s.forwardPath(); err != nil {
  177. return err
  178. }
  179. s.next()
  180. if p := s.next(); p == ' ' {
  181. // parse Rcpt-parameters
  182. if tup, err := s.parameters(); err != nil {
  183. return errors.New("param parse error")
  184. } else if len(tup) > 0 {
  185. s.PathParams = tup
  186. }
  187. }
  188. return nil
  189. }
  190. // esmtp-param *(SP esmtp-param)
  191. func (s *Parser) parameters() ([]PathParam, error) {
  192. params := make([]PathParam, 0)
  193. for {
  194. if result, err := s.param(); err != nil {
  195. return params, err
  196. } else {
  197. params = append(params, result)
  198. }
  199. if p := s.next(); p != ' ' {
  200. return params, nil
  201. }
  202. }
  203. }
  204. func isESMTPValue(c byte) bool {
  205. if ('!' <= c && c <= '<') ||
  206. ('>' <= c && c <= '~') {
  207. return true
  208. }
  209. return false
  210. }
  211. // esmtp-param = esmtp-keyword ["=" esmtp-value]
  212. // esmtp-keyword = (ALPHA / DIGIT) *(ALPHA / DIGIT / "-")
  213. // esmtp-value = 1*(%d33-60 / %d62-126)
  214. func (s *Parser) param() (result []string, err error) {
  215. state := 0
  216. var key, value string
  217. defer func() {
  218. result = append(result, key, value)
  219. s.accept.Reset()
  220. }()
  221. for c := s.next(); ; c = s.next() {
  222. switch state {
  223. case 0:
  224. // first char must be let-dig
  225. if !isLetDig(c) {
  226. return result, errors.New("parse error")
  227. }
  228. // accept
  229. s.accept.WriteByte(c)
  230. state = 1
  231. case 1:
  232. // *(ALPHA / DIGIT / "-")
  233. if !isLetDig(c) {
  234. if c == '=' {
  235. key = s.accept.String()
  236. s.accept.Reset()
  237. state = 2
  238. continue
  239. } else if c == '-' {
  240. // cannot have - at the end of a keyword
  241. if p := s.peek(); !isLetDig(p) && p != '-' {
  242. return result, errors.New("parse error")
  243. }
  244. s.accept.WriteByte(c)
  245. continue
  246. }
  247. key = s.accept.String()
  248. return result, nil
  249. }
  250. s.accept.WriteByte(c)
  251. case 2:
  252. // start of value, must match at least 1
  253. if !isESMTPValue(c) {
  254. return result, errors.New("parse error")
  255. }
  256. s.accept.WriteByte(c)
  257. if !isESMTPValue(s.peek()) {
  258. value = s.accept.String()
  259. return result, nil
  260. }
  261. state = 3
  262. case 3:
  263. // 1*(%d33-60 / %d62-126)
  264. s.accept.WriteByte(c)
  265. if !isESMTPValue(s.peek()) {
  266. value = s.accept.String()
  267. return result, nil
  268. }
  269. }
  270. }
  271. }
  272. // "<" [ A-d-l ":" ] Mailbox ">"
  273. func (s *Parser) path() (err error) {
  274. if s.next() == '<' && s.peek() == '@' {
  275. if err = s.adl(); err == nil {
  276. s.next()
  277. if s.ch != ':' {
  278. return errors.New("syntax error")
  279. }
  280. }
  281. }
  282. if err = s.mailbox(); err != nil {
  283. return err
  284. }
  285. if p := s.peek(); p != '>' {
  286. return errors.New("missing closing >")
  287. }
  288. return nil
  289. }
  290. // At-domain *( "," At-domain )
  291. func (s *Parser) adl() error {
  292. for {
  293. if err := s.atDomain(); err != nil {
  294. return err
  295. }
  296. s.ADL = append(s.ADL, s.accept.String())
  297. s.accept.Reset()
  298. if s.peek() != ',' {
  299. break
  300. }
  301. s.next()
  302. }
  303. return nil
  304. }
  305. // At-domain = "@" Domain
  306. func (s *Parser) atDomain() error {
  307. if s.next() == '@' {
  308. s.accept.WriteByte('@')
  309. return s.domain()
  310. }
  311. return errors.New("syntax error")
  312. }
  313. // sub-domain *("." sub-domain)
  314. func (s *Parser) domain() error {
  315. for {
  316. if err := s.subdomain(); err != nil {
  317. return err
  318. }
  319. if p := s.peek(); p != '.' {
  320. if p != ':' && p != ',' && p != '>' && p != 0 {
  321. return errors.New("domain parse error")
  322. }
  323. break
  324. }
  325. s.accept.WriteByte(s.next())
  326. }
  327. return nil
  328. }
  329. // Let-dig [Ldh-str]
  330. func (s *Parser) subdomain() error {
  331. state := 0
  332. for c := s.next(); ; c = s.next() {
  333. switch state {
  334. case 0:
  335. p := s.peek()
  336. if isLetDig(c) {
  337. s.accept.WriteByte(c)
  338. if !isLetDig(p) && p != '-' {
  339. return nil
  340. }
  341. state = 1
  342. continue
  343. }
  344. return errors.New("subdomain parse err")
  345. case 1:
  346. p := s.peek()
  347. if isLetDig(c) || c == '-' {
  348. s.accept.WriteByte(c)
  349. }
  350. if !isLetDig(p) && p != '-' {
  351. if c == '-' {
  352. return errors.New("subdomain parse err")
  353. }
  354. return nil
  355. }
  356. }
  357. }
  358. }
  359. // Local-part "@" ( Domain / address-literal )
  360. func (s *Parser) mailbox() error {
  361. defer func() {
  362. if s.accept.Len() > 0 {
  363. s.Domain = s.accept.String()
  364. s.accept.Reset()
  365. }
  366. }()
  367. err := s.localPart()
  368. if err != nil {
  369. return err
  370. }
  371. if s.ch != '@' {
  372. return atExpected
  373. }
  374. if p := s.peek(); p == '[' {
  375. return s.addressLiteral()
  376. } else {
  377. return s.domain()
  378. }
  379. }
  380. // "[" ( IPv4-address-literal /
  381. // IPv6-address-literal /
  382. // General-address-literal ) "]"
  383. func (s *Parser) addressLiteral() error {
  384. ch := s.next()
  385. if ch == '[' {
  386. p := s.peek()
  387. var err error
  388. if p == 'I' || p == 'i' {
  389. for i := 0; i < 5; i++ {
  390. s.next() // IPv6:
  391. }
  392. err = s.ipv6AddressLiteral()
  393. } else if p >= 48 && p <= 57 {
  394. err = s.ipv4AddressLiteral()
  395. }
  396. if err != nil {
  397. return err
  398. }
  399. if s.ch != ']' {
  400. return errors.New("] expected for address literal")
  401. }
  402. return nil
  403. }
  404. return nil
  405. }
  406. // Snum 3("." Snum)
  407. func (s *Parser) ipv4AddressLiteral() error {
  408. for i := 0; i < 4; i++ {
  409. if err := s.snum(); err != nil {
  410. return err
  411. }
  412. if s.ch != '.' {
  413. break
  414. }
  415. s.accept.WriteByte(s.ch)
  416. }
  417. ip := net.ParseIP(s.accept.String())
  418. if ip == nil {
  419. return errors.New("invalid ip")
  420. }
  421. s.IP = ip
  422. return nil
  423. }
  424. // 1*3DIGIT
  425. // representing a decimal integer
  426. // value accept the range 0 through 255
  427. func (s *Parser) snum() error {
  428. state := 0
  429. var num bytes.Buffer
  430. for i := 4; i > 0; i-- {
  431. c := s.next()
  432. if state == 0 {
  433. if !(c >= 48 && c <= 57) {
  434. return errors.New("snum parse error")
  435. } else {
  436. num.WriteByte(s.ch)
  437. s.accept.WriteByte(s.ch)
  438. state = 1
  439. continue
  440. }
  441. }
  442. if state == 1 {
  443. if !(c >= 48 && c <= 57) {
  444. if v, err := strconv.Atoi(num.String()); err != nil {
  445. return err
  446. } else if v >= 0 && v <= 255 {
  447. return nil
  448. } else {
  449. return errors.New("invalid ipv4")
  450. }
  451. } else {
  452. num.WriteByte(s.ch)
  453. s.accept.WriteByte(s.ch)
  454. }
  455. }
  456. }
  457. return errors.New("too many digits")
  458. }
  459. //IPv6:" IPv6-addr
  460. func (s *Parser) ipv6AddressLiteral() error {
  461. var ip bytes.Buffer
  462. for c := s.next(); ; c = s.next() {
  463. if !(c >= 48 && c <= 57) &&
  464. !(c >= 65 && c <= 70) &&
  465. !(c >= 97 && c <= 102) &&
  466. c != ':' && c != '.' {
  467. ipstr := ip.String()
  468. if v := net.ParseIP(ipstr); v != nil {
  469. s.accept.WriteString(v.String())
  470. s.IP = v
  471. return nil
  472. }
  473. return errors.New("invalid ipv6")
  474. } else {
  475. ip.WriteByte(c)
  476. }
  477. }
  478. }
  479. // Dot-string / Quoted-string
  480. func (s *Parser) localPart() error {
  481. defer func() {
  482. if s.accept.Len() > 0 {
  483. s.LocalPart = s.accept.String()
  484. s.accept.Reset()
  485. }
  486. }()
  487. p := s.peek()
  488. if p == '"' {
  489. return s.quotedString()
  490. } else {
  491. return s.dotString()
  492. }
  493. }
  494. // DQUOTE *QcontentSMTP DQUOTE
  495. func (s *Parser) quotedString() error {
  496. if s.next() == '"' {
  497. if err := s.QcontentSMTP(); err != nil {
  498. return err
  499. }
  500. if s.ch != '"' {
  501. return errors.New("quoted string not closed")
  502. } else {
  503. // accept the "
  504. s.next()
  505. }
  506. }
  507. return nil
  508. }
  509. // qtextSMTP / quoted-pairSMTP
  510. // quoted-pairSMTP = %d92 %d32-126
  511. // qtextSMTP = %d32-33 / %d35-91 / %d93-126
  512. func (s *Parser) QcontentSMTP() error {
  513. state := 0
  514. for {
  515. ch := s.next()
  516. switch state {
  517. case 0:
  518. if ch == '\\' {
  519. state = 1
  520. // s.accept.WriteByte(ch)
  521. continue
  522. } else if ch == 32 || ch == 33 ||
  523. (ch >= 35 && ch <= 91) ||
  524. (ch >= 93 && ch <= 126) {
  525. if s.LocalPartQuotes == false && !s.isAtext(ch) {
  526. s.LocalPartQuotes = true
  527. }
  528. s.accept.WriteByte(ch)
  529. continue
  530. }
  531. return nil
  532. case 1:
  533. // escaped character state
  534. if ch >= 32 && ch <= 126 {
  535. if s.LocalPartQuotes == false && !s.isAtext(ch) {
  536. s.LocalPartQuotes = true
  537. }
  538. s.accept.WriteByte(ch)
  539. state = 0
  540. continue
  541. } else {
  542. return errors.New("non-printable character found")
  543. }
  544. }
  545. }
  546. }
  547. //Dot-string = Atom *("." Atom)
  548. func (s *Parser) dotString() error {
  549. for {
  550. if err := s.atom(); err != nil {
  551. return err
  552. }
  553. if s.ch != '.' {
  554. break
  555. }
  556. s.accept.WriteByte(s.ch)
  557. }
  558. return nil
  559. }
  560. // 1*atext
  561. func (s *Parser) atom() error {
  562. state := 0
  563. for {
  564. if state == 0 {
  565. if !s.isAtext(s.next()) {
  566. return errors.New("atom parse error")
  567. } else {
  568. s.accept.WriteByte(s.ch)
  569. state = 1
  570. continue
  571. }
  572. }
  573. if state == 1 {
  574. if !s.isAtext(s.next()) {
  575. return nil
  576. } else {
  577. s.accept.WriteByte(s.ch)
  578. }
  579. }
  580. }
  581. }
  582. /*
  583. Dot-string = Atom *("." Atom)
  584. Atom = 1*atext
  585. atext = ALPHA / DIGIT / ; Any character except controls,
  586. "!" / "#" / ; SP, and specials.
  587. "$" / "%" / ; Used for atoms
  588. "&" / "'" /
  589. "*" / "+" /
  590. "-" / "/" /
  591. "=" / "?" /
  592. "^" / "_" /
  593. "`" / "{" /
  594. "|" / "}" /
  595. "~"
  596. */
  597. func (s *Parser) isAtext(c byte) bool {
  598. if ('0' <= c && c <= '9') ||
  599. ('a' <= c && c <= 'z') ||
  600. ('A' <= c && c <= 'Z') ||
  601. c == '!' || c == '#' ||
  602. c == '$' || c == '%' ||
  603. c == '&' || c == '\'' ||
  604. c == '*' || c == '+' ||
  605. c == '-' || c == '/' ||
  606. c == '=' || c == '?' ||
  607. c == '^' || c == '_' ||
  608. c == '`' || c == '{' ||
  609. c == '|' || c == '}' ||
  610. c == '~' {
  611. return true
  612. }
  613. return false
  614. }
  615. func isLetDig(c byte) bool {
  616. if ('0' <= c && c <= '9') ||
  617. ('A' <= c && c <= 'Z') ||
  618. ('a' <= c && c <= 'z') {
  619. return true
  620. }
  621. return false
  622. }
  623. //ehlo = "EHLO" SP ( Domain / address-literal ) CRLF
  624. // Note: "HELO" is ignored here
  625. func (s *Parser) Ehlo(input []byte) (domain string, ip net.IP, err error) {
  626. s.set(input)
  627. s.next()
  628. if s.ch == ' ' {
  629. if p := s.peek(); p == '[' {
  630. err = s.addressLiteral()
  631. if err == nil {
  632. domain = s.accept.String()
  633. ip = net.ParseIP(domain)
  634. if ip == nil {
  635. err = errors.New("invalid ip")
  636. }
  637. return
  638. }
  639. } else {
  640. err = s.domain()
  641. if err == nil {
  642. domain = s.accept.String()
  643. }
  644. return
  645. }
  646. } else {
  647. err = errors.New("ehlo parse error")
  648. }
  649. return domain, ip, err
  650. }
  651. // helo = "HELO" SP Domain CRLF
  652. // Note: "HELO" is ignored here, so is the CRLF at the end
  653. func (s *Parser) Helo(input []byte) (domain string, err error) {
  654. s.set(input)
  655. s.next()
  656. if s.ch == ' ' {
  657. err = s.domain()
  658. if err == nil {
  659. domain = s.accept.String()
  660. }
  661. return
  662. } else {
  663. err = errors.New("helo parse error")
  664. }
  665. return
  666. }