parse.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. package rfc5321
  2. // Parse RFC5321 productions, no regex
  3. import (
  4. "bytes"
  5. "errors"
  6. "net"
  7. "strconv"
  8. )
  9. const (
  10. // The maximum total length of a reverse-path or forward-path is 256
  11. LimitPath = 256
  12. // The maximum total length of a user name or other local-part is 64
  13. // however, here we double it, since a few major services don't respect that and go over
  14. LimitLocalPart = 64 * 2
  15. // //The maximum total length of a domain name or number is 255
  16. LimitDomain = 255
  17. // The minimum total number of recipients that must be buffered is 100
  18. LimitRecipients = 100
  19. )
  20. // Parse Email Addresses according to https://tools.ietf.org/html/rfc5321
  21. type Parser struct {
  22. NullPath bool
  23. LocalPart string
  24. Domain string
  25. ADL []string
  26. PathParams [][]string
  27. pos int
  28. ch byte
  29. buf []byte
  30. accept bytes.Buffer
  31. }
  32. func NewParser(buf []byte) *Parser {
  33. s := new(Parser)
  34. s.buf = buf
  35. s.pos = -1
  36. return s
  37. }
  38. func (s *Parser) Reset() {
  39. s.buf = s.buf[:0]
  40. if s.pos != -1 {
  41. s.pos = -1
  42. s.ADL = nil
  43. s.PathParams = nil
  44. s.NullPath = false
  45. s.LocalPart = ""
  46. s.Domain = ""
  47. s.accept.Reset()
  48. }
  49. }
  50. func (s *Parser) set(input []byte) {
  51. s.Reset()
  52. s.buf = input
  53. }
  54. func (s *Parser) next() byte {
  55. s.pos++
  56. if s.pos < len(s.buf) {
  57. s.ch = s.buf[s.pos]
  58. return s.ch
  59. }
  60. return 0
  61. }
  62. func (s *Parser) peek() byte {
  63. if s.pos+1 < len(s.buf) {
  64. return s.buf[s.pos+1]
  65. }
  66. return 0
  67. }
  68. func (s *Parser) reversePath() (err error) {
  69. if s.peek() == ' ' {
  70. s.next() // tolerate a space at the front
  71. }
  72. if i := bytes.Index(s.buf[s.pos+1:], []byte{'<', '>'}); i == 0 {
  73. s.NullPath = true
  74. return nil
  75. }
  76. if err = s.path(); err != nil {
  77. return err
  78. }
  79. return nil
  80. }
  81. func (s *Parser) forwardPath() (err error) {
  82. if s.peek() == ' ' {
  83. s.next() // tolerate a space at the front
  84. }
  85. if i := bytes.Index(bytes.ToLower(s.buf[s.pos+1:]), []byte(postmasterPath)); i == 0 {
  86. s.LocalPart = postmasterLocalPart
  87. return nil
  88. }
  89. if err = s.path(); err != nil {
  90. return err
  91. }
  92. return nil
  93. }
  94. //MailFrom accepts the following syntax: Reverse-path [SP Mail-parameters] CRLF
  95. func (s *Parser) MailFrom(input []byte) (err error) {
  96. s.set(input)
  97. if err := s.reversePath(); err != nil {
  98. return err
  99. }
  100. s.next()
  101. if p := s.next(); p == ' ' {
  102. // parse Rcpt-parameters
  103. // The optional <mail-parameters> are associated with negotiated SMTP
  104. // service extensions
  105. if tup, err := s.parameters(); err != nil {
  106. return errors.New("param parse error")
  107. } else if len(tup) > 0 {
  108. s.PathParams = tup
  109. }
  110. }
  111. return nil
  112. }
  113. const postmasterPath = "<postmaster>"
  114. const postmasterLocalPart = "Postmaster"
  115. //RcptTo accepts the following syntax: ( "<Postmaster@" Domain ">" / "<Postmaster>" /
  116. // Forward-path ) [SP Rcpt-parameters] CRLF
  117. func (s *Parser) RcptTo(input []byte) (err error) {
  118. s.set(input)
  119. if err := s.forwardPath(); err != nil {
  120. return err
  121. }
  122. s.next()
  123. if p := s.next(); p == ' ' {
  124. // parse Rcpt-parameters
  125. if tup, err := s.parameters(); err != nil {
  126. return errors.New("param parse error")
  127. } else if len(tup) > 0 {
  128. s.PathParams = tup
  129. }
  130. }
  131. return nil
  132. }
  133. // esmtp-param *(SP esmtp-param)
  134. func (s *Parser) parameters() ([][]string, error) {
  135. params := make([][]string, 0)
  136. for {
  137. if result, err := s.param(); err != nil {
  138. return params, err
  139. } else {
  140. params = append(params, result)
  141. }
  142. if p := s.next(); p != ' ' {
  143. return params, nil
  144. }
  145. }
  146. }
  147. func isESMTPValue(c byte) bool {
  148. if ('!' <= c && c <= '<') ||
  149. ('>' <= c && c <= '~') {
  150. return true
  151. }
  152. return false
  153. }
  154. // esmtp-param = esmtp-keyword ["=" esmtp-value]
  155. // esmtp-keyword = (ALPHA / DIGIT) *(ALPHA / DIGIT / "-")
  156. // esmtp-value = 1*(%d33-60 / %d62-126)
  157. func (s *Parser) param() (result []string, err error) {
  158. state := 0
  159. var key, value string
  160. defer func() {
  161. result = append(result, key, value)
  162. s.accept.Reset()
  163. }()
  164. for c := s.next(); ; c = s.next() {
  165. switch state {
  166. case 0:
  167. // first char must be let-dig
  168. if !isLetDig(c) {
  169. return result, errors.New("parse error")
  170. }
  171. // accept
  172. s.accept.WriteByte(c)
  173. state = 1
  174. case 1:
  175. // *(ALPHA / DIGIT / "-")
  176. if !isLetDig(c) {
  177. if c == '=' {
  178. key = s.accept.String()
  179. s.accept.Reset()
  180. state = 2
  181. continue
  182. } else if c == '-' {
  183. // cannot have - at the end of a keyword
  184. if p := s.peek(); !isLetDig(p) && p != '-' {
  185. return result, errors.New("parse error")
  186. }
  187. s.accept.WriteByte(c)
  188. continue
  189. }
  190. key = s.accept.String()
  191. return result, nil
  192. }
  193. s.accept.WriteByte(c)
  194. case 2:
  195. // start of value, must match at least 1
  196. if !isESMTPValue(c) {
  197. return result, errors.New("parse error")
  198. }
  199. s.accept.WriteByte(c)
  200. if !isESMTPValue(s.peek()) {
  201. value = s.accept.String()
  202. return result, nil
  203. }
  204. state = 3
  205. case 3:
  206. // 1*(%d33-60 / %d62-126)
  207. s.accept.WriteByte(c)
  208. if !isESMTPValue(s.peek()) {
  209. value = s.accept.String()
  210. return result, nil
  211. }
  212. }
  213. }
  214. }
  215. // "<" [ A-d-l ":" ] Mailbox ">"
  216. func (s *Parser) path() (err error) {
  217. if s.next() == '<' && s.peek() == '@' {
  218. if err = s.adl(); err == nil {
  219. s.next()
  220. if s.ch != ':' {
  221. return errors.New("syntax error")
  222. }
  223. }
  224. }
  225. if err = s.mailbox(); err != nil {
  226. return err
  227. }
  228. if p := s.peek(); p != '>' {
  229. return errors.New("missing closing >")
  230. }
  231. return nil
  232. }
  233. // At-domain *( "," At-domain )
  234. func (s *Parser) adl() error {
  235. for {
  236. if err := s.atDomain(); err != nil {
  237. return err
  238. }
  239. s.ADL = append(s.ADL, s.accept.String())
  240. s.accept.Reset()
  241. if s.peek() != ',' {
  242. break
  243. }
  244. s.next()
  245. }
  246. return nil
  247. }
  248. // At-domain = "@" Domain
  249. func (s *Parser) atDomain() error {
  250. if s.next() == '@' {
  251. s.accept.WriteByte('@')
  252. return s.domain()
  253. }
  254. return errors.New("syntax error")
  255. }
  256. // sub-domain *("." sub-domain)
  257. func (s *Parser) domain() error {
  258. for {
  259. if err := s.subdomain(); err != nil {
  260. return err
  261. }
  262. if p := s.peek(); p != '.' {
  263. if p != ':' && p != ',' && p != '>' && p != 0 {
  264. return errors.New("domain parse error")
  265. }
  266. break
  267. }
  268. s.accept.WriteByte(s.next())
  269. }
  270. return nil
  271. }
  272. // Let-dig [Ldh-str]
  273. func (s *Parser) subdomain() error {
  274. state := 0
  275. for c := s.next(); ; c = s.next() {
  276. switch state {
  277. case 0:
  278. p := s.peek()
  279. if isLetDig(c) {
  280. s.accept.WriteByte(c)
  281. if !isLetDig(p) && p != '-' {
  282. return nil
  283. }
  284. state = 1
  285. continue
  286. }
  287. return errors.New("parse err")
  288. case 1:
  289. p := s.peek()
  290. if isLetDig(c) || c == '-' {
  291. s.accept.WriteByte(c)
  292. }
  293. if !isLetDig(p) && p != '-' {
  294. if c == '-' {
  295. return errors.New("parse err")
  296. }
  297. return nil
  298. }
  299. }
  300. }
  301. }
  302. // Local-part "@" ( Domain / address-literal )
  303. func (s *Parser) mailbox() error {
  304. defer func() {
  305. if s.accept.Len() > 0 {
  306. s.Domain = s.accept.String()
  307. s.accept.Reset()
  308. }
  309. }()
  310. err := s.localPart()
  311. if err != nil {
  312. return err
  313. }
  314. if s.ch != '@' {
  315. return errors.New("@ expected as part of mailbox")
  316. }
  317. if p := s.peek(); p == '[' {
  318. return s.addressLiteral()
  319. } else {
  320. return s.domain()
  321. }
  322. }
  323. // "[" ( IPv4-address-literal /
  324. // IPv6-address-literal /
  325. // General-address-literal ) "]"
  326. func (s *Parser) addressLiteral() error {
  327. ch := s.next()
  328. if ch == '[' {
  329. p := s.peek()
  330. var err error
  331. if p == 'I' || p == 'i' {
  332. for i := 0; i < 5; i++ {
  333. s.next() // IPv6:
  334. }
  335. err = s.ipv6AddressLiteral()
  336. } else if p >= 48 && p <= 57 {
  337. err = s.ipv4AddressLiteral()
  338. }
  339. if err != nil {
  340. return err
  341. }
  342. if s.ch != ']' {
  343. return errors.New("] expected for address literal")
  344. }
  345. return nil
  346. }
  347. return nil
  348. }
  349. // Snum 3("." Snum)
  350. func (s *Parser) ipv4AddressLiteral() error {
  351. for i := 0; i < 4; i++ {
  352. if err := s.snum(); err != nil {
  353. return err
  354. }
  355. if s.ch != '.' {
  356. break
  357. }
  358. s.accept.WriteByte(s.ch)
  359. }
  360. return nil
  361. }
  362. // 1*3DIGIT
  363. // representing a decimal integer
  364. // value accept the range 0 through 255
  365. func (s *Parser) snum() error {
  366. state := 0
  367. var num bytes.Buffer
  368. for i := 4; i > 0; i-- {
  369. c := s.next()
  370. if state == 0 {
  371. if !(c >= 48 && c <= 57) {
  372. return errors.New("parse error")
  373. } else {
  374. num.WriteByte(s.ch)
  375. s.accept.WriteByte(s.ch)
  376. state = 1
  377. continue
  378. }
  379. }
  380. if state == 1 {
  381. if !(c >= 48 && c <= 57) {
  382. if v, err := strconv.Atoi(num.String()); err != nil {
  383. return err
  384. } else if v >= 0 && v <= 255 {
  385. return nil
  386. } else {
  387. return errors.New("invalid ipv4")
  388. }
  389. } else {
  390. num.WriteByte(s.ch)
  391. s.accept.WriteByte(s.ch)
  392. }
  393. }
  394. }
  395. return errors.New("too many digits")
  396. }
  397. //IPv6:" IPv6-addr
  398. func (s *Parser) ipv6AddressLiteral() error {
  399. var ip bytes.Buffer
  400. for c := s.next(); ; c = s.next() {
  401. if !(c >= 48 && c <= 57) &&
  402. !(c >= 65 && c <= 70) &&
  403. !(c >= 97 && c <= 102) &&
  404. c != ':' && c != '.' {
  405. ipstr := ip.String()
  406. if v := net.ParseIP(ipstr); v != nil {
  407. s.accept.WriteString(ipstr)
  408. return nil
  409. }
  410. return errors.New("invalid ipv6")
  411. } else {
  412. ip.WriteByte(c)
  413. }
  414. }
  415. }
  416. // Dot-string / Quoted-string
  417. func (s *Parser) localPart() error {
  418. defer func() {
  419. if s.accept.Len() > 0 {
  420. s.LocalPart = s.accept.String()
  421. s.accept.Reset()
  422. }
  423. }()
  424. p := s.peek()
  425. if p == '"' {
  426. return s.quotedString()
  427. } else {
  428. return s.dotString()
  429. }
  430. }
  431. // DQUOTE *QcontentSMTP DQUOTE
  432. func (s *Parser) quotedString() error {
  433. if s.next() == '"' {
  434. if err := s.QcontentSMTP(); err != nil {
  435. return err
  436. }
  437. if s.ch != '"' {
  438. return errors.New("quoted string not closed")
  439. } else {
  440. // accept the "
  441. s.next()
  442. }
  443. }
  444. return nil
  445. }
  446. // qtextSMTP / quoted-pairSMTP
  447. // quoted-pairSMTP = %d92 %d32-126
  448. // qtextSMTP = %d32-33 / %d35-91 / %d93-126
  449. func (s *Parser) QcontentSMTP() error {
  450. state := 0
  451. for {
  452. ch := s.next()
  453. switch state {
  454. case 0:
  455. if ch == '\\' {
  456. state = 1
  457. s.accept.WriteByte(ch)
  458. continue
  459. } else if ch == 32 || ch == 33 ||
  460. (ch >= 35 && ch <= 91) ||
  461. (ch >= 93 && ch <= 126) {
  462. s.accept.WriteByte(ch)
  463. continue
  464. }
  465. return nil
  466. case 1:
  467. // escaped character state
  468. if ch >= 32 && ch <= 126 {
  469. s.accept.WriteByte(ch)
  470. state = 0
  471. continue
  472. } else {
  473. return errors.New("non-printable character found")
  474. }
  475. }
  476. }
  477. }
  478. //Dot-string = Atom *("." Atom)
  479. func (s *Parser) dotString() error {
  480. for {
  481. if err := s.atom(); err != nil {
  482. return err
  483. }
  484. if s.ch != '.' {
  485. break
  486. }
  487. s.accept.WriteByte(s.ch)
  488. }
  489. return nil
  490. }
  491. // 1*atext
  492. func (s *Parser) atom() error {
  493. state := 0
  494. for {
  495. if state == 0 {
  496. if !s.isAtext(s.next()) {
  497. return errors.New("parse error")
  498. } else {
  499. s.accept.WriteByte(s.ch)
  500. state = 1
  501. continue
  502. }
  503. }
  504. if state == 1 {
  505. if !s.isAtext(s.next()) {
  506. return nil
  507. } else {
  508. s.accept.WriteByte(s.ch)
  509. }
  510. }
  511. }
  512. }
  513. /*
  514. Dot-string = Atom *("." Atom)
  515. Atom = 1*atext
  516. atext = ALPHA / DIGIT / ; Any character except controls,
  517. "!" / "#" / ; SP, and specials.
  518. "$" / "%" / ; Used for atoms
  519. "&" / "'" /
  520. "*" / "+" /
  521. "-" / "/" /
  522. "=" / "?" /
  523. "^" / "_" /
  524. "`" / "{" /
  525. "|" / "}" /
  526. "~"
  527. */
  528. func (s *Parser) isAtext(c byte) bool {
  529. if ('0' <= c && c <= '9') ||
  530. ('A' <= c && c <= 'z') ||
  531. c == '!' || c == '#' ||
  532. c == '$' || c == '%' ||
  533. c == '&' || c == '\'' ||
  534. c == '*' || c == '+' ||
  535. c == '-' || c == '/' ||
  536. c == '=' || c == '?' ||
  537. c == '^' || c == '_' ||
  538. c == '`' || c == '{' ||
  539. c == '|' || c == '}' ||
  540. c == '~' {
  541. return true
  542. }
  543. return false
  544. }
  545. func isLetDig(c byte) bool {
  546. if ('0' <= c && c <= '9') ||
  547. ('A' <= c && c <= 'z') {
  548. return true
  549. }
  550. return false
  551. }