envelope.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. package mail
  2. import (
  3. "bufio"
  4. "bytes"
  5. "crypto/md5"
  6. "encoding/base64"
  7. "errors"
  8. "fmt"
  9. "gopkg.in/iconv.v1"
  10. "io"
  11. "io/ioutil"
  12. "mime/quotedprintable"
  13. "net/mail"
  14. "net/textproto"
  15. "regexp"
  16. "strings"
  17. "sync"
  18. "time"
  19. )
  20. const maxHeaderChunk = 1 + (3 << 10) // 3KB
  21. // Address encodes an email address of the form `<user@host>`
  22. type Address struct {
  23. User string
  24. Host string
  25. }
  26. func (ep *Address) String() string {
  27. return fmt.Sprintf("%s@%s", ep.User, ep.Host)
  28. }
  29. func (ep *Address) IsEmpty() bool {
  30. return ep.User == "" && ep.Host == ""
  31. }
  32. var ap = mail.AddressParser{}
  33. // NewAddress takes a string of an RFC 5322 address of the
  34. // form "Gogh Fir <[email protected]>" or "[email protected]".
  35. func NewAddress(str string) (Address, error) {
  36. a, err := ap.Parse(str)
  37. if err != nil {
  38. return Address{}, err
  39. }
  40. pos := strings.Index(a.Address, "@")
  41. if pos > 0 {
  42. return Address{
  43. User: a.Address[0:pos],
  44. Host: a.Address[pos+1:],
  45. },
  46. nil
  47. }
  48. return Address{}, errors.New("invalid address")
  49. }
  50. // Email represents a single SMTP message.
  51. type Envelope struct {
  52. // Remote IP address
  53. RemoteIP string
  54. // Message sent in EHLO command
  55. Helo string
  56. // Sender
  57. MailFrom Address
  58. // Recipients
  59. RcptTo []Address
  60. // Data stores the header and message body
  61. Data bytes.Buffer
  62. // Subject stores the subject of the email, extracted and decoded after calling ParseHeaders()
  63. Subject string
  64. // TLS is true if the email was received using a TLS connection
  65. TLS bool
  66. // Header stores the results from ParseHeaders()
  67. Header textproto.MIMEHeader
  68. // Values hold the values generated when processing the envelope by the backend
  69. Values map[string]interface{}
  70. // Hashes of each email on the rcpt
  71. Hashes []string
  72. // additional delivery header that may be added
  73. DeliveryHeader string
  74. // Email(s) will be queued with this id
  75. QueuedId string
  76. // When locked, it means that the envelope is being processed by the backend
  77. sync.Mutex
  78. }
  79. func NewEnvelope(remoteAddr string, clientID uint64) *Envelope {
  80. return &Envelope{
  81. RemoteIP: remoteAddr,
  82. Values: make(map[string]interface{}),
  83. QueuedId: queuedID(clientID),
  84. }
  85. }
  86. func queuedID(clientID uint64) string {
  87. return fmt.Sprintf("%x", md5.Sum([]byte(string(time.Now().Unix())+string(clientID))))
  88. }
  89. // ParseHeaders parses the headers into Header field of the Envelope struct.
  90. // Data buffer must be full before calling.
  91. // It assumes that at most 30kb of email data can be a header
  92. // Decoding of encoding to UTF is only done on the Subject, where the result is assigned to the Subject field
  93. func (e *Envelope) ParseHeaders() error {
  94. var err error
  95. if e.Header != nil {
  96. return errors.New("Headers already parsed")
  97. }
  98. buf := bytes.NewBuffer(e.Data.Bytes())
  99. // find where the header ends, assuming that over 30 kb would be max
  100. max := maxHeaderChunk
  101. if buf.Len() < max {
  102. max = buf.Len()
  103. }
  104. // read in the chunk which we'll scan for the header
  105. chunk := make([]byte, max)
  106. buf.Read(chunk)
  107. headerEnd := strings.Index(string(chunk), "\n\n") // the first two new-lines chars are the End Of Header
  108. if headerEnd > -1 {
  109. header := chunk[0:headerEnd]
  110. headerReader := textproto.NewReader(bufio.NewReader(bytes.NewBuffer(header)))
  111. e.Header, err = headerReader.ReadMIMEHeader()
  112. if err != nil {
  113. // decode the subject
  114. if subject, ok := e.Header["Subject"]; ok {
  115. e.Subject = MimeHeaderDecode(subject[0])
  116. }
  117. }
  118. } else {
  119. err = errors.New("header not found")
  120. }
  121. return err
  122. }
  123. // Len returns the number of bytes that would be in the reader returned by NewReader()
  124. func (e *Envelope) Len() int {
  125. return len(e.DeliveryHeader) + e.Data.Len()
  126. }
  127. // Returns a new reader for reading the email contents, including the delivery headers
  128. func (e *Envelope) NewReader() io.Reader {
  129. return io.MultiReader(
  130. strings.NewReader(e.DeliveryHeader),
  131. bytes.NewReader(e.Data.Bytes()),
  132. )
  133. }
  134. // String converts the email to string.
  135. // Typically, you would want to use the compressor guerrilla.Processor for more efficiency, or use NewReader
  136. func (e *Envelope) String() string {
  137. return e.DeliveryHeader + e.Data.String()
  138. }
  139. // ResetTransaction is called when the transaction is reset (keeping the connection open)
  140. func (e *Envelope) ResetTransaction() {
  141. e.MailFrom = Address{}
  142. e.RcptTo = []Address{}
  143. // reset the data buffer, keep it allocated
  144. e.Data.Reset()
  145. // todo: these are probably good candidates for buffers / use sync.Pool (after profiling)
  146. e.Subject = ""
  147. e.Header = nil
  148. e.Hashes = make([]string, 0)
  149. e.DeliveryHeader = ""
  150. e.Values = make(map[string]interface{})
  151. }
  152. // Seed is called when used with a new connection, once it's accepted
  153. func (e *Envelope) Reseed(RemoteIP string, clientID uint64) {
  154. e.RemoteIP = RemoteIP
  155. e.QueuedId = queuedID(clientID)
  156. e.Helo = ""
  157. e.TLS = false
  158. }
  159. // PushRcpt adds a recipient email address to the envelope
  160. func (e *Envelope) PushRcpt(addr Address) {
  161. e.RcptTo = append(e.RcptTo, addr)
  162. }
  163. // Pop removes the last email address that was pushed to the envelope
  164. func (e *Envelope) PopRcpt() Address {
  165. ret := e.RcptTo[len(e.RcptTo)-1]
  166. e.RcptTo = e.RcptTo[:len(e.RcptTo)-1]
  167. return ret
  168. }
  169. var mimeRegex, _ = regexp.Compile(`=\?(.+?)\?([QBqp])\?(.+?)\?=`)
  170. // Decode strings in Mime header format
  171. // eg. =?ISO-2022-JP?B?GyRCIVo9dztSOWJAOCVBJWMbKEI=?=
  172. // This function uses GNU iconv under the hood, for more charset support than in Go's library
  173. func MimeHeaderDecode(str string) string {
  174. matched := mimeRegex.FindAllStringSubmatch(str, -1)
  175. var charset, encoding, payload string
  176. if matched != nil {
  177. for i := 0; i < len(matched); i++ {
  178. if len(matched[i]) > 2 {
  179. charset = matched[i][1]
  180. encoding = strings.ToUpper(matched[i][2])
  181. payload = matched[i][3]
  182. switch encoding {
  183. case "B":
  184. str = strings.Replace(
  185. str,
  186. matched[i][0],
  187. MailTransportDecode(payload, "base64", charset),
  188. 1)
  189. case "Q":
  190. str = strings.Replace(
  191. str,
  192. matched[i][0],
  193. MailTransportDecode(payload, "quoted-printable", charset),
  194. 1)
  195. }
  196. }
  197. }
  198. }
  199. return str
  200. }
  201. // decode from 7bit to 8bit UTF-8
  202. // encodingType can be "base64" or "quoted-printable"
  203. func MailTransportDecode(str string, encodingType string, charset string) string {
  204. if charset == "" {
  205. charset = "UTF-8"
  206. } else {
  207. charset = strings.ToUpper(charset)
  208. }
  209. if encodingType == "base64" {
  210. str = fromBase64(str)
  211. } else if encodingType == "quoted-printable" {
  212. str = fromQuotedP(str)
  213. }
  214. if charset != "UTF-8" {
  215. charset = fixCharset(charset)
  216. // iconv is pretty good at what it does
  217. if cd, err := iconv.Open("UTF-8", charset); err == nil {
  218. defer func() {
  219. cd.Close()
  220. if r := recover(); r != nil {
  221. //logln(1, fmt.Sprintf("Recovered in %v", r))
  222. }
  223. }()
  224. // eg. charset can be "ISO-2022-JP"
  225. return cd.ConvString(str)
  226. }
  227. }
  228. return str
  229. }
  230. func fromBase64(data string) string {
  231. buf := bytes.NewBufferString(data)
  232. decoder := base64.NewDecoder(base64.StdEncoding, buf)
  233. res, _ := ioutil.ReadAll(decoder)
  234. return string(res)
  235. }
  236. func fromQuotedP(data string) string {
  237. res, _ := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(data)))
  238. return string(res)
  239. }
  240. var charsetRegex, _ = regexp.Compile(`[_:.\/\\]`)
  241. func fixCharset(charset string) string {
  242. fixed_charset := charsetRegex.ReplaceAllString(charset, "-")
  243. // Fix charset
  244. // borrowed from http://squirrelmail.svn.sourceforge.net/viewvc/squirrelmail/trunk/squirrelmail/include/languages.php?revision=13765&view=markup
  245. // OE ks_c_5601_1987 > cp949
  246. fixed_charset = strings.Replace(fixed_charset, "ks-c-5601-1987", "cp949", -1)
  247. // Moz x-euc-tw > euc-tw
  248. fixed_charset = strings.Replace(fixed_charset, "x-euc", "euc", -1)
  249. // Moz x-windows-949 > cp949
  250. fixed_charset = strings.Replace(fixed_charset, "x-windows_", "cp", -1)
  251. // windows-125x and cp125x charsets
  252. fixed_charset = strings.Replace(fixed_charset, "windows-", "cp", -1)
  253. // ibm > cp
  254. fixed_charset = strings.Replace(fixed_charset, "ibm", "cp", -1)
  255. // iso-8859-8-i -> iso-8859-8
  256. fixed_charset = strings.Replace(fixed_charset, "iso-8859-8-i", "iso-8859-8", -1)
  257. if charset != fixed_charset {
  258. return fixed_charset
  259. }
  260. return charset
  261. }
  262. // Envelopes have their own pool
  263. type Pool struct {
  264. // envelopes that are ready to be borrowed
  265. pool chan *Envelope
  266. // semaphore to control number of maximum borrowed envelopes
  267. sem chan bool
  268. }
  269. func NewPool(poolSize int) *Pool {
  270. return &Pool{
  271. pool: make(chan *Envelope, poolSize),
  272. sem: make(chan bool, poolSize),
  273. }
  274. }
  275. func (p *Pool) Borrow(remoteAddr string, clientID uint64) *Envelope {
  276. var e *Envelope
  277. p.sem <- true // block the envelope until more room
  278. select {
  279. case e = <-p.pool:
  280. e.Reseed(remoteAddr, clientID)
  281. default:
  282. e = NewEnvelope(remoteAddr, clientID)
  283. }
  284. return e
  285. }
  286. // Return returns an envelope back to the envelope pool
  287. // Note that an envelope will not be recycled while it still is
  288. // processing
  289. func (p *Pool) Return(e *Envelope) {
  290. // we down't want to recycle an envelope that may still be processing
  291. isUnlocked := func() <-chan bool {
  292. signal := make(chan bool)
  293. // make sure envelope finished processing
  294. go func() {
  295. // lock will block if still processing
  296. e.Lock()
  297. // got the lock, it means processing finished
  298. e.Unlock()
  299. // generate a signal
  300. signal <- true
  301. }()
  302. return signal
  303. }()
  304. select {
  305. case <-time.After(time.Second * 30):
  306. // envelope still processing, we can't recycle it.
  307. case <-isUnlocked:
  308. // The envelope was _unlocked_, it finished processing
  309. // put back in the pool or destroy
  310. select {
  311. case p.pool <- e:
  312. //placed envelope back in pool
  313. default:
  314. // pool is full, don't return
  315. }
  316. }
  317. // take a value off the semaphore to make room for more envelopes
  318. <-p.sem
  319. }