Browse Source

Make cgo iconv package optional #103 (#104)

Issue #103
* Iconv is now a soft dependency
* Adds a new mail/iconv package and setup an init hook to use iconv whenever it's imported
* Adds a new mail/encoding using same pattern as above (decoder from golang.org/x/text package)
* update envelope.go to use the default charset converter in golang
* update serve.go so that it uses mail/encoding
* update tests

* update glide lock
Flashmob 7 years ago
parent
commit
6f87aaa724

+ 2 - 0
cmd/guerrillad/serve.go

@@ -12,6 +12,8 @@ import (
 
 
 	"github.com/flashmob/go-guerrilla"
 	"github.com/flashmob/go-guerrilla"
 	"github.com/flashmob/go-guerrilla/log"
 	"github.com/flashmob/go-guerrilla/log"
+	//_ "github.com/flashmob/go-guerrilla/mail/iconv"
+	_ "github.com/flashmob/go-guerrilla/mail/encoding"
 	"github.com/spf13/cobra"
 	"github.com/spf13/cobra"
 
 
 	_ "github.com/go-sql-driver/mysql"
 	_ "github.com/go-sql-driver/mysql"

+ 41 - 8
glide.lock

@@ -1,8 +1,8 @@
-hash: 6330c21df1693590f5b681e06578d476231b1c30b652b1068b3bef59a7ddb594
-updated: 2017-10-09T14:49:12.394397577+01:00
+hash: ab5586e1ee56f15336e425d99f774acd4f6bc0f042ab597248366592d8c0b1bf
+updated: 2018-03-11T11:39:28.566276841+11:00
 imports:
 imports:
 - name: github.com/asaskevich/EventBus
 - name: github.com/asaskevich/EventBus
-  version: 52a0dcfcbd8299da13aad44b96a6642dd79cbb08
+  version: 68a521d7cbbb7a859c2608b06342f384b3bd5f5a
 - name: github.com/garyburd/redigo
 - name: github.com/garyburd/redigo
   version: 8873b2f1995f59d4bcdd2b0dc9858e2cb9bf0c13
   version: 8873b2f1995f59d4bcdd2b0dc9858e2cb9bf0c13
   subpackages:
   subpackages:
@@ -12,17 +12,50 @@ imports:
   version: a0583e0143b1624142adab07e0e97fe106d99561
   version: a0583e0143b1624142adab07e0e97fe106d99561
 - name: github.com/inconshreveable/mousetrap
 - name: github.com/inconshreveable/mousetrap
   version: 76626ae9c91c4f2a10f34cad8ce83ea42c93bb75
   version: 76626ae9c91c4f2a10f34cad8ce83ea42c93bb75
+- name: github.com/rakyll/statik
+  version: 274df120e9065bdd08eb1120e0375e3dc1ae8465
+  subpackages:
+  - fs
 - name: github.com/sirupsen/logrus
 - name: github.com/sirupsen/logrus
-  version: ba1b36c82c5e05c4f912a88eab0dcd91a171688f
-  vcs: git
+  version: d682213848ed68c0a260ca37d6dd5ace8423f5ba
 - name: github.com/spf13/cobra
 - name: github.com/spf13/cobra
-  version: 50204810fdb5010baae72e4f41b303689cbdcc9f
+  version: b62566898a99f2db9c68ed0026aa0a052e59678d
 - name: github.com/spf13/pflag
 - name: github.com/spf13/pflag
-  version: a9789e855c7696159b7db0db7f440b449edf2b31
+  version: 25f8b5b07aece3207895bf19f7ab517eb3b22a40
+- name: golang.org/x/crypto
+  version: c7dcf104e3a7a1417abc0230cb0d5240d764159d
+  subpackages:
+  - ssh/terminal
+- name: golang.org/x/net
+  version: d0aafc73d5cdc42264b0af071c261abac580695e
+  subpackages:
+  - html
+  - html/atom
+  - html/charset
 - name: golang.org/x/sys
 - name: golang.org/x/sys
-  version: ebfc5b4631820b793c9010c87fd8fef0f39eb082
+  version: 7dca6fe1f43775aa6d1334576870ff63f978f539
   subpackages:
   subpackages:
   - unix
   - unix
+  - windows
+- name: golang.org/x/text
+  version: b7ef84aaf62aa3e70962625c80a571ae7c17cb40
+  subpackages:
+  - encoding
+  - encoding/charmap
+  - encoding/htmlindex
+  - encoding/internal
+  - encoding/internal/identifier
+  - encoding/japanese
+  - encoding/korean
+  - encoding/simplifiedchinese
+  - encoding/traditionalchinese
+  - encoding/unicode
+  - internal/language
+  - internal/tag
+  - internal/utf8internal
+  - language
+  - runes
+  - transform
 - name: gopkg.in/iconv.v1
 - name: gopkg.in/iconv.v1
   version: 16a760eb7e186ae0e3aedda00d4a1daa4d0701d8
   version: 16a760eb7e186ae0e3aedda00d4a1daa4d0701d8
 testImports: []
 testImports: []

+ 7 - 3
glide.yaml

@@ -1,8 +1,7 @@
 package: github.com/flashmob/go-guerrilla
 package: github.com/flashmob/go-guerrilla
 import:
 import:
 - package: github.com/sirupsen/logrus
 - package: github.com/sirupsen/logrus
-  version: ~0.11.0
-  vcs: git
+  version: ~1.0.4
 - package: github.com/garyburd/redigo
 - package: github.com/garyburd/redigo
   version: ~1.0.0
   version: ~1.0.0
   subpackages:
   subpackages:
@@ -11,6 +10,11 @@ import:
 - package: gopkg.in/iconv.v1
 - package: gopkg.in/iconv.v1
   version: ~1.1.1
   version: ~1.1.1
 - package: github.com/asaskevich/EventBus
 - package: github.com/asaskevich/EventBus
-  version: 52a0dcfcbd8299da13aad44b96a6642dd79cbb08
+  version: 68a521d7cbbb7a859c2608b06342f384b3bd5f5a
 - package: github.com/go-sql-driver/mysql
 - package: github.com/go-sql-driver/mysql
   version: ^1.3.0
   version: ^1.3.0
+- package: golang.org/x/sys
+  version: 7dca6fe1f43775aa6d1334576870ff63f978f539
+- package: golang.org/x/net
+  subpackages:
+  - html/charset

+ 20 - 0
mail/encoding/encoding.go

@@ -0,0 +1,20 @@
+// encoding enables using golang.org/x/net/html/charset for converting 7bit to UTF-8.
+// golang.org/x/net/html/charset supports a larger range of encodings.
+// when importing, place an underscore _ in front to import for side-effects
+
+package encoding
+
+import (
+	"io"
+
+	"github.com/flashmob/go-guerrilla/mail"
+	cs "golang.org/x/net/html/charset"
+)
+
+func init() {
+
+	mail.Dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
+		return cs.NewReaderLabel(charset, input)
+	}
+
+}

+ 19 - 0
mail/encoding/encoding_test.go

@@ -0,0 +1,19 @@
+package encoding
+
+import (
+	"github.com/flashmob/go-guerrilla/mail"
+	"strings"
+	"testing"
+)
+
+// This will use the golang.org/x/net/html/charset encoder
+func TestEncodingMimeHeaderDecode(t *testing.T) {
+	str := mail.MimeHeaderDecode("=?ISO-2022-JP?B?GyRCIVo9dztSOWJAOCVBJWMbKEI=?=")
+	if i := strings.Index(str, "【女子高生チャ"); i != 0 {
+		t.Error("expecting 【女子高生チャ, got:", str)
+	}
+	str = mail.MimeHeaderDecode("=?ISO-8859-1?Q?Andr=E9?= Pirard <[email protected]>")
+	if strings.Index(str, "André Pirard") != 0 {
+		t.Error("expecting André Pirard, got:", str)
+	}
+}

+ 54 - 98
mail/envelope.go

@@ -4,21 +4,29 @@ import (
 	"bufio"
 	"bufio"
 	"bytes"
 	"bytes"
 	"crypto/md5"
 	"crypto/md5"
-	"encoding/base64"
 	"errors"
 	"errors"
 	"fmt"
 	"fmt"
-	"gopkg.in/iconv.v1"
 	"io"
 	"io"
-	"io/ioutil"
-	"mime/quotedprintable"
+	"mime"
 	"net/mail"
 	"net/mail"
 	"net/textproto"
 	"net/textproto"
-	"regexp"
 	"strings"
 	"strings"
 	"sync"
 	"sync"
 	"time"
 	"time"
 )
 )
 
 
+// A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
+// Used by the MimeHeaderDecode function.
+// It's exposed public so that an alternative decoder can be set, eg Gnu iconv
+// by importing the mail/inconv package.
+// Another alternative would be to use https://godoc.org/golang.org/x/text/encoding
+var Dec mime.WordDecoder
+
+func init() {
+	// use the default decoder, without Gnu inconv. Import the mail/inconv package to use iconv.
+	Dec = mime.WordDecoder{}
+}
+
 const maxHeaderChunk = 1 + (3 << 10) // 3KB
 const maxHeaderChunk = 1 + (3 << 10) // 3KB
 
 
 // Address encodes an email address of the form `<user@host>`
 // Address encodes an email address of the form `<user@host>`
@@ -192,107 +200,55 @@ func (e *Envelope) PopRcpt() Address {
 	return ret
 	return ret
 }
 }
 
 
-var mimeRegex, _ = regexp.Compile(`=\?(.+?)\?([QBqp])\?(.+?)\?=`)
-
-// Decode strings in Mime header format
-// eg. =?ISO-2022-JP?B?GyRCIVo9dztSOWJAOCVBJWMbKEI=?=
-// This function uses GNU iconv under the hood, for more charset support than in Go's library
+// Converts 7 bit encoded mime header strings to UTF-8
 func MimeHeaderDecode(str string) string {
 func MimeHeaderDecode(str string) string {
+	state := 0
+	var buf bytes.Buffer
+	var out []byte
+	for i := 0; i < len(str); i++ {
+		switch state {
+		case 0:
+			if str[i] == '=' {
+				buf.WriteByte(str[i])
+				state = 1
+			} else {
+				out = append(out, str[i])
+			}
+		case 1:
+			if str[i] == '?' {
+				buf.WriteByte(str[i])
+				state = 2
+			} else {
+				out = append(out, str[i])
+				buf.Reset()
+				state = 0
+			}
 
 
-	matched := mimeRegex.FindAllStringSubmatch(str, -1)
-	var charset, encoding, payload string
-	if matched != nil {
-		for i := 0; i < len(matched); i++ {
-			if len(matched[i]) > 2 {
-				charset = matched[i][1]
-				encoding = strings.ToUpper(matched[i][2])
-				payload = matched[i][3]
-				switch encoding {
-				case "B":
-					str = strings.Replace(
-						str,
-						matched[i][0],
-						MailTransportDecode(payload, "base64", charset),
-						1)
-				case "Q":
-					str = strings.Replace(
-						str,
-						matched[i][0],
-						MailTransportDecode(payload, "quoted-printable", charset),
-						1)
+		case 2:
+			if str[i] == ' ' {
+				d, err := Dec.Decode(buf.String())
+				if err == nil {
+					out = append(out, []byte(d)...)
+				} else {
+					out = append(out, buf.Bytes()...)
 				}
 				}
+				out = append(out, ' ')
+				buf.Reset()
+				state = 0
+			} else {
+				buf.WriteByte(str[i])
 			}
 			}
 		}
 		}
 	}
 	}
-	return str
-}
-
-// decode from 7bit to 8bit UTF-8
-// encodingType can be "base64" or "quoted-printable"
-func MailTransportDecode(str string, encodingType string, charset string) string {
-	if charset == "" {
-		charset = "UTF-8"
-	} else {
-		charset = strings.ToUpper(charset)
-	}
-	if encodingType == "base64" {
-		str = fromBase64(str)
-	} else if encodingType == "quoted-printable" {
-		str = fromQuotedP(str)
-	}
-
-	if charset != "UTF-8" {
-		charset = fixCharset(charset)
-		// iconv is pretty good at what it does
-		if cd, err := iconv.Open("UTF-8", charset); err == nil {
-			defer func() {
-				cd.Close()
-				if r := recover(); r != nil {
-					//logln(1, fmt.Sprintf("Recovered in %v", r))
-				}
-			}()
-			// eg. charset can be "ISO-2022-JP"
-			return cd.ConvString(str)
+	if buf.Len() > 0 {
+		d, err := Dec.Decode(buf.String())
+		if err == nil {
+			out = append(out, []byte(d)...)
+		} else {
+			out = append(out, buf.Bytes()...)
 		}
 		}
-
-	}
-	return str
-}
-
-func fromBase64(data string) string {
-	buf := bytes.NewBufferString(data)
-	decoder := base64.NewDecoder(base64.StdEncoding, buf)
-	res, _ := ioutil.ReadAll(decoder)
-	return string(res)
-}
-
-func fromQuotedP(data string) string {
-	res, _ := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(data)))
-	return string(res)
-}
-
-var charsetRegex, _ = regexp.Compile(`[_:.\/\\]`)
-
-func fixCharset(charset string) string {
-	fixed_charset := charsetRegex.ReplaceAllString(charset, "-")
-	// Fix charset
-	// borrowed from http://squirrelmail.svn.sourceforge.net/viewvc/squirrelmail/trunk/squirrelmail/include/languages.php?revision=13765&view=markup
-	// OE ks_c_5601_1987 > cp949
-	fixed_charset = strings.Replace(fixed_charset, "ks-c-5601-1987", "cp949", -1)
-	// Moz x-euc-tw > euc-tw
-	fixed_charset = strings.Replace(fixed_charset, "x-euc", "euc", -1)
-	// Moz x-windows-949 > cp949
-	fixed_charset = strings.Replace(fixed_charset, "x-windows_", "cp", -1)
-	// windows-125x and cp125x charsets
-	fixed_charset = strings.Replace(fixed_charset, "windows-", "cp", -1)
-	// ibm > cp
-	fixed_charset = strings.Replace(fixed_charset, "ibm", "cp", -1)
-	// iso-8859-8-i -> iso-8859-8
-	fixed_charset = strings.Replace(fixed_charset, "iso-8859-8-i", "iso-8859-8", -1)
-	if charset != fixed_charset {
-		return fixed_charset
 	}
 	}
-	return charset
+	return string(out)
 }
 }
 
 
 // Envelopes have their own pool
 // Envelopes have their own pool

+ 13 - 3
mail/envelope_test.go

@@ -6,10 +6,20 @@ import (
 	"testing"
 	"testing"
 )
 )
 
 
+// Test MimeHeader decoding, not using iconv
 func TestMimeHeaderDecode(t *testing.T) {
 func TestMimeHeaderDecode(t *testing.T) {
-	str := MimeHeaderDecode("=?ISO-2022-JP?B?GyRCIVo9dztSOWJAOCVBJWMbKEI=?=")
-	if i := strings.Index(str, "【女子高生チャ"); i != 0 {
-		t.Error("expecting 【女子高生チャ, got:", str)
+
+	/*
+		Normally this would fail if not using iconv
+		str := MimeHeaderDecode("=?ISO-2022-JP?B?GyRCIVo9dztSOWJAOCVBJWMbKEI=?=")
+		if i := strings.Index(str, "【女子高生チャ"); i != 0 {
+			t.Error("expecting 【女子高生チャ, got:", str)
+		}
+	*/
+
+	str := MimeHeaderDecode("=?utf-8?B?55So5oi34oCcRXBpZGVtaW9sb2d5IGluIG51cnNpbmcgYW5kIGg=?=  =?utf-8?B?ZWFsdGggY2FyZSBlQm9vayByZWFkL2F1ZGlvIGlkOm8=?=  =?utf-8?B?cTNqZWVr4oCd5Zyo572R56uZ4oCcU1BZ5Lit5paH5a6Y5pa5572R56uZ4oCd?=  =?utf-8?B?55qE5biQ5Y+36K+m5oOF?=")
+	if i := strings.Index(str, "用户“Epidemiology in nursing and h  ealth care eBook read/audio id:o  q3jeek”在网站“SPY中文官方网站”  的帐号详情"); i != 0 {
+		t.Error("expecting 用户“Epidemiology in nursing and h  ealth care eBook read/audio id:o  q3jeek”在网站“SPY中文官方网站”  的帐号详情, got:", str)
 	}
 	}
 	str = MimeHeaderDecode("=?ISO-8859-1?Q?Andr=E9?= Pirard <[email protected]>")
 	str = MimeHeaderDecode("=?ISO-8859-1?Q?Andr=E9?= Pirard <[email protected]>")
 	if strings.Index(str, "André Pirard") != 0 {
 	if strings.Index(str, "André Pirard") != 0 {

+ 24 - 0
mail/iconv/iconv.go

@@ -0,0 +1,24 @@
+// iconv enables using GNU iconv for converting 7bit to UTF-8.
+// iconv supports a larger range of encodings.
+// It's a cgo package, the build system needs have Gnu library headers available.
+// when importing, place an underscore _ in front to import for side-effects
+package iconv
+
+import (
+	"fmt"
+	"io"
+
+	"github.com/flashmob/go-guerrilla/mail"
+	ico "gopkg.in/iconv.v1"
+)
+
+func init() {
+	mail.Dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
+		if cd, err := ico.Open("UTF-8", charset); err == nil {
+			r := ico.NewReader(cd, input, 32)
+			return r, nil
+		}
+		return nil, fmt.Errorf("unhandled charset %q", charset)
+	}
+
+}

+ 19 - 0
mail/iconv/iconv_test.go

@@ -0,0 +1,19 @@
+package iconv
+
+import (
+	"github.com/flashmob/go-guerrilla/mail"
+	"strings"
+	"testing"
+)
+
+// This will use the iconv encoder
+func TestIconvMimeHeaderDecode(t *testing.T) {
+	str := mail.MimeHeaderDecode("=?ISO-2022-JP?B?GyRCIVo9dztSOWJAOCVBJWMbKEI=?=")
+	if i := strings.Index(str, "【女子高生チャ"); i != 0 {
+		t.Error("expecting 【女子高生チャ, got:", str)
+	}
+	str = mail.MimeHeaderDecode("=?ISO-8859-1?Q?Andr=E9?= Pirard <[email protected]>")
+	if strings.Index(str, "André Pirard") != 0 {
+		t.Error("expecting André Pirard, got:", str)
+	}
+}