Browse Source

- parse emails with \r\n line endings (ignore \r)
- if email header line has a parse error then ignore it then continue parsing

flashmob 6 năm trước cách đây
mục cha
commit
b541fd4ec3
3 tập tin đã thay đổi với 251 bổ sung47 xóa
  1. 125 0
      api_test.go
  2. 87 42
      mail/mime/mime.go
  3. 39 5
      mail/mime/mime_test.go

+ 125 - 0
api_test.go

@@ -897,6 +897,131 @@ Content-Disposition: attachment;
 ------_=_NextPart_001_01CBE273.65A0E7AA--
 `
 
+var mime3 = `From [email protected] Mon Feb 19 22:24:21 2001
+Received: from [137.154.210.66] by hotmail.com (3.2) with ESMTP id MHotMailBC5B58230039400431D5899AD24289FA0; Mon Feb 19 22:22:29 2001
+Received: from lancelot.cit.nepean.uws.edu.au (lancelot.cit.uws.edu.au [137.154.148.30])
+        by day.uws.edu.au (8.11.1/8.11.1) with ESMTP id f1K6MN404936;
+        Tue, 20 Feb 2001 17:22:24 +1100 (EST)
+Received: from hotmail.com (law2-f35.hotmail.com [216.32.181.35])
+        by lancelot.cit.nepean.uws.edu.au (8.10.0.Beta10/8.10.0.Beta10) with ESMTP id f1K6MJb13619;
+        Tue, 20 Feb 2001 17:22:19 +1100 (EST)
+Received: from mail pickup service by hotmail.com with Microsoft SMTPSVC;
+         Mon, 19 Feb 2001 22:21:44 -0800
+Received: from 203.54.221.89 by lw2fd.hotmail.msn.com with HTTP;        Tue, 20 Feb 2001 06:21:44 GMT
+X-Originating-IP: [203.54.221.89]
+From: "lara devine" <[email protected]>
+To: [email protected], [email protected],
+   [email protected], [email protected],
+   [email protected], [email protected],
+   [email protected], [email protected],
+   [email protected]
+Subject: Fwd: Goldfish
+Date: Tue, 20 Feb 2001 06:21:44
+Mime-Version: 1.0
+Content-Type: text/plain; format=flowed
+Message-ID: <[email protected]>
+X-OriginalArrivalTime: 20 Feb 2001 06:21:44.0718 (UTC) FILETIME=[658BDAE0:01C09B05]
+
+
+
+
+>> >Two builders (Chris and James) are seated either side of a table in a
+> > >rough
+> > >pub when a well-dressed man enters, orders beer and sits on a stool at
+> > >the bar.
+> > >The two builders start to speculate about the occupation of the suit.
+> > >
+> > >Chris: - I reckon he's an accountant.
+> > >
+> > >James: - No way - he's a stockbroker.
+> > >
+> > >Chris: - He ain't no stockbroker! A stockbroker wouldn't come in here!
+> > >
+> > >The argument repeats itself for some time until the volume of beer gets
+> > >the better of Chris and he makes for the toilet. On entering the toilet
+> > >he
+> > >sees that the suit is standing at a urinal. Curiosity and the several
+> > >beers
+> > >get the better of the builder...
+> > >
+> > >Chris: - 'scuse me.... no offence meant, but me and me mate were
+> > wondering
+> > >
+> > >  what you do for a living?
+> > >
+> > >Suit: - No offence taken! I'm a Logical Scientist by profession!
+> > >
+> > >Chris: - Oh! What's that then?
+> > >
+> > >Suit:- I'll try to explain by example... Do you have a goldfish at
+>home?
+> > >
+> > >Chris:- Er...mmm... well yeah, I do as it happens!
+> > >
+> > >Suit: - Well, it's logical to follow that you keep it in a bowl or in a
+> > >pond. Which is it?
+> > >
+> > >Chris: - It's in a pond!
+> > >
+> > >Suit: - Well then it's reasonable to suppose that you have a large
+> > >garden
+> > >then?
+> > >
+> > >Chris: - As it happens, yes I have got a big garden!
+> > >
+> > >Suit: - Well then it's logical to assume that in this town that if you
+> > >have a large garden that you have a large house?
+> > >
+> > >Chris: - As it happens I've got a five bedroom house... built it
+>myself!
+> > >
+> > >Suit: - Well given that you've built a five-bedroom house it is logical
+> > >to asume that you haven't built it just for yourself and that you are
+> > >quite
+> > >probably married?
+> > >
+> > >Chris: - Yes I am married, I live with my wife and three children!
+> > >
+> > >Suit: - Well then it is logical to assume that you are sexually active
+> > >with your wife on a regular basis?
+> > >
+> > >Chris:- Yep! Four nights a week!
+> > >
+> > >Suit: - Well then it is logical to suggest that you do not masturbate
+> > >very often?
+> > >
+> > >Chris: - Me? Never.
+> > >
+> > >Suit: - Well there you are! That's logical science at work!
+> > >
+> > >Chris:- How's that then?
+> > >
+> > >Suit: - Well from finding out that you had a goldfish, I've told you
+> > >about the size of garden you have, size of house, your family and your
+> > >sex
+> > >life!
+> > >
+> > >Chris: - I see! That's pretty impressive... thanks mate!
+> > >
+> > >Both leave the toilet and Chris returns to his mate.
+> > >
+> > >James: - I see the suit was in there. Did you ask him what he does?
+> > >
+> > >Chris: - Yep! He's a logical scientist!
+> > >
+> > >James: What's a logical Scientist?
+> > >
+> > >Chris: - I'll try and explain. Do you have a goldfish?
+> > >
+> > >James: - Nope.
+> > >
+> > >Chris: - Well then, you're a wanker.
+>
+
+_________________________________________________________________________
+Get Your Private, Free E-mail from MSN Hotmail at http://www.hotmail.com.
+`
+
 /*
 1  0  166  1514
 1.1  186  260  259

+ 87 - 42
mail/mime/mime.go

@@ -46,9 +46,10 @@ type mimepart struct {
 }
 
 const (
-	maxBoundaryLen = 70 + 10
-	doubleDash     = "--"
-	startPos       = -1
+	maxBoundaryLen       = 70 + 10
+	doubleDash           = "--"
+	startPos             = -1
+	headerErrorThreshold = 4
 )
 
 var NotMime = errors.New("not Mime")
@@ -59,6 +60,7 @@ type Parser struct {
 
 	buf                   []byte
 	pos                   int
+	peekOffset            int
 	ch                    byte
 	gotNewSlice, consumed chan bool
 	accept                bytes.Buffer
@@ -172,43 +174,52 @@ func (p *Parser) more() bool {
 // returns 0 if no more input can be read
 // blocks if at the end of the buffer
 func (p *Parser) next() byte {
-	// wait for a new new slice if reached the end
-	if p.pos+1 >= len(p.buf) {
-		if !p.more() {
-			p.ch = 0
-			return 0
+	for {
+		// wait for more bytes if reached the end
+		if p.pos+1 >= len(p.buf) {
+			if !p.more() {
+				p.ch = 0
+				return 0
+			}
 		}
+		if p.pos > -1 || p.msgPos != 0 {
+			// dont incr on first call to next()
+			p.msgPos++
+		}
+		p.pos++
+		if p.buf[p.pos] == '\r' {
+			// ignore \r
+			continue
+		}
+		p.ch = p.buf[p.pos]
+		if p.ch == '\n' {
+			p.msgLine++
+		}
+		return p.ch
 	}
-	if p.pos > -1 || p.msgPos != 0 {
-		// dont incr on first call to next()
-		p.msgPos++
-	}
-	p.pos++
-	p.ch = p.buf[p.pos]
-
-	if p.ch == '\n' {
-		p.msgLine++
-	}
-	return p.ch
 }
 
 // peek does not advance the pointer, but will block if there's no more
 // input in the buffer
 func (p *Parser) peek() byte {
-
-	// reached the end?
-	if p.pos+1 >= len(p.buf) {
-		if !p.more() {
-			p.ch = 0
-			return 0
+	p.peekOffset = 1
+	for {
+		// reached the end? Wait for more bytes to consume
+		if p.pos+p.peekOffset >= len(p.buf) {
+			if !p.more() {
+				p.ch = 0
+				return 0
+			}
 		}
+		// peek the next byte
+		ret := p.buf[p.pos+p.peekOffset]
+		if ret == '\r' {
+			// ignore \r
+			p.peekOffset++
+			continue
+		}
+		return ret
 	}
-
-	// peek the next byte
-	if p.pos+1 < len(p.buf) {
-		return p.buf[p.pos+1]
-	}
-	return 0
 }
 
 // inject is used for testing, to simulate a byte stream
@@ -245,10 +256,18 @@ func (p *Parser) set(input []byte) {
 // skip advances the pointer n bytes. It will block if not enough bytes left in
 // the buffer, i.e. if bBytes > len(p.buf) - p.pos
 func (p *Parser) skip(nBytes int) {
-
-	for i := 0; i < nBytes; i++ {
+	for {
+		if p.pos+nBytes < len(p.buf) {
+			p.pos += nBytes - 1
+			p.msgPos = p.msgPos + uint(nBytes) - 1
+			p.next()
+			return
+		}
+		remainder := len(p.buf) - p.pos
+		nBytes -= remainder
+		p.pos += remainder - 1
 		p.next()
-		if p.ch == 0 {
+		if nBytes < 1 {
 			return
 		}
 	}
@@ -272,10 +291,6 @@ func (p *Parser) boundary(contentBoundary string) (end bool, err error) {
 	p.boundaryMatched = 0
 	for {
 		if i := bytes.Index(p.buf[p.pos:], []byte(boundary)); i > -1 {
-			// advance the pointer to 1 char before the end of the boundary
-			// then let next() to advance the last char.
-			// in case the boundary is the tail part of buffer, calling next()
-			// will wait until we get a new buffer
 
 			p.skip(i)
 			p.lastBoundaryPos = p.msgPos // -1 - uint(len(boundary))
@@ -376,8 +391,11 @@ func (p *Parser) transportPadding() (err error) {
 }
 
 func (p *Parser) header(mh *Part) (err error) {
-	var state int
-	var name string
+	var (
+		state      int
+		name       string
+		errorCount int
+	)
 
 	defer func() {
 		p.accept.Reset()
@@ -403,6 +421,10 @@ func (p *Parser) header(mh *Part) (err error) {
 				p.next()
 				state = 1
 			} else {
+				if errorCount < headerErrorThreshold {
+					state = 2 // tolerate this error
+					continue
+				}
 				pc := p.peek()
 				err = errors.New("unexpected char:[" + string(p.ch) + "], peek:" +
 					string(pc) + ", pos:" + strconv.Itoa(int(p.msgPos)))
@@ -458,11 +480,27 @@ func (p *Parser) header(mh *Part) (err error) {
 						state = 0
 					}
 				} else {
-					err = errors.New("parse error")
+					err = errors.New("header parse error, pos:" + strconv.Itoa(p.pos))
 					return
 				}
 			}
-
+		case 2:
+			errorCount++
+			// error recovery for header lines with parse errors -
+			// ignore the line, discard anything that was scanned, scan until the end-of-line
+			// then start a new line again (back to state 0)
+			p.accept.Reset()
+			for {
+				if p.ch != '\n' {
+					p.next()
+				}
+				if p.ch == 0 {
+					return io.EOF
+				} else if p.ch == '\n' {
+					state = 0
+					break
+				}
+			}
 		}
 		if p.ch == '\n' && p.peek() == '\n' {
 			return nil
@@ -895,6 +933,13 @@ func (p *Parser) Parse(buf []byte) error {
 	}
 }
 
+func (p *Parser) ParseError(err error) bool {
+	if err != nil && err != io.EOF && err != NotMime {
+		return true
+	}
+	return false
+}
+
 func NewMimeParser() *Parser {
 	p := new(Parser)
 	p.consumed = make(chan bool)

+ 39 - 5
mail/mime/mime_test.go

@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"io"
 	"strconv"
-	"strings"
 	"testing"
 	"time"
 )
@@ -76,14 +75,15 @@ func TestMimeContentType(t *testing.T) {
 		t.Error("contentType.subType expecting 'text', got:", contentType.superType)
 	}
 
-	if ct := contentType.String(); strings.Compare(contentType.String(), subject) != 0 {
+	if ct := contentType.String(); contentType.String() != subject {
 		t.Error("\n[" + ct + "]\ndoes not equal\n[" + subject + "]")
 	}
 }
 
 func TestEmailHeader(t *testing.T) {
 	p = NewMimeParser()
-	in := `From: Al Gore <[email protected]>
+	in := `Wong ignore me
+From: Al Gore <[email protected]>
 To: White House Transportation Coordinator <[email protected]>
 Subject: [Fwd: Map of Argentina with Description]
 MIME-Version: 1.0
@@ -193,6 +193,36 @@ func TestBoundarySplit(t *testing.T) {
 
 }
 
+func TestSkip(t *testing.T) {
+	p = NewMimeParser()
+	p.inject(
+		[]byte("you cant touch this"),
+		[]byte("stop, hammer time"))
+
+	p.skip(3)
+
+	if p.pos != 3 {
+		t.Error("position should be 3 after skipping 3 bytes, it is:", p.pos)
+	}
+
+	p.pos = 0
+
+	// after we used next() to advance
+	p.next()
+	p.skip(3)
+	if p.pos != 4 {
+		t.Error("position should be 4 after skipping 3 bytes, it is:", p.pos)
+	}
+
+	// advance to the 2nd buffer
+	p.pos = 0
+	p.skip(19)
+	if p.pos != 0 && p.buf[0] != 's' {
+		t.Error("position should be 0 and p.buf[0] should be 's'")
+	}
+
+}
+
 func TestMimeContentQuotedParams(t *testing.T) {
 	p = NewMimeParser()
 	// quoted
@@ -441,6 +471,7 @@ TmV4dFBhcnRfMDAwX0FFNkJfNzI1RTA5QUYuODhCN0Y5MzQtLQ0K
 func TestNestedEmail(t *testing.T) {
 	p = NewMimeParser()
 	email = email
+	//email = strings.Replace(string(email), "\n", "\r\n", -1)
 	p.inject([]byte(email))
 
 	go func() {
@@ -464,7 +495,7 @@ func TestNestedEmail(t *testing.T) {
 	}
 	fmt.Print(email)
 	//fmt.Println(strings.Index(email, "--D7F------------D7FD5A0B8AB9C65CCDBFA872--"))
-	i := 0
+	i := 1
 	fmt.Println("[" + email[p.Parts[i].StartingPosBody:p.Parts[i].EndingPosBody] + "]")
 	//i := 2
 	//fmt.Println("**********{" + email[p.parts[i].startingPosBody:p.parts[i].endingPosBody] + "}**********")
@@ -490,7 +521,10 @@ func TestNonMineEmail(t *testing.T) {
 			fmt.Println(p.Parts[part].Part + "  " + strconv.Itoa(int(p.Parts[part].StartingPos)) + "  " + strconv.Itoa(int(p.Parts[part].StartingPosBody)) + "  " + strconv.Itoa(int(p.Parts[part].EndingPosBody)))
 		}
 	}
-	p.Close()
+	err := p.Close()
+	if err != nil {
+		t.Error(err)
+	}
 
 	// what if we pass an empty string?
 	p.inject([]byte{' '})