Browse Source

hooray, it works

flashmob 6 năm trước cách đây
mục cha
commit
9ed261d19e
3 tập tin đã thay đổi với 36 bổ sung271 xóa
  1. 1 1
      api_test.go
  2. 31 266
      mail/mime/mime.go
  3. 4 4
      mail/mime/mime_test.go

+ 1 - 1
api_test.go

@@ -1058,7 +1058,7 @@ func TestStreamMimeProcessor(t *testing.T) {
 	}()
 
 	// change \n to \r\n
-	mime = strings.Replace(mime, "\n", "\r\n", -1)
+	mime = strings.Replace(mime2, "\n", "\r\n", -1)
 	// lets have a talk with the server
 	if err := talkToServer("127.0.0.1:2525", mime); err != nil {
 		t.Error(err)

+ 31 - 266
mail/mime/mime.go

@@ -4,12 +4,10 @@ package mime
 
 Mime is a simple MIME scanner for email-message byte streams.
 It builds a data-structure that represents a tree of all the mime parts,
-recording their headers, starting and ending positions, while processinging
+recording their headers, starting and ending positions, while processioning
 the message efficiently, slice by slice. It avoids the use of regular expressions,
 doesn't back-track or multi-scan.
 
-This package used the PECL Mailparse library as a refrence/benchmark for testing
-
 */
 import (
 	"bytes"
@@ -745,69 +743,20 @@ func (p *Parser) parameter() (attribute, value string, err error) {
 	}
 }
 
-// isBranch determines if we should branch this part, when building
-// the mime tree
-func (p *Parser) isBranch(part *Part, parent *Part) bool {
-	ct := part.ContentType
-	if ct == nil {
-		return false
-	}
-	if part.ContentBoundary == "" {
-		return false
-	}
-
-	// tolerate some incorrect messages that re-use the identical content-boundary
-	if parent != nil && ct.superType != "message" {
-		if parent.ContentBoundary == part.ContentBoundary {
-			return false
-		}
-	}
-	if ct.superType == "message" && ct.subType == "delivery-status" {
-		return false
-	}
-	if ct.superType == "message" && ct.subType == "disposition-notification" {
-		return false
-	}
-
-	// branch on these superTypes
-	if ct.superType == "multipart" ||
-		ct.superType == "message" {
-		return true
-	}
-	return false
-}
-
-// multi finds the boundary and call back to mime() itself
-func (p *Parser) multi(part *Part, depth string) (err error) {
-	if part.ContentType != nil {
-		// scan until the start of the boundary
-		if part.ContentType.superType == "multipart" {
-			if end, bErr := p.boundary(part.ContentBoundary); bErr != nil {
-				return bErr
-			} else if end {
-				part.EndingPosBody = p.lastBoundaryPos
-				return
-			}
-		}
-		// call back to mime() to start working on a new branch
-		err = p.mime2(part, depth)
-		if err != nil {
-			return err
-		}
-	}
-	return
-}
-
-func (p *Parser) mime(depth string, count int, part *Part) (err error) {
+// mime scans the mime content and builds the mime-part tree in
+// p.Parts on-the-fly, as more bytes get fed in.
+func (p *Parser) mime(part *Part, cb string) (err error) {
 
-	if count == 0 {
-		count = 1
-	}
-	count = 1
-	first := part == nil
-	if first {
+	count := 1
+	root := part == nil
+	if root {
 		part = newPart()
 		p.addPart(part, "1")
+		defer func() {
+			if part != nil {
+				part.EndingPosBody = p.lastBoundaryPos
+			}
+		}()
 	}
 
 	// read the header
@@ -816,57 +765,65 @@ func (p *Parser) mime(depth string, count int, part *Part) (err error) {
 		if err != nil {
 			return err
 		}
-	} else if first {
+	} else if root {
 		return errors.New("parse error, no header")
 	}
 	if p.ch == '\n' && p.peek() == '\n' {
 		p.next()
 		p.next()
 	}
+	part.StartingPosBody = p.msgPos
 	ct := part.ContentType
 	if ct != nil && ct.superType == "message" && ct.subType == "rfc822" {
-
 		var subPart *Part
 		subPart = newPart()
 		subPartId := part.Node + "." + strconv.Itoa(count)
 		subPart.StartingPos = p.msgPos
 		count++
 		p.addPart(subPart, subPartId)
-		err = p.mime(subPartId, count, subPart)
+		err = p.mime(subPart, part.ContentBoundary)
+		subPart.EndingPosBody = p.msgPos
+		part.EndingPosBody = p.msgPos
 		return
 	}
-	if ct != nil && ct.superType == "multipart" && part.ContentBoundary != "" {
+	if ct != nil && ct.superType == "multipart" &&
+		part.ContentBoundary != "" &&
+		part.ContentBoundary != cb /* content-boundary must be different to previous */ {
 		var subPart *Part
 		subPart = newPart()
 		for {
 			subPartId := part.Node + "." + strconv.Itoa(count)
 			if end, bErr := p.boundary(part.ContentBoundary); bErr != nil {
+				// there was an error with parsing the boundary
 				err = bErr
 				if subPart.StartingPos == 0 {
 					subPart.StartingPos = p.msgPos
 				} else {
-					//fmt.Println("["+string(p.buf[subPart.StartingPos:p.msgPos])+"]")
+					subPart.EndingPos = p.msgPos
+					subPart.EndingPosBody = p.lastBoundaryPos
 					subPart, count = p.split(subPart, count)
 				}
 				return
 			} else if end {
-				return
+				// reached the terminating boundary (ends with double dash --)
+				subPart.EndingPosBody = p.lastBoundaryPos
+				break
 			} else {
+				// process the part boundary
 				if subPart.StartingPos == 0 {
 					subPart.StartingPos = p.msgPos
 					count++
 					p.addPart(subPart, subPartId)
-					err = p.mime(subPartId, count, subPart)
+					err = p.mime(subPart, part.ContentBoundary)
 					if err != nil {
 						return
 					}
 					subPartId = part.Node + "." + strconv.Itoa(count)
 				} else {
-					//fmt.Println("["+string(p.buf[subPart.StartingPos:p.msgPos])+"]")
+					subPart.EndingPosBody = p.lastBoundaryPos
 					subPart, count = p.split(subPart, count)
-					//subPart.Node = subPartId
 					p.addPart(subPart, subPartId)
-					err = p.mime(subPartId, count, subPart)
+					err = p.mime(subPart, part.ContentBoundary)
 					if err != nil {
 						return
 					}
@@ -874,13 +831,11 @@ func (p *Parser) mime(depth string, count int, part *Part) (err error) {
 			}
 		}
 	}
-	part.EndingPosBody = p.lastBoundaryPos
 	return
 
 }
 
 func (p *Parser) split(subPart *Part, count int) (*Part, int) {
-	subPart.EndingPos = p.msgPos
 	subPart = nil
 	count++
 	subPart = newPart()
@@ -888,195 +843,6 @@ func (p *Parser) split(subPart *Part, count int) (*Part, int) {
 	return subPart, count
 }
 
-func (p *Parser) mime_new(depth string, count int, cb string) (err error) {
-
-	defer func() {
-		fmt.Println("i quit")
-	}()
-	if count == 0 {
-		count = 1
-	}
-	part := newPart()
-
-	partID := strconv.Itoa(count)
-	if depth != "" {
-		partID = depth + "." + strconv.Itoa(count)
-	}
-	p.addPart(part, partID)
-	// record the start of the part
-	part.StartingPos = p.msgPos
-
-	// read the header
-	if p.ch >= 33 && p.ch <= 126 {
-		err = p.header(part)
-		if err != nil {
-			return err
-		}
-	} else if depth == "" {
-		return errors.New("parse error, no header")
-	}
-	if p.ch == '\n' && p.peek() == '\n' {
-		p.next()
-		p.next()
-	}
-	part.StartingPosBody = p.msgPos
-	skip := false
-	if part.ContentBoundary != "" {
-		if cb == part.ContentBoundary {
-			// tolerate some messages that have identical multipart content-boundary
-			skip = true
-		}
-		cb = part.ContentBoundary
-	}
-	ct := part.ContentType
-	if part.ContentType != nil && ct.superType == "message" &&
-		ct.subType == "rfc822" {
-
-		err = p.mime_new(partID, 1, cb)
-		part.EndingPosBody = p.msgPos
-		if err != nil {
-			return
-		}
-	}
-
-	for {
-		if cb != "" {
-			if end, bErr := p.boundary(cb); bErr != nil || end == true {
-				part.EndingPosBody = p.lastBoundaryPos
-				if end {
-					bErr = boundaryEnd{cb}
-
-					return bErr
-				}
-				return bErr
-			}
-			part.EndingPosBody = p.msgPos
-		} else {
-			for p.ch != 0 {
-				// keep scanning until the end
-				p.next()
-			}
-			part.EndingPosBody = p.msgPos
-			err = NotMime
-			return
-		}
-
-		if !skip && ct != nil &&
-			(ct.superType == "multipart" || (ct.superType == "message" && ct.subType == "rfc822")) {
-			// start a new branch (count is 1)
-			err = p.mime_new(partID, count, cb)
-
-			part.EndingPosBody = p.msgPos // good?
-			if err != nil {
-				if v, ok := err.(boundaryEnd); ok && v.Error() != cb {
-					// we are back to the upper level, stop propagating the content-boundary 'end' error
-					count++
-					continue
-				}
-				if depth == "" {
-					part.EndingPosBody = p.lastBoundaryPos
-				}
-
-				return
-			}
-
-		} else {
-			// new sibling for this node
-			count++
-			err = p.mime_new(depth, count, cb)
-
-			if err == nil {
-				return
-			}
-			if v, ok := err.(boundaryEnd); ok && v.Error() != cb {
-				// we are back to the upper level, stop propagating the content-boundary 'end' error
-				continue
-			}
-			return
-		}
-	}
-}
-
-// mime scans the mime content and builds the mime-part tree in
-// p.Parts on-the-fly, as more bytes get fed in.
-func (p *Parser) mime2(parent *Part, depth string) (err error) {
-
-	count := 1
-	for {
-		part := newPart()
-		partID := strconv.Itoa(count)
-		if depth != "" {
-			partID = depth + "." + strconv.Itoa(count)
-		}
-		p.addPart(part, partID)
-		// record the start of the part
-		part.StartingPos = p.msgPos
-		// parse the headers
-		if p.ch >= 33 && p.ch <= 126 {
-			err = p.header(part)
-			if err != nil {
-				return err
-			}
-		} else if len(p.Parts) == 0 {
-			// return an error if the first part is not a valid header
-			// (subsequent parts could have no headers)
-			return errors.New("parse error, no header")
-		}
-		if p.ch == '\n' && p.peek() == '\n' {
-			p.next()
-			p.next()
-		}
-
-		// inherit the content boundary from parent if not present
-		if part.ContentBoundary == "" && parent != nil {
-			part.ContentBoundary = parent.ContentBoundary
-		}
-
-		// record the start of the message body
-		part.StartingPosBody = p.msgPos
-
-		// build the mime tree recursively
-		if p.isBranch(part, parent) {
-			err = p.multi(part, partID)
-			part.EndingPosBody = p.lastBoundaryPos
-			if err != nil {
-				break
-			}
-		}
-
-		// if we didn't branch & we're still at the root (not a mime email)
-		if parent == nil {
-			for p.ch != 0 {
-				// keep scanning until the end
-				p.next()
-
-			}
-			if len(p.Parts) == 1 {
-				part.EndingPosBody = p.msgPos
-				err = NotMime
-			} else {
-				err = io.EOF
-			}
-			return
-		}
-		// after we return from the lower branches (if there were any)
-		// we walk each of the siblings of the parent
-
-		if end, bErr := p.boundary(parent.ContentBoundary); bErr != nil {
-			part.EndingPosBody = p.lastBoundaryPos
-			return bErr
-		} else if end {
-			// the last sibling
-			part.EndingPosBody = p.lastBoundaryPos
-			return
-		}
-		part.EndingPosBody = p.lastBoundaryPos
-		count++
-	}
-
-	return
-}
-
 func (p *Parser) reset() {
 	p.lastBoundaryPos = 0
 	p.pos = startPos
@@ -1146,8 +912,7 @@ func (p *Parser) Parse(buf []byte) error {
 		// initial step - start the mime parser
 		go func() {
 			p.next()
-			err := p.mime("", 1, nil)
-			//err := p.mime2(nil, "")
+			err := p.mime(nil, "")
 			if _, ok := err.(boundaryEnd); ok {
 				err = nil
 			}

+ 4 - 4
mail/mime/mime_test.go

@@ -524,7 +524,7 @@ email 2
 */
 func TestNestedEmail(t *testing.T) {
 	p = NewMimeParser()
-	email = email
+	email = email2
 	//email = strings.Replace(string(email), "\n", "\r\n", -1)
 	p.inject([]byte(email))
 
@@ -536,7 +536,7 @@ func TestNestedEmail(t *testing.T) {
 		//os.Exit(1)
 	}()
 
-	if err := p.mime("", 1, nil); err != nil && err != io.EOF {
+	if err := p.mime(nil, ""); err != nil && err != io.EOF {
 		t.Error(err)
 	}
 	output := email
@@ -575,7 +575,7 @@ This is not a an MIME email
 func TestNonMineEmail(t *testing.T) {
 	p = NewMimeParser()
 	p.inject([]byte(email4))
-	if err := p.mime("", 1, nil); err != nil && err != NotMime && err != io.EOF {
+	if err := p.mime(nil, ""); err != nil && err != NotMime && err != io.EOF {
 		t.Error(err)
 	} else {
 		for part := range p.Parts {
@@ -589,7 +589,7 @@ func TestNonMineEmail(t *testing.T) {
 
 	// what if we pass an empty string?
 	p.inject([]byte{' '})
-	if err := p.mime("", 1, nil); err == nil || err == NotMime || err == io.EOF {
+	if err := p.mime(nil, ""); err == nil || err == NotMime || err == io.EOF {
 		t.Error("unexpected error", err)
 	}