Browse Source

hooray, it works

flashmob 6 years ago
parent
commit
9ed261d19e
3 changed files with 36 additions and 271 deletions
  1. 1 1
      api_test.go
  2. 31 266
      mail/mime/mime.go
  3. 4 4
      mail/mime/mime_test.go

+ 1 - 1
api_test.go

@@ -1058,7 +1058,7 @@ func TestStreamMimeProcessor(t *testing.T) {
 	}()
 	}()
 
 
 	// change \n to \r\n
 	// change \n to \r\n
-	mime = strings.Replace(mime, "\n", "\r\n", -1)
+	mime = strings.Replace(mime2, "\n", "\r\n", -1)
 	// lets have a talk with the server
 	// lets have a talk with the server
 	if err := talkToServer("127.0.0.1:2525", mime); err != nil {
 	if err := talkToServer("127.0.0.1:2525", mime); err != nil {
 		t.Error(err)
 		t.Error(err)

+ 31 - 266
mail/mime/mime.go

@@ -4,12 +4,10 @@ package mime
 
 
 Mime is a simple MIME scanner for email-message byte streams.
 Mime is a simple MIME scanner for email-message byte streams.
 It builds a data-structure that represents a tree of all the mime parts,
 It builds a data-structure that represents a tree of all the mime parts,
-recording their headers, starting and ending positions, while processinging
+recording their headers, starting and ending positions, while processioning
 the message efficiently, slice by slice. It avoids the use of regular expressions,
 the message efficiently, slice by slice. It avoids the use of regular expressions,
 doesn't back-track or multi-scan.
 doesn't back-track or multi-scan.
 
 
-This package used the PECL Mailparse library as a refrence/benchmark for testing
-
 */
 */
 import (
 import (
 	"bytes"
 	"bytes"
@@ -745,69 +743,20 @@ func (p *Parser) parameter() (attribute, value string, err error) {
 	}
 	}
 }
 }
 
 
-// isBranch determines if we should branch this part, when building
-// the mime tree
-func (p *Parser) isBranch(part *Part, parent *Part) bool {
-	ct := part.ContentType
-	if ct == nil {
-		return false
-	}
-	if part.ContentBoundary == "" {
-		return false
-	}
-
-	// tolerate some incorrect messages that re-use the identical content-boundary
-	if parent != nil && ct.superType != "message" {
-		if parent.ContentBoundary == part.ContentBoundary {
-			return false
-		}
-	}
-	if ct.superType == "message" && ct.subType == "delivery-status" {
-		return false
-	}
-	if ct.superType == "message" && ct.subType == "disposition-notification" {
-		return false
-	}
-
-	// branch on these superTypes
-	if ct.superType == "multipart" ||
-		ct.superType == "message" {
-		return true
-	}
-	return false
-}
-
-// multi finds the boundary and call back to mime() itself
-func (p *Parser) multi(part *Part, depth string) (err error) {
-	if part.ContentType != nil {
-		// scan until the start of the boundary
-		if part.ContentType.superType == "multipart" {
-			if end, bErr := p.boundary(part.ContentBoundary); bErr != nil {
-				return bErr
-			} else if end {
-				part.EndingPosBody = p.lastBoundaryPos
-				return
-			}
-		}
-		// call back to mime() to start working on a new branch
-		err = p.mime2(part, depth)
-		if err != nil {
-			return err
-		}
-	}
-	return
-}
-
-func (p *Parser) mime(depth string, count int, part *Part) (err error) {
+// mime scans the mime content and builds the mime-part tree in
+// p.Parts on-the-fly, as more bytes get fed in.
+func (p *Parser) mime(part *Part, cb string) (err error) {
 
 
-	if count == 0 {
-		count = 1
-	}
-	count = 1
-	first := part == nil
-	if first {
+	count := 1
+	root := part == nil
+	if root {
 		part = newPart()
 		part = newPart()
 		p.addPart(part, "1")
 		p.addPart(part, "1")
+		defer func() {
+			if part != nil {
+				part.EndingPosBody = p.lastBoundaryPos
+			}
+		}()
 	}
 	}
 
 
 	// read the header
 	// read the header
@@ -816,57 +765,65 @@ func (p *Parser) mime(depth string, count int, part *Part) (err error) {
 		if err != nil {
 		if err != nil {
 			return err
 			return err
 		}
 		}
-	} else if first {
+	} else if root {
 		return errors.New("parse error, no header")
 		return errors.New("parse error, no header")
 	}
 	}
 	if p.ch == '\n' && p.peek() == '\n' {
 	if p.ch == '\n' && p.peek() == '\n' {
 		p.next()
 		p.next()
 		p.next()
 		p.next()
 	}
 	}
+	part.StartingPosBody = p.msgPos
 	ct := part.ContentType
 	ct := part.ContentType
 	if ct != nil && ct.superType == "message" && ct.subType == "rfc822" {
 	if ct != nil && ct.superType == "message" && ct.subType == "rfc822" {
-
 		var subPart *Part
 		var subPart *Part
 		subPart = newPart()
 		subPart = newPart()
 		subPartId := part.Node + "." + strconv.Itoa(count)
 		subPartId := part.Node + "." + strconv.Itoa(count)
 		subPart.StartingPos = p.msgPos
 		subPart.StartingPos = p.msgPos
 		count++
 		count++
 		p.addPart(subPart, subPartId)
 		p.addPart(subPart, subPartId)
-		err = p.mime(subPartId, count, subPart)
+		err = p.mime(subPart, part.ContentBoundary)
+		subPart.EndingPosBody = p.msgPos
+		part.EndingPosBody = p.msgPos
 		return
 		return
 	}
 	}
-	if ct != nil && ct.superType == "multipart" && part.ContentBoundary != "" {
+	if ct != nil && ct.superType == "multipart" &&
+		part.ContentBoundary != "" &&
+		part.ContentBoundary != cb /* content-boundary must be different to previous */ {
 		var subPart *Part
 		var subPart *Part
 		subPart = newPart()
 		subPart = newPart()
 		for {
 		for {
 			subPartId := part.Node + "." + strconv.Itoa(count)
 			subPartId := part.Node + "." + strconv.Itoa(count)
 			if end, bErr := p.boundary(part.ContentBoundary); bErr != nil {
 			if end, bErr := p.boundary(part.ContentBoundary); bErr != nil {
+				// there was an error with parsing the boundary
 				err = bErr
 				err = bErr
 				if subPart.StartingPos == 0 {
 				if subPart.StartingPos == 0 {
 					subPart.StartingPos = p.msgPos
 					subPart.StartingPos = p.msgPos
 				} else {
 				} else {
-					//fmt.Println("["+string(p.buf[subPart.StartingPos:p.msgPos])+"]")
+					subPart.EndingPos = p.msgPos
+					subPart.EndingPosBody = p.lastBoundaryPos
 					subPart, count = p.split(subPart, count)
 					subPart, count = p.split(subPart, count)
 				}
 				}
 				return
 				return
 			} else if end {
 			} else if end {
-				return
+				// reached the terminating boundary (ends with double dash --)
+				subPart.EndingPosBody = p.lastBoundaryPos
+				break
 			} else {
 			} else {
+				// process the part boundary
 				if subPart.StartingPos == 0 {
 				if subPart.StartingPos == 0 {
 					subPart.StartingPos = p.msgPos
 					subPart.StartingPos = p.msgPos
 					count++
 					count++
 					p.addPart(subPart, subPartId)
 					p.addPart(subPart, subPartId)
-					err = p.mime(subPartId, count, subPart)
+					err = p.mime(subPart, part.ContentBoundary)
 					if err != nil {
 					if err != nil {
 						return
 						return
 					}
 					}
 					subPartId = part.Node + "." + strconv.Itoa(count)
 					subPartId = part.Node + "." + strconv.Itoa(count)
 				} else {
 				} else {
-					//fmt.Println("["+string(p.buf[subPart.StartingPos:p.msgPos])+"]")
+					subPart.EndingPosBody = p.lastBoundaryPos
 					subPart, count = p.split(subPart, count)
 					subPart, count = p.split(subPart, count)
-					//subPart.Node = subPartId
 					p.addPart(subPart, subPartId)
 					p.addPart(subPart, subPartId)
-					err = p.mime(subPartId, count, subPart)
+					err = p.mime(subPart, part.ContentBoundary)
 					if err != nil {
 					if err != nil {
 						return
 						return
 					}
 					}
@@ -874,13 +831,11 @@ func (p *Parser) mime(depth string, count int, part *Part) (err error) {
 			}
 			}
 		}
 		}
 	}
 	}
-	part.EndingPosBody = p.lastBoundaryPos
 	return
 	return
 
 
 }
 }
 
 
 func (p *Parser) split(subPart *Part, count int) (*Part, int) {
 func (p *Parser) split(subPart *Part, count int) (*Part, int) {
-	subPart.EndingPos = p.msgPos
 	subPart = nil
 	subPart = nil
 	count++
 	count++
 	subPart = newPart()
 	subPart = newPart()
@@ -888,195 +843,6 @@ func (p *Parser) split(subPart *Part, count int) (*Part, int) {
 	return subPart, count
 	return subPart, count
 }
 }
 
 
-func (p *Parser) mime_new(depth string, count int, cb string) (err error) {
-
-	defer func() {
-		fmt.Println("i quit")
-	}()
-	if count == 0 {
-		count = 1
-	}
-	part := newPart()
-
-	partID := strconv.Itoa(count)
-	if depth != "" {
-		partID = depth + "." + strconv.Itoa(count)
-	}
-	p.addPart(part, partID)
-	// record the start of the part
-	part.StartingPos = p.msgPos
-
-	// read the header
-	if p.ch >= 33 && p.ch <= 126 {
-		err = p.header(part)
-		if err != nil {
-			return err
-		}
-	} else if depth == "" {
-		return errors.New("parse error, no header")
-	}
-	if p.ch == '\n' && p.peek() == '\n' {
-		p.next()
-		p.next()
-	}
-	part.StartingPosBody = p.msgPos
-	skip := false
-	if part.ContentBoundary != "" {
-		if cb == part.ContentBoundary {
-			// tolerate some messages that have identical multipart content-boundary
-			skip = true
-		}
-		cb = part.ContentBoundary
-	}
-	ct := part.ContentType
-	if part.ContentType != nil && ct.superType == "message" &&
-		ct.subType == "rfc822" {
-
-		err = p.mime_new(partID, 1, cb)
-		part.EndingPosBody = p.msgPos
-		if err != nil {
-			return
-		}
-	}
-
-	for {
-		if cb != "" {
-			if end, bErr := p.boundary(cb); bErr != nil || end == true {
-				part.EndingPosBody = p.lastBoundaryPos
-				if end {
-					bErr = boundaryEnd{cb}
-
-					return bErr
-				}
-				return bErr
-			}
-			part.EndingPosBody = p.msgPos
-		} else {
-			for p.ch != 0 {
-				// keep scanning until the end
-				p.next()
-			}
-			part.EndingPosBody = p.msgPos
-			err = NotMime
-			return
-		}
-
-		if !skip && ct != nil &&
-			(ct.superType == "multipart" || (ct.superType == "message" && ct.subType == "rfc822")) {
-			// start a new branch (count is 1)
-			err = p.mime_new(partID, count, cb)
-
-			part.EndingPosBody = p.msgPos // good?
-			if err != nil {
-				if v, ok := err.(boundaryEnd); ok && v.Error() != cb {
-					// we are back to the upper level, stop propagating the content-boundary 'end' error
-					count++
-					continue
-				}
-				if depth == "" {
-					part.EndingPosBody = p.lastBoundaryPos
-				}
-
-				return
-			}
-
-		} else {
-			// new sibling for this node
-			count++
-			err = p.mime_new(depth, count, cb)
-
-			if err == nil {
-				return
-			}
-			if v, ok := err.(boundaryEnd); ok && v.Error() != cb {
-				// we are back to the upper level, stop propagating the content-boundary 'end' error
-				continue
-			}
-			return
-		}
-	}
-}
-
-// mime scans the mime content and builds the mime-part tree in
-// p.Parts on-the-fly, as more bytes get fed in.
-func (p *Parser) mime2(parent *Part, depth string) (err error) {
-
-	count := 1
-	for {
-		part := newPart()
-		partID := strconv.Itoa(count)
-		if depth != "" {
-			partID = depth + "." + strconv.Itoa(count)
-		}
-		p.addPart(part, partID)
-		// record the start of the part
-		part.StartingPos = p.msgPos
-		// parse the headers
-		if p.ch >= 33 && p.ch <= 126 {
-			err = p.header(part)
-			if err != nil {
-				return err
-			}
-		} else if len(p.Parts) == 0 {
-			// return an error if the first part is not a valid header
-			// (subsequent parts could have no headers)
-			return errors.New("parse error, no header")
-		}
-		if p.ch == '\n' && p.peek() == '\n' {
-			p.next()
-			p.next()
-		}
-
-		// inherit the content boundary from parent if not present
-		if part.ContentBoundary == "" && parent != nil {
-			part.ContentBoundary = parent.ContentBoundary
-		}
-
-		// record the start of the message body
-		part.StartingPosBody = p.msgPos
-
-		// build the mime tree recursively
-		if p.isBranch(part, parent) {
-			err = p.multi(part, partID)
-			part.EndingPosBody = p.lastBoundaryPos
-			if err != nil {
-				break
-			}
-		}
-
-		// if we didn't branch & we're still at the root (not a mime email)
-		if parent == nil {
-			for p.ch != 0 {
-				// keep scanning until the end
-				p.next()
-
-			}
-			if len(p.Parts) == 1 {
-				part.EndingPosBody = p.msgPos
-				err = NotMime
-			} else {
-				err = io.EOF
-			}
-			return
-		}
-		// after we return from the lower branches (if there were any)
-		// we walk each of the siblings of the parent
-
-		if end, bErr := p.boundary(parent.ContentBoundary); bErr != nil {
-			part.EndingPosBody = p.lastBoundaryPos
-			return bErr
-		} else if end {
-			// the last sibling
-			part.EndingPosBody = p.lastBoundaryPos
-			return
-		}
-		part.EndingPosBody = p.lastBoundaryPos
-		count++
-	}
-
-	return
-}
-
 func (p *Parser) reset() {
 func (p *Parser) reset() {
 	p.lastBoundaryPos = 0
 	p.lastBoundaryPos = 0
 	p.pos = startPos
 	p.pos = startPos
@@ -1146,8 +912,7 @@ func (p *Parser) Parse(buf []byte) error {
 		// initial step - start the mime parser
 		// initial step - start the mime parser
 		go func() {
 		go func() {
 			p.next()
 			p.next()
-			err := p.mime("", 1, nil)
-			//err := p.mime2(nil, "")
+			err := p.mime(nil, "")
 			if _, ok := err.(boundaryEnd); ok {
 			if _, ok := err.(boundaryEnd); ok {
 				err = nil
 				err = nil
 			}
 			}

+ 4 - 4
mail/mime/mime_test.go

@@ -524,7 +524,7 @@ email 2
 */
 */
 func TestNestedEmail(t *testing.T) {
 func TestNestedEmail(t *testing.T) {
 	p = NewMimeParser()
 	p = NewMimeParser()
-	email = email
+	email = email2
 	//email = strings.Replace(string(email), "\n", "\r\n", -1)
 	//email = strings.Replace(string(email), "\n", "\r\n", -1)
 	p.inject([]byte(email))
 	p.inject([]byte(email))
 
 
@@ -536,7 +536,7 @@ func TestNestedEmail(t *testing.T) {
 		//os.Exit(1)
 		//os.Exit(1)
 	}()
 	}()
 
 
-	if err := p.mime("", 1, nil); err != nil && err != io.EOF {
+	if err := p.mime(nil, ""); err != nil && err != io.EOF {
 		t.Error(err)
 		t.Error(err)
 	}
 	}
 	output := email
 	output := email
@@ -575,7 +575,7 @@ This is not a an MIME email
 func TestNonMineEmail(t *testing.T) {
 func TestNonMineEmail(t *testing.T) {
 	p = NewMimeParser()
 	p = NewMimeParser()
 	p.inject([]byte(email4))
 	p.inject([]byte(email4))
-	if err := p.mime("", 1, nil); err != nil && err != NotMime && err != io.EOF {
+	if err := p.mime(nil, ""); err != nil && err != NotMime && err != io.EOF {
 		t.Error(err)
 		t.Error(err)
 	} else {
 	} else {
 		for part := range p.Parts {
 		for part := range p.Parts {
@@ -589,7 +589,7 @@ func TestNonMineEmail(t *testing.T) {
 
 
 	// what if we pass an empty string?
 	// what if we pass an empty string?
 	p.inject([]byte{' '})
 	p.inject([]byte{' '})
-	if err := p.mime("", 1, nil); err == nil || err == NotMime || err == io.EOF {
+	if err := p.mime(nil, ""); err == nil || err == NotMime || err == io.EOF {
 		t.Error("unexpected error", err)
 		t.Error("unexpected error", err)
 	}
 	}