Browse Source

alternative mime parser

flashmob 6 years ago
parent
commit
62f30a7a08
2 changed files with 110 additions and 16 deletions
  1. 84 3
      mail/mime/mime.go
  2. 26 13
      mail/mime/mime_test.go

+ 84 - 3
mail/mime/mime.go

@@ -28,6 +28,14 @@ const (
 	headerErrorThreshold = 4
 	headerErrorThreshold = 4
 )
 )
 
 
+type boundaryEnd struct {
+	cb string
+}
+
+func (e boundaryEnd) Error() string {
+	return e.cb
+}
+
 var NotMime = errors.New("not Mime")
 var NotMime = errors.New("not Mime")
 
 
 type captureBuffer struct {
 type captureBuffer struct {
@@ -783,7 +791,7 @@ func (p *Parser) multi(part *Part, depth string) (err error) {
 			}
 			}
 		}
 		}
 		// call back to mime() to start working on a new branch
 		// call back to mime() to start working on a new branch
-		err = p.mime(part, depth)
+		err = p.mime2(part, depth)
 		if err != nil {
 		if err != nil {
 			return err
 			return err
 		}
 		}
@@ -791,9 +799,82 @@ func (p *Parser) multi(part *Part, depth string) (err error) {
 	return
 	return
 }
 }
 
 
+func (p *Parser) mime(depth string, count int, cb string) (err error) {
+
+	if count == 0 {
+		count = 1
+	}
+	part := newPart()
+
+	partID := strconv.Itoa(count)
+	if depth != "" {
+		partID = depth + "." + strconv.Itoa(count)
+	}
+	p.addPart(part, partID)
+	// record the start of the part
+	part.StartingPos = p.msgPos
+
+	// read the header
+	if p.ch >= 33 && p.ch <= 126 {
+		err = p.header(part)
+		if err != nil {
+			return err
+		}
+	}
+	if p.ch == '\n' && p.peek() == '\n' {
+		p.next()
+		p.next()
+	}
+	part.StartingPosBody = p.msgPos
+	if part.ContentBoundary != "" {
+		cb = part.ContentBoundary
+	}
+
+	if part.ContentType != nil && part.ContentType.superType == "message" {
+		err = p.mime(partID, 1, cb)
+		part.EndingPosBody = p.msgPos
+		if err != nil {
+			return
+		}
+	}
+
+	for {
+		count++
+		if cb != "" {
+			if end, bErr := p.boundary(cb); bErr != nil {
+				part.EndingPosBody = p.lastBoundaryPos
+				return bErr
+			} else if end {
+				bErr = boundaryEnd{cb}
+				part.EndingPosBody = p.lastBoundaryPos
+				return bErr
+			}
+			part.EndingPosBody = p.lastBoundaryPos
+		}
+
+		ct := part.ContentType
+		if ct != nil && (ct.superType == "multipart" || ct.superType == "message") {
+			// start a new branch (count is 1)
+			err = p.mime(partID, 1, cb)
+			part.EndingPosBody = p.msgPos // good?
+			if err != nil {
+				if v, ok := err.(boundaryEnd); ok && v.Error() != cb {
+					// we are back to the upper level, stop propagating the content-boundary 'end' error
+					continue
+				}
+				return
+			}
+		} else {
+			// new sibling for this node (count has incremented)
+			err = p.mime(depth, count, cb)
+			return
+		}
+	}
+}
+
 // mime scans the mime content and builds the mime-part tree in
 // mime scans the mime content and builds the mime-part tree in
 // p.Parts on-the-fly, as more bytes get fed in.
 // p.Parts on-the-fly, as more bytes get fed in.
-func (p *Parser) mime(parent *Part, depth string) (err error) {
+func (p *Parser) mime2(parent *Part, depth string) (err error) {
 
 
 	count := 1
 	count := 1
 	for {
 	for {
@@ -925,7 +1006,7 @@ func (p *Parser) Parse(buf []byte) error {
 		// initial step - start the mime parser
 		// initial step - start the mime parser
 		go func() {
 		go func() {
 			p.next()
 			p.next()
-			err := p.mime(nil, "")
+			err := p.mime("", 1, "")
 			fmt.Println("mine() ret", err)
 			fmt.Println("mine() ret", err)
 			p.result <- parserMsg{err}
 			p.result <- parserMsg{err}
 		}()
 		}()

+ 26 - 13
mail/mime/mime_test.go

@@ -490,6 +490,18 @@ TmV4dFBhcnRfMDAwX0FFNkJfNzI1RTA5QUYuODhCN0Y5MzQtLQ0K
 --XXXXboundary text--
 --XXXXboundary text--
 `
 `
 
 
+/*
+1  0  121  1763
+1.1  207  302  628
+1.1.1  343  428  445
+1.1.2  485  569  586
+1.2  668  730  1763
+1.2.1  730  959  1763
+1.2.1.1  1045  1140  1501
+1.2.1.1.1  1181  1281  1303
+1.2.1.1.2  1343  1442  1459
+1.2.1.2  1541  1703  1721
+*/
 func TestNestedEmail(t *testing.T) {
 func TestNestedEmail(t *testing.T) {
 	p = NewMimeParser()
 	p = NewMimeParser()
 	email = email2
 	email = email2
@@ -504,29 +516,30 @@ func TestNestedEmail(t *testing.T) {
 		//os.Exit(1)
 		//os.Exit(1)
 	}()
 	}()
 
 
-	if err := p.mime(nil, ""); err != nil && err != io.EOF {
+	if err := p.mime("", 1, ""); err != nil && err != io.EOF {
 		t.Error(err)
 		t.Error(err)
 	}
 	}
 	output := email
 	output := email
 	for part := range p.Parts {
 	for part := range p.Parts {
-		output = replaceAtIndex(output, '#', p.Parts[part].StartingPos)
-		output = replaceAtIndex(output, '&', p.Parts[part].StartingPosBody)
-		output = replaceAtIndex(output, '*', p.Parts[part].EndingPosBody)
+		//output = replaceAtIndex(output, '#', p.Parts[part].StartingPos)
+		//output = replaceAtIndex(output, '&', p.Parts[part].StartingPosBody)
+		//output = replaceAtIndex(output, '*', p.Parts[part].EndingPosBody)
 		fmt.Println(p.Parts[part].Node + "  " + strconv.Itoa(int(p.Parts[part].StartingPos)) + "  " + strconv.Itoa(int(p.Parts[part].StartingPosBody)) + "  " + strconv.Itoa(int(p.Parts[part].EndingPosBody)))
 		fmt.Println(p.Parts[part].Node + "  " + strconv.Itoa(int(p.Parts[part].StartingPos)) + "  " + strconv.Itoa(int(p.Parts[part].StartingPosBody)) + "  " + strconv.Itoa(int(p.Parts[part].EndingPosBody)))
 	}
 	}
 	fmt.Print(output)
 	fmt.Print(output)
 	//fmt.Println(strings.Index(output, "--D7F------------D7FD5A0B8AB9C65CCDBFA872--"))
 	//fmt.Println(strings.Index(output, "--D7F------------D7FD5A0B8AB9C65CCDBFA872--"))
 	i := 1
 	i := 1
-	fmt.Println("[" + output[p.Parts[i].StartingPosBody:p.Parts[i].EndingPosBody] + "]")
+	_ = i
+	//fmt.Println("[" + output[p.Parts[i].StartingPosBody:p.Parts[i].EndingPosBody] + "]")
 	//i := 2
 	//i := 2
 	//fmt.Println("**********{" + output[p.parts[i].startingPosBody:p.parts[i].endingPosBody] + "}**********")
 	//fmt.Println("**********{" + output[p.parts[i].startingPosBody:p.parts[i].endingPosBody] + "}**********")
 
 
-	p.Close()
-	p.inject([]byte(email))
-	if err := p.mime(nil, ""); err != nil && err != io.EOF {
-		t.Error(err)
-	}
-	p.Close()
+	//p.Close()
+	//p.inject([]byte(email))
+	//if err := p.mime("", 1, ""); err != nil && err != io.EOF {
+	//	t.Error(err)
+	//}
+	//p.Close()
 }
 }
 
 
 func replaceAtIndex(str string, replacement rune, index uint) string {
 func replaceAtIndex(str string, replacement rune, index uint) string {
@@ -542,7 +555,7 @@ This is not a an MIME email
 func TestNonMineEmail(t *testing.T) {
 func TestNonMineEmail(t *testing.T) {
 	p = NewMimeParser()
 	p = NewMimeParser()
 	p.inject([]byte(email4))
 	p.inject([]byte(email4))
-	if err := p.mime(nil, ""); err != nil && err != NotMime && err != io.EOF {
+	if err := p.mime("", 1, ""); err != nil && err != NotMime && err != io.EOF {
 		t.Error(err)
 		t.Error(err)
 	} else {
 	} else {
 		for part := range p.Parts {
 		for part := range p.Parts {
@@ -556,7 +569,7 @@ func TestNonMineEmail(t *testing.T) {
 
 
 	// what if we pass an empty string?
 	// what if we pass an empty string?
 	p.inject([]byte{' '})
 	p.inject([]byte{' '})
-	if err := p.mime(nil, ""); err == nil || err == NotMime || err == io.EOF {
+	if err := p.mime("", 1, ""); err == nil || err == NotMime || err == io.EOF {
 		t.Error("unexpected error", err)
 		t.Error("unexpected error", err)
 	}
 	}