Browse Source

debug decoding

flashmob 6 years ago
parent
commit
c87be0eebb
5 changed files with 125 additions and 43 deletions
  1. 25 20
      backends/s_transformer.go
  2. 75 21
      chunk/chunk_test.go
  3. 5 2
      chunk/reader.go
  4. 16 0
      chunk/transfer/decoder.go
  5. 4 0
      mail/mime/mime.go

+ 25 - 20
backends/s_transformer.go

@@ -77,11 +77,13 @@ var regexpCharset = regexp.MustCompile("(?i)charset=\"?(.+)\"?") // (?i) is a fl
 
 
 // todo: we may optimize this by looking at t.partsCachedOriginal, implement a Reader for it, re-write the header as we read from it
 // todo: we may optimize this by looking at t.partsCachedOriginal, implement a Reader for it, re-write the header as we read from it
 
 
-func (t *Transform) ReWrite(b []byte) (count int, err error) {
+func (t *Transform) ReWrite(b []byte, last bool) (count int, err error) {
+	defer func() {
+		count = len(b)
+	}()
 	if !t.isBody {
 	if !t.isBody {
 		// we place the partial header's bytes on a buffer from which we can read one line at a time
 		// we place the partial header's bytes on a buffer from which we can read one line at a time
 		// then we match and replace the lines we want
 		// then we match and replace the lines we want
-		count = len(b)
 		if i, err := io.Copy(&t.buf, bytes.NewReader(b)); err != nil {
 		if i, err := io.Copy(&t.buf, bytes.NewReader(b)); err != nil {
 			return int(i), err
 			return int(i), err
 		}
 		}
@@ -89,7 +91,6 @@ func (t *Transform) ReWrite(b []byte) (count int, err error) {
 		charsetFrom := ""
 		charsetFrom := ""
 		for {
 		for {
 			line, rErr := t.buf.ReadBytes('\n')
 			line, rErr := t.buf.ReadBytes('\n')
-
 			if rErr == nil {
 			if rErr == nil {
 				if !charsetProcessed {
 				if !charsetProcessed {
 					// is charsetFrom supported?
 					// is charsetFrom supported?
@@ -105,15 +106,12 @@ func (t *Transform) ReWrite(b []byte) (count int, err error) {
 
 
 				if bytes.Contains(line, []byte("Content-Transfer-Encoding: base64")) {
 				if bytes.Contains(line, []byte("Content-Transfer-Encoding: base64")) {
 					line = bytes.Replace(line, []byte("base64"), []byte("8bit"), 1)
 					line = bytes.Replace(line, []byte("base64"), []byte("8bit"), 1)
-					t.current.TransferEncoding = "8bit"
-
 				} else if bytes.Contains(line, []byte("charset")) {
 				} else if bytes.Contains(line, []byte("charset")) {
 					if match := regexpCharset.FindSubmatch(line); match != nil && len(match) > 0 {
 					if match := regexpCharset.FindSubmatch(line); match != nil && len(match) > 0 {
 						// test if the encoding is supported
 						// test if the encoding is supported
 						if charsetFrom != "" {
 						if charsetFrom != "" {
 							// it's supported, we can change it to utf8
 							// it's supported, we can change it to utf8
 							line = regexpCharset.ReplaceAll(line, []byte("charset=utf8"))
 							line = regexpCharset.ReplaceAll(line, []byte("charset=utf8"))
-							t.current.Charset = "utf8"
 						}
 						}
 					}
 					}
 				}
 				}
@@ -126,13 +124,15 @@ func (t *Transform) ReWrite(b []byte) (count int, err error) {
 					break
 					break
 				}
 				}
 			} else {
 			} else {
-				// returned data does not end in delim
-				panic("returned data does not end in delim")
-				//break
+				return
 			}
 			}
 		}
 		}
 	} else {
 	} else {
 
 
+		if ct := t.current.ContentType.Supertype(); ct == "multipart" || ct == "message" {
+			return
+		}
+
 		// do body decode here
 		// do body decode here
 		t.pr, t.pw = io.Pipe()
 		t.pr, t.pw = io.Pipe()
 		if t.decoder == nil {
 		if t.decoder == nil {
@@ -142,13 +142,15 @@ func (t *Transform) ReWrite(b []byte) (count int, err error) {
 			if charsetFrom == "" {
 			if charsetFrom == "" {
 				charsetFrom = mail.MostCommonCharset
 				charsetFrom = mail.MostCommonCharset
 			}
 			}
+
 			if mail.SupportsCharset(charsetFrom) {
 			if mail.SupportsCharset(charsetFrom) {
-				t.decoder, err = transfer.NewBodyDecoder(t.pr, transfer.Base64, charsetFrom)
-			}
-			if err != nil {
-				return
+				t.decoder, err = transfer.NewBodyDecoder(t.pr, transfer.ParseEncoding(t.current.TransferEncoding), charsetFrom)
+				if err != nil {
+					return
+				}
+				t.current.Charset = "utf8"
+				t.current.TransferEncoding = "8bit"
 			}
 			}
-
 		}
 		}
 
 
 		wg := sync.WaitGroup{}
 		wg := sync.WaitGroup{}
@@ -171,14 +173,16 @@ func (t *Transform) ReWrite(b []byte) (count int, err error) {
 		_ = i
 		_ = i
 		wg.Wait()
 		wg.Wait()
 		_ = t.pr.Close()
 		_ = t.pr.Close()
-		count = len(b)
+
+		if last {
+			t.decoder = nil
+		}
 	}
 	}
 	return count, err
 	return count, err
 }
 }
 
 
 func (t *Transform) Reset() {
 func (t *Transform) Reset() {
 	t.decoder = nil
 	t.decoder = nil
-
 }
 }
 
 
 func Transformer() *StreamDecorator {
 func Transformer() *StreamDecorator {
@@ -236,8 +240,8 @@ func Transformer() *StreamDecorator {
 
 
 						// break chunk on new part
 						// break chunk on new part
 						if part.StartingPos > 0 && part.StartingPos > msgPos {
 						if part.StartingPos > 0 && part.StartingPos > msgPos {
-							reWriter.isBody = false
-							count, err = reWriter.ReWrite(p[pos : part.StartingPos-offset])
+							cbLen := len(part.ContentBoundary) + 3
+							count, err = reWriter.ReWrite(p[pos:part.StartingPos-offset-uint(cbLen)], true)
 
 
 							total += count
 							total += count
 							if err != nil {
 							if err != nil {
@@ -246,10 +250,11 @@ func Transformer() *StreamDecorator {
 							reWriter.current = part
 							reWriter.current = part
 							pos += count
 							pos += count
 							msgPos = part.StartingPos
 							msgPos = part.StartingPos
+							reWriter.isBody = false
 						}
 						}
 						// break chunk on header (found the body)
 						// break chunk on header (found the body)
 						if part.StartingPosBody > 0 && part.StartingPosBody >= msgPos {
 						if part.StartingPosBody > 0 && part.StartingPosBody >= msgPos {
-							count, err = reWriter.ReWrite(p[pos : part.StartingPosBody-offset])
+							count, err = reWriter.ReWrite(p[pos:part.StartingPosBody-offset], true)
 							total += count
 							total += count
 							if err != nil {
 							if err != nil {
 								break
 								break
@@ -262,7 +267,7 @@ func Transformer() *StreamDecorator {
 						}
 						}
 						// if on the latest (last) part, and yet there is still data to be written out
 						// if on the latest (last) part, and yet there is still data to be written out
 						if len(*parts)-1 == i && len(p)-1 > pos {
 						if len(*parts)-1 == i && len(p)-1 > pos {
-							count, err = reWriter.ReWrite(p[pos:])
+							count, err = reWriter.ReWrite(p[pos:], false)
 							total += count
 							total += count
 							if err != nil {
 							if err != nil {
 								break
 								break

+ 75 - 21
chunk/chunk_test.go

@@ -134,12 +134,20 @@ Content-Type: image/gif; name="map_of_Argentina.gif"
 Content-Transfer-Encoding: base64
 Content-Transfer-Encoding: base64
 Content-Disposition: attachment; filename="map_of_Argentina.gif"
 Content-Disposition: attachment; filename="map_of_Argentina.gif"
 
 
-R01GOD1hJQA1AKIAAP/////78P/omn19fQAAAAAAAAAAAAAAACwAAAAAJQA1AAAD7Qi63P5w
-wEmjBCLrnQnhYCgM1wh+pkgqqeC9XrutmBm7hAK3tP31gFcAiFKVQrGFR6kscnonTe7FAAad
-GugmRu3CmiBt57fsVq3Y0VFKnpYdxPC6M7Ze4crnnHum4oN6LFJ1bn5NXTN7OF5fQkN5WYow
-BEN2dkGQGWJtSzqGTICJgnQuTJN/WJsojad9qXMuhIWdjXKjY4tenjo6tjVssk2gaWq3uGNX
-U6ZGxseyk8SasGw3J9GRzdTQky1iHNvcPNNI4TLeKdfMvy0vMqLrItvuxfDW8ubjueDtJufz
-7itICBxISKDBgwgTKjyYAAA7
+iVBORw0KGgoAAAANSUhEUgAAAG4AAAAyCAIAAAAydXkgAAAACXBIWXMAAA7EAAAOxAGVKw4bAAAA
+B3RJTUUH1gYEExgGfYkXIAAAAAd0RVh0QXV0aG9yAKmuzEgAAAAMdEVYdERlc2NyaXB0aW9uABMJ
+ISMAAAAKdEVYdENvcHlyaWdodACsD8w6AAAADnRFWHRDcmVhdGlvbiB0aW1lADX3DwkAAAAJdEVY
+dFNvZnR3YXJlAF1w/zoAAAALdEVYdERpc2NsYWltZXIAt8C0jwAAAAh0RVh0V2FybmluZwDAG+aH
+AAAAB3RFWHRTb3VyY2UA9f+D6wAAAAh0RVh0Q29tbWVudAD2zJa/AAAABnRFWHRUaXRsZQCo7tIn
+AAABAElEQVR4nO2ZUY6DIBCG66YH88FGvQLHEI+hHsMriPFw7AMJIYAwoO269v+eSDPDmKn5HOXx
+AAAAAAAAAPxblmWRZJZlSU3RCCE451Z1IUQ00c1ScM7p15zHT1J0URSpwUkpmrquh2HY60uA1+vl
+/b2qKkp63tUCcA8otrK8k+dKr7+I1V0tEEUppRRCZDcnzZUZHLdP6g6uFomiBACYeHUTTnF9ZwV4
+3dp1HaW0V5dRUR6ZJU3e7kqLaK+9ZpymKamKOV3uTZrhigCAU1wZhV7aRE2IlKn2tq60WNeVHtz3
+vV7Xdc05b5pmL0ADVwLg5QOu3BNZhhxVwH1cmYoluwDqX2zbj2bPFgAAAMdJREFUNnUruBIALxmu
+dF1mBXhlSimtPzW6O5hfIQOJB7mcK72NSzrk2bYt+ku0IvhL8PCKwxhTi3meT9s06aBGOSjjpduF
+Ut1UnlnUUmG4kDtj6j5aa5c3noOfhX4ND1eXhvJMOYZFGYYxNs8zY6wsS73O3u2rUY1jjOkOBlp5
+uSf4NTn/fsw4Bz/oSnMMCm9laU4FuzMj5ZpN6K58JrVSfnAEW9d127ZxHInVLZM2TSOlpL/C72He
+j2c+wQEAAAAAAAAAfB2/3ihTGANzPd8AAAAASUVORK5CYII=
 --DC8------------DC8638F443D87A7F0726DEF7--
 --DC8------------DC8638F443D87A7F0726DEF7--
 
 
 --D7F------------D7FD5A0B8AB9C65CCDBFA872--
 --D7F------------D7FD5A0B8AB9C65CCDBFA872--
@@ -342,8 +350,40 @@ func TestHashBytes(t *testing.T) {
 	}
 	}
 }
 }
 
 
+func TestTransformer(t *testing.T) {
+	store, chunksaver, mimeanalyzer, stream := initTestStream(true)
+	buf := make([]byte, 64)
+	var result bytes.Buffer
+	if _, err := io.CopyBuffer(stream, bytes.NewBuffer([]byte(email3)), buf); err != nil {
+		t.Error(err)
+	} else {
+		_ = mimeanalyzer.Close()
+		_ = chunksaver.Close()
+
+		email, err := store.GetEmail(1)
+		if err != nil {
+			t.Error("email not found")
+			return
+		}
+
+		// this should read all parts
+		r, err := NewChunkedReader(store, email, 0)
+		buf2 := make([]byte, 64)
+		if w, err := io.CopyBuffer(&result, r, buf2); err != nil {
+			t.Error(err)
+		} else if w != email.size {
+			t.Error("email.size != number of bytes copied from reader", w, email.size)
+		}
+
+		if !strings.Contains(result.String(), "</html>") {
+			t.Error("Looks like it didn;t read the entire email, was expecting </html>")
+		}
+		result.Reset()
+	}
+}
+
 func TestChunkSaverReader(t *testing.T) {
 func TestChunkSaverReader(t *testing.T) {
-	store, chunksaver, mimeanalyzer, stream := initTestStream()
+	store, chunksaver, mimeanalyzer, stream := initTestStream(false)
 	buf := make([]byte, 64)
 	buf := make([]byte, 64)
 	var result bytes.Buffer
 	var result bytes.Buffer
 	if _, err := io.CopyBuffer(stream, bytes.NewBuffer([]byte(email3)), buf); err != nil {
 	if _, err := io.CopyBuffer(stream, bytes.NewBuffer([]byte(email3)), buf); err != nil {
@@ -432,8 +472,8 @@ func TestChunkSaverReader(t *testing.T) {
 
 
 func TestChunkSaverWrite(t *testing.T) {
 func TestChunkSaverWrite(t *testing.T) {
 
 
-	store, chunksaver, mimeanalyzer, stream := initTestStream()
-
+	store, chunksaver, mimeanalyzer, stream := initTestStream(true)
+	var out bytes.Buffer
 	buf := make([]byte, 128)
 	buf := make([]byte, 128)
 	if written, err := io.CopyBuffer(stream, bytes.NewBuffer([]byte(email3)), buf); err != nil {
 	if written, err := io.CopyBuffer(stream, bytes.NewBuffer([]byte(email3)), buf); err != nil {
 		t.Error(err)
 		t.Error(err)
@@ -445,7 +485,6 @@ func TestChunkSaverWrite(t *testing.T) {
 		for _, chunk := range store.chunks {
 		for _, chunk := range store.chunks {
 			total += len(chunk.data)
 			total += len(chunk.data)
 		}
 		}
-		// 8A9m4qGsTU4wQB1wAgBEVw==
 		fmt.Println("compressed", total, "saved:", written-int64(total))
 		fmt.Println("compressed", total, "saved:", written-int64(total))
 		email, err := store.GetEmail(1)
 		email, err := store.GetEmail(1)
 		if err != nil {
 		if err != nil {
@@ -455,11 +494,14 @@ func TestChunkSaverWrite(t *testing.T) {
 
 
 		// this should read all parts
 		// this should read all parts
 		r, err := NewChunkedReader(store, email, 0)
 		r, err := NewChunkedReader(store, email, 0)
-		if w, err := io.Copy(os.Stdout, r); err != nil {
+		if w, err := io.Copy(&out, r); err != nil {
 			t.Error(err)
 			t.Error(err)
 		} else if w != email.size {
 		} else if w != email.size {
 			t.Error("email.size != number of bytes copied from reader", w, email.size)
 			t.Error("email.size != number of bytes copied from reader", w, email.size)
+		} else if !strings.Contains(out.String(), "</html>") {
+			t.Error("The email didn't decode properly, expecting </html>")
 		}
 		}
+		out.Reset()
 
 
 		// test the seek feature
 		// test the seek feature
 		r, err = NewChunkedReader(store, email, 0)
 		r, err = NewChunkedReader(store, email, 0)
@@ -474,13 +516,14 @@ func TestChunkSaverWrite(t *testing.T) {
 			if err != nil {
 			if err != nil {
 				t.Error(err)
 				t.Error(err)
 			}
 			}
-			w, err := io.Copy(os.Stdout, r)
+			w, err := io.Copy(&out, r)
 			if err != nil {
 			if err != nil {
 				t.Error(err)
 				t.Error(err)
 			}
 			}
-			if w != int64(email.partsInfo.Parts[i].Size) {
-				t.Error("incorrect size, expecting", email.partsInfo.Parts[i].Size, "but read:", w)
+			if w != int64(email.partsInfo.Parts[i-1].Size) {
+				t.Error(i, "incorrect size, expecting", email.partsInfo.Parts[i-1].Size, "but read:", w)
 			}
 			}
+			out.Reset()
 		}
 		}
 
 
 		r, err = NewChunkedReader(store, email, 0)
 		r, err = NewChunkedReader(store, email, 0)
@@ -505,7 +548,7 @@ func TestChunkSaverWrite(t *testing.T) {
 	}
 	}
 }
 }
 
 
-func initTestStream() (*StoreMemory, *backends.StreamDecorator, *backends.StreamDecorator, backends.StreamProcessor) {
+func initTestStream(transform bool) (*StoreMemory, *backends.StreamDecorator, *backends.StreamDecorator, backends.StreamProcessor) {
 	// place the parse result in an envelope
 	// place the parse result in an envelope
 	e := mail.NewEnvelope("127.0.0.1", 1)
 	e := mail.NewEnvelope("127.0.0.1", 1)
 	to, _ := mail.NewAddress("[email protected]")
 	to, _ := mail.NewAddress("[email protected]")
@@ -518,17 +561,25 @@ func initTestStream() (*StoreMemory, *backends.StreamDecorator, *backends.Stream
 	chunksaver := backends.Streamers["chunksaver"]()
 	chunksaver := backends.Streamers["chunksaver"]()
 	mimeanalyzer := backends.Streamers["mimeanalyzer"]()
 	mimeanalyzer := backends.Streamers["mimeanalyzer"]()
 	transformer := backends.Streamers["transformer"]()
 	transformer := backends.Streamers["transformer"]()
-	debug := backends.Streamers["debug"]()
+	//debug := backends.Streamers["debug"]()
 	// add the default processor as the underlying processor for chunksaver
 	// add the default processor as the underlying processor for chunksaver
 	// and chain it with mimeanalyzer.
 	// and chain it with mimeanalyzer.
 	// Call order: mimeanalyzer -> chunksaver -> default (terminator)
 	// Call order: mimeanalyzer -> chunksaver -> default (terminator)
 	// This will also set our Open, Close and Initialize functions
 	// This will also set our Open, Close and Initialize functions
 	// we also inject a Storage and a ChunkingBufferMime
 	// we also inject a Storage and a ChunkingBufferMime
-	stream := mimeanalyzer.Decorate(
-		transformer.Decorate(
-			debug.Decorate(
+	var stream backends.StreamProcessor
+	if transform {
+		stream = mimeanalyzer.Decorate(
+			transformer.Decorate(
+				//debug.Decorate(
 				chunksaver.Decorate(
 				chunksaver.Decorate(
-					backends.DefaultStreamProcessor{}, store, chunkBuffer))))
+					backends.DefaultStreamProcessor{}, store, chunkBuffer))) //)
+	} else {
+		stream = mimeanalyzer.Decorate(
+			//debug.Decorate(
+			chunksaver.Decorate(
+				backends.DefaultStreamProcessor{}, store, chunkBuffer)) //)
+	}
 
 
 	// configure the buffer cap
 	// configure the buffer cap
 	bc := backends.BackendConfig{}
 	bc := backends.BackendConfig{}
@@ -540,6 +591,9 @@ func initTestStream() (*StoreMemory, *backends.StreamDecorator, *backends.Stream
 	// give it the envelope with the parse results
 	// give it the envelope with the parse results
 	_ = chunksaver.Open(e)
 	_ = chunksaver.Open(e)
 	_ = mimeanalyzer.Open(e)
 	_ = mimeanalyzer.Open(e)
-	_ = transformer.Open(e)
+	if transform {
+		_ = transformer.Open(e)
+	}
+
 	return store, chunksaver, mimeanalyzer, stream
 	return store, chunksaver, mimeanalyzer, stream
 }
 }

+ 5 - 2
chunk/reader.go

@@ -11,6 +11,8 @@ type chunkedReader struct {
 	email *Email
 	email *Email
 	// part requests a part. If 0, all the parts are read sequentially
 	// part requests a part. If 0, all the parts are read sequentially
 	part int
 	part int
+
+	// i is which part it's currently reading, j is which chunk of a part
 	i, j int
 	i, j int
 
 
 	cache cachedChunks
 	cache cachedChunks
@@ -20,6 +22,7 @@ type chunkedReader struct {
 // if part is 0, Read will read in the entire message. 1 selects the first part, 2 2nd, and so on..
 // if part is 0, Read will read in the entire message. 1 selects the first part, 2 2nd, and so on..
 func NewChunkedReader(db Storage, email *Email, part int) (*chunkedReader, error) {
 func NewChunkedReader(db Storage, email *Email, part int) (*chunkedReader, error) {
 	r := new(chunkedReader)
 	r := new(chunkedReader)
+	fmt.Println("new reader")
 	r.db = db
 	r.db = db
 	if email == nil {
 	if email == nil {
 		return nil, errors.New("nil email")
 		return nil, errors.New("nil email")
@@ -145,7 +148,7 @@ func (c *cachedChunks) empty() {
 	for i := range c.chunks {
 	for i := range c.chunks {
 		c.chunks[i] = nil
 		c.chunks[i] = nil
 	}
 	}
-	c.chunks = c.chunks[:] // set len to 0
+	c.chunks = c.chunks[:0] // set len to 0
 	for key := range c.hashIndex {
 	for key := range c.hashIndex {
 		delete(c.hashIndex, key)
 		delete(c.hashIndex, key)
 	}
 	}
@@ -180,10 +183,10 @@ func (r *chunkedReader) Read(p []byte) (n int, err error) {
 				if r.j == length { // last chunk in a part?
 				if r.j == length { // last chunk in a part?
 					r.j = 0 // reset chunk index
 					r.j = 0 // reset chunk index
 					r.i++   // advance to the next part
 					r.i++   // advance to the next part
+					r.cache.empty()
 					if r.i == len(r.email.partsInfo.Parts) || r.part > 0 {
 					if r.i == len(r.email.partsInfo.Parts) || r.part > 0 {
 						// there are no more parts to return
 						// there are no more parts to return
 						err = io.EOF
 						err = io.EOF
-						r.cache.empty()
 					}
 					}
 				}
 				}
 			}
 			}

+ 16 - 0
chunk/transfer/decoder.go

@@ -18,9 +18,25 @@ const (
 	QuotedPrintable
 	QuotedPrintable
 	SevenBit // default, 1-127, 13 & 10 at line endings
 	SevenBit // default, 1-127, 13 & 10 at line endings
 	EightBit // 998 octets per line,  13 & 10 at line endings
 	EightBit // 998 octets per line,  13 & 10 at line endings
+	Binary   // 8 bit with no line restrictions
 
 
 )
 )
 
 
+func ParseEncoding(str string) Encoding {
+	if str == "base64" {
+		return Base64
+	} else if str == "quoted-printable" {
+		return QuotedPrintable
+	} else if str == "7bit" {
+		return SevenBit
+	} else if str == "8bit" {
+		return EightBit
+	} else if str == "binary" {
+		return Binary
+	}
+	return SevenBit
+}
+
 // decoder decodes base64 and q-printable, then converting charset to UTF-8
 // decoder decodes base64 and q-printable, then converting charset to UTF-8
 type decoder struct {
 type decoder struct {
 	state     int
 	state     int

+ 4 - 0
mail/mime/mime.go

@@ -194,6 +194,10 @@ func (c *contentType) Charset() (ret string) {
 	return ""
 	return ""
 }
 }
 
 
+func (c *contentType) Supertype() (ret string) {
+	return c.superType
+}
+
 func newPart() *Part {
 func newPart() *Part {
 	mh := new(Part)
 	mh := new(Part)
 	mh.Headers = make(textproto.MIMEHeader, 1)
 	mh.Headers = make(textproto.MIMEHeader, 1)