mime.go 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211
  1. package mimeparse
  2. /*
  3. Mime is a simple MIME scanner for email-message byte streams.
  4. It builds a data-structure that represents a tree of all the mime parts,
  5. recording their headers, starting and ending positions, while processioning
  6. the message efficiently, slice by slice. It avoids the use of regular expressions,
  7. doesn't back-track or multi-scan.
  8. */
  9. import (
  10. "bytes"
  11. "fmt"
  12. "io"
  13. "net/textproto"
  14. "strconv"
  15. "strings"
  16. "sync"
  17. )
  18. var (
  19. MaxNodesErr *Error
  20. NotMineErr *Error
  21. )
  22. func init() {
  23. NotMineErr = &Error{
  24. err: ErrorNotMime,
  25. }
  26. MaxNodesErr = &Error{
  27. err: ErrorMaxNodes,
  28. }
  29. }
  30. const (
  31. // maxBoundaryLen limits the length of the content-boundary.
  32. // Technically the limit is 79, but here we are more liberal
  33. maxBoundaryLen = 70 + 10
  34. // doubleDash is the prefix for a content-boundary string. It is also added
  35. // as a postfix to a content-boundary string to signal the end of content parts.
  36. doubleDash = "--"
  37. // startPos assigns the pos property when the buffer is set.
  38. // The reason why -1 is because peek() implementation becomes simpler
  39. startPos = -1
  40. // headerErrorThreshold how many errors in the header
  41. headerErrorThreshold = 4
  42. multipart = "multipart"
  43. contentTypeHeader = "Content-Type"
  44. dot = "."
  45. first = "1"
  46. // MaxNodes limits the number of items in the Parts array. Effectively limiting
  47. // the number of nested calls the parser may make.
  48. MaxNodes = 512
  49. )
  50. type MimeError int
  51. const (
  52. ErrorNotMime MimeError = iota
  53. ErrorMaxNodes
  54. ErrorBoundaryTooShort
  55. ErrorBoundaryLineExpected
  56. ErrorUnexpectedChar
  57. ErrorHeaderFieldTooShort
  58. ErrorBoundaryExceededLength
  59. ErrorHeaderParseError
  60. ErrorMissingSubtype
  61. ErrorUnexpectedTok
  62. ErrorUnexpectedCommentToken
  63. ErrorInvalidToken
  64. ErrorUnexpectedQuotedStrToken
  65. ErrorParameterExpectingEquals
  66. ErrorNoHeader
  67. )
  68. func (e MimeError) Error() string {
  69. switch e {
  70. case ErrorNotMime:
  71. return "not Mime"
  72. case ErrorMaxNodes:
  73. return "too many mime part nodes"
  74. case ErrorBoundaryTooShort:
  75. return "content boundary too short"
  76. case ErrorBoundaryLineExpected:
  77. return "boundary new line expected"
  78. case ErrorUnexpectedChar:
  79. return "unexpected char"
  80. case ErrorHeaderFieldTooShort:
  81. return "header field too short"
  82. case ErrorBoundaryExceededLength:
  83. return "boundary exceeded max length"
  84. case ErrorHeaderParseError:
  85. return "header parse error"
  86. case ErrorMissingSubtype:
  87. return "missing subtype"
  88. case ErrorUnexpectedTok:
  89. return "unexpected tok"
  90. case ErrorUnexpectedCommentToken:
  91. return "unexpected comment token"
  92. case ErrorInvalidToken:
  93. return "invalid token"
  94. case ErrorUnexpectedQuotedStrToken:
  95. return "unexpected token"
  96. case ErrorParameterExpectingEquals:
  97. return "expecting ="
  98. case ErrorNoHeader:
  99. return "parse error, no header"
  100. }
  101. return "unknown mime error"
  102. }
  103. func (e *MimeError) UnmarshalJSON(b []byte) error {
  104. v, err := strconv.ParseInt(string(b), 10, 32)
  105. if err != nil {
  106. return err
  107. }
  108. *e = MimeError(v)
  109. return nil
  110. }
  111. // MarshalJSON implements json.Marshaler
  112. func (e MimeError) MarshalJSON() ([]byte, error) {
  113. return []byte(strconv.Itoa(int(e))), nil
  114. }
  115. // Error implements the error interface
  116. type Error struct {
  117. err error
  118. char byte
  119. peek byte
  120. pos uint // msgPos
  121. }
  122. func (e Error) Error() string {
  123. if e.char == 0 {
  124. return e.err.Error()
  125. }
  126. return e.err.Error() + " char:[" + string(e.char) + "], peek:[" +
  127. string(e.peek) + "], pos:" + strconv.Itoa(int(e.pos))
  128. }
  129. func (e Error) Unwrap() error {
  130. return e.err
  131. }
  132. func (e *Error) ParseError() bool {
  133. if e.err != io.EOF && error(e.err) != NotMineErr && error(e.err) != MaxNodesErr {
  134. return true
  135. }
  136. return false
  137. }
  138. func (p *Parser) newParseError(e MimeError) *Error {
  139. var peek byte
  140. offset := 1
  141. for {
  142. // reached the end? (don't wait for more bytes to consume)
  143. if p.pos+offset >= len(p.buf) {
  144. peek = 0
  145. break
  146. }
  147. // peek the next byte
  148. peek := p.buf[p.pos+offset]
  149. if peek == '\r' {
  150. // ignore \r
  151. offset++
  152. continue
  153. }
  154. break
  155. }
  156. return &Error{
  157. err: e,
  158. char: p.ch,
  159. peek: peek,
  160. pos: p.msgPos,
  161. }
  162. }
  163. type captureBuffer struct {
  164. bytes.Buffer
  165. upper bool // flag used by acceptHeaderName(), if true, the next accepted chr will be uppercase'd
  166. }
  167. type Parser struct {
  168. // related to the state of the parser
  169. buf []byte // input buffer
  170. pos int // position in the input buffer
  171. peekOffset int // peek() ignores \r so we must keep count of how many \r were ignored
  172. ch byte // value of byte at current pos in buf[]. At EOF, ch == 0
  173. gotNewSlice, consumed chan bool // flags that control the synchronisation of reads
  174. accept captureBuffer // input is captured to this buffer to build strings
  175. boundaryMatched int // an offset. Used in cases where the boundary string is split over multiple buffers
  176. count uint // counts how many times Parse() was called
  177. result chan parserMsg // used to pass the result back to the main goroutine
  178. mux sync.Mutex // ensure calls to Parse() and Close() are synchronized
  179. // Parts is the mime parts tree. The parser builds the parts as it consumes the input
  180. // In order to represent the tree in an array, we use Parts.Node to store the name of
  181. // each node. The name of the node is the *path* of the node. The root node is always
  182. // "1". The child would be "1.1", the next sibling would be "1.2", while the child of
  183. // "1.2" would be "1.2.1"
  184. Parts Parts
  185. msgPos uint // global position in the message
  186. lastBoundaryPos uint // the last msgPos where a boundary was detected
  187. maxNodes int // the desired number of maximum nodes the parser is limited to
  188. w io.Writer // underlying io.Writer
  189. temp string
  190. }
  191. type Parts []*Part
  192. type Part struct {
  193. // Headers contain the header names and values in a map data-structure
  194. Headers textproto.MIMEHeader
  195. // Node stores the name for the node that is a part of the resulting mime tree
  196. Node string
  197. // StartingPos is the starting position, including header (after boundary, 0 at the top)
  198. StartingPos uint
  199. // StartingPosBody is the starting position of the body, after header \n\n
  200. StartingPosBody uint
  201. // EndingPos is the ending position for the part, including the boundary line
  202. EndingPos uint
  203. // EndingPosBody is the ending position for the body, excluding boundary.
  204. // I.e EndingPos - len(Boundary Line)
  205. EndingPosBody uint
  206. // Charset holds the character-set the part is encoded in, eg. us-ascii
  207. Charset string
  208. // TransferEncoding holds the transfer encoding that was used to pack the message eg. base64
  209. TransferEncoding string
  210. // ContentBoundary holds the unique string that was used to delimit multi-parts, eg. --someboundary123
  211. ContentBoundary string
  212. // ContentType holds the mime content type, eg text/html
  213. ContentType *contentType
  214. // ContentBase is typically a url
  215. ContentBase string
  216. // DispositionFileName what file-nme to use for the part, eg. image.jpeg
  217. DispositionFileName string
  218. // ContentDisposition describes how to display the part, eg. attachment
  219. ContentDisposition string
  220. // ContentName as name implies
  221. ContentName string
  222. }
  223. type parameter struct {
  224. name string
  225. value string
  226. }
  227. type contentType struct {
  228. superType string
  229. subType string
  230. parameters []parameter
  231. b bytes.Buffer
  232. }
  233. type parserMsg struct {
  234. err error
  235. }
  236. var isTokenSpecial = [128]bool{
  237. '(': true,
  238. ')': true,
  239. '<': true,
  240. '>': true,
  241. '@': true,
  242. ',': true,
  243. ';': true,
  244. ':': true,
  245. '\\': true,
  246. '"': true,
  247. '/': true,
  248. '[': true,
  249. ']': true,
  250. '?': true,
  251. '=': true,
  252. }
  253. func (c *contentType) params() (ret string) {
  254. defer func() {
  255. c.b.Reset()
  256. }()
  257. for k := range c.parameters {
  258. if c.parameters[k].value == "" {
  259. c.b.WriteString("; " + c.parameters[k].name)
  260. continue
  261. }
  262. c.b.WriteString("; " + c.parameters[k].name + "=\"" + c.parameters[k].value + "\"")
  263. }
  264. return c.b.String()
  265. }
  266. // String returns the contentType type as a string
  267. func (c *contentType) String() (ret string) {
  268. ret = fmt.Sprintf("%s/%s%s", c.superType, c.subType,
  269. c.params())
  270. return
  271. }
  272. // Charset returns the charset value specified by the content type
  273. func (c *contentType) Charset() (ret string) {
  274. if c.superType == "" {
  275. return ""
  276. }
  277. for i := range c.parameters {
  278. if c.parameters[i].name == "charset" {
  279. return c.parameters[i].value
  280. }
  281. }
  282. return ""
  283. }
  284. func (c *contentType) Supertype() (ret string) {
  285. return c.superType
  286. }
  287. func newPart() *Part {
  288. mh := new(Part)
  289. mh.Headers = make(textproto.MIMEHeader, 1)
  290. return mh
  291. }
  292. func (p *Parser) addPart(mh *Part, id string) {
  293. mh.Node = id
  294. p.Parts = append(p.Parts, mh)
  295. }
  296. // more waits for more input, returns false if there is no more
  297. func (p *Parser) more() bool {
  298. p.consumed <- true // signal that we've reached the end of available input
  299. gotMore := <-p.gotNewSlice
  300. return gotMore
  301. }
  302. // next reads the next byte and advances the pointer
  303. // returns 0 if no more input can be read
  304. // blocks if at the end of the buffer
  305. func (p *Parser) next() byte {
  306. for {
  307. // wait for more bytes if reached the end
  308. if p.pos+1 >= len(p.buf) {
  309. if !p.more() {
  310. p.ch = 0
  311. return 0
  312. }
  313. }
  314. if p.pos > -1 || p.msgPos != 0 {
  315. // dont incr on first call to next()
  316. p.msgPos++
  317. }
  318. p.pos++
  319. if p.buf[p.pos] == '\r' {
  320. // ignore \r
  321. continue
  322. }
  323. p.ch = p.buf[p.pos]
  324. return p.ch
  325. }
  326. }
  327. // peek does not advance the pointer, but will block if there's no more
  328. // input in the buffer
  329. func (p *Parser) peek() byte {
  330. p.peekOffset = 1
  331. for {
  332. // reached the end? Wait for more bytes to consume
  333. if p.pos+p.peekOffset >= len(p.buf) {
  334. if !p.more() {
  335. return 0
  336. }
  337. }
  338. // peek the next byte
  339. ret := p.buf[p.pos+p.peekOffset]
  340. if ret == '\r' {
  341. // ignore \r
  342. p.peekOffset++
  343. continue
  344. }
  345. return ret
  346. }
  347. }
  348. // inject is used for testing, to simulate a byte stream
  349. func (p *Parser) inject(input ...[]byte) {
  350. p.msgPos = 0
  351. p.set(input[0])
  352. p.pos = 0
  353. p.ch = p.buf[0]
  354. go func() {
  355. for i := 1; i < len(input); i++ {
  356. <-p.consumed
  357. p.set(input[i])
  358. p.gotNewSlice <- true
  359. }
  360. <-p.consumed
  361. p.gotNewSlice <- false // no more data
  362. }()
  363. }
  364. // Set the buffer and reset p.pos to startPos, which is typically -1
  365. // The reason why -1 is because peek() implementation becomes more
  366. // simple, as it only needs to add 1 to p.pos for all cases.
  367. // We don't read the buffer when we set, only when next() is called.
  368. // This allows us to peek in to the next buffer while still being on
  369. // the last element from the previous buffer
  370. func (p *Parser) set(input []byte) {
  371. if p.pos != startPos {
  372. // rewind
  373. p.pos = startPos
  374. }
  375. p.buf = input
  376. }
  377. // skip advances the pointer n bytes. It will block if not enough bytes left in
  378. // the buffer, i.e. if bBytes > len(p.buf) - p.pos
  379. func (p *Parser) skip(nBytes int) {
  380. for {
  381. if p.pos+nBytes < len(p.buf) {
  382. p.pos += nBytes - 1
  383. p.msgPos = p.msgPos + uint(nBytes) - 1
  384. p.next()
  385. return
  386. }
  387. remainder := len(p.buf) - p.pos
  388. nBytes -= remainder
  389. p.pos += remainder - 1
  390. p.msgPos += uint(remainder - 1)
  391. p.next()
  392. if p.ch == 0 {
  393. return
  394. } else if nBytes < 1 {
  395. return
  396. }
  397. }
  398. }
  399. // boundary scans until next boundary string, returns error if not found
  400. // syntax specified https://tools.ietf.org/html/rfc2046 p21
  401. func (p *Parser) boundary(contentBoundary string) (end bool, err error) {
  402. defer func() {
  403. if err == nil {
  404. if p.ch == '\n' {
  405. p.next()
  406. }
  407. }
  408. }()
  409. if len(contentBoundary) < 1 {
  410. err = ErrorBoundaryTooShort
  411. }
  412. boundary := doubleDash + contentBoundary
  413. p.boundaryMatched = 0
  414. for {
  415. if i := bytes.Index(p.buf[p.pos:], []byte(boundary)); i > -1 {
  416. p.skip(i)
  417. p.lastBoundaryPos = p.msgPos
  418. p.skip(len(boundary))
  419. if end, err = p.boundaryEnd(); err != nil {
  420. return
  421. }
  422. if err = p.transportPadding(); err != nil {
  423. return
  424. }
  425. if p.ch != '\n' {
  426. err = ErrorBoundaryLineExpected
  427. }
  428. return
  429. } else {
  430. // search the tail for partial match
  431. // if one is found, load more data and continue the match
  432. // if matched, advance buffer in same way as above
  433. start := len(p.buf) - len(boundary) + 1
  434. if start < 0 {
  435. start = 0
  436. }
  437. subject := p.buf[start:]
  438. for i := 0; i < len(subject); i++ {
  439. if subject[i] == boundary[p.boundaryMatched] {
  440. p.boundaryMatched++
  441. } else {
  442. p.boundaryMatched = 0
  443. }
  444. }
  445. p.skip(len(p.buf) - p.pos) // discard the remaining data
  446. if p.ch == 0 {
  447. return false, io.EOF
  448. } else if p.boundaryMatched > 0 {
  449. // check for a match by joining the match from the end of the last buf
  450. // & the beginning of this buf
  451. if bytes.Compare(
  452. p.buf[0:len(boundary)-p.boundaryMatched],
  453. []byte(boundary[p.boundaryMatched:])) == 0 {
  454. // advance the pointer
  455. p.skip(len(boundary) - p.boundaryMatched)
  456. p.lastBoundaryPos = p.msgPos - uint(len(boundary))
  457. end, err = p.boundaryEnd()
  458. if err != nil {
  459. return
  460. }
  461. if err = p.transportPadding(); err != nil {
  462. return
  463. }
  464. if p.ch != '\n' {
  465. err = ErrorBoundaryLineExpected
  466. }
  467. return
  468. }
  469. p.boundaryMatched = 0
  470. }
  471. }
  472. }
  473. }
  474. // is it the end of a boundary?
  475. func (p *Parser) boundaryEnd() (result bool, err error) {
  476. if p.ch == '-' && p.peek() == '-' {
  477. p.next()
  478. p.next()
  479. result = true
  480. }
  481. if p.ch == 0 {
  482. err = io.EOF
  483. }
  484. return
  485. }
  486. // *LWSP-char
  487. // = *(WSP / CRLF WSP)
  488. func (p *Parser) transportPadding() (err error) {
  489. for {
  490. if p.ch == ' ' || p.ch == '\t' {
  491. p.next()
  492. } else if c := p.peek(); p.ch == '\n' && (c == ' ' || c == '\t') {
  493. p.next()
  494. p.next()
  495. } else {
  496. if c == 0 {
  497. err = io.EOF
  498. }
  499. return
  500. }
  501. }
  502. }
  503. // acceptHeaderName builds the header name in the buffer while ensuring that
  504. // that the case is normalized. Ie. Content-type is written as Content-Type
  505. func (p *Parser) acceptHeaderName() {
  506. if p.accept.upper && p.ch >= 'a' && p.ch <= 'z' {
  507. p.ch -= 32
  508. }
  509. if !p.accept.upper && p.ch >= 'A' && p.ch <= 'Z' {
  510. p.ch += 32
  511. }
  512. p.accept.upper = p.ch == '-'
  513. _ = p.accept.WriteByte(p.ch)
  514. }
  515. func (p *Parser) header(mh *Part) (err error) {
  516. var (
  517. state int
  518. name string
  519. errorCount int
  520. )
  521. defer func() {
  522. p.accept.Reset()
  523. if val := mh.Headers.Get("Content-Transfer-Encoding"); val != "" {
  524. mh.TransferEncoding = val
  525. }
  526. if val := mh.Headers.Get("Content-Disposition"); val != "" {
  527. mh.ContentDisposition = val
  528. }
  529. }()
  530. for {
  531. switch state {
  532. case 0: // header name
  533. if (p.ch >= 33 && p.ch <= 126) && p.ch != ':' {
  534. // capture
  535. p.acceptHeaderName()
  536. } else if p.ch == ':' {
  537. state = 1
  538. } else if p.ch == ' ' && p.peek() == ':' { // tolerate a SP before the :
  539. p.next()
  540. state = 1
  541. } else {
  542. if errorCount < headerErrorThreshold {
  543. state = 2 // tolerate this error
  544. continue
  545. }
  546. err = p.newParseError(ErrorUnexpectedChar)
  547. return
  548. }
  549. if state == 1 {
  550. if p.accept.Len() < 2 {
  551. err = p.newParseError(ErrorHeaderFieldTooShort)
  552. return
  553. }
  554. p.accept.upper = true
  555. name = p.accept.String()
  556. p.accept.Reset()
  557. if c := p.peek(); c == ' ' {
  558. // skip the space
  559. p.next()
  560. }
  561. p.next()
  562. continue
  563. }
  564. case 1: // header value
  565. if name == contentTypeHeader {
  566. var err error
  567. contentType, err := p.contentType()
  568. if err != nil {
  569. return err
  570. }
  571. mh.ContentType = &contentType
  572. for i := range contentType.parameters {
  573. switch {
  574. case contentType.parameters[i].name == "boundary":
  575. mh.ContentBoundary = contentType.parameters[i].value
  576. if len(mh.ContentBoundary) >= maxBoundaryLen {
  577. return p.newParseError(ErrorBoundaryExceededLength)
  578. }
  579. case contentType.parameters[i].name == "charset":
  580. mh.Charset = strings.ToUpper(contentType.parameters[i].value)
  581. case contentType.parameters[i].name == "name":
  582. mh.ContentName = contentType.parameters[i].value
  583. }
  584. }
  585. mh.Headers.Add(contentTypeHeader, contentType.String())
  586. state = 0
  587. } else {
  588. if p.ch != '\n' || p.isWSP(p.ch) {
  589. _ = p.accept.WriteByte(p.ch)
  590. } else if p.ch == '\n' {
  591. c := p.peek()
  592. if p.isWSP(c) {
  593. break // skip \n
  594. } else {
  595. mh.Headers.Add(name, p.accept.String())
  596. p.accept.Reset()
  597. state = 0
  598. }
  599. } else {
  600. err = p.newParseError(ErrorHeaderParseError)
  601. return
  602. }
  603. }
  604. case 2: // header error, discard line
  605. errorCount++
  606. // error recovery for header lines with parse errors -
  607. // ignore the line, discard anything that was scanned, scan until the end-of-line
  608. // then start a new line again (back to state 0)
  609. p.accept.Reset()
  610. for {
  611. if p.ch != '\n' {
  612. p.next()
  613. }
  614. if p.ch == 0 {
  615. return io.EOF
  616. } else if p.ch == '\n' {
  617. state = 0
  618. break
  619. }
  620. }
  621. }
  622. if p.ch == '\n' && p.peek() == '\n' {
  623. return nil
  624. }
  625. p.next()
  626. if p.ch == 0 {
  627. return io.EOF
  628. }
  629. }
  630. }
  631. func (p *Parser) isWSP(b byte) bool {
  632. return b == ' ' || b == '\t'
  633. }
  634. func (p *Parser) contentType() (result contentType, err error) {
  635. result = contentType{}
  636. if result.superType, err = p.mimeType(); err != nil {
  637. return
  638. }
  639. if p.ch != '/' {
  640. return result, p.newParseError(ErrorMissingSubtype)
  641. }
  642. p.next()
  643. if result.subType, err = p.mimeSubType(); err != nil {
  644. return
  645. }
  646. for {
  647. if p.ch == ';' {
  648. p.next()
  649. continue
  650. }
  651. if p.ch == '\n' {
  652. c := p.peek()
  653. if p.isWSP(c) {
  654. p.next() // skip \n (FWS)
  655. continue
  656. }
  657. if c == '\n' { // end of header
  658. return
  659. }
  660. }
  661. if p.isWSP(p.ch) { // skip WSP
  662. p.next()
  663. continue
  664. }
  665. if p.ch == '(' {
  666. if err = p.comment(); err != nil {
  667. return
  668. }
  669. continue
  670. }
  671. if p.ch > 32 && p.ch < 128 && !isTokenSpecial[p.ch] {
  672. if key, val, err := p.parameter(); err != nil {
  673. return result, err
  674. } else {
  675. if key == "charset" {
  676. val = strings.ToUpper(val)
  677. }
  678. // add the new parameter
  679. result.parameters = append(result.parameters, parameter{key, val})
  680. }
  681. } else {
  682. break
  683. }
  684. }
  685. return
  686. }
  687. func (p *Parser) mimeType() (str string, err error) {
  688. defer func() {
  689. if p.accept.Len() > 0 {
  690. str = p.accept.String()
  691. p.accept.Reset()
  692. }
  693. }()
  694. if p.ch < 128 && p.ch > 32 && !isTokenSpecial[p.ch] {
  695. for {
  696. if p.ch >= 'A' && p.ch <= 'Z' {
  697. p.ch += 32 // lowercase
  698. }
  699. _ = p.accept.WriteByte(p.ch)
  700. p.next()
  701. if !(p.ch < 128 && p.ch > 32 && !isTokenSpecial[p.ch]) {
  702. return
  703. }
  704. }
  705. } else {
  706. err = p.newParseError(ErrorUnexpectedTok)
  707. return
  708. }
  709. }
  710. func (p *Parser) mimeSubType() (str string, err error) {
  711. return p.mimeType()
  712. }
  713. // comment = "(" *(ctext / quoted-pair / comment) ")"
  714. //
  715. // ctext = <any CHAR excluding "(", ; => may be folded
  716. // ")", "\" & CR, & including
  717. // linear-white-space>
  718. //
  719. // quoted-pair = "\" CHAR ; may quote any char
  720. func (p *Parser) comment() (err error) {
  721. // all header fields except for Content-Disposition
  722. // can include RFC 822 comments
  723. if p.ch != '(' {
  724. err = p.newParseError(ErrorUnexpectedCommentToken)
  725. }
  726. for {
  727. p.next()
  728. if p.ch == ')' {
  729. p.next()
  730. return
  731. }
  732. }
  733. }
  734. func (p *Parser) token(lower bool) (str string, err error) {
  735. defer func() {
  736. if err == nil {
  737. str = p.accept.String()
  738. }
  739. if p.accept.Len() > 0 {
  740. p.accept.Reset()
  741. }
  742. }()
  743. var once bool // must match at least 1 good char
  744. for {
  745. if p.ch > 32 && p.ch < 128 && !isTokenSpecial[p.ch] {
  746. if lower && p.ch >= 'A' && p.ch <= 'Z' {
  747. p.ch += 32 // lowercase it
  748. }
  749. _ = p.accept.WriteByte(p.ch)
  750. once = true
  751. } else if !once {
  752. err = p.newParseError(ErrorInvalidToken)
  753. return
  754. } else {
  755. return
  756. }
  757. p.next()
  758. }
  759. }
  760. // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
  761. // quoted-pair = "\" CHAR
  762. // CHAR = <any US-ASCII character (octets 0 - 127)>
  763. // qdtext = <any TEXT except <">>
  764. // TEXT = <any OCTET except CTLs, but including LWS>
  765. func (p *Parser) quotedString() (str string, err error) {
  766. defer func() {
  767. if err == nil {
  768. str = p.accept.String()
  769. }
  770. if p.accept.Len() > 0 {
  771. p.accept.Reset()
  772. }
  773. }()
  774. if p.ch != '"' {
  775. err = p.newParseError(ErrorUnexpectedQuotedStrToken)
  776. return
  777. }
  778. p.next()
  779. state := 0
  780. for {
  781. switch state {
  782. case 0: // inside quotes
  783. if p.ch == '"' {
  784. p.next()
  785. return
  786. }
  787. if p.ch == '\\' {
  788. state = 1
  789. break
  790. }
  791. if (p.ch < 127 && p.ch > 32) || p.isWSP(p.ch) {
  792. _ = p.accept.WriteByte(p.ch)
  793. } else {
  794. err = p.newParseError(ErrorUnexpectedQuotedStrToken)
  795. return
  796. }
  797. case 1:
  798. // escaped (<any US-ASCII character (octets 0 - 127)>)
  799. if p.ch != 0 && p.ch <= 127 {
  800. _ = p.accept.WriteByte(p.ch)
  801. state = 0
  802. } else {
  803. err = p.newParseError(ErrorUnexpectedQuotedStrToken)
  804. return
  805. }
  806. }
  807. p.next()
  808. }
  809. }
  810. // parameter := attribute "=" value
  811. // attribute := token
  812. // token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, or tspecials>
  813. // value := token / quoted-string
  814. // CTL := %x00-1F / %x7F
  815. // quoted-string : <"> <">
  816. func (p *Parser) parameter() (attribute, value string, err error) {
  817. defer func() {
  818. p.accept.Reset()
  819. }()
  820. if attribute, err = p.token(true); err != nil {
  821. return "", "", err
  822. }
  823. if p.ch != '=' {
  824. if len(attribute) > 0 {
  825. return
  826. }
  827. return "", "", p.newParseError(ErrorParameterExpectingEquals)
  828. }
  829. p.next()
  830. if p.ch == '"' {
  831. if value, err = p.quotedString(); err != nil {
  832. return
  833. }
  834. return
  835. } else {
  836. if value, err = p.token(false); err != nil {
  837. return
  838. }
  839. return
  840. }
  841. }
  842. // mime scans the mime content and builds the mime-part tree in
  843. // p.Parts on-the-fly, as more bytes get fed in.
  844. func (p *Parser) mime(part *Part, cb string) (err error) {
  845. if len(p.Parts) >= p.maxNodes {
  846. for {
  847. // skip until the end of the stream (we've stopped parsing due to max nodes)
  848. p.skip(len(p.buf) + 1)
  849. if p.ch == 0 {
  850. break
  851. }
  852. }
  853. if p.maxNodes == 1 {
  854. // in this case, only one header item, so assume the end of message is
  855. // the ending position of the header
  856. p.Parts[0].EndingPos = p.msgPos
  857. p.Parts[0].EndingPosBody = p.msgPos
  858. } else {
  859. err = MaxNodesErr
  860. }
  861. return
  862. }
  863. count := 1
  864. root := part == nil
  865. if root {
  866. part = newPart()
  867. p.addPart(part, first)
  868. defer func() {
  869. // err is io.EOF if nothing went with parsing
  870. if err == io.EOF {
  871. err = nil
  872. }
  873. if err != MaxNodesErr {
  874. part.EndingPosBody = p.lastBoundaryPos
  875. part.EndingPos = p.msgPos
  876. } else {
  877. // remove the unfinished node (edge case)
  878. var parts []*Part
  879. p.Parts = append(parts, p.Parts[:p.maxNodes]...)
  880. }
  881. // not a mime email (but is an rfc5322 message)
  882. if len(p.Parts) == 1 &&
  883. len(part.Headers) > 0 &&
  884. part.Headers.Get("MIME-Version") == "" &&
  885. err == nil {
  886. err = NotMineErr
  887. }
  888. }()
  889. }
  890. // read the header
  891. if p.ch >= 33 && p.ch <= 126 {
  892. err = p.header(part)
  893. if err != nil {
  894. return err
  895. }
  896. } else if root {
  897. return p.newParseError(ErrorNoHeader)
  898. }
  899. if p.ch == '\n' && p.peek() == '\n' {
  900. p.next()
  901. p.next()
  902. }
  903. part.StartingPosBody = p.msgPos
  904. ct := part.ContentType
  905. if ct != nil && ct.superType == "message" && ct.subType == "rfc822" {
  906. var subPart *Part
  907. subPart = newPart()
  908. subPartId := part.Node + dot + strconv.Itoa(count)
  909. subPart.StartingPos = p.msgPos
  910. count++
  911. p.addPart(subPart, subPartId)
  912. err = p.mime(subPart, part.ContentBoundary)
  913. subPart.EndingPosBody = p.lastBoundaryPos
  914. subPart.EndingPos = p.msgPos
  915. return
  916. }
  917. if ct != nil && ct.superType == multipart &&
  918. part.ContentBoundary != "" &&
  919. part.ContentBoundary != cb { /* content-boundary must be different to previous */
  920. var subPart *Part
  921. subPart = newPart()
  922. subPart.ContentBoundary = part.ContentBoundary
  923. for {
  924. subPartId := part.Node + dot + strconv.Itoa(count)
  925. if end, bErr := p.boundary(part.ContentBoundary); bErr != nil {
  926. // there was an error with parsing the boundary
  927. err = bErr
  928. if subPart.StartingPos == 0 {
  929. subPart.StartingPos = p.msgPos
  930. } else {
  931. subPart.EndingPos = p.msgPos
  932. subPart.EndingPosBody = p.lastBoundaryPos
  933. subPart, count = p.split(subPart, count)
  934. }
  935. return
  936. } else if end {
  937. // reached the terminating boundary (ends with double dash --)
  938. subPart.EndingPosBody = p.lastBoundaryPos
  939. subPart.EndingPos = p.msgPos
  940. break
  941. } else {
  942. // process the part boundary
  943. if subPart.StartingPos == 0 {
  944. subPart.StartingPos = p.msgPos
  945. count++
  946. p.addPart(subPart, subPartId)
  947. err = p.mime(subPart, part.ContentBoundary)
  948. if err != nil {
  949. return
  950. }
  951. subPartId = part.Node + dot + strconv.Itoa(count)
  952. } else {
  953. subPart.EndingPosBody = p.lastBoundaryPos
  954. subPart.EndingPos = p.msgPos
  955. subPart, count = p.split(subPart, count)
  956. p.addPart(subPart, subPartId)
  957. err = p.mime(subPart, part.ContentBoundary)
  958. if err != nil {
  959. return
  960. }
  961. }
  962. }
  963. }
  964. } else if part.ContentBoundary == "" {
  965. for {
  966. p.skip(len(p.buf))
  967. if p.ch == 0 {
  968. if part.StartingPosBody > 0 {
  969. part.EndingPosBody = p.msgPos
  970. part.EndingPos = p.msgPos
  971. }
  972. err = io.EOF
  973. return
  974. }
  975. }
  976. }
  977. return
  978. }
  979. func (p *Parser) split(subPart *Part, count int) (*Part, int) {
  980. cb := subPart.ContentBoundary
  981. subPart = nil
  982. count++
  983. subPart = newPart()
  984. subPart.StartingPos = p.msgPos
  985. subPart.ContentBoundary = cb
  986. return subPart, count
  987. }
  988. func (p *Parser) reset() {
  989. p.lastBoundaryPos = 0
  990. p.pos = startPos
  991. p.msgPos = 0
  992. p.count = 0
  993. p.ch = 0
  994. }
  995. // Open prepares the parser for accepting input
  996. func (p *Parser) Open() {
  997. p.Parts = make([]*Part, 0)
  998. }
  999. // Close tells the MIME Parser there's no more data & waits for it to return a result
  1000. // it will return an io.EOF error if no error with parsing MIME was detected
  1001. func (p *Parser) Close() error {
  1002. p.mux.Lock()
  1003. defer func() {
  1004. p.reset()
  1005. p.mux.Unlock()
  1006. }()
  1007. if p.count == 0 {
  1008. return nil
  1009. }
  1010. for {
  1011. select {
  1012. // we need to repeat sending a false signal because peek() / next() could be
  1013. // called a few times before a result is returned
  1014. case p.gotNewSlice <- false:
  1015. select {
  1016. case <-p.consumed: // more() was called, there's nothing to consume
  1017. case r := <-p.result:
  1018. return r.err
  1019. }
  1020. case r := <-p.result:
  1021. return r.err
  1022. }
  1023. }
  1024. }
  1025. func (p *Parser) Write(buf []byte) (int, error) {
  1026. p.temp = p.temp + string(buf)
  1027. if err := p.Parse(buf); err != nil {
  1028. return len(buf), err
  1029. }
  1030. if p.w != nil {
  1031. return p.w.Write(buf)
  1032. }
  1033. return len(buf), nil
  1034. }
  1035. // Parse takes a byte stream, and feeds it to the MIME Parser, then
  1036. // waits for the Parser to consume all input before returning.
  1037. // The parser will build a parse tree in p.Parts
  1038. // The parser doesn't decode any input. All it does
  1039. // is collect information about where the different MIME parts
  1040. // start and end, and other meta-data. This data can be used
  1041. // later down the stack to determine how to store/decode/display
  1042. // the messages
  1043. // returns error if there's a parse error, except io.EOF when no
  1044. // error occurred.
  1045. func (p *Parser) Parse(buf []byte) error {
  1046. defer func() {
  1047. p.mux.Unlock()
  1048. }()
  1049. p.mux.Lock()
  1050. // Feed the new slice. Assumes that the parser is blocked now, waiting
  1051. // for new data, or not started yet.
  1052. p.set(buf)
  1053. if p.count == 0 {
  1054. // initial step - start the mime parser
  1055. go func() {
  1056. p.next()
  1057. err := p.mime(nil, "")
  1058. p.result <- parserMsg{err}
  1059. }()
  1060. } else {
  1061. // tell the parser to resume consuming
  1062. p.gotNewSlice <- true
  1063. }
  1064. p.count++
  1065. select {
  1066. case <-p.consumed: // wait for prev buf to be consumed
  1067. return nil
  1068. case r := <-p.result:
  1069. // mime() has returned with a result (it finished consuming)
  1070. p.reset()
  1071. return r.err
  1072. }
  1073. }
  1074. // Error returns true if the type of error was a parse error
  1075. // Returns false if it was an io.EOF or the email was not mime, or exceeded maximum nodes
  1076. func (p *Parser) ParseError(err error) bool {
  1077. if err != nil && err != io.EOF && err != NotMineErr && err != MaxNodesErr {
  1078. return true
  1079. }
  1080. return false
  1081. }
  1082. // NewMimeParser returns a mime parser. See MaxNodes for how many nodes it's limited to
  1083. func NewMimeParser() *Parser {
  1084. p := new(Parser)
  1085. p.consumed = make(chan bool)
  1086. p.gotNewSlice = make(chan bool)
  1087. p.result = make(chan parserMsg, 1)
  1088. p.maxNodes = MaxNodes
  1089. return p
  1090. }
  1091. func NewMimeParserWriter(w io.Writer) *Parser {
  1092. p := NewMimeParser()
  1093. p.w = w
  1094. return p
  1095. }
  1096. // NewMimeParser returns a mime parser with a custom MaxNodes value
  1097. func NewMimeParserLimited(maxNodes int) *Parser {
  1098. p := NewMimeParser()
  1099. p.maxNodes = maxNodes
  1100. return p
  1101. }