Browse Source

* Patch from Bram Kuijvenhoven to fix stream, and add MIME mode

git-svn-id: trunk@6987 -
michael 18 years ago
parent
commit
28e7b354cc
1 changed files with 226 additions and 95 deletions
  1. 226 95
      packages/fcl-base/src/inc/base64.pp

+ 226 - 95
packages/fcl-base/src/inc/base64.pp

@@ -12,7 +12,12 @@
 
  **********************************************************************}
 
-// Encoding and decoding streams for base64 data as described in RFC2045
+// Encoding and decoding streams for base64 data as described in
+//   RFC2045 (Mode = bdmMIME) and
+//   RFC3548 (Mode = bdmStrict)
+
+// Addition of TBase64DecodingMode supporting both Strict and MIME mode is
+//   (C) 2007 Hexis BV, by Bram Kuijvenhoven ([email protected])
 
 {$MODE objfpc}
 {$H+}
@@ -21,7 +26,7 @@ unit base64;
 
 interface
 
-uses classes;
+uses classes, sysutils;
 
 type
 
@@ -40,51 +45,88 @@ type
     function Seek(Offset: Longint; Origin: Word): Longint; override;
   end;
 
+  (* The TBase64DecodingStream supports two modes:
+   * - 'strict mode':
+   *    - follows RFC3548
+   *    - rejects any characters outside of base64 alphabet,
+   *    - only accepts up to two '=' characters at the end and
+   *    - requires the input to have a Size being a multiple of 4; otherwise raises an EBase64DecodeException
+   * - 'MIME mode':
+   *    - follows RFC2045
+   *    - ignores any characters outside of base64 alphabet
+   *    - takes any '=' as end of string
+   *    - handles apparently truncated input streams gracefully
+   *)
+  TBase64DecodingMode = (bdmStrict, bdmMIME);
+
+  { TBase64DecodingStream }
 
   TBase64DecodingStream = class(TStream)
+  private
+    FMode: TBase64DecodingMode;
+    procedure SetMode(const AValue: TBase64DecodingMode);
+    function  GetSize: Int64; override;
+    function  GetPosition: Int64; override;
   protected
     InputStream: TStream;
-    CurPos, InputStreamSize: LongInt;
-    Buf: array[0..2] of Byte;
-    BufPos: Integer;    // Offset of byte which is to be read next
-    fEOF: Boolean;
+    CurPos,             // 0-based (decoded) position of this stream (nr. of decoded & Read bytes since last reset)
+    DecodedSize: Int64; // length of decoded stream ((expected) decoded bytes since last Reset until Mode-dependent end of stream)
+    ReadBase64ByteCount: Int64; // number of valid base64 bytes read from input stream since last Reset
+    Buf: array[0..2] of Byte; // last 3 decoded bytes
+    BufPos: Integer;          // offset in Buf of byte which is to be read next; if >2, next block must be read from InputStream & decoded
+    FEOF: Boolean;            // if true, all decoded bytes have been read
   public
     constructor Create(AInputStream: TStream);
+    constructor Create(AInputStream: TStream; AMode: TBase64DecodingMode);
     procedure Reset;
 
     function Read(var Buffer; Count: Longint): Longint; override;
     function Write(const Buffer; Count: Longint): Longint; override;
     function Seek(Offset: Longint; Origin: Word): Longint; override;
+    
     property EOF: Boolean read fEOF;
+    property Mode: TBase64DecodingMode read FMode write SetMode;
+  end;
+  
+  EBase64DecodingException = class(Exception)
   end;
-
-
 
 implementation
 
+uses
+  Math;
+
 const
+  SStrictNonBase64Char    = 'Non-valid Base64 Encoding character in input';
+  SStrictInputTruncated   = 'Input stream was truncated at non-4 byte boundary';
+  SStrictMisplacedPadChar = 'Unexpected padding character ''='' before end of input stream';
 
   EncodingTable: PChar =
     'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
 
-  DecTable: array[Byte] of Byte =
-    (99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,  // 0-15
-     99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,  // 16-31
-     99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 62, 99, 99, 99, 63,  // 32-47
-     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 99, 99, 99, 00, 99, 99,  // 48-63
-     99, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14,  // 64-79
-     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 99, 99, 99, 99, 99,  // 80-95
-     99, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,  // 96-111
-     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 99, 99, 99, 99, 99,  // 112-127
-     99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99);
+const
+  NA =  85; // not in base64 alphabet at all; binary: 01010101
+  PC = 255; // padding character                      11111111
 
+  DecTable: array[Byte] of Byte =
+    (NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,  // 0-15
+     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,  // 16-31
+     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 62, NA, NA, NA, 63,  // 32-47
+     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NA, NA, NA, PC, NA, NA,  // 48-63
+     NA, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14,  // 64-79
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NA, NA, NA, NA, NA,  // 80-95
+     NA, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,  // 96-111
+     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NA, NA, NA, NA, NA,  // 112-127
+     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
+     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
+     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
+     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
+     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
+     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
+     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
+     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA);
+
+  Alphabet = ['a'..'z','A'..'Z','0'..'9','+','/','=']; // all 65 chars that are in the base64 encoding alphabet
 
 constructor TBase64EncodingStream.Create(AOutputStream: TStream);
 begin
@@ -164,96 +206,211 @@ begin
     raise EStreamError.Create('Invalid stream operation');
 end;
 
+procedure TBase64DecodingStream.SetMode(const AValue: TBase64DecodingMode);
+begin
+  if FMode = AValue then exit;
+  FMode := AValue;
+  DecodedSize := -1; // forget any calculations on this
+end;
 
+function TBase64DecodingStream.GetSize: Int64;
+var
+  endBytes: array[0..1] of Char;
+  ipos, isize: Int64;
+  scanBuf: array[0..1023] of Char;
+  count: LongInt;
+  i: Integer;
+  c: Char;
+begin
+  // Note: this method only works on Seekable InputStreams (for bdmStrict we also get the Size property)
+  if DecodedSize<>-1 then Exit(DecodedSize);
+  ipos := InputStream.Position; // save position in input stream
+  case Mode of
+    bdmMIME:  begin
+      // read until end of input stream or first occurence of a '='
+      Result := ReadBase64ByteCount; // keep number of valid base64 bytes since last Reset in Result
+      repeat
+        count := InputStream.Read(scanBuf, SizeOf(scanBuf));
+        for i := 0 to count-1 do begin
+          c := scanBuf[i];
+          if c in Alphabet-['='] then // base64 encoding characters except '='
+            Inc(Result)
+          else if c = '=' then // end marker '='
+            Break;
+        end;
+      until count = 0;
+      writeln(Result);
+      // we are now either at the end of the stream, or encountered our first '=', stored in c
+      if c = '=' then begin // '=' found
+        if Result mod 4 <= 1 then // badly placed '=', disregard last block
+          Result := (Result div 4) * 3
+        else // 4 byte block ended with '=' or '=='
+          Result := (Result div 4) * 3 + Result mod 4 - 1;
+      end else // end of stream
+        Result := (Result div 4) * 3; // number of valid 4 byte blocks times 3
+    end;
+    bdmStrict:begin
+      // seek to end of input stream, read last two bytes and determine size
+      //   from InputStream size and the number of leading '=' bytes
+      // NB we don't raise an exception here if the input does not contains an integer multiple of 4 bytes
+      ipos  := InputStream.Position;
+      isize := InputStream.Size;
+      Result := ((ReadBase64ByteCount + (isize - ipos) + 3) div 4) * 3;
+      InputStream.Seek(-2, soFromEnd);
+      InputStream.Read(endBytes, 2);
+      if endBytes[1] = '=' then begin // last byte
+        Dec(Result);
+      if endBytes[0] = '=' then       // second to last byte
+        Dec(Result);
+      end;
+    end;
+  end;
+  InputStream.Position := ipos; // restore position in input stream
+  // store calculated DecodedSize
+  DecodedSize := Result;
+end;
 
+function TBase64DecodingStream.GetPosition: Int64;
+begin
+  Result := CurPos;
+end;
 
 constructor TBase64DecodingStream.Create(AInputStream: TStream);
+begin
+  Create(AInputStream, bdmMIME); // MIME mode is default
+end;
+
+constructor TBase64DecodingStream.Create(AInputStream: TStream; AMode: TBase64DecodingMode);
 begin
   inherited Create;
   InputStream := AInputStream;
+  Mode := AMode;
   Reset;
 end;
 
 procedure TBase64DecodingStream.Reset;
 begin
-  InputStreamSize := -1;
-  BufPos := 3;
-  fEOF := False;
+  ReadBase64ByteCount := 0; // number of bytes Read form InputStream since last call to Reset
+  CurPos := 0; // position in decoded byte sequence since last Reset
+  DecodedSize := -1; // indicates unknown; will be set after first call to GetSize or when reaching end of stream
+  BufPos := 3; // signals we need to read & decode a new block of 4 bytes
+  FEOF := False;
 end;
 
 function TBase64DecodingStream.Read(var Buffer; Count: Longint): Longint;
 var
-  p: PChar;
-  b: Char;
-  ReadBuf: array[0..3] of Byte;
-  ToRead, OrgToRead, HaveRead, ReadOK, i, j: Integer;
+  p: PByte;
+  b: byte;
+  ReadBuf: array[0..3] of Byte; // buffer to store last read 4 input bytes
+  ToRead, OrgToRead, HaveRead, ReadOK, i: Integer;
+  
+  procedure DetectedEnd(ASize:Int64);
+  begin
+    DecodedSize := ASize;
+    // Correct Count if at end of base64 input
+    if CurPos + Count > DecodedSize then
+      Count := DecodedSize - CurPos;
+  end;
+  
 begin
-  if Count <= 0 then exit(0);
-  if InputStreamSize <> -1 then begin
-    if CurPos + Count > InputStreamSize then
-      Count := InputStreamSize - CurPos;
+  if Count <= 0 then exit(0); // nothing to read, quit
+  if DecodedSize <> -1 then begin // try using calculated size info if possible
+    if CurPos + Count > DecodedSize then
+      Count := DecodedSize - CurPos;
     if Count <= 0 then exit(0);
   end;
 
   Result := 0;
-  p := PChar(@Buffer);
-  while (Count > 0) and not fEOF do begin
+  p := @Buffer;
+  while true do begin
+    // get new 4-byte block if at end of Buf
     if BufPos > 2 then begin
       BufPos := 0;
       // Read the next 4 valid bytes
-      ToRead := 4;
-      ReadOK := 0;
+      ToRead := 4; // number of base64 bytes left to read into ReadBuf
+      ReadOK := 0; // number of base64 bytes already read into ReadBuf
       while ToRead > 0 do begin
         OrgToRead := ToRead;
         HaveRead := InputStream.Read(ReadBuf[ReadOK], ToRead);
         //WriteLn('ToRead = ', ToRead, ', HaveRead = ', HaveRead, ', ReadOK=', ReadOk);
-        if HaveRead > 0 then begin
-          i := ReadOk;
-          while i<HaveRead do begin
-            ReadBuf[i] := DecTable[ReadBuf[i]];
-            if ReadBuf[i] = 99 then
-              for j := i to 3 do
-                ReadBuf[i] := ReadBuf[i + 1]
-            else begin
-              Inc(i);
+        if HaveRead > 0 then begin // if any new bytes; in ReadBuf[ReadOK .. ReadOK + HaveRead-1]
+          for i := ReadOK to ReadOK + HaveRead - 1 do begin
+            b := DecTable[ReadBuf[i]];
+            if b <> NA then begin // valid base64 alphabet character ('=' inclusive)
+              ReadBuf[ReadOK] := b;
               Inc(ReadOK);
               Dec(ToRead);
+            end else if Mode=bdmStrict then begin // non-valid character
+              raise EBase64DecodingException.CreateFmt(SStrictNonBase64Char,[]);
             end;
           end;
         end;
-        if HaveRead <> OrgToRead then begin
-          //WriteLn('Ende? ReadOK=', ReadOK, ', count=', Count);
+        
+        if HaveRead <> OrgToRead then begin // less than 4 base64 bytes could be read; end of input stream
+          //WriteLn('End: ReadOK=', ReadOK, ', count=', Count);
           for i := ReadOK to 3 do
-            ReadBuf[i] := Ord('=');
-          fEOF := True;
-          if ReadOK < 2 then exit;    // Not enough data available in input stream
-          break;
+            ReadBuf[i] := 0; // pad buffer with zeros so decoding of 4-bytes will be correct
+          if (Mode = bdmStrict) and (ReadOK > 0) then
+            raise EBase64DecodingException.CreateFmt(SStrictInputTruncated,[]);
+          Break;
         end;
       end;
 
-      // Check for fill bytes
-      if (Count >= 2) and (ReadBuf[3] = Ord('=')) then begin
-        //WriteLn('Endemarkierung!');
-        fEOF := True;
-        if ReadBuf[2] = Ord('=') then
-          Count := 1
-        else
-          Count := 2;
+      Inc(ReadBase64ByteCount, ReadOK);
+      
+      // Check for pad characters
+      case Mode of
+        bdmStrict:begin
+          if ReadOK = 0 then // end of input stream was reached at 4-byte boundary
+            DetectedEnd(CurPos)
+          else if (ReadBuf[0] = PC) or (ReadBuf[1] = PC) then
+            raise EBase64DecodingException.CreateFmt(SStrictMisplacedPadChar,[])   // =BBB or B=BB
+          else if (ReadBuf[2] = PC) then begin
+            if (ReadBuf[3] <> PC) or (InputStream.Position < InputStream.Size) then
+              raise EBase64DecodingException.CreateFmt(SStrictMisplacedPadChar,[]); // BB=B or BB==, but not at end of input stream
+            DetectedEnd(CurPos + 1)  // only one byte left to read;  BB==, at end of input stream
+          end else if (ReadBuf[3] = PC) then begin
+            if (InputStream.Position < InputStream.Size) then
+              raise EBase64DecodingException.CreateFmt(SStrictMisplacedPadChar,[]); // BBB=, but not at end of input stream
+            DetectedEnd(CurPos + 2); // only two bytes left to read; BBB=, at end of input stream
+          end;
+        end;
+        bdmMIME:begin
+          if ReadOK = 0 then // end of input stream was reached at 4-byte boundary
+            DetectedEnd(CurPos)
+          else if (ReadBuf[0] = PC) or (ReadBuf[1] = PC) then
+            DetectedEnd(CurPos)      // =BBB or B=BB: end here
+          else if (ReadBuf[2] = PC) then begin
+            DetectedEnd(CurPos + 1)  // only one byte left to read;  BB=B or BB==
+          end else if (ReadBuf[3] = PC) then begin
+            DetectedEnd(CurPos + 2); // only two bytes left to read; BBB=
+          end;
+        end;
       end;
-
+      
       // Decode the 4 bytes in the buffer to 3 undecoded bytes
-      Buf[0] := ReadBuf[0] shl 2 or ReadBuf[1] shr 4;
+      Buf[0] :=  ReadBuf[0]         shl 2 or ReadBuf[1] shr 4;
       Buf[1] := (ReadBuf[1] and 15) shl 4 or ReadBuf[2] shr 2;
-      Buf[2] := (ReadBuf[2] and 3) shl 6 or ReadBuf[3];
+      Buf[2] := (ReadBuf[2] and  3) shl 6 or ReadBuf[3];
+    end;
+    
+    if Count <= 0 then begin
+      Break;
     end;
 
-    p[0] := Chr(Buf[BufPos]);
+    // write one byte to Count
+    p^ := Buf[BufPos];
     Inc(p);
     Inc(BufPos);
     Inc(CurPos);
     Dec(Count);
     Inc(Result);
   end;
+  
+  // check for EOF
+  if (DecodedSize <> -1) and (CurPos >= DecodedSize) then begin
+    FEOF := true;
+  end;
 end;
 
 function TBase64DecodingStream.Write(const Buffer; Count: Longint): Longint;
@@ -262,35 +419,9 @@ begin
 end;
 
 function TBase64DecodingStream.Seek(Offset: Longint; Origin: Word): Longint;
-var
-  ipos: LongInt;
-  endbytes: array[0..1] of Char;
 begin
-  {This will work only if the input stream supports seeking / Size. If not, the
-   input stream will raise an exception; we don't handle them here but pass them
-   to the caller.}
-  if InputStreamSize = -1 then begin
-    ipos := InputStream.Position;
-    InputStreamSize := ((InputStream.Size - ipos + 3) div 4) * 3;
-    InputStream.Seek(-2, soFromEnd);
-    InputStream.Read(endbytes, 2);
-    InputStream.Position := ipos;
-    if endbytes[1] = '=' then begin
-      Dec(InputStreamSize);
-    if endbytes[0] = '=' then
-      Dec(InputStreamSize);
-    end;
-  end;
-
-  // This stream only supports the Seek modes needed for determining its size
-  if (Origin = soFromCurrent) and (Offset = 0) then
-    Result := CurPos
-  else if (Origin = soFromEnd) and (Offset = 0) then
-    Result := InputStreamSize
-  else if (Origin = soFromBeginning) and (Offset = CurPos) then
-    Result := CurPos
-  else
-    raise EStreamError.Create('Invalid stream operation');
+  // TODO: implement Seeking in TBase64DecodingStream
+  raise EStreamError.Create('Invalid stream operation');
 end;