| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511 |
- {
- $Project$
- $Workfile$
- $Revision$
- $DateUTC$
- $Id$
- This file is part of the Indy (Internet Direct) project, and is offered
- under the dual-licensing agreement described on the Indy website.
- (http://www.indyproject.org/)
- Copyright:
- (c) 1993-2005, Chad Z. Hower and the Indy Pit Crew. All rights reserved.
- }
- {
- $Log$
- }
- {
- Rev 1.7 10/6/2004 10:47:00 PM BGooijen
- changed array indexer from 64 to 32 bit, it gave errors in dotnet, and making
- >2GB arrays is not done anyways
- Rev 1.6 2004.05.20 1:39:28 PM czhower
- Last of the IdStream updates
- Rev 1.5 2004.05.20 11:37:24 AM czhower
- IdStreamVCL
- Rev 1.4 2004.05.19 3:06:56 PM czhower
- IdStream / .NET fix
- Rev 1.3 2004.02.03 5:45:50 PM czhower
- Name changes
- Rev 1.2 1/21/2004 1:19:58 PM JPMugaas
- InitComponent.
- Rev 1.1 16/01/2004 18:00:26 CCostelloe
- This is now working code.
- Rev 1.0 14/01/2004 00:46:14 CCostelloe
- An implementation of Apple's BinHex4 encoding. It is a "work-in-progress",
- it does not yet work properly, only checked in as a placeholder.
- }
- unit IdCoderBinHex4;
- {
- Written by Ciaran Costelloe, [email protected], December 2003.
- Based on TIdCoderMIME, derived from TIdCoder3to4, derived from TIdCoder.
- DESCRIPTION:
- This is an implementation of the BinHex 4.0 decoder used particularly by Apple.
- It is defined in RFC 1741. It is a variant of a 3-to-4 decoder, but it uses
- character 90 for sequences of repeating characters, allowing some compression,
- but thereby not allowing it to be mapped in as another 3-to-4 decoder.
- Per the RFC, it must be encapsulated in a MIME part (it cannot be directly coded
- inline in an email "body"), the part is strictly defined to have a header entry
- (with the appropriate "myfile.ext"):
- Content-Type: application/mac-binhex40; name="myfile.ext"
- After the header, the part MUST start with the text (NOT indented):
- (This file must be converted with BinHex 4.0)
- This allows the option AND the ambiguity of identifying it by either the
- Content-Type OR by the initial text line. However, it is also stated that any
- text before the specified text line must be ignored, implying the line does not
- have to be the first - an apparent contradiction.
- The encoded file then follows, split with CRLFs (to avoid lines that are too long
- for emails) that must be discarded.
- The file starts with a colon (:), a header, followed by the file contents, and
- ending in another colon.
- There is also an interesting article on the web, "BinHex 4.0 Definition by Peter
- N Lewis, Aug 1991", which has very useful information on what is implemeted in
- practice, and seems to come with the good provenance of bitter experience.
- From RFC 1741:
- 1) 8 bit encoding of the file:
- Byte: Length of FileName (1->63)
- Bytes: FileName ("Length" bytes)
- Byte: Version
- Long: Type
- Long: Creator
- Word: Flags (And $F800)
- Long: Length of Data Fork
- Long: Length of Resource Fork
- Word: CRC
- Bytes: Data Fork ("Data Length" bytes)
- Word: CRC
- Bytes: Resource Fork ("Rsrc Length" bytes)
- Word: CRC
- 2) Compression of repetitive characters.
- ($90 is the marker, encoding is made for 3->255 characters)
- 00 11 22 33 44 55 66 77 -> 00 11 22 33 44 55 66 77
- 11 22 22 22 22 22 22 33 -> 11 22 90 06 33
- 11 22 90 33 44 -> 11 22 90 00 33 44
- The whole file is considered as a stream of bits. This stream will
- be divided in blocks of 6 bits and then converted to one of 64
- characters contained in a table. The characters in this table have
- been chosen for maximum noise protection. The format will start
- with a ":" (first character on a line) and end with a ":".
- There will be a maximum of 64 characters on a line. It must be
- preceded, by this comment, starting in column 1 (it does not start
- in column 1 in this document):
- (This file must be converted with BinHex 4.0)
- Any text before this comment is to be ignored.
- The characters used are:
- !"#$%&'()*+,- 012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr
- IMPLEMENTATION NOTES:
- There are older variants referred to in RFC 1741, but I have only come
- across encodings in current use as separate MIME parts, which this
- implementation is targetted at.
- When encoding into BinHex4, you do NOT have to implement the run-length
- encoding (the character 90 for sequences of repeating characters), and
- this encoder does not do it. The CRC values generated in the header have
- NOT been tested (because this decoder ignores them).
- The decoder has to allow for the run-length encoding. The decoder works
- irrespective of whether it is preceded by the identification string
- or not (GBinHex4IdentificationString below). The string to be decoded must
- include the starting and ending colons. It can deal with embedded CR and LFs.
- Unlike base64 and quoted-printable, we cannot decode line-by-line cleanly,
- because the lines do not contain a clean number of 4-byte blocks due to the
- first line starting with a colon, leaving 63 bytes on that line, plus you have
- the problem of dealing with the run-length encoding and stripping the header.
- If the attachment only has a data fork, it is saved; if only a resource fork,
- it is saved; if both, only the data fork is saved. The decoder does NOT
- check that the CRC values are correct.
- Indy units use the content-type to decide if the part is BinHex4:
- Content-Type: application/mac-binhex40; name="myfile.ext"
- WARNING: This code only implements BinHex4.0 when used as a part in a
- MIME-encoded email. To have a part encoded, set the parts
- ContentTransfer := 'binhex40'.
- }
- interface
- {$i IdCompilerDefines.inc}
- uses
- Classes,
- IdException,
- IdCoder,
- IdCoder3to4,
- IdGlobal,
- IdStream,
- SysUtils;
- type
- TIdEncoderBinHex4 = class(TIdEncoder3to4)
- protected
- FFileName: String;
- function GetCRC(const ABlock: TIdBytes; const AOffset: Integer = 0; const ASize: Integer = -1): Word;
- procedure AddByteCRC(var ACRC: Word; AByte: Byte);
- procedure InitComponent; override;
- public
- {$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
- constructor Create(AOwner: TComponent); reintroduce; overload;
- {$ENDIF}
- procedure Encode(ASrcStream: TStream; ADestStream: TStream; const ABytes: Integer = -1); override;
- //We need to specify this value before calling Encode...
- property FileName: String read FFileName write FFileName;
- end;
- TIdDecoderBinHex4 = class(TIdDecoder4to3)
- protected
- procedure InitComponent; override;
- public
- {$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
- constructor Create(AOwner: TComponent); reintroduce; overload;
- {$ENDIF}
- procedure Decode(ASrcStream: TStream; const ABytes: Integer = -1); override;
- end;
- const
- //Note the 7th characeter is a ' which is represented in a string as ''
- GBinHex4CodeTable: string = '!"#$%&''()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr'; {Do not Localize}
- GBinHex4IdentificationString: string = '(This file must be converted with BinHex 4.0)'; {Do not Localize}
- type
- EIdMissingColon = class(EIdException);
- EIdMissingFileName = class(EIdException);
- var
- GBinHex4DecodeTable: TIdDecodeTable;
- implementation
- uses
- IdResourceStrings;
- { TIdDecoderBinHex4 }
- {$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
- constructor TIdDecoderBinHex4.Create(AOwner: TComponent);
- begin
- inherited Create(AOwner);
- end;
- {$ENDIF}
- procedure TIdDecoderBinHex4.InitComponent;
- begin
- inherited InitComponent;
- FDecodeTable := GBinHex4DecodeTable;
- FCodingTable := ToBytes(GBinHex4CodeTable);
- FFillChar := '='; {Do not Localize}
- end;
- procedure TIdDecoderBinHex4.Decode(ASrcStream: TStream; const ABytes: Integer = -1);
- var
- LCopyToPos: integer;
- LIn : TIdBytes;
- LInSize: Integer;
- LOut: TIdBytes;
- LN: Integer;
- LRepetition: Integer;
- LForkLength: Integer;
- begin
- LInSize := IndyLength(ASrcStream, ABytes);
- if LInSize <= 0 then begin
- Exit;
- end;
- SetLength(LIn, LInSize);
- TIdStreamHelper.ReadBytes(ASrcStream, LIn, LInSize);
- //We don't need to check if the identification string is present, since the
- //attachment is bounded by a : at the start and end, and the identification
- //string may have been stripped off already.
- //While we are at it, remove all the CRs and LFs...
- LCopyToPos := -1;
- for LN := 0 to LInSize-1 do begin
- if LIn[LN] = 58 then begin //Ascii 58 is a colon :
- if LCopyToPos = -1 then begin
- //This is the start of the file...
- LCopyToPos := 0;
- end else begin
- //This is the second :, i.e. the end of the file...
- SetLength(LIn, LCopyToPos);
- LCopyToPos := -2; //Flag that we got an end marker
- Break;
- end;
- end else begin
- if (LCopyToPos > -1) and (not ByteIsInEOL(LIn, LN)) then begin
- LIn[LCopyToPos] := LIn[LN];
- Inc(LCopyToPos);
- end;
- end;
- end;
- //did we get the initial colon?
- if LCopyToPos = -1 then begin
- raise EIdMissingColon.Create('Block passed to TIdDecoderBinHex4.Decode is missing a starting colon :'); {Do not Localize}
- end;
- //did we get the terminating colon?
- if LCopyToPos <> -2 then begin
- raise EIdMissingColon.Create('Block passed to TIdDecoderBinHex4.Decode is missing a terminating colon :'); {Do not Localize}
- end;
- if Length(LIn) = 0 then begin
- Exit;
- end;
- LOut := InternalDecode(LIn);
- // Now expand the run-length encoding.
- // $90 is the marker, encoding is made for 3->255 characters
- // 00 11 22 33 44 55 66 77 -> 00 11 22 33 44 55 66 77
- // 11 22 22 22 22 22 22 33 -> 11 22 90 06 33
- // 11 22 90 33 44 -> 11 22 90 00 33 44
- LN := 0;
- while LN < Length(LOut) do begin
- if LOut[LN] = $90 then begin
- LRepetition := LOut[LN+1];
- if LRepetition = 0 then begin
- //90 is by itself, so just remove the 00
- //22 90 00 -> 22 90
- RemoveBytes(LOut, LN+1, 1);
- Inc(LN); //Move past the $90
- end
- else if LRepetition = 1 then begin
- //Not allowed: 22 90 01 -> 22
- //Throw an exception or deal with it? Deal with it.
- RemoveBytes(LOut, LN, 2);
- end
- else if LRepetition = 2 then begin
- //Not allowed: 22 90 02 -> 22 22
- //Throw an exception or deal with it? Deal with it.
- LOut[LN] := LOut[LN-1];
- RemoveBytes(LOut, LN+1, 1);
- Inc(LN);
- end
- else if LRepetition = 3 then begin
- //22 90 03 -> 22 22 22
- LOut[LN] := LOut[LN-1];
- LOut[LN+1] := LOut[LN-1];
- Inc(LN, 2);
- end
- else begin
- //Repetition is 4 to 255: expand the sequence.
- //22 90 04 -> 22 22 22 22
- LOut[LN] := LOut[LN-1];
- LOut[LN+1] := LOut[LN-1];
- ExpandBytes(LOut, LN+2, LRepetition-2, LOut[LN-1]);
- Inc(LN, LRepetition-1);
- end;
- end else begin
- Inc(LN);
- end;
- end;
- //We are not finished yet. Strip off the header, by calculating the offset
- //of the start of the attachment and it's length.
- LN := 1 + LOut[0]; //Length byte + length of filename
- Inc(LN, 1 + 4 + 4 + 2); //Version, type, creator, flags
- // TODO: use one of the BytesTo...() functions here instead?
- LForkLength := (((((LOut[LN]*256)+LOut[LN+1])*256)+LOut[LN+2])*256)+LOut[LN+3];
- Inc(LN, 4); //Go past the data fork length
- if LForkLength = 0 then begin
- //No data fork present, save the resource fork instead...
- // TODO: use one of the BytesTo...() functions here instead?
- LForkLength := (((((LOut[LN]*256)+LOut[LN+1])*256)+LOut[LN+2])*256)+LOut[LN+3];
- end;
- Inc(LN, 4); //Go past the resource fork length
- Inc(LN, 2); //CRC
- //At this point, LOut[LN] points to the actual data (the data fork, if there
- //is one, or else the resource fork if there is no data fork).
- if Assigned(FStream) then begin
- TIdStreamHelper.Write(FStream, LOut, LForkLength, LN);
- end;
- end;
- { TIdEncoderBinHex4 }
- {$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
- constructor TIdEncoderBinHex4.Create(AOwner: TComponent);
- begin
- inherited Create(AOwner);
- end;
- {$ENDIF}
- procedure TIdEncoderBinHex4.InitComponent;
- begin
- inherited InitComponent;
- FCodingTable := ToBytes(GBinHex4CodeTable);
- FFillChar := '='; {Do not Localize}
- end;
- function TIdEncoderBinHex4.GetCRC(const ABlock: TIdBytes; const AOffset: Integer = 0;
- const ASize: Integer = -1): Word;
- var
- LN: Integer;
- LActual: Integer;
- begin
- Result := 0;
- LActual := IndyLength(ABlock, ASize, AOffset);
- if LActual > 0 then
- begin
- for LN := 0 to LActual-1 do begin
- AddByteCRC(Result, ABlock[AOffset+LN]);
- end;
- end;
- end;
- procedure TIdEncoderBinHex4.AddByteCRC(var ACRC: Word; AByte: Byte);
- //BinHex 4.0 uses a 16-bit CRC with an 0x1021 seed.
- var
- LWillShiftedOutBitBeA1: boolean;
- LN: integer;
- begin
- for LN := 1 to 8 do begin
- LWillShiftedOutBitBeA1 := (ACRC and $8000) <> 0;
- //Shift the CRC left, and add the next bit from our byte...
- ACRC := (ACRC shl 1) or (AByte shr 7);
- if LWillShiftedOutBitBeA1 then begin
- ACRC := ACRC xor $1021;
- end;
- AByte := (AByte shl 1) and $FF;
- end;
- end;
- procedure TIdEncoderBinHex4.Encode(ASrcStream: TStream; ADestStream: TStream; const ABytes: Integer = -1);
- var
- LN: Integer;
- LOffset: Integer;
- LBlocks: Integer;
- LOut: TIdBytes;
- LSSize, LTemp: Integer;
- LFileName: {$IFDEF HAS_AnsiString}AnsiString{$ELSE}TIdBytes{$ENDIF};
- LCRC: word;
- LRemainder: integer;
- begin
- if FFileName = '' then begin
- raise EIdMissingFileName.Create('Data passed to TIdEncoderBinHex4.Encode is missing a filename'); {Do not Localize}
- end;
- //Read in the attachment first...
- LSSize := IndyLength(ASrcStream, ABytes);
- //BinHex4.0 allows filenames to be only 255 bytes long (because the length
- //is stored in a byte), so truncate the filename to 255 bytes...
- {$IFNDEF HAS_AnsiString}
- LFileName := IndyTextEncoding_OSDefault.GetBytes(FFileName);
- {$ELSE}
- {$IFDEF STRING_IS_UNICODE}
- LFileName := AnsiString(FFileName); // explicit convert to Ansi
- {$ELSE}
- LFileName := FFileName;
- {$ENDIF}
- {$ENDIF}
- if Length(FFileName) > 255 then begin
- SetLength(LFileName, 255);
- end;
- //Construct the header...
- SetLength(LOut, 1+Length(LFileName)+1+4+4+2+4+4+2+LSSize+2);
- LOut[0] := Length(LFileName); //Length of filename in 1st byte
- for LN := 1 to Length(LFileName) do begin
- LOut[LN] := {$IFNDEF HAS_AnsiString}LFileName[LN-1]{$ELSE}Byte(LFileName[LN]){$ENDIF};
- end;
- LOffset := 1+Length(LFileName); //Points to byte after filename
- LOut[LOffset] := 0; //Version
- Inc(LOffset);
- for LN := 0 to 7 do begin
- LOut[LOffset+LN] := 32; //Use spaces for Type & Creator
- end;
- Inc(LOffset, 8);
- LOut[LOffset] := 0; //Flags
- LOut[LOffset+1] := 0; //Flags
- Inc(LOffset, 2);
- LTemp := LSSize;
- LOut[LOffset] := LTemp mod 256; //Length of data fork
- LTemp := LTemp div 256;
- LOut[LOffset+1] := LTemp mod 256; //Length of data fork
- LTemp := LTemp div 256;
- LOut[LOffset+2] := LTemp mod 256; //Length of data fork
- LTemp := LTemp div 256;
- LOut[LOffset+3] := LTemp; //Length of data fork
- Inc(LOffset, 4);
- LOut[LOffset] := 0; //Length of resource fork
- LOut[LOffset+1] := 0; //Length of resource fork
- LOut[LOffset+2] := 0; //Length of resource fork
- LOut[LOffset+3] := 0; //Length of resource fork
- Inc(LOffset, 4);
- //Next comes the CRC for the header...
- LCRC := GetCRC(LOut, 0, LOffset);
- LOut[LOffset] := LCRC mod 256; //CRC of data fork
- LCRC := LCRC div 256;
- LOut[LOffset+1] := LCRC; //CRC of data fork
- Inc(LOffset, 2);
- //Next comes the data fork (we will not be using the resource fork)...
- //Copy in the attachment...
- TIdStreamHelper.ReadBytes(ASrcStream, LOut, LSSize, LOffset);
- LCRC := GetCRC(LOut, LOffset, LSSize);
- Inc(LOffset, LSSize);
- LOut[LOffset] := LCRC mod 256; //CRC of data fork
- LCRC := LCRC div 256;
- LOut[LOffset+1] := LCRC; //CRC of data fork
- Inc(LOffset, 2);
- //To prepare for the 3to4 encoder, make sure our block is a multiple of 3...
- LSSize := LOffset mod 3;
- if LSSize > 0 then begin
- ExpandBytes(LOut, LOffset, 3-LSSize);
- end;
- //We now need to 3to4 encode LOut...
- //TODO: compress repetitive bytes to "<byte> $90 <run length>"
- LOut := InternalEncode(LOut);
- //Need to add a colon at the start & end of the block...
- InsertByte(LOut, 58, 0);
- AppendByte(LOut, 58);
- //Expand any bare $90 to $90 $00
- LN := 0;
- while LN < Length(LOut) do begin
- if LOut[LN] = $90 then begin
- InsertByte(LOut, 0, LN+1);
- Inc(LN);
- end;
- Inc(LN);
- end;
- WriteStringToStream(ADestStream, GBinHex4IdentificationString + EOL);
- //Put back in our CRLFs. A max of 64 chars are allowed per line.
- LBlocks := Length(LOut) div 64;
- for LN := 0 to LBlocks-1 do begin
- TIdStreamHelper.Write(ADestStream, LOut, 64, LN*64);
- WriteStringToStream(ADestStream, EOL);
- end;
- LRemainder := Length(LOut) mod 64;
- if LRemainder > 0 then begin
- TIdStreamHelper.Write(ADestStream, LOut, LRemainder, LBlocks*64);
- WriteStringToStream(ADestStream, EOL);
- end;
- end;
- initialization
- TIdDecoder4to3.ConstructDecodeTable(GBinHex4CodeTable, GBinHex4DecodeTable);
- end.
|