IdCoderBinHex4.pas 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. {
  2. $Project$
  3. $Workfile$
  4. $Revision$
  5. $DateUTC$
  6. $Id$
  7. This file is part of the Indy (Internet Direct) project, and is offered
  8. under the dual-licensing agreement described on the Indy website.
  9. (http://www.indyproject.org/)
  10. Copyright:
  11. (c) 1993-2005, Chad Z. Hower and the Indy Pit Crew. All rights reserved.
  12. }
  13. {
  14. $Log$
  15. }
  16. {
  17. Rev 1.7 10/6/2004 10:47:00 PM BGooijen
  18. changed array indexer from 64 to 32 bit, it gave errors in dotnet, and making
  19. >2GB arrays is not done anyways
  20. Rev 1.6 2004.05.20 1:39:28 PM czhower
  21. Last of the IdStream updates
  22. Rev 1.5 2004.05.20 11:37:24 AM czhower
  23. IdStreamVCL
  24. Rev 1.4 2004.05.19 3:06:56 PM czhower
  25. IdStream / .NET fix
  26. Rev 1.3 2004.02.03 5:45:50 PM czhower
  27. Name changes
  28. Rev 1.2 1/21/2004 1:19:58 PM JPMugaas
  29. InitComponent.
  30. Rev 1.1 16/01/2004 18:00:26 CCostelloe
  31. This is now working code.
  32. Rev 1.0 14/01/2004 00:46:14 CCostelloe
  33. An implementation of Apple's BinHex4 encoding. It is a "work-in-progress",
  34. it does not yet work properly, only checked in as a placeholder.
  35. }
  36. unit IdCoderBinHex4;
  37. {
  38. Written by Ciaran Costelloe, [email protected], December 2003.
  39. Based on TIdCoderMIME, derived from TIdCoder3to4, derived from TIdCoder.
  40. DESCRIPTION:
  41. This is an implementation of the BinHex 4.0 decoder used particularly by Apple.
  42. It is defined in RFC 1741. It is a variant of a 3-to-4 decoder, but it uses
  43. character 90 for sequences of repeating characters, allowing some compression,
  44. but thereby not allowing it to be mapped in as another 3-to-4 decoder.
  45. Per the RFC, it must be encapsulated in a MIME part (it cannot be directly coded
  46. inline in an email "body"), the part is strictly defined to have a header entry
  47. (with the appropriate "myfile.ext"):
  48. Content-Type: application/mac-binhex40; name="myfile.ext"
  49. After the header, the part MUST start with the text (NOT indented):
  50. (This file must be converted with BinHex 4.0)
  51. This allows the option AND the ambiguity of identifying it by either the
  52. Content-Type OR by the initial text line. However, it is also stated that any
  53. text before the specified text line must be ignored, implying the line does not
  54. have to be the first - an apparent contradiction.
  55. The encoded file then follows, split with CRLFs (to avoid lines that are too long
  56. for emails) that must be discarded.
  57. The file starts with a colon (:), a header, followed by the file contents, and
  58. ending in another colon.
  59. There is also an interesting article on the web, "BinHex 4.0 Definition by Peter
  60. N Lewis, Aug 1991", which has very useful information on what is implemeted in
  61. practice, and seems to come with the good provenance of bitter experience.
  62. From RFC 1741:
  63. 1) 8 bit encoding of the file:
  64. Byte: Length of FileName (1->63)
  65. Bytes: FileName ("Length" bytes)
  66. Byte: Version
  67. Long: Type
  68. Long: Creator
  69. Word: Flags (And $F800)
  70. Long: Length of Data Fork
  71. Long: Length of Resource Fork
  72. Word: CRC
  73. Bytes: Data Fork ("Data Length" bytes)
  74. Word: CRC
  75. Bytes: Resource Fork ("Rsrc Length" bytes)
  76. Word: CRC
  77. 2) Compression of repetitive characters.
  78. ($90 is the marker, encoding is made for 3->255 characters)
  79. 00 11 22 33 44 55 66 77 -> 00 11 22 33 44 55 66 77
  80. 11 22 22 22 22 22 22 33 -> 11 22 90 06 33
  81. 11 22 90 33 44 -> 11 22 90 00 33 44
  82. The whole file is considered as a stream of bits. This stream will
  83. be divided in blocks of 6 bits and then converted to one of 64
  84. characters contained in a table. The characters in this table have
  85. been chosen for maximum noise protection. The format will start
  86. with a ":" (first character on a line) and end with a ":".
  87. There will be a maximum of 64 characters on a line. It must be
  88. preceded, by this comment, starting in column 1 (it does not start
  89. in column 1 in this document):
  90. (This file must be converted with BinHex 4.0)
  91. Any text before this comment is to be ignored.
  92. The characters used are:
  93. !"#$%&'()*+,- 012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr
  94. IMPLEMENTATION NOTES:
  95. There are older variants referred to in RFC 1741, but I have only come
  96. across encodings in current use as separate MIME parts, which this
  97. implementation is targetted at.
  98. When encoding into BinHex4, you do NOT have to implement the run-length
  99. encoding (the character 90 for sequences of repeating characters), and
  100. this encoder does not do it. The CRC values generated in the header have
  101. NOT been tested (because this decoder ignores them).
  102. The decoder has to allow for the run-length encoding. The decoder works
  103. irrespective of whether it is preceded by the identification string
  104. or not (GBinHex4IdentificationString below). The string to be decoded must
  105. include the starting and ending colons. It can deal with embedded CR and LFs.
  106. Unlike base64 and quoted-printable, we cannot decode line-by-line cleanly,
  107. because the lines do not contain a clean number of 4-byte blocks due to the
  108. first line starting with a colon, leaving 63 bytes on that line, plus you have
  109. the problem of dealing with the run-length encoding and stripping the header.
  110. If the attachment only has a data fork, it is saved; if only a resource fork,
  111. it is saved; if both, only the data fork is saved. The decoder does NOT
  112. check that the CRC values are correct.
  113. Indy units use the content-type to decide if the part is BinHex4:
  114. Content-Type: application/mac-binhex40; name="myfile.ext"
  115. WARNING: This code only implements BinHex4.0 when used as a part in a
  116. MIME-encoded email. To have a part encoded, set the parts
  117. ContentTransfer := 'binhex40'.
  118. }
  119. interface
  120. {$i IdCompilerDefines.inc}
  121. uses
  122. Classes,
  123. IdException,
  124. IdCoder,
  125. IdCoder3to4,
  126. IdGlobal,
  127. IdStream,
  128. SysUtils;
  129. type
  130. TIdEncoderBinHex4 = class(TIdEncoder3to4)
  131. protected
  132. FFileName: String;
  133. function GetCRC(const ABlock: TIdBytes; const AOffset: Integer = 0; const ASize: Integer = -1): Word;
  134. procedure AddByteCRC(var ACRC: Word; AByte: Byte);
  135. procedure InitComponent; override;
  136. public
  137. {$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
  138. constructor Create(AOwner: TComponent); reintroduce; overload;
  139. {$ENDIF}
  140. procedure Encode(ASrcStream: TStream; ADestStream: TStream; const ABytes: Integer = -1); override;
  141. //We need to specify this value before calling Encode...
  142. property FileName: String read FFileName write FFileName;
  143. end;
  144. TIdDecoderBinHex4 = class(TIdDecoder4to3)
  145. protected
  146. procedure InitComponent; override;
  147. public
  148. {$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
  149. constructor Create(AOwner: TComponent); reintroduce; overload;
  150. {$ENDIF}
  151. procedure Decode(ASrcStream: TStream; const ABytes: Integer = -1); override;
  152. end;
  153. const
  154. //Note the 7th characeter is a ' which is represented in a string as ''
  155. GBinHex4CodeTable: string = '!"#$%&''()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr'; {Do not Localize}
  156. GBinHex4IdentificationString: string = '(This file must be converted with BinHex 4.0)'; {Do not Localize}
  157. type
  158. EIdMissingColon = class(EIdException);
  159. EIdMissingFileName = class(EIdException);
  160. var
  161. GBinHex4DecodeTable: TIdDecodeTable;
  162. implementation
  163. uses
  164. IdResourceStrings;
  165. { TIdDecoderBinHex4 }
  166. {$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
  167. constructor TIdDecoderBinHex4.Create(AOwner: TComponent);
  168. begin
  169. inherited Create(AOwner);
  170. end;
  171. {$ENDIF}
  172. procedure TIdDecoderBinHex4.InitComponent;
  173. begin
  174. inherited InitComponent;
  175. FDecodeTable := GBinHex4DecodeTable;
  176. FCodingTable := ToBytes(GBinHex4CodeTable);
  177. FFillChar := '='; {Do not Localize}
  178. end;
  179. procedure TIdDecoderBinHex4.Decode(ASrcStream: TStream; const ABytes: Integer = -1);
  180. var
  181. LCopyToPos: integer;
  182. LIn : TIdBytes;
  183. LInSize: Integer;
  184. LOut: TIdBytes;
  185. LN: Integer;
  186. LRepetition: Integer;
  187. LForkLength: Integer;
  188. begin
  189. LInSize := IndyLength(ASrcStream, ABytes);
  190. if LInSize <= 0 then begin
  191. Exit;
  192. end;
  193. SetLength(LIn, LInSize);
  194. TIdStreamHelper.ReadBytes(ASrcStream, LIn, LInSize);
  195. //We don't need to check if the identification string is present, since the
  196. //attachment is bounded by a : at the start and end, and the identification
  197. //string may have been stripped off already.
  198. //While we are at it, remove all the CRs and LFs...
  199. LCopyToPos := -1;
  200. for LN := 0 to LInSize-1 do begin
  201. if LIn[LN] = 58 then begin //Ascii 58 is a colon :
  202. if LCopyToPos = -1 then begin
  203. //This is the start of the file...
  204. LCopyToPos := 0;
  205. end else begin
  206. //This is the second :, i.e. the end of the file...
  207. SetLength(LIn, LCopyToPos);
  208. LCopyToPos := -2; //Flag that we got an end marker
  209. Break;
  210. end;
  211. end else begin
  212. if (LCopyToPos > -1) and (not ByteIsInEOL(LIn, LN)) then begin
  213. LIn[LCopyToPos] := LIn[LN];
  214. Inc(LCopyToPos);
  215. end;
  216. end;
  217. end;
  218. //did we get the initial colon?
  219. if LCopyToPos = -1 then begin
  220. raise EIdMissingColon.Create('Block passed to TIdDecoderBinHex4.Decode is missing a starting colon :'); {Do not Localize}
  221. end;
  222. //did we get the terminating colon?
  223. if LCopyToPos <> -2 then begin
  224. raise EIdMissingColon.Create('Block passed to TIdDecoderBinHex4.Decode is missing a terminating colon :'); {Do not Localize}
  225. end;
  226. if Length(LIn) = 0 then begin
  227. Exit;
  228. end;
  229. LOut := InternalDecode(LIn);
  230. // Now expand the run-length encoding.
  231. // $90 is the marker, encoding is made for 3->255 characters
  232. // 00 11 22 33 44 55 66 77 -> 00 11 22 33 44 55 66 77
  233. // 11 22 22 22 22 22 22 33 -> 11 22 90 06 33
  234. // 11 22 90 33 44 -> 11 22 90 00 33 44
  235. LN := 0;
  236. while LN < Length(LOut) do begin
  237. if LOut[LN] = $90 then begin
  238. LRepetition := LOut[LN+1];
  239. if LRepetition = 0 then begin
  240. //90 is by itself, so just remove the 00
  241. //22 90 00 -> 22 90
  242. RemoveBytes(LOut, LN+1, 1);
  243. Inc(LN); //Move past the $90
  244. end
  245. else if LRepetition = 1 then begin
  246. //Not allowed: 22 90 01 -> 22
  247. //Throw an exception or deal with it? Deal with it.
  248. RemoveBytes(LOut, LN, 2);
  249. end
  250. else if LRepetition = 2 then begin
  251. //Not allowed: 22 90 02 -> 22 22
  252. //Throw an exception or deal with it? Deal with it.
  253. LOut[LN] := LOut[LN-1];
  254. RemoveBytes(LOut, LN+1, 1);
  255. Inc(LN);
  256. end
  257. else if LRepetition = 3 then begin
  258. //22 90 03 -> 22 22 22
  259. LOut[LN] := LOut[LN-1];
  260. LOut[LN+1] := LOut[LN-1];
  261. Inc(LN, 2);
  262. end
  263. else begin
  264. //Repetition is 4 to 255: expand the sequence.
  265. //22 90 04 -> 22 22 22 22
  266. LOut[LN] := LOut[LN-1];
  267. LOut[LN+1] := LOut[LN-1];
  268. ExpandBytes(LOut, LN+2, LRepetition-2, LOut[LN-1]);
  269. Inc(LN, LRepetition-1);
  270. end;
  271. end else begin
  272. Inc(LN);
  273. end;
  274. end;
  275. //We are not finished yet. Strip off the header, by calculating the offset
  276. //of the start of the attachment and it's length.
  277. LN := 1 + LOut[0]; //Length byte + length of filename
  278. Inc(LN, 1 + 4 + 4 + 2); //Version, type, creator, flags
  279. // TODO: use one of the BytesTo...() functions here instead?
  280. LForkLength := (((((LOut[LN]*256)+LOut[LN+1])*256)+LOut[LN+2])*256)+LOut[LN+3];
  281. Inc(LN, 4); //Go past the data fork length
  282. if LForkLength = 0 then begin
  283. //No data fork present, save the resource fork instead...
  284. // TODO: use one of the BytesTo...() functions here instead?
  285. LForkLength := (((((LOut[LN]*256)+LOut[LN+1])*256)+LOut[LN+2])*256)+LOut[LN+3];
  286. end;
  287. Inc(LN, 4); //Go past the resource fork length
  288. Inc(LN, 2); //CRC
  289. //At this point, LOut[LN] points to the actual data (the data fork, if there
  290. //is one, or else the resource fork if there is no data fork).
  291. if Assigned(FStream) then begin
  292. TIdStreamHelper.Write(FStream, LOut, LForkLength, LN);
  293. end;
  294. end;
  295. { TIdEncoderBinHex4 }
  296. {$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
  297. constructor TIdEncoderBinHex4.Create(AOwner: TComponent);
  298. begin
  299. inherited Create(AOwner);
  300. end;
  301. {$ENDIF}
  302. procedure TIdEncoderBinHex4.InitComponent;
  303. begin
  304. inherited InitComponent;
  305. FCodingTable := ToBytes(GBinHex4CodeTable);
  306. FFillChar := '='; {Do not Localize}
  307. end;
  308. function TIdEncoderBinHex4.GetCRC(const ABlock: TIdBytes; const AOffset: Integer = 0;
  309. const ASize: Integer = -1): Word;
  310. var
  311. LN: Integer;
  312. LActual: Integer;
  313. begin
  314. Result := 0;
  315. LActual := IndyLength(ABlock, ASize, AOffset);
  316. if LActual > 0 then
  317. begin
  318. for LN := 0 to LActual-1 do begin
  319. AddByteCRC(Result, ABlock[AOffset+LN]);
  320. end;
  321. end;
  322. end;
  323. procedure TIdEncoderBinHex4.AddByteCRC(var ACRC: Word; AByte: Byte);
  324. //BinHex 4.0 uses a 16-bit CRC with an 0x1021 seed.
  325. var
  326. LWillShiftedOutBitBeA1: boolean;
  327. LN: integer;
  328. begin
  329. for LN := 1 to 8 do begin
  330. LWillShiftedOutBitBeA1 := (ACRC and $8000) <> 0;
  331. //Shift the CRC left, and add the next bit from our byte...
  332. ACRC := (ACRC shl 1) or (AByte shr 7);
  333. if LWillShiftedOutBitBeA1 then begin
  334. ACRC := ACRC xor $1021;
  335. end;
  336. AByte := (AByte shl 1) and $FF;
  337. end;
  338. end;
  339. procedure TIdEncoderBinHex4.Encode(ASrcStream: TStream; ADestStream: TStream; const ABytes: Integer = -1);
  340. var
  341. LN: Integer;
  342. LOffset: Integer;
  343. LBlocks: Integer;
  344. LOut: TIdBytes;
  345. LSSize, LTemp: Integer;
  346. LFileName: {$IFDEF HAS_AnsiString}AnsiString{$ELSE}TIdBytes{$ENDIF};
  347. LCRC: word;
  348. LRemainder: integer;
  349. begin
  350. if FFileName = '' then begin
  351. raise EIdMissingFileName.Create('Data passed to TIdEncoderBinHex4.Encode is missing a filename'); {Do not Localize}
  352. end;
  353. //Read in the attachment first...
  354. LSSize := IndyLength(ASrcStream, ABytes);
  355. //BinHex4.0 allows filenames to be only 255 bytes long (because the length
  356. //is stored in a byte), so truncate the filename to 255 bytes...
  357. {$IFNDEF HAS_AnsiString}
  358. LFileName := IndyTextEncoding_OSDefault.GetBytes(FFileName);
  359. {$ELSE}
  360. {$IFDEF STRING_IS_UNICODE}
  361. LFileName := AnsiString(FFileName); // explicit convert to Ansi
  362. {$ELSE}
  363. LFileName := FFileName;
  364. {$ENDIF}
  365. {$ENDIF}
  366. if Length(FFileName) > 255 then begin
  367. SetLength(LFileName, 255);
  368. end;
  369. //Construct the header...
  370. SetLength(LOut, 1+Length(LFileName)+1+4+4+2+4+4+2+LSSize+2);
  371. LOut[0] := Length(LFileName); //Length of filename in 1st byte
  372. for LN := 1 to Length(LFileName) do begin
  373. LOut[LN] := {$IFNDEF HAS_AnsiString}LFileName[LN-1]{$ELSE}Byte(LFileName[LN]){$ENDIF};
  374. end;
  375. LOffset := 1+Length(LFileName); //Points to byte after filename
  376. LOut[LOffset] := 0; //Version
  377. Inc(LOffset);
  378. for LN := 0 to 7 do begin
  379. LOut[LOffset+LN] := 32; //Use spaces for Type & Creator
  380. end;
  381. Inc(LOffset, 8);
  382. LOut[LOffset] := 0; //Flags
  383. LOut[LOffset+1] := 0; //Flags
  384. Inc(LOffset, 2);
  385. LTemp := LSSize;
  386. LOut[LOffset] := LTemp mod 256; //Length of data fork
  387. LTemp := LTemp div 256;
  388. LOut[LOffset+1] := LTemp mod 256; //Length of data fork
  389. LTemp := LTemp div 256;
  390. LOut[LOffset+2] := LTemp mod 256; //Length of data fork
  391. LTemp := LTemp div 256;
  392. LOut[LOffset+3] := LTemp; //Length of data fork
  393. Inc(LOffset, 4);
  394. LOut[LOffset] := 0; //Length of resource fork
  395. LOut[LOffset+1] := 0; //Length of resource fork
  396. LOut[LOffset+2] := 0; //Length of resource fork
  397. LOut[LOffset+3] := 0; //Length of resource fork
  398. Inc(LOffset, 4);
  399. //Next comes the CRC for the header...
  400. LCRC := GetCRC(LOut, 0, LOffset);
  401. LOut[LOffset] := LCRC mod 256; //CRC of data fork
  402. LCRC := LCRC div 256;
  403. LOut[LOffset+1] := LCRC; //CRC of data fork
  404. Inc(LOffset, 2);
  405. //Next comes the data fork (we will not be using the resource fork)...
  406. //Copy in the attachment...
  407. TIdStreamHelper.ReadBytes(ASrcStream, LOut, LSSize, LOffset);
  408. LCRC := GetCRC(LOut, LOffset, LSSize);
  409. Inc(LOffset, LSSize);
  410. LOut[LOffset] := LCRC mod 256; //CRC of data fork
  411. LCRC := LCRC div 256;
  412. LOut[LOffset+1] := LCRC; //CRC of data fork
  413. Inc(LOffset, 2);
  414. //To prepare for the 3to4 encoder, make sure our block is a multiple of 3...
  415. LSSize := LOffset mod 3;
  416. if LSSize > 0 then begin
  417. ExpandBytes(LOut, LOffset, 3-LSSize);
  418. end;
  419. //We now need to 3to4 encode LOut...
  420. //TODO: compress repetitive bytes to "<byte> $90 <run length>"
  421. LOut := InternalEncode(LOut);
  422. //Need to add a colon at the start & end of the block...
  423. InsertByte(LOut, 58, 0);
  424. AppendByte(LOut, 58);
  425. //Expand any bare $90 to $90 $00
  426. LN := 0;
  427. while LN < Length(LOut) do begin
  428. if LOut[LN] = $90 then begin
  429. InsertByte(LOut, 0, LN+1);
  430. Inc(LN);
  431. end;
  432. Inc(LN);
  433. end;
  434. WriteStringToStream(ADestStream, GBinHex4IdentificationString + EOL);
  435. //Put back in our CRLFs. A max of 64 chars are allowed per line.
  436. LBlocks := Length(LOut) div 64;
  437. for LN := 0 to LBlocks-1 do begin
  438. TIdStreamHelper.Write(ADestStream, LOut, 64, LN*64);
  439. WriteStringToStream(ADestStream, EOL);
  440. end;
  441. LRemainder := Length(LOut) mod 64;
  442. if LRemainder > 0 then begin
  443. TIdStreamHelper.Write(ADestStream, LOut, LRemainder, LBlocks*64);
  444. WriteStringToStream(ADestStream, EOL);
  445. end;
  446. end;
  447. initialization
  448. TIdDecoder4to3.ConstructDecodeTable(GBinHex4CodeTable, GBinHex4DecodeTable);
  449. end.