GZipInputStream.cs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. // GzipInputStream.cs
  2. // Copyright (C) 2001 Mike Krueger
  3. //
  4. // This file was translated from java, it was part of the GNU Classpath
  5. // Copyright (C) 2001 Free Software Foundation, Inc.
  6. //
  7. // This program is free software; you can redistribute it and/or
  8. // modify it under the terms of the GNU General Public License
  9. // as published by the Free Software Foundation; either version 2
  10. // of the License, or (at your option) any later version.
  11. //
  12. // This program is distributed in the hope that it will be useful,
  13. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. // GNU General Public License for more details.
  16. //
  17. // You should have received a copy of the GNU General Public License
  18. // along with this program; if not, write to the Free Software
  19. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  20. //
  21. // Linking this library statically or dynamically with other modules is
  22. // making a combined work based on this library. Thus, the terms and
  23. // conditions of the GNU General Public License cover the whole
  24. // combination.
  25. //
  26. // As a special exception, the copyright holders of this library give you
  27. // permission to link this library with independent modules to produce an
  28. // executable, regardless of the license terms of these independent
  29. // modules, and to copy and distribute the resulting executable under
  30. // terms of your choice, provided that you also meet, for each linked
  31. // independent module, the terms and conditions of the license of that
  32. // module. An independent module is a module which is not derived from
  33. // or based on this library. If you modify this library, you may extend
  34. // this exception to your version of the library, but you are not
  35. // obligated to do so. If you do not wish to do so, delete this
  36. // exception statement from your version.
  37. using System;
  38. using System.IO;
  39. using ICSharpCode.SharpZipLib.Checksums;
  40. using ICSharpCode.SharpZipLib.Zip.Compression;
  41. using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
  42. namespace ICSharpCode.SharpZipLib.GZip
  43. {
  44. /// <summary>
  45. /// This filter stream is used to decompress a "GZIP" format stream.
  46. /// The "GZIP" format is described baseInputStream RFC 1952.
  47. ///
  48. /// author of the original java version : John Leuner
  49. /// </summary>
  50. /// <example> This sample shows how to unzip a gzipped file
  51. /// <code>
  52. /// using System;
  53. /// using System.IO;
  54. ///
  55. /// using NZlib.GZip;
  56. ///
  57. /// class MainClass
  58. /// {
  59. /// public static void Main(string[] args)
  60. /// {
  61. /// Stream s = new GZipInputStream(File.OpenRead(args[0]));
  62. /// FileStream fs = File.Create(Path.GetFileNameWithoutExtension(args[0]));
  63. /// int size = 2048;
  64. /// byte[] writeData = new byte[2048];
  65. /// while (true) {
  66. /// size = s.Read(writeData, 0, size);
  67. /// if (size > 0) {
  68. /// fs.Write(writeData, 0, size);
  69. /// } else {
  70. /// break;
  71. /// }
  72. /// }
  73. /// s.Close();
  74. /// }
  75. /// }
  76. /// </code>
  77. /// </example>
  78. public class GZipInputStream : InflaterInputStream
  79. {
  80. /// <summary>
  81. /// CRC-32 value for uncompressed data
  82. /// </summary>
  83. protected Crc32 crc = new Crc32();
  84. /// <summary>
  85. /// Indicates end of stream
  86. /// </summary>
  87. protected bool eos;
  88. // Have we read the GZIP header yet?
  89. bool readGZIPHeader;
  90. /// <summary>
  91. /// Creates a GzipInputStream with the default buffer size
  92. /// </summary>
  93. /// <param name="baseInputStream">
  94. /// The stream to read compressed data from (baseInputStream GZIP format)
  95. /// </param>
  96. public GZipInputStream(Stream baseInputStream) : this(baseInputStream, 4096)
  97. {
  98. }
  99. /// <summary>
  100. /// Creates a GZIPInputStream with the specified buffer size
  101. /// </summary>
  102. /// <param name="baseInputStream">
  103. /// The stream to read compressed data from (baseInputStream GZIP format)
  104. /// </param>
  105. /// <param name="size">
  106. /// Size of the buffer to use
  107. /// </param>
  108. public GZipInputStream(Stream baseInputStream, int size) : base(baseInputStream, new Inflater(true), size)
  109. {
  110. }
  111. /// <summary>
  112. /// Reads uncompressed data into an array of bytes
  113. /// </summary>
  114. /// <param name="buf">
  115. /// the buffer to read uncompressed data into
  116. /// </param>
  117. /// <param name="offset">
  118. /// the offset indicating where the data should be placed
  119. /// </param>
  120. /// <param name="len">
  121. /// the number of uncompressed bytes to be read
  122. /// </param>
  123. public override int Read(byte[] buf, int offset, int len)
  124. {
  125. // We first have to slurp baseInputStream the GZIP header, then we feed all the
  126. // rest of the data to the superclass.
  127. //
  128. // As we do that we continually update the CRC32. Once the data is
  129. // finished, we check the CRC32
  130. //
  131. // This means we don't need our own buffer, as everything is done
  132. // baseInputStream the superclass.
  133. if (!readGZIPHeader) {
  134. ReadHeader();
  135. }
  136. if (eos) {
  137. return 0;
  138. }
  139. // System.err.println("GZIPIS.read(byte[], off, len ... " + offset + " and len " + len);
  140. //We don't have to read the header, so we just grab data from the superclass
  141. int numRead = base.Read(buf, offset, len);
  142. if (numRead > 0) {
  143. crc.Update(buf, offset, numRead);
  144. }
  145. if (inf.IsFinished) {
  146. ReadFooter();
  147. }
  148. return numRead;
  149. }
  150. private void ReadHeader()
  151. {
  152. /* 1. Check the two magic bytes */
  153. Crc32 headCRC = new Crc32();
  154. int magic = baseInputStream.ReadByte();
  155. if (magic < 0) {
  156. eos = true;
  157. return;
  158. }
  159. headCRC.Update(magic);
  160. if (magic != (GZipConstants.GZIP_MAGIC >> 8)) {
  161. throw new IOException("Error baseInputStream GZIP header, first byte doesn't match");
  162. }
  163. magic = baseInputStream.ReadByte();
  164. if (magic != (GZipConstants.GZIP_MAGIC & 0xFF)) {
  165. throw new IOException("Error baseInputStream GZIP header, second byte doesn't match");
  166. }
  167. headCRC.Update(magic);
  168. /* 2. Check the compression type (must be 8) */
  169. int CM = baseInputStream.ReadByte();
  170. if (CM != 8) {
  171. throw new IOException("Error baseInputStream GZIP header, data not baseInputStream deflate format");
  172. }
  173. headCRC.Update(CM);
  174. /* 3. Check the flags */
  175. int flags = baseInputStream.ReadByte();
  176. if (flags < 0) {
  177. throw new Exception("Early EOF baseInputStream GZIP header");
  178. }
  179. headCRC.Update(flags);
  180. /* This flag byte is divided into individual bits as follows:
  181. bit 0 FTEXT
  182. bit 1 FHCRC
  183. bit 2 FEXTRA
  184. bit 3 FNAME
  185. bit 4 FCOMMENT
  186. bit 5 reserved
  187. bit 6 reserved
  188. bit 7 reserved
  189. */
  190. /* 3.1 Check the reserved bits are zero */
  191. if ((flags & 0xd0) != 0) {
  192. throw new IOException("Reserved flag bits baseInputStream GZIP header != 0");
  193. }
  194. /* 4.-6. Skip the modification time, extra flags, and OS type */
  195. for (int i=0; i< 6; i++) {
  196. int readByte = baseInputStream.ReadByte();
  197. if (readByte < 0) {
  198. throw new Exception("Early EOF baseInputStream GZIP header");
  199. }
  200. headCRC.Update(readByte);
  201. }
  202. /* 7. Read extra field */
  203. if ((flags & GZipConstants.FEXTRA) != 0) {
  204. /* Skip subfield id */
  205. for (int i=0; i< 2; i++) {
  206. int readByte = baseInputStream.ReadByte();
  207. if (readByte < 0) {
  208. throw new Exception("Early EOF baseInputStream GZIP header");
  209. }
  210. headCRC.Update(readByte);
  211. }
  212. if (baseInputStream.ReadByte() < 0 || baseInputStream.ReadByte() < 0) {
  213. throw new Exception("Early EOF baseInputStream GZIP header");
  214. }
  215. int len1, len2, extraLen;
  216. len1 = baseInputStream.ReadByte();
  217. len2 = baseInputStream.ReadByte();
  218. if ((len1 < 0) || (len2 < 0)) {
  219. throw new Exception("Early EOF baseInputStream GZIP header");
  220. }
  221. headCRC.Update(len1);
  222. headCRC.Update(len2);
  223. extraLen = (len1 << 8) | len2;
  224. for (int i = 0; i < extraLen;i++) {
  225. int readByte = baseInputStream.ReadByte();
  226. if (readByte < 0)
  227. {
  228. throw new Exception("Early EOF baseInputStream GZIP header");
  229. }
  230. headCRC.Update(readByte);
  231. }
  232. }
  233. /* 8. Read file name */
  234. if ((flags & GZipConstants.FNAME) != 0) {
  235. int readByte;
  236. while ( (readByte = baseInputStream.ReadByte()) > 0) {
  237. headCRC.Update(readByte);
  238. }
  239. if (readByte < 0) {
  240. throw new Exception("Early EOF baseInputStream GZIP file name");
  241. }
  242. headCRC.Update(readByte);
  243. }
  244. /* 9. Read comment */
  245. if ((flags & GZipConstants.FCOMMENT) != 0) {
  246. int readByte;
  247. while ( (readByte = baseInputStream.ReadByte()) > 0) {
  248. headCRC.Update(readByte);
  249. }
  250. if (readByte < 0) {
  251. throw new Exception("Early EOF baseInputStream GZIP comment");
  252. }
  253. headCRC.Update(readByte);
  254. }
  255. /* 10. Read header CRC */
  256. if ((flags & GZipConstants.FHCRC) != 0) {
  257. int tempByte;
  258. int crcval = baseInputStream.ReadByte();
  259. if (crcval < 0) {
  260. throw new Exception("Early EOF baseInputStream GZIP header");
  261. }
  262. tempByte = baseInputStream.ReadByte();
  263. if (tempByte < 0) {
  264. throw new Exception("Early EOF baseInputStream GZIP header");
  265. }
  266. crcval = (crcval << 8) | tempByte;
  267. if (crcval != ((int) headCRC.Value & 0xffff)) {
  268. throw new IOException("Header CRC value mismatch");
  269. }
  270. }
  271. readGZIPHeader = true;
  272. //System.err.println("Read GZIP header");
  273. }
  274. private void ReadFooter()
  275. {
  276. byte[] footer = new byte[8];
  277. int avail = inf.RemainingInput;
  278. if (avail > 8) {
  279. avail = 8;
  280. }
  281. System.Array.Copy(buf, len - inf.RemainingInput, footer, 0, avail);
  282. int needed = 8 - avail;
  283. while (needed > 0) {
  284. int count = baseInputStream.Read(footer, 8-needed, needed);
  285. if (count <= 0) {
  286. throw new Exception("Early EOF baseInputStream GZIP footer");
  287. }
  288. needed -= count; //Jewel Jan 16
  289. }
  290. int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) | ((footer[2] & 0xff) << 16) | (footer[3] << 24);
  291. if (crcval != (int) crc.Value) {
  292. throw new IOException("GZIP crc sum mismatch, theirs \"" + crcval + "\" and ours \"" + (int) crc.Value);
  293. }
  294. int total = (footer[4] & 0xff) | ((footer[5] & 0xff) << 8) | ((footer[6] & 0xff) << 16) | (footer[7] << 24);
  295. if (total != inf.TotalOut) {
  296. throw new IOException("Number of bytes mismatch");
  297. }
  298. /* XXX Should we support multiple members.
  299. * Difficult, since there may be some bytes still baseInputStream buf
  300. */
  301. eos = true;
  302. }
  303. }
  304. }