snappy_unittest.cc 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445
  1. // Copyright 2005 and onwards Google Inc.
  2. //
  3. // Redistribution and use in source and binary forms, with or without
  4. // modification, are permitted provided that the following conditions are
  5. // met:
  6. //
  7. // * Redistributions of source code must retain the above copyright
  8. // notice, this list of conditions and the following disclaimer.
  9. // * Redistributions in binary form must reproduce the above
  10. // copyright notice, this list of conditions and the following disclaimer
  11. // in the documentation and/or other materials provided with the
  12. // distribution.
  13. // * Neither the name of Google Inc. nor the names of its
  14. // contributors may be used to endorse or promote products derived from
  15. // this software without specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28. #include <math.h>
  29. #include <stdlib.h>
  30. #include <algorithm>
  31. #include <string>
  32. #include <utility>
  33. #include <vector>
  34. #include "snappy.h"
  35. #include "snappy-internal.h"
  36. #include "snappy-test.h"
  37. #include "snappy-sinksource.h"
  38. DEFINE_int32(start_len, -1,
  39. "Starting prefix size for testing (-1: just full file contents)");
  40. DEFINE_int32(end_len, -1,
  41. "Starting prefix size for testing (-1: just full file contents)");
  42. DEFINE_int32(bytes, 10485760,
  43. "How many bytes to compress/uncompress per file for timing");
  44. DEFINE_bool(zlib, false,
  45. "Run zlib compression (http://www.zlib.net)");
  46. DEFINE_bool(lzo, false,
  47. "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
  48. DEFINE_bool(fastlz, false,
  49. "Run FastLZ compression (http://www.fastlz.org/");
  50. DEFINE_bool(snappy, true, "Run snappy compression");
  51. DEFINE_bool(write_compressed, false,
  52. "Write compressed versions of each file to <file>.comp");
  53. DEFINE_bool(write_uncompressed, false,
  54. "Write uncompressed versions of each file to <file>.uncomp");
  55. DEFINE_bool(snappy_dump_decompression_table, false,
  56. "If true, we print the decompression table during tests.");
  57. namespace snappy {
  58. #ifdef HAVE_FUNC_MMAP
  59. // To test against code that reads beyond its input, this class copies a
  60. // string to a newly allocated group of pages, the last of which
  61. // is made unreadable via mprotect. Note that we need to allocate the
  62. // memory with mmap(), as POSIX allows mprotect() only on memory allocated
  63. // with mmap(), and some malloc/posix_memalign implementations expect to
  64. // be able to read previously allocated memory while doing heap allocations.
  65. class DataEndingAtUnreadablePage {
  66. public:
  67. explicit DataEndingAtUnreadablePage(const string& s) {
  68. const size_t page_size = getpagesize();
  69. const size_t size = s.size();
  70. // Round up space for string to a multiple of page_size.
  71. size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
  72. alloc_size_ = space_for_string + page_size;
  73. mem_ = mmap(NULL, alloc_size_,
  74. PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
  75. CHECK_NE(MAP_FAILED, mem_);
  76. protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
  77. char* dst = protected_page_ - size;
  78. memcpy(dst, s.data(), size);
  79. data_ = dst;
  80. size_ = size;
  81. // Make guard page unreadable.
  82. CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
  83. }
  84. ~DataEndingAtUnreadablePage() {
  85. // Undo the mprotect.
  86. CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE));
  87. CHECK_EQ(0, munmap(mem_, alloc_size_));
  88. }
  89. const char* data() const { return data_; }
  90. size_t size() const { return size_; }
  91. private:
  92. size_t alloc_size_;
  93. void* mem_;
  94. char* protected_page_;
  95. const char* data_;
  96. size_t size_;
  97. };
  98. #else // HAVE_FUNC_MMAP
  99. // Fallback for systems without mmap.
  100. typedef string DataEndingAtUnreadablePage;
  101. #endif
  102. enum CompressorType {
  103. ZLIB, LZO, FASTLZ, SNAPPY
  104. };
  105. const char* names[] = {
  106. "ZLIB", "LZO", "FASTLZ", "SNAPPY"
  107. };
  108. static size_t MinimumRequiredOutputSpace(size_t input_size,
  109. CompressorType comp) {
  110. switch (comp) {
  111. #ifdef ZLIB_VERSION
  112. case ZLIB:
  113. return ZLib::MinCompressbufSize(input_size);
  114. #endif // ZLIB_VERSION
  115. #ifdef LZO_VERSION
  116. case LZO:
  117. return input_size + input_size/64 + 16 + 3;
  118. #endif // LZO_VERSION
  119. #ifdef FASTLZ_VERSION
  120. case FASTLZ:
  121. return max(static_cast<int>(ceil(input_size * 1.05)), 66);
  122. #endif // FASTLZ_VERSION
  123. case SNAPPY:
  124. return snappy::MaxCompressedLength(input_size);
  125. default:
  126. LOG(FATAL) << "Unknown compression type number " << comp;
  127. return 0;
  128. }
  129. }
  130. // Returns true if we successfully compressed, false otherwise.
  131. //
  132. // If compressed_is_preallocated is set, do not resize the compressed buffer.
  133. // This is typically what you want for a benchmark, in order to not spend
  134. // time in the memory allocator. If you do set this flag, however,
  135. // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
  136. // number of bytes, and may contain junk bytes at the end after return.
  137. static bool Compress(const char* input, size_t input_size, CompressorType comp,
  138. string* compressed, bool compressed_is_preallocated) {
  139. if (!compressed_is_preallocated) {
  140. compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
  141. }
  142. switch (comp) {
  143. #ifdef ZLIB_VERSION
  144. case ZLIB: {
  145. ZLib zlib;
  146. uLongf destlen = compressed->size();
  147. int ret = zlib.Compress(
  148. reinterpret_cast<Bytef*>(string_as_array(compressed)),
  149. &destlen,
  150. reinterpret_cast<const Bytef*>(input),
  151. input_size);
  152. CHECK_EQ(Z_OK, ret);
  153. if (!compressed_is_preallocated) {
  154. compressed->resize(destlen);
  155. }
  156. return true;
  157. }
  158. #endif // ZLIB_VERSION
  159. #ifdef LZO_VERSION
  160. case LZO: {
  161. unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
  162. lzo_uint destlen;
  163. int ret = lzo1x_1_15_compress(
  164. reinterpret_cast<const uint8*>(input),
  165. input_size,
  166. reinterpret_cast<uint8*>(string_as_array(compressed)),
  167. &destlen,
  168. mem);
  169. CHECK_EQ(LZO_E_OK, ret);
  170. delete[] mem;
  171. if (!compressed_is_preallocated) {
  172. compressed->resize(destlen);
  173. }
  174. break;
  175. }
  176. #endif // LZO_VERSION
  177. #ifdef FASTLZ_VERSION
  178. case FASTLZ: {
  179. // Use level 1 compression since we mostly care about speed.
  180. int destlen = fastlz_compress_level(
  181. 1,
  182. input,
  183. input_size,
  184. string_as_array(compressed));
  185. if (!compressed_is_preallocated) {
  186. compressed->resize(destlen);
  187. }
  188. CHECK_NE(destlen, 0);
  189. break;
  190. }
  191. #endif // FASTLZ_VERSION
  192. case SNAPPY: {
  193. size_t destlen;
  194. snappy::RawCompress(input, input_size,
  195. string_as_array(compressed),
  196. &destlen);
  197. CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
  198. if (!compressed_is_preallocated) {
  199. compressed->resize(destlen);
  200. }
  201. break;
  202. }
  203. default: {
  204. return false; // the asked-for library wasn't compiled in
  205. }
  206. }
  207. return true;
  208. }
  209. static bool Uncompress(const string& compressed, CompressorType comp,
  210. int size, string* output) {
  211. switch (comp) {
  212. #ifdef ZLIB_VERSION
  213. case ZLIB: {
  214. output->resize(size);
  215. ZLib zlib;
  216. uLongf destlen = output->size();
  217. int ret = zlib.Uncompress(
  218. reinterpret_cast<Bytef*>(string_as_array(output)),
  219. &destlen,
  220. reinterpret_cast<const Bytef*>(compressed.data()),
  221. compressed.size());
  222. CHECK_EQ(Z_OK, ret);
  223. CHECK_EQ(static_cast<uLongf>(size), destlen);
  224. break;
  225. }
  226. #endif // ZLIB_VERSION
  227. #ifdef LZO_VERSION
  228. case LZO: {
  229. output->resize(size);
  230. lzo_uint destlen;
  231. int ret = lzo1x_decompress(
  232. reinterpret_cast<const uint8*>(compressed.data()),
  233. compressed.size(),
  234. reinterpret_cast<uint8*>(string_as_array(output)),
  235. &destlen,
  236. NULL);
  237. CHECK_EQ(LZO_E_OK, ret);
  238. CHECK_EQ(static_cast<lzo_uint>(size), destlen);
  239. break;
  240. }
  241. #endif // LZO_VERSION
  242. #ifdef FASTLZ_VERSION
  243. case FASTLZ: {
  244. output->resize(size);
  245. int destlen = fastlz_decompress(compressed.data(),
  246. compressed.length(),
  247. string_as_array(output),
  248. size);
  249. CHECK_EQ(destlen, size);
  250. break;
  251. }
  252. #endif // FASTLZ_VERSION
  253. case SNAPPY: {
  254. snappy::RawUncompress(compressed.data(), compressed.size(),
  255. string_as_array(output));
  256. break;
  257. }
  258. default: {
  259. return false; // the asked-for library wasn't compiled in
  260. }
  261. }
  262. return true;
  263. }
  264. static void Measure(const char* data,
  265. size_t length,
  266. CompressorType comp,
  267. int repeats,
  268. int block_size) {
  269. // Run tests a few time and pick median running times
  270. static const int kRuns = 5;
  271. double ctime[kRuns];
  272. double utime[kRuns];
  273. int compressed_size = 0;
  274. {
  275. // Chop the input into blocks
  276. int num_blocks = (length + block_size - 1) / block_size;
  277. std::vector<const char*> input(num_blocks);
  278. std::vector<size_t> input_length(num_blocks);
  279. std::vector<string> compressed(num_blocks);
  280. std::vector<string> output(num_blocks);
  281. for (int b = 0; b < num_blocks; b++) {
  282. int input_start = b * block_size;
  283. int input_limit = std::min<int>((b+1)*block_size, length);
  284. input[b] = data+input_start;
  285. input_length[b] = input_limit-input_start;
  286. // Pre-grow the output buffer so we don't measure string append time.
  287. compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
  288. }
  289. // First, try one trial compression to make sure the code is compiled in
  290. if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
  291. LOG(WARNING) << "Skipping " << names[comp] << ": "
  292. << "library not compiled in";
  293. return;
  294. }
  295. for (int run = 0; run < kRuns; run++) {
  296. CycleTimer ctimer, utimer;
  297. for (int b = 0; b < num_blocks; b++) {
  298. // Pre-grow the output buffer so we don't measure string append time.
  299. compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
  300. }
  301. ctimer.Start();
  302. for (int b = 0; b < num_blocks; b++)
  303. for (int i = 0; i < repeats; i++)
  304. Compress(input[b], input_length[b], comp, &compressed[b], true);
  305. ctimer.Stop();
  306. // Compress once more, with resizing, so we don't leave junk
  307. // at the end that will confuse the decompressor.
  308. for (int b = 0; b < num_blocks; b++) {
  309. Compress(input[b], input_length[b], comp, &compressed[b], false);
  310. }
  311. for (int b = 0; b < num_blocks; b++) {
  312. output[b].resize(input_length[b]);
  313. }
  314. utimer.Start();
  315. for (int i = 0; i < repeats; i++)
  316. for (int b = 0; b < num_blocks; b++)
  317. Uncompress(compressed[b], comp, input_length[b], &output[b]);
  318. utimer.Stop();
  319. ctime[run] = ctimer.Get();
  320. utime[run] = utimer.Get();
  321. }
  322. compressed_size = 0;
  323. for (size_t i = 0; i < compressed.size(); i++) {
  324. compressed_size += compressed[i].size();
  325. }
  326. }
  327. std::sort(ctime, ctime + kRuns);
  328. std::sort(utime, utime + kRuns);
  329. const int med = kRuns/2;
  330. float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
  331. float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
  332. string x = names[comp];
  333. x += ":";
  334. string urate = (uncomp_rate >= 0)
  335. ? StringPrintf("%.1f", uncomp_rate)
  336. : string("?");
  337. printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
  338. "comp %5.1f MB/s uncomp %5s MB/s\n",
  339. x.c_str(),
  340. block_size/(1<<20),
  341. static_cast<int>(length), static_cast<uint32>(compressed_size),
  342. (compressed_size * 100.0) / std::max<int>(1, length),
  343. comp_rate,
  344. urate.c_str());
  345. }
  346. static int VerifyString(const string& input) {
  347. string compressed;
  348. DataEndingAtUnreadablePage i(input);
  349. const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
  350. CHECK_EQ(written, compressed.size());
  351. CHECK_LE(compressed.size(),
  352. snappy::MaxCompressedLength(input.size()));
  353. CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
  354. string uncompressed;
  355. DataEndingAtUnreadablePage c(compressed);
  356. CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
  357. CHECK_EQ(uncompressed, input);
  358. return uncompressed.size();
  359. }
  360. static void VerifyStringSink(const string& input) {
  361. string compressed;
  362. DataEndingAtUnreadablePage i(input);
  363. const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
  364. CHECK_EQ(written, compressed.size());
  365. CHECK_LE(compressed.size(),
  366. snappy::MaxCompressedLength(input.size()));
  367. CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
  368. string uncompressed;
  369. uncompressed.resize(input.size());
  370. snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
  371. DataEndingAtUnreadablePage c(compressed);
  372. snappy::ByteArraySource source(c.data(), c.size());
  373. CHECK(snappy::Uncompress(&source, &sink));
  374. CHECK_EQ(uncompressed, input);
  375. }
  376. static void VerifyIOVec(const string& input) {
  377. string compressed;
  378. DataEndingAtUnreadablePage i(input);
  379. const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
  380. CHECK_EQ(written, compressed.size());
  381. CHECK_LE(compressed.size(),
  382. snappy::MaxCompressedLength(input.size()));
  383. CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
  384. // Try uncompressing into an iovec containing a random number of entries
  385. // ranging from 1 to 10.
  386. char* buf = new char[input.size()];
  387. ACMRandom rnd(input.size());
  388. size_t num = rnd.Next() % 10 + 1;
  389. if (input.size() < num) {
  390. num = input.size();
  391. }
  392. struct iovec* iov = new iovec[num];
  393. int used_so_far = 0;
  394. for (size_t i = 0; i < num; ++i) {
  395. iov[i].iov_base = buf + used_so_far;
  396. if (i == num - 1) {
  397. iov[i].iov_len = input.size() - used_so_far;
  398. } else {
  399. // Randomly choose to insert a 0 byte entry.
  400. if (rnd.OneIn(5)) {
  401. iov[i].iov_len = 0;
  402. } else {
  403. iov[i].iov_len = rnd.Uniform(input.size());
  404. }
  405. }
  406. used_so_far += iov[i].iov_len;
  407. }
  408. CHECK(snappy::RawUncompressToIOVec(
  409. compressed.data(), compressed.size(), iov, num));
  410. CHECK(!memcmp(buf, input.data(), input.size()));
  411. delete[] iov;
  412. delete[] buf;
  413. }
  414. // Test that data compressed by a compressor that does not
  415. // obey block sizes is uncompressed properly.
  416. static void VerifyNonBlockedCompression(const string& input) {
  417. if (input.length() > snappy::kBlockSize) {
  418. // We cannot test larger blocks than the maximum block size, obviously.
  419. return;
  420. }
  421. string prefix;
  422. Varint::Append32(&prefix, input.size());
  423. // Setup compression table
  424. snappy::internal::WorkingMemory wmem;
  425. int table_size;
  426. uint16* table = wmem.GetHashTable(input.size(), &table_size);
  427. // Compress entire input in one shot
  428. string compressed;
  429. compressed += prefix;
  430. compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
  431. char* dest = string_as_array(&compressed) + prefix.size();
  432. char* end = snappy::internal::CompressFragment(input.data(), input.size(),
  433. dest, table, table_size);
  434. compressed.resize(end - compressed.data());
  435. // Uncompress into string
  436. string uncomp_str;
  437. CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
  438. CHECK_EQ(uncomp_str, input);
  439. // Uncompress using source/sink
  440. string uncomp_str2;
  441. uncomp_str2.resize(input.size());
  442. snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
  443. snappy::ByteArraySource source(compressed.data(), compressed.size());
  444. CHECK(snappy::Uncompress(&source, &sink));
  445. CHECK_EQ(uncomp_str2, input);
  446. // Uncompress into iovec
  447. {
  448. static const int kNumBlocks = 10;
  449. struct iovec vec[kNumBlocks];
  450. const int block_size = 1 + input.size() / kNumBlocks;
  451. string iovec_data(block_size * kNumBlocks, 'x');
  452. for (int i = 0; i < kNumBlocks; i++) {
  453. vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
  454. vec[i].iov_len = block_size;
  455. }
  456. CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
  457. vec, kNumBlocks));
  458. CHECK_EQ(string(iovec_data.data(), input.size()), input);
  459. }
  460. }
  461. // Expand the input so that it is at least K times as big as block size
  462. static string Expand(const string& input) {
  463. static const int K = 3;
  464. string data = input;
  465. while (data.size() < K * snappy::kBlockSize) {
  466. data += input;
  467. }
  468. return data;
  469. }
  470. static int Verify(const string& input) {
  471. VLOG(1) << "Verifying input of size " << input.size();
  472. // Compress using string based routines
  473. const int result = VerifyString(input);
  474. // Verify using sink based routines
  475. VerifyStringSink(input);
  476. VerifyNonBlockedCompression(input);
  477. VerifyIOVec(input);
  478. if (!input.empty()) {
  479. const string expanded = Expand(input);
  480. VerifyNonBlockedCompression(expanded);
  481. VerifyIOVec(input);
  482. }
  483. return result;
  484. }
  485. static bool IsValidCompressedBuffer(const string& c) {
  486. return snappy::IsValidCompressedBuffer(c.data(), c.size());
  487. }
  488. static bool Uncompress(const string& c, string* u) {
  489. return snappy::Uncompress(c.data(), c.size(), u);
  490. }
  491. // This test checks to ensure that snappy doesn't coredump if it gets
  492. // corrupted data.
  493. TEST(CorruptedTest, VerifyCorrupted) {
  494. string source = "making sure we don't crash with corrupted input";
  495. VLOG(1) << source;
  496. string dest;
  497. string uncmp;
  498. snappy::Compress(source.data(), source.size(), &dest);
  499. // Mess around with the data. It's hard to simulate all possible
  500. // corruptions; this is just one example ...
  501. CHECK_GT(dest.size(), 3);
  502. dest[1]--;
  503. dest[3]++;
  504. // this really ought to fail.
  505. CHECK(!IsValidCompressedBuffer(dest));
  506. CHECK(!Uncompress(dest, &uncmp));
  507. // This is testing for a security bug - a buffer that decompresses to 100k
  508. // but we lie in the snappy header and only reserve 0 bytes of memory :)
  509. source.resize(100000);
  510. for (size_t i = 0; i < source.length(); ++i) {
  511. source[i] = 'A';
  512. }
  513. snappy::Compress(source.data(), source.size(), &dest);
  514. dest[0] = dest[1] = dest[2] = dest[3] = 0;
  515. CHECK(!IsValidCompressedBuffer(dest));
  516. CHECK(!Uncompress(dest, &uncmp));
  517. if (sizeof(void *) == 4) {
  518. // Another security check; check a crazy big length can't DoS us with an
  519. // over-allocation.
  520. // Currently this is done only for 32-bit builds. On 64-bit builds,
  521. // where 3 GB might be an acceptable allocation size, Uncompress()
  522. // attempts to decompress, and sometimes causes the test to run out of
  523. // memory.
  524. dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
  525. // This decodes to a really large size, i.e., about 3 GB.
  526. dest[4] = 'k';
  527. CHECK(!IsValidCompressedBuffer(dest));
  528. CHECK(!Uncompress(dest, &uncmp));
  529. } else {
  530. LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
  531. }
  532. // This decodes to about 2 MB; much smaller, but should still fail.
  533. dest[0] = dest[1] = dest[2] = '\xff';
  534. dest[3] = 0x00;
  535. CHECK(!IsValidCompressedBuffer(dest));
  536. CHECK(!Uncompress(dest, &uncmp));
  537. // try reading stuff in from a bad file.
  538. for (int i = 1; i <= 3; ++i) {
  539. string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(),
  540. 0);
  541. string uncmp;
  542. // check that we don't return a crazy length
  543. size_t ulen;
  544. CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
  545. || (ulen < (1<<20)));
  546. uint32 ulen2;
  547. snappy::ByteArraySource source(data.data(), data.size());
  548. CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
  549. (ulen2 < (1<<20)));
  550. CHECK(!IsValidCompressedBuffer(data));
  551. CHECK(!Uncompress(data, &uncmp));
  552. }
  553. }
  554. // Helper routines to construct arbitrary compressed strings.
  555. // These mirror the compression code in snappy.cc, but are copied
  556. // here so that we can bypass some limitations in the how snappy.cc
  557. // invokes these routines.
  558. static void AppendLiteral(string* dst, const string& literal) {
  559. if (literal.empty()) return;
  560. int n = literal.size() - 1;
  561. if (n < 60) {
  562. // Fit length in tag byte
  563. dst->push_back(0 | (n << 2));
  564. } else {
  565. // Encode in upcoming bytes
  566. char number[4];
  567. int count = 0;
  568. while (n > 0) {
  569. number[count++] = n & 0xff;
  570. n >>= 8;
  571. }
  572. dst->push_back(0 | ((59+count) << 2));
  573. *dst += string(number, count);
  574. }
  575. *dst += literal;
  576. }
  577. static void AppendCopy(string* dst, int offset, int length) {
  578. while (length > 0) {
  579. // Figure out how much to copy in one shot
  580. int to_copy;
  581. if (length >= 68) {
  582. to_copy = 64;
  583. } else if (length > 64) {
  584. to_copy = 60;
  585. } else {
  586. to_copy = length;
  587. }
  588. length -= to_copy;
  589. if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
  590. assert(to_copy-4 < 8); // Must fit in 3 bits
  591. dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
  592. dst->push_back(offset & 0xff);
  593. } else if (offset < 65536) {
  594. dst->push_back(2 | ((to_copy-1) << 2));
  595. dst->push_back(offset & 0xff);
  596. dst->push_back(offset >> 8);
  597. } else {
  598. dst->push_back(3 | ((to_copy-1) << 2));
  599. dst->push_back(offset & 0xff);
  600. dst->push_back((offset >> 8) & 0xff);
  601. dst->push_back((offset >> 16) & 0xff);
  602. dst->push_back((offset >> 24) & 0xff);
  603. }
  604. }
  605. }
  606. TEST(Snappy, SimpleTests) {
  607. Verify("");
  608. Verify("a");
  609. Verify("ab");
  610. Verify("abc");
  611. Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
  612. Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
  613. Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
  614. Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
  615. Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
  616. }
  617. // Verify max blowup (lots of four-byte copies)
  618. TEST(Snappy, MaxBlowup) {
  619. string input;
  620. for (int i = 0; i < 20000; i++) {
  621. ACMRandom rnd(i);
  622. uint32 bytes = static_cast<uint32>(rnd.Next());
  623. input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
  624. }
  625. for (int i = 19999; i >= 0; i--) {
  626. ACMRandom rnd(i);
  627. uint32 bytes = static_cast<uint32>(rnd.Next());
  628. input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
  629. }
  630. Verify(input);
  631. }
  632. TEST(Snappy, RandomData) {
  633. ACMRandom rnd(FLAGS_test_random_seed);
  634. const int num_ops = 20000;
  635. for (int i = 0; i < num_ops; i++) {
  636. if ((i % 1000) == 0) {
  637. VLOG(0) << "Random op " << i << " of " << num_ops;
  638. }
  639. string x;
  640. size_t len = rnd.Uniform(4096);
  641. if (i < 100) {
  642. len = 65536 + rnd.Uniform(65536);
  643. }
  644. while (x.size() < len) {
  645. int run_len = 1;
  646. if (rnd.OneIn(10)) {
  647. run_len = rnd.Skewed(8);
  648. }
  649. char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
  650. while (run_len-- > 0 && x.size() < len) {
  651. x += c;
  652. }
  653. }
  654. Verify(x);
  655. }
  656. }
  657. TEST(Snappy, FourByteOffset) {
  658. // The new compressor cannot generate four-byte offsets since
  659. // it chops up the input into 32KB pieces. So we hand-emit the
  660. // copy manually.
  661. // The two fragments that make up the input string.
  662. string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
  663. string fragment2 = "some other string";
  664. // How many times each fragment is emitted.
  665. const int n1 = 2;
  666. const int n2 = 100000 / fragment2.size();
  667. const int length = n1 * fragment1.size() + n2 * fragment2.size();
  668. string compressed;
  669. Varint::Append32(&compressed, length);
  670. AppendLiteral(&compressed, fragment1);
  671. string src = fragment1;
  672. for (int i = 0; i < n2; i++) {
  673. AppendLiteral(&compressed, fragment2);
  674. src += fragment2;
  675. }
  676. AppendCopy(&compressed, src.size(), fragment1.size());
  677. src += fragment1;
  678. CHECK_EQ(length, src.size());
  679. string uncompressed;
  680. CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
  681. CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
  682. &uncompressed));
  683. CHECK_EQ(uncompressed, src);
  684. }
  685. TEST(Snappy, IOVecEdgeCases) {
  686. // Test some tricky edge cases in the iovec output that are not necessarily
  687. // exercised by random tests.
  688. // Our output blocks look like this initially (the last iovec is bigger
  689. // than depicted):
  690. // [ ] [ ] [ ] [ ] [ ]
  691. static const int kLengths[] = { 2, 1, 4, 8, 128 };
  692. struct iovec iov[ARRAYSIZE(kLengths)];
  693. for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
  694. iov[i].iov_base = new char[kLengths[i]];
  695. iov[i].iov_len = kLengths[i];
  696. }
  697. string compressed;
  698. Varint::Append32(&compressed, 22);
  699. // A literal whose output crosses three blocks.
  700. // [ab] [c] [123 ] [ ] [ ]
  701. AppendLiteral(&compressed, "abc123");
  702. // A copy whose output crosses two blocks (source and destination
  703. // segments marked).
  704. // [ab] [c] [1231] [23 ] [ ]
  705. // ^--^ --
  706. AppendCopy(&compressed, 3, 3);
  707. // A copy where the input is, at first, in the block before the output:
  708. //
  709. // [ab] [c] [1231] [231231 ] [ ]
  710. // ^--- ^---
  711. // Then during the copy, the pointers move such that the input and
  712. // output pointers are in the same block:
  713. //
  714. // [ab] [c] [1231] [23123123] [ ]
  715. // ^- ^-
  716. // And then they move again, so that the output pointer is no longer
  717. // in the same block as the input pointer:
  718. // [ab] [c] [1231] [23123123] [123 ]
  719. // ^-- ^--
  720. AppendCopy(&compressed, 6, 9);
  721. // Finally, a copy where the input is from several blocks back,
  722. // and it also crosses three blocks:
  723. //
  724. // [ab] [c] [1231] [23123123] [123b ]
  725. // ^ ^
  726. // [ab] [c] [1231] [23123123] [123bc ]
  727. // ^ ^
  728. // [ab] [c] [1231] [23123123] [123bc12 ]
  729. // ^- ^-
  730. AppendCopy(&compressed, 17, 4);
  731. CHECK(snappy::RawUncompressToIOVec(
  732. compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
  733. CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
  734. CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
  735. CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
  736. CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
  737. CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
  738. for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
  739. delete[] reinterpret_cast<char *>(iov[i].iov_base);
  740. }
  741. }
  742. TEST(Snappy, IOVecLiteralOverflow) {
  743. static const int kLengths[] = { 3, 4 };
  744. struct iovec iov[ARRAYSIZE(kLengths)];
  745. for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
  746. iov[i].iov_base = new char[kLengths[i]];
  747. iov[i].iov_len = kLengths[i];
  748. }
  749. string compressed;
  750. Varint::Append32(&compressed, 8);
  751. AppendLiteral(&compressed, "12345678");
  752. CHECK(!snappy::RawUncompressToIOVec(
  753. compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
  754. for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
  755. delete[] reinterpret_cast<char *>(iov[i].iov_base);
  756. }
  757. }
  758. TEST(Snappy, IOVecCopyOverflow) {
  759. static const int kLengths[] = { 3, 4 };
  760. struct iovec iov[ARRAYSIZE(kLengths)];
  761. for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
  762. iov[i].iov_base = new char[kLengths[i]];
  763. iov[i].iov_len = kLengths[i];
  764. }
  765. string compressed;
  766. Varint::Append32(&compressed, 8);
  767. AppendLiteral(&compressed, "123");
  768. AppendCopy(&compressed, 3, 5);
  769. CHECK(!snappy::RawUncompressToIOVec(
  770. compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
  771. for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
  772. delete[] reinterpret_cast<char *>(iov[i].iov_base);
  773. }
  774. }
  775. static bool CheckUncompressedLength(const string& compressed,
  776. size_t* ulength) {
  777. const bool result1 = snappy::GetUncompressedLength(compressed.data(),
  778. compressed.size(),
  779. ulength);
  780. snappy::ByteArraySource source(compressed.data(), compressed.size());
  781. uint32 length;
  782. const bool result2 = snappy::GetUncompressedLength(&source, &length);
  783. CHECK_EQ(result1, result2);
  784. return result1;
  785. }
  786. TEST(SnappyCorruption, TruncatedVarint) {
  787. string compressed, uncompressed;
  788. size_t ulength;
  789. compressed.push_back('\xf0');
  790. CHECK(!CheckUncompressedLength(compressed, &ulength));
  791. CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
  792. CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
  793. &uncompressed));
  794. }
  795. TEST(SnappyCorruption, UnterminatedVarint) {
  796. string compressed, uncompressed;
  797. size_t ulength;
  798. compressed.push_back('\x80');
  799. compressed.push_back('\x80');
  800. compressed.push_back('\x80');
  801. compressed.push_back('\x80');
  802. compressed.push_back('\x80');
  803. compressed.push_back(10);
  804. CHECK(!CheckUncompressedLength(compressed, &ulength));
  805. CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
  806. CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
  807. &uncompressed));
  808. }
  809. TEST(SnappyCorruption, OverflowingVarint) {
  810. string compressed, uncompressed;
  811. size_t ulength;
  812. compressed.push_back('\xfb');
  813. compressed.push_back('\xff');
  814. compressed.push_back('\xff');
  815. compressed.push_back('\xff');
  816. compressed.push_back('\x7f');
  817. CHECK(!CheckUncompressedLength(compressed, &ulength));
  818. CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
  819. CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
  820. &uncompressed));
  821. }
  822. TEST(Snappy, ReadPastEndOfBuffer) {
  823. // Check that we do not read past end of input
  824. // Make a compressed string that ends with a single-byte literal
  825. string compressed;
  826. Varint::Append32(&compressed, 1);
  827. AppendLiteral(&compressed, "x");
  828. string uncompressed;
  829. DataEndingAtUnreadablePage c(compressed);
  830. CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
  831. CHECK_EQ(uncompressed, string("x"));
  832. }
  833. // Check for an infinite loop caused by a copy with offset==0
  834. TEST(Snappy, ZeroOffsetCopy) {
  835. const char* compressed = "\x40\x12\x00\x00";
  836. // \x40 Length (must be > kMaxIncrementCopyOverflow)
  837. // \x12\x00\x00 Copy with offset==0, length==5
  838. char uncompressed[100];
  839. EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
  840. }
  841. TEST(Snappy, ZeroOffsetCopyValidation) {
  842. const char* compressed = "\x05\x12\x00\x00";
  843. // \x05 Length
  844. // \x12\x00\x00 Copy with offset==0, length==5
  845. EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
  846. }
  847. namespace {
  848. int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
  849. std::pair<size_t, bool> p =
  850. snappy::internal::FindMatchLength(s1, s2, s2 + length);
  851. CHECK_EQ(p.first < 8, p.second);
  852. return p.first;
  853. }
  854. } // namespace
  855. TEST(Snappy, FindMatchLength) {
  856. // Exercise all different code paths through the function.
  857. // 64-bit version:
  858. // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
  859. EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
  860. EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
  861. // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
  862. EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
  863. // Same, but edge cases.
  864. EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
  865. EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
  866. // Find non-match at once in first loop.
  867. EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
  868. EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
  869. EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
  870. EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
  871. // Find non-match in first loop after one block.
  872. EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
  873. "abcdefgh?1234567xxxxxxxx", 24));
  874. EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
  875. "abcdefgh0?234567xxxxxxxx", 24));
  876. EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
  877. "abcdefgh01237654xxxxxxxx", 24));
  878. EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
  879. "abcdefgh0123456?xxxxxxxx", 24));
  880. // 32-bit version:
  881. // Short matches.
  882. EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
  883. EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
  884. EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
  885. EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
  886. EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
  887. EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
  888. EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
  889. EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
  890. EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
  891. EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
  892. // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
  893. EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
  894. EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
  895. EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
  896. // Same, but edge cases.
  897. EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
  898. EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
  899. // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
  900. EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
  901. // Find non-match at once in first loop.
  902. EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
  903. "xxxxxx?123xxxxxxxx", 18));
  904. EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
  905. "xxxxxx0?23xxxxxxxx", 18));
  906. EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
  907. "xxxxxx0132xxxxxxxx", 18));
  908. EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
  909. "xxxxxx012?xxxxxxxx", 18));
  910. // Same, but edge cases.
  911. EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
  912. EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
  913. EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
  914. EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
  915. // Find non-match in first loop after one block.
  916. EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
  917. "xxxxxxabcd?123xx", 16));
  918. EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
  919. "xxxxxxabcd0?23xx", 16));
  920. EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
  921. "xxxxxxabcd0132xx", 16));
  922. EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
  923. "xxxxxxabcd012?xx", 16));
  924. // Same, but edge cases.
  925. EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
  926. EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
  927. EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
  928. EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
  929. }
  930. TEST(Snappy, FindMatchLengthRandom) {
  931. const int kNumTrials = 10000;
  932. const int kTypicalLength = 10;
  933. ACMRandom rnd(FLAGS_test_random_seed);
  934. for (int i = 0; i < kNumTrials; i++) {
  935. string s, t;
  936. char a = rnd.Rand8();
  937. char b = rnd.Rand8();
  938. while (!rnd.OneIn(kTypicalLength)) {
  939. s.push_back(rnd.OneIn(2) ? a : b);
  940. t.push_back(rnd.OneIn(2) ? a : b);
  941. }
  942. DataEndingAtUnreadablePage u(s);
  943. DataEndingAtUnreadablePage v(t);
  944. int matched = TestFindMatchLength(u.data(), v.data(), t.size());
  945. if (matched == t.size()) {
  946. EXPECT_EQ(s, t);
  947. } else {
  948. EXPECT_NE(s[matched], t[matched]);
  949. for (int j = 0; j < matched; j++) {
  950. EXPECT_EQ(s[j], t[j]);
  951. }
  952. }
  953. }
  954. }
  955. static uint16 MakeEntry(unsigned int extra,
  956. unsigned int len,
  957. unsigned int copy_offset) {
  958. // Check that all of the fields fit within the allocated space
  959. assert(extra == (extra & 0x7)); // At most 3 bits
  960. assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
  961. assert(len == (len & 0x7f)); // At most 7 bits
  962. return len | (copy_offset << 8) | (extra << 11);
  963. }
  964. // Check that the decompression table is correct, and optionally print out
  965. // the computed one.
  966. TEST(Snappy, VerifyCharTable) {
  967. using snappy::internal::LITERAL;
  968. using snappy::internal::COPY_1_BYTE_OFFSET;
  969. using snappy::internal::COPY_2_BYTE_OFFSET;
  970. using snappy::internal::COPY_4_BYTE_OFFSET;
  971. using snappy::internal::char_table;
  972. uint16 dst[256];
  973. // Place invalid entries in all places to detect missing initialization
  974. int assigned = 0;
  975. for (int i = 0; i < 256; i++) {
  976. dst[i] = 0xffff;
  977. }
  978. // Small LITERAL entries. We store (len-1) in the top 6 bits.
  979. for (unsigned int len = 1; len <= 60; len++) {
  980. dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
  981. assigned++;
  982. }
  983. // Large LITERAL entries. We use 60..63 in the high 6 bits to
  984. // encode the number of bytes of length info that follow the opcode.
  985. for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
  986. // We set the length field in the lookup table to 1 because extra
  987. // bytes encode len-1.
  988. dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
  989. assigned++;
  990. }
  991. // COPY_1_BYTE_OFFSET.
  992. //
  993. // The tag byte in the compressed data stores len-4 in 3 bits, and
  994. // offset/256 in 5 bits. offset%256 is stored in the next byte.
  995. //
  996. // This format is used for length in range [4..11] and offset in
  997. // range [0..2047]
  998. for (unsigned int len = 4; len < 12; len++) {
  999. for (unsigned int offset = 0; offset < 2048; offset += 256) {
  1000. dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
  1001. MakeEntry(1, len, offset>>8);
  1002. assigned++;
  1003. }
  1004. }
  1005. // COPY_2_BYTE_OFFSET.
  1006. // Tag contains len-1 in top 6 bits, and offset in next two bytes.
  1007. for (unsigned int len = 1; len <= 64; len++) {
  1008. dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
  1009. assigned++;
  1010. }
  1011. // COPY_4_BYTE_OFFSET.
  1012. // Tag contents len-1 in top 6 bits, and offset in next four bytes.
  1013. for (unsigned int len = 1; len <= 64; len++) {
  1014. dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
  1015. assigned++;
  1016. }
  1017. // Check that each entry was initialized exactly once.
  1018. EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
  1019. for (int i = 0; i < 256; i++) {
  1020. EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
  1021. }
  1022. if (FLAGS_snappy_dump_decompression_table) {
  1023. printf("static const uint16 char_table[256] = {\n ");
  1024. for (int i = 0; i < 256; i++) {
  1025. printf("0x%04x%s",
  1026. dst[i],
  1027. ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
  1028. }
  1029. printf("};\n");
  1030. }
  1031. // Check that computed table matched recorded table.
  1032. for (int i = 0; i < 256; i++) {
  1033. EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
  1034. }
  1035. }
  1036. static void CompressFile(const char* fname) {
  1037. string fullinput;
  1038. CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
  1039. string compressed;
  1040. Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
  1041. CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
  1042. file::Defaults()));
  1043. }
  1044. static void UncompressFile(const char* fname) {
  1045. string fullinput;
  1046. CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
  1047. size_t uncompLength;
  1048. CHECK(CheckUncompressedLength(fullinput, &uncompLength));
  1049. string uncompressed;
  1050. uncompressed.resize(uncompLength);
  1051. CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
  1052. CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
  1053. file::Defaults()));
  1054. }
  1055. static void MeasureFile(const char* fname) {
  1056. string fullinput;
  1057. CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
  1058. printf("%-40s :\n", fname);
  1059. int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
  1060. int end_len = fullinput.size();
  1061. if (FLAGS_end_len >= 0) {
  1062. end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
  1063. }
  1064. for (int len = start_len; len <= end_len; len++) {
  1065. const char* const input = fullinput.data();
  1066. int repeats = (FLAGS_bytes + len) / (len + 1);
  1067. if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
  1068. if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
  1069. if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
  1070. if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
  1071. // For block-size based measurements
  1072. if (0 && FLAGS_snappy) {
  1073. Measure(input, len, SNAPPY, repeats, 8<<10);
  1074. Measure(input, len, SNAPPY, repeats, 16<<10);
  1075. Measure(input, len, SNAPPY, repeats, 32<<10);
  1076. Measure(input, len, SNAPPY, repeats, 64<<10);
  1077. Measure(input, len, SNAPPY, repeats, 256<<10);
  1078. Measure(input, len, SNAPPY, repeats, 1024<<10);
  1079. }
  1080. }
  1081. }
  1082. static struct {
  1083. const char* label;
  1084. const char* filename;
  1085. size_t size_limit;
  1086. } files[] = {
  1087. { "html", "html", 0 },
  1088. { "urls", "urls.10K", 0 },
  1089. { "jpg", "fireworks.jpeg", 0 },
  1090. { "jpg_200", "fireworks.jpeg", 200 },
  1091. { "pdf", "paper-100k.pdf", 0 },
  1092. { "html4", "html_x_4", 0 },
  1093. { "txt1", "alice29.txt", 0 },
  1094. { "txt2", "asyoulik.txt", 0 },
  1095. { "txt3", "lcet10.txt", 0 },
  1096. { "txt4", "plrabn12.txt", 0 },
  1097. { "pb", "geo.protodata", 0 },
  1098. { "gaviota", "kppkn.gtb", 0 },
  1099. };
  1100. static void BM_UFlat(int iters, int arg) {
  1101. StopBenchmarkTiming();
  1102. // Pick file to process based on "arg"
  1103. CHECK_GE(arg, 0);
  1104. CHECK_LT(arg, ARRAYSIZE(files));
  1105. string contents = ReadTestDataFile(files[arg].filename,
  1106. files[arg].size_limit);
  1107. string zcontents;
  1108. snappy::Compress(contents.data(), contents.size(), &zcontents);
  1109. char* dst = new char[contents.size()];
  1110. SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
  1111. static_cast<int64>(contents.size()));
  1112. SetBenchmarkLabel(files[arg].label);
  1113. StartBenchmarkTiming();
  1114. while (iters-- > 0) {
  1115. CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
  1116. }
  1117. StopBenchmarkTiming();
  1118. delete[] dst;
  1119. }
  1120. BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
  1121. static void BM_UValidate(int iters, int arg) {
  1122. StopBenchmarkTiming();
  1123. // Pick file to process based on "arg"
  1124. CHECK_GE(arg, 0);
  1125. CHECK_LT(arg, ARRAYSIZE(files));
  1126. string contents = ReadTestDataFile(files[arg].filename,
  1127. files[arg].size_limit);
  1128. string zcontents;
  1129. snappy::Compress(contents.data(), contents.size(), &zcontents);
  1130. SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
  1131. static_cast<int64>(contents.size()));
  1132. SetBenchmarkLabel(files[arg].label);
  1133. StartBenchmarkTiming();
  1134. while (iters-- > 0) {
  1135. CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
  1136. }
  1137. StopBenchmarkTiming();
  1138. }
  1139. BENCHMARK(BM_UValidate)->DenseRange(0, 4);
  1140. static void BM_UIOVec(int iters, int arg) {
  1141. StopBenchmarkTiming();
  1142. // Pick file to process based on "arg"
  1143. CHECK_GE(arg, 0);
  1144. CHECK_LT(arg, ARRAYSIZE(files));
  1145. string contents = ReadTestDataFile(files[arg].filename,
  1146. files[arg].size_limit);
  1147. string zcontents;
  1148. snappy::Compress(contents.data(), contents.size(), &zcontents);
  1149. // Uncompress into an iovec containing ten entries.
  1150. const int kNumEntries = 10;
  1151. struct iovec iov[kNumEntries];
  1152. char *dst = new char[contents.size()];
  1153. int used_so_far = 0;
  1154. for (int i = 0; i < kNumEntries; ++i) {
  1155. iov[i].iov_base = dst + used_so_far;
  1156. if (used_so_far == contents.size()) {
  1157. iov[i].iov_len = 0;
  1158. continue;
  1159. }
  1160. if (i == kNumEntries - 1) {
  1161. iov[i].iov_len = contents.size() - used_so_far;
  1162. } else {
  1163. iov[i].iov_len = contents.size() / kNumEntries;
  1164. }
  1165. used_so_far += iov[i].iov_len;
  1166. }
  1167. SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
  1168. static_cast<int64>(contents.size()));
  1169. SetBenchmarkLabel(files[arg].label);
  1170. StartBenchmarkTiming();
  1171. while (iters-- > 0) {
  1172. CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
  1173. kNumEntries));
  1174. }
  1175. StopBenchmarkTiming();
  1176. delete[] dst;
  1177. }
  1178. BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
  1179. static void BM_UFlatSink(int iters, int arg) {
  1180. StopBenchmarkTiming();
  1181. // Pick file to process based on "arg"
  1182. CHECK_GE(arg, 0);
  1183. CHECK_LT(arg, ARRAYSIZE(files));
  1184. string contents = ReadTestDataFile(files[arg].filename,
  1185. files[arg].size_limit);
  1186. string zcontents;
  1187. snappy::Compress(contents.data(), contents.size(), &zcontents);
  1188. char* dst = new char[contents.size()];
  1189. SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
  1190. static_cast<int64>(contents.size()));
  1191. SetBenchmarkLabel(files[arg].label);
  1192. StartBenchmarkTiming();
  1193. while (iters-- > 0) {
  1194. snappy::ByteArraySource source(zcontents.data(), zcontents.size());
  1195. snappy::UncheckedByteArraySink sink(dst);
  1196. CHECK(snappy::Uncompress(&source, &sink));
  1197. }
  1198. StopBenchmarkTiming();
  1199. string s(dst, contents.size());
  1200. CHECK_EQ(contents, s);
  1201. delete[] dst;
  1202. }
  1203. BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
  1204. static void BM_ZFlat(int iters, int arg) {
  1205. StopBenchmarkTiming();
  1206. // Pick file to process based on "arg"
  1207. CHECK_GE(arg, 0);
  1208. CHECK_LT(arg, ARRAYSIZE(files));
  1209. string contents = ReadTestDataFile(files[arg].filename,
  1210. files[arg].size_limit);
  1211. char* dst = new char[snappy::MaxCompressedLength(contents.size())];
  1212. SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
  1213. static_cast<int64>(contents.size()));
  1214. StartBenchmarkTiming();
  1215. size_t zsize = 0;
  1216. while (iters-- > 0) {
  1217. snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
  1218. }
  1219. StopBenchmarkTiming();
  1220. const double compression_ratio =
  1221. static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
  1222. SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
  1223. files[arg].label, 100.0 * compression_ratio));
  1224. VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
  1225. files[arg].label, contents.size(), zsize);
  1226. delete[] dst;
  1227. }
  1228. BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
  1229. } // namespace snappy
  1230. int main(int argc, char** argv) {
  1231. InitGoogle(argv[0], &argc, &argv, true);
  1232. RunSpecifiedBenchmarks();
  1233. if (argc >= 2) {
  1234. for (int arg = 1; arg < argc; arg++) {
  1235. if (FLAGS_write_compressed) {
  1236. snappy::CompressFile(argv[arg]);
  1237. } else if (FLAGS_write_uncompressed) {
  1238. snappy::UncompressFile(argv[arg]);
  1239. } else {
  1240. snappy::MeasureFile(argv[arg]);
  1241. }
  1242. }
  1243. return 0;
  1244. }
  1245. return RUN_ALL_TESTS();
  1246. }