fossil-delta.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714
  1. /*
  2. ** Copyright (c) 2006 D. Richard Hipp
  3. **
  4. ** This program is free software; you can redistribute it and/or
  5. ** modify it under the terms of the Simplified BSD License (also
  6. ** known as the "2-Clause License" or "FreeBSD License".)
  7. ** This program is distributed in the hope that it will be useful,
  8. ** but without any warranty; without even the implied warranty of
  9. ** merchantability or fitness for a particular purpose.
  10. **
  11. ** Author contact information:
  12. ** [email protected]
  13. ** http://www.hwaci.com/drh/
  14. **
  15. *******************************************************************************
  16. **
  17. ** This module implements the delta compress algorithm.
  18. **
  19. ** Though developed specifically for fossil, the code in this file
  20. ** is generally applicable and is thus easily separated from the
  21. ** fossil source code base. Nothing in this file depends on anything
  22. ** else in fossil.
  23. */
  24. #include "config.h"
  25. #include <stdio.h>
  26. #include <assert.h>
  27. #include <stdlib.h>
  28. #include <string.h>
  29. #include "fossil-delta.h"
  30. /*
  31. ** Macros for turning debugging printfs on and off
  32. */
  33. #if 0
  34. # define DEBUG1(X) X
  35. #else
  36. # define DEBUG1(X)
  37. #endif
  38. #if 0
  39. #define DEBUG2(X) X
  40. /*
  41. ** For debugging:
  42. ** Print 16 characters of text from zBuf
  43. */
  44. static const char *print16(const char *z){
  45. int i;
  46. static char zBuf[20];
  47. for(i=0; i<16; i++){
  48. if( z[i]>=0x20 && z[i]<=0x7e ){
  49. zBuf[i] = z[i];
  50. }else{
  51. zBuf[i] = '.';
  52. }
  53. }
  54. zBuf[i] = 0;
  55. return zBuf;
  56. }
  57. #else
  58. # define DEBUG2(X)
  59. #endif
  60. //#if INTERFACE
  61. /*
  62. ** The "u32" type must be an unsigned 32-bit integer. Adjust this
  63. */
  64. typedef unsigned int u32;
  65. /*
  66. ** Must be a 16-bit value
  67. */
  68. typedef short int s16;
  69. typedef unsigned short int u16;
  70. //#endif /* INTERFACE */
  71. /*
  72. ** The width of a hash window in bytes. The algorithm only works if this
  73. ** is a power of 2.
  74. */
  75. #define NHASH 16
  76. /*
  77. ** The current state of the rolling hash.
  78. **
  79. ** z[] holds the values that have been hashed. z[] is a circular buffer.
  80. ** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of
  81. ** the window.
  82. **
  83. ** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted
  84. ** sum. Hash.b is z[i]*NHASH + z[i+1]*(NHASH-1) + ... + z[i+NHASH-1]*1.
  85. ** (Each index for z[] should be module NHASH, of course. The %NHASH operator
  86. ** is omitted in the prior expression for brevity.)
  87. */
  88. typedef struct hash hash;
  89. struct hash {
  90. u16 a, b; /* Hash values */
  91. u16 i; /* Start of the hash window */
  92. char z[NHASH]; /* The values that have been hashed */
  93. };
  94. /*
  95. ** Initialize the rolling hash using the first NHASH characters of z[]
  96. */
  97. static void hash_init(hash *pHash, const char *z){
  98. u16 a, b, i;
  99. a = b = z[0];
  100. for(i=1; i<NHASH; i++){
  101. a += z[i];
  102. b += a;
  103. }
  104. memcpy(pHash->z, z, NHASH);
  105. pHash->a = a & 0xffff;
  106. pHash->b = b & 0xffff;
  107. pHash->i = 0;
  108. }
  109. /*
  110. ** Advance the rolling hash by a single character "c"
  111. */
  112. static void hash_next(hash *pHash, int c){
  113. u16 old = pHash->z[pHash->i];
  114. pHash->z[pHash->i] = c;
  115. pHash->i = (pHash->i+1)&(NHASH-1);
  116. pHash->a = pHash->a - old + c;
  117. pHash->b = pHash->b - NHASH*old + pHash->a;
  118. }
  119. /*
  120. ** Return a 32-bit hash value
  121. */
  122. static u32 hash_32bit(hash *pHash){
  123. return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16);
  124. }
  125. /*
  126. ** Compute a hash on NHASH bytes.
  127. **
  128. ** This routine is intended to be equivalent to:
  129. ** hash h;
  130. ** hash_init(&h, zInput);
  131. ** return hash_32bit(&h);
  132. */
  133. static u32 hash_once(const char *z){
  134. u16 a, b, i;
  135. a = b = z[0];
  136. for(i=1; i<NHASH; i++){
  137. a += z[i];
  138. b += a;
  139. }
  140. return a | (((u32)b)<<16);
  141. }
  142. /*
  143. ** Write an base-64 integer into the given buffer.
  144. */
  145. static void putInt(unsigned int v, char **pz){
  146. static const char zDigits[] =
  147. "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~";
  148. /* 123456789 123456789 123456789 123456789 123456789 123456789 123 */
  149. int i, j;
  150. char zBuf[20];
  151. if( v==0 ){
  152. *(*pz)++ = '0';
  153. return;
  154. }
  155. for(i=0; v>0; i++, v>>=6){
  156. zBuf[i] = zDigits[v&0x3f];
  157. }
  158. for(j=i-1; j>=0; j--){
  159. *(*pz)++ = zBuf[j];
  160. }
  161. }
  162. /*
  163. ** Read bytes from *pz and convert them into a positive integer. When
  164. ** finished, leave *pz pointing to the first character past the end of
  165. ** the integer. The *pLen parameter holds the length of the string
  166. ** in *pz and is decremented once for each character in the integer.
  167. */
  168. static unsigned int getInt(const char **pz, int *pLen){
  169. static const signed char zValue[] = {
  170. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  171. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  172. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  173. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
  174. -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
  175. 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, 36,
  176. -1, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
  177. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, -1, -1, -1, 63, -1,
  178. };
  179. unsigned int v = 0;
  180. int c;
  181. unsigned char *z = (unsigned char*)*pz;
  182. unsigned char *zStart = z;
  183. while( (c = zValue[0x7f&*(z++)])>=0 ){
  184. v = (v<<6) + c;
  185. }
  186. z--;
  187. *pLen -= z - zStart;
  188. *pz = (char*)z;
  189. return v;
  190. }
  191. /*
  192. ** Return the number digits in the base-64 representation of a positive integer
  193. */
  194. static int digit_count(int v){
  195. unsigned int i, x;
  196. for(i=1, x=64; v>=x; i++, x <<= 6){}
  197. return i;
  198. }
  199. #ifdef __GNUC__
  200. # define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
  201. #else
  202. # define GCC_VERSION 0
  203. #endif
  204. /*
  205. ** Compute a 32-bit big-endian checksum on the N-byte buffer. If the
  206. ** buffer is not a multiple of 4 bytes length, compute the sum that would
  207. ** have occurred if the buffer was padded with zeros to the next multiple
  208. ** of four bytes.
  209. */
  210. static unsigned int checksum(const char *zIn, size_t N){
  211. static const int byteOrderTest = 1;
  212. const unsigned char *z = (const unsigned char *)zIn;
  213. const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3];
  214. unsigned sum = 0;
  215. assert( (z - (const unsigned char*)0)%4==0 ); /* Four-byte alignment */
  216. if( 0==*(char*)&byteOrderTest ){
  217. /* This is a big-endian machine */
  218. while( z<zEnd ){
  219. sum += *(unsigned*)z;
  220. z += 4;
  221. }
  222. }else{
  223. /* A little-endian machine */
  224. #if GCC_VERSION>=4003000
  225. while( z<zEnd ){
  226. sum += __builtin_bswap32(*(unsigned*)z);
  227. z += 4;
  228. }
  229. #elif defined(_MSC_VER) && _MSC_VER>=1300
  230. while( z<zEnd ){
  231. sum += _byteswap_ulong(*(unsigned*)z);
  232. z += 4;
  233. }
  234. #else
  235. unsigned sum0 = 0;
  236. unsigned sum1 = 0;
  237. unsigned sum2 = 0;
  238. while(N >= 16){
  239. sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]);
  240. sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]);
  241. sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]);
  242. sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
  243. z += 16;
  244. N -= 16;
  245. }
  246. while(N >= 4){
  247. sum0 += z[0];
  248. sum1 += z[1];
  249. sum2 += z[2];
  250. sum += z[3];
  251. z += 4;
  252. N -= 4;
  253. }
  254. sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24);
  255. #endif
  256. }
  257. switch(N&3){
  258. case 3: sum += (z[2] << 8);
  259. case 2: sum += (z[1] << 16);
  260. case 1: sum += (z[0] << 24);
  261. default: ;
  262. }
  263. return sum;
  264. }
  265. /*
  266. ** Create a new delta.
  267. **
  268. ** The delta is written into a preallocated buffer, zDelta, which
  269. ** should be at least 60 bytes longer than the target file, zOut.
  270. ** The delta string will be NUL-terminated, but it might also contain
  271. ** embedded NUL characters if either the zSrc or zOut files are
  272. ** binary. This function returns the length of the delta string
  273. ** in bytes, excluding the final NUL terminator character.
  274. **
  275. ** Output Format:
  276. **
  277. ** The delta begins with a base64 number followed by a newline. This
  278. ** number is the number of bytes in the TARGET file. Thus, given a
  279. ** delta file z, a program can compute the size of the output file
  280. ** simply by reading the first line and decoding the base-64 number
  281. ** found there. The delta_output_size() routine does exactly this.
  282. **
  283. ** After the initial size number, the delta consists of a series of
  284. ** literal text segments and commands to copy from the SOURCE file.
  285. ** A copy command looks like this:
  286. **
  287. ** NNN@MMM,
  288. **
  289. ** where NNN is the number of bytes to be copied and MMM is the offset
  290. ** into the source file of the first byte (both base-64). If NNN is 0
  291. ** it means copy the rest of the input file. Literal text is like this:
  292. **
  293. ** NNN:TTTTT
  294. **
  295. ** where NNN is the number of bytes of text (base-64) and TTTTT is the text.
  296. **
  297. ** The last term is of the form
  298. **
  299. ** NNN;
  300. **
  301. ** In this case, NNN is a 32-bit bigendian checksum of the output file
  302. ** that can be used to verify that the delta applied correctly. All
  303. ** numbers are in base-64.
  304. **
  305. ** Pure text files generate a pure text delta. Binary files generate a
  306. ** delta that may contain some binary data.
  307. **
  308. ** Algorithm:
  309. **
  310. ** The encoder first builds a hash table to help it find matching
  311. ** patterns in the source file. 16-byte chunks of the source file
  312. ** sampled at evenly spaced intervals are used to populate the hash
  313. ** table.
  314. **
  315. ** Next we begin scanning the target file using a sliding 16-byte
  316. ** window. The hash of the 16-byte window in the target is used to
  317. ** search for a matching section in the source file. When a match
  318. ** is found, a copy command is added to the delta. An effort is
  319. ** made to extend the matching section to regions that come before
  320. ** and after the 16-byte hash window. A copy command is only issued
  321. ** if the result would use less space that just quoting the text
  322. ** literally. Literal text is added to the delta for sections that
  323. ** do not match or which can not be encoded efficiently using copy
  324. ** commands.
  325. */
  326. int delta_create(
  327. const char *zSrc, /* The source or pattern file */
  328. unsigned int lenSrc, /* Length of the source file */
  329. const char *zOut, /* The target file */
  330. unsigned int lenOut, /* Length of the target file */
  331. char *zDelta /* Write the delta into this buffer */
  332. ){
  333. int i, base;
  334. char *zOrigDelta = zDelta;
  335. hash h;
  336. int nHash; /* Number of hash table entries */
  337. int *landmark; /* Primary hash table */
  338. int *collide; /* Collision chain */
  339. int lastRead = -1; /* Last byte of zSrc read by a COPY command */
  340. /* Add the target file size to the beginning of the delta
  341. */
  342. putInt(lenOut, &zDelta);
  343. *(zDelta++) = '\n';
  344. /* If the source file is very small, it means that we have no
  345. ** chance of ever doing a copy command. Just output a single
  346. ** literal segment for the entire target and exit.
  347. */
  348. if( lenSrc<=NHASH ){
  349. putInt(lenOut, &zDelta);
  350. *(zDelta++) = ':';
  351. memcpy(zDelta, zOut, lenOut);
  352. zDelta += lenOut;
  353. putInt(checksum(zOut, lenOut), &zDelta);
  354. *(zDelta++) = ';';
  355. return zDelta - zOrigDelta;
  356. }
  357. /* Compute the hash table used to locate matching sections in the
  358. ** source file.
  359. */
  360. nHash = lenSrc/NHASH;
  361. collide = fossil_malloc( nHash*2*sizeof(int) );
  362. memset(collide, -1, nHash*2*sizeof(int));
  363. landmark = &collide[nHash];
  364. for(i=0; i<lenSrc-NHASH; i+=NHASH){
  365. int hv = hash_once(&zSrc[i]) % nHash;
  366. collide[i/NHASH] = landmark[hv];
  367. landmark[hv] = i/NHASH;
  368. }
  369. /* Begin scanning the target file and generating copy commands and
  370. ** literal sections of the delta.
  371. */
  372. base = 0; /* We have already generated everything before zOut[base] */
  373. while( base+NHASH<lenOut ){
  374. int iSrc, iBlock;
  375. unsigned int bestCnt, bestOfst=0, bestLitsz=0;
  376. hash_init(&h, &zOut[base]);
  377. i = 0; /* Trying to match a landmark against zOut[base+i] */
  378. bestCnt = 0;
  379. while( 1 ){
  380. int hv;
  381. int limit = 250;
  382. hv = hash_32bit(&h) % nHash;
  383. DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); )
  384. iBlock = landmark[hv];
  385. while( iBlock>=0 && (limit--)>0 ){
  386. /*
  387. ** The hash window has identified a potential match against
  388. ** landmark block iBlock. But we need to investigate further.
  389. **
  390. ** Look for a region in zOut that matches zSrc. Anchor the search
  391. ** at zSrc[iSrc] and zOut[base+i]. Do not include anything prior to
  392. ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen].
  393. **
  394. ** Set cnt equal to the length of the match and set ofst so that
  395. ** zSrc[ofst] is the first element of the match. litsz is the number
  396. ** of characters between zOut[base] and the beginning of the match.
  397. ** sz will be the overhead (in bytes) needed to encode the copy
  398. ** command. Only generate copy command if the overhead of the
  399. ** copy command is less than the amount of literal text to be copied.
  400. */
  401. int cnt, ofst, litsz;
  402. int j, k, x, y;
  403. int sz;
  404. int limitX;
  405. /* Beginning at iSrc, match forwards as far as we can. j counts
  406. ** the number of characters that match */
  407. iSrc = iBlock*NHASH;
  408. y = base+i;
  409. limitX = ( lenSrc-iSrc <= lenOut-y ) ? lenSrc : iSrc + lenOut - y;
  410. for(x=iSrc; x<limitX; x++, y++){
  411. if( zSrc[x]!=zOut[y] ) break;
  412. }
  413. j = x - iSrc - 1;
  414. /* Beginning at iSrc-1, match backwards as far as we can. k counts
  415. ** the number of characters that match */
  416. for(k=1; k<iSrc && k<=i; k++){
  417. if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
  418. }
  419. k--;
  420. /* Compute the offset and size of the matching region */
  421. ofst = iSrc-k;
  422. cnt = j+k+1;
  423. litsz = i-k; /* Number of bytes of literal text before the copy */
  424. DEBUG2( printf("MATCH %d bytes at %d: [%s] litsz=%d\n",
  425. cnt, ofst, print16(&zSrc[ofst]), litsz); )
  426. /* sz will hold the number of bytes needed to encode the "insert"
  427. ** command and the copy command, not counting the "insert" text */
  428. sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3;
  429. if( cnt>=sz && cnt>bestCnt ){
  430. /* Remember this match only if it is the best so far and it
  431. ** does not increase the file size */
  432. bestCnt = cnt;
  433. bestOfst = iSrc-k;
  434. bestLitsz = litsz;
  435. DEBUG2( printf("... BEST SO FAR\n"); )
  436. }
  437. /* Check the next matching block */
  438. iBlock = collide[iBlock];
  439. }
  440. /* We have a copy command that does not cause the delta to be larger
  441. ** than a literal insert. So add the copy command to the delta.
  442. */
  443. if( bestCnt>0 ){
  444. if( bestLitsz>0 ){
  445. /* Add an insert command before the copy */
  446. putInt(bestLitsz,&zDelta);
  447. *(zDelta++) = ':';
  448. memcpy(zDelta, &zOut[base], bestLitsz);
  449. zDelta += bestLitsz;
  450. base += bestLitsz;
  451. DEBUG2( printf("insert %d\n", bestLitsz); )
  452. }
  453. base += bestCnt;
  454. putInt(bestCnt, &zDelta);
  455. *(zDelta++) = '@';
  456. putInt(bestOfst, &zDelta);
  457. DEBUG2( printf("copy %d bytes from %d\n", bestCnt, bestOfst); )
  458. *(zDelta++) = ',';
  459. if( bestOfst + bestCnt -1 > lastRead ){
  460. lastRead = bestOfst + bestCnt - 1;
  461. DEBUG2( printf("lastRead becomes %d\n", lastRead); )
  462. }
  463. bestCnt = 0;
  464. break;
  465. }
  466. /* If we reach this point, it means no match is found so far */
  467. if( base+i+NHASH>=lenOut ){
  468. /* We have reached the end of the file and have not found any
  469. ** matches. Do an "insert" for everything that does not match */
  470. putInt(lenOut-base, &zDelta);
  471. *(zDelta++) = ':';
  472. memcpy(zDelta, &zOut[base], lenOut-base);
  473. zDelta += lenOut-base;
  474. base = lenOut;
  475. break;
  476. }
  477. /* Advance the hash by one character. Keep looking for a match */
  478. hash_next(&h, zOut[base+i+NHASH]);
  479. i++;
  480. }
  481. }
  482. /* Output a final "insert" record to get all the text at the end of
  483. ** the file that does not match anything in the source file.
  484. */
  485. if( base<lenOut ){
  486. putInt(lenOut-base, &zDelta);
  487. *(zDelta++) = ':';
  488. memcpy(zDelta, &zOut[base], lenOut-base);
  489. zDelta += lenOut-base;
  490. }
  491. /* Output the final checksum record. */
  492. putInt(checksum(zOut, lenOut), &zDelta);
  493. *(zDelta++) = ';';
  494. fossil_free(collide);
  495. return zDelta - zOrigDelta;
  496. }
  497. /*
  498. ** Return the size (in bytes) of the output from applying
  499. ** a delta.
  500. **
  501. ** This routine is provided so that an procedure that is able
  502. ** to call delta_apply() can learn how much space is required
  503. ** for the output and hence allocate nor more space that is really
  504. ** needed.
  505. */
  506. int delta_output_size(const char *zDelta, int lenDelta){
  507. int size;
  508. size = getInt(&zDelta, &lenDelta);
  509. if( *zDelta!='\n' ){
  510. /* ERROR: size integer not terminated by "\n" */
  511. return -1;
  512. }
  513. return size;
  514. }
  515. /*
  516. ** Apply a delta.
  517. **
  518. ** The output buffer should be big enough to hold the whole output
  519. ** file and a NUL terminator at the end. The delta_output_size()
  520. ** routine will determine this size for you.
  521. **
  522. ** The delta string should be null-terminated. But the delta string
  523. ** may contain embedded NUL characters (if the input and output are
  524. ** binary files) so we also have to pass in the length of the delta in
  525. ** the lenDelta parameter.
  526. **
  527. ** This function returns the size of the output file in bytes (excluding
  528. ** the final NUL terminator character). Except, if the delta string is
  529. ** malformed or intended for use with a source file other than zSrc,
  530. ** then this routine returns -1.
  531. **
  532. ** Refer to the delta_create() documentation above for a description
  533. ** of the delta file format.
  534. */
  535. int delta_apply(
  536. const char *zSrc, /* The source or pattern file */
  537. int lenSrc, /* Length of the source file */
  538. const char *zDelta, /* Delta to apply to the pattern */
  539. int lenDelta, /* Length of the delta */
  540. char *zOut /* Write the output into this preallocated buffer */
  541. ){
  542. unsigned int limit;
  543. unsigned int total = 0;
  544. #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
  545. char *zOrigOut = zOut;
  546. #endif
  547. limit = getInt(&zDelta, &lenDelta);
  548. if( *zDelta!='\n' ){
  549. /* ERROR: size integer not terminated by "\n" */
  550. return -1;
  551. }
  552. zDelta++; lenDelta--;
  553. while( *zDelta && lenDelta>0 ){
  554. unsigned int cnt, ofst;
  555. cnt = getInt(&zDelta, &lenDelta);
  556. switch( zDelta[0] ){
  557. case '@': {
  558. zDelta++; lenDelta--;
  559. ofst = getInt(&zDelta, &lenDelta);
  560. if( lenDelta>0 && zDelta[0]!=',' ){
  561. /* ERROR: copy command not terminated by ',' */
  562. return -1;
  563. }
  564. zDelta++; lenDelta--;
  565. DEBUG1( printf("COPY %d from %d\n", cnt, ofst); )
  566. total += cnt;
  567. if( total>limit ){
  568. /* ERROR: copy exceeds output file size */
  569. return -1;
  570. }
  571. if( ofst+cnt > lenSrc ){
  572. /* ERROR: copy extends past end of input */
  573. return -1;
  574. }
  575. memcpy(zOut, &zSrc[ofst], cnt);
  576. zOut += cnt;
  577. break;
  578. }
  579. case ':': {
  580. zDelta++; lenDelta--;
  581. total += cnt;
  582. if( total>limit ){
  583. /* ERROR: insert command gives an output larger than predicted */
  584. return -1;
  585. }
  586. DEBUG1( printf("INSERT %d\n", cnt); )
  587. if( cnt>lenDelta ){
  588. /* ERROR: insert count exceeds size of delta */
  589. return -1;
  590. }
  591. memcpy(zOut, zDelta, cnt);
  592. zOut += cnt;
  593. zDelta += cnt;
  594. lenDelta -= cnt;
  595. break;
  596. }
  597. case ';': {
  598. zDelta++; lenDelta--;
  599. zOut[0] = 0;
  600. #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
  601. if( cnt!=checksum(zOrigOut, total) ){
  602. /* ERROR: bad checksum */
  603. return -1;
  604. }
  605. #endif
  606. if( total!=limit ){
  607. /* ERROR: generated size does not match predicted size */
  608. return -1;
  609. }
  610. return total;
  611. }
  612. default: {
  613. /* ERROR: unknown delta operator */
  614. return -1;
  615. }
  616. }
  617. }
  618. /* ERROR: unterminated delta */
  619. return -1;
  620. }
  621. /*
  622. ** Analyze a delta. Figure out the total number of bytes copied from
  623. ** source to target, and the total number of bytes inserted by the delta,
  624. ** and return both numbers.
  625. */
  626. int delta_analyze(
  627. const char *zDelta, /* Delta to apply to the pattern */
  628. int lenDelta, /* Length of the delta */
  629. int *pnCopy, /* OUT: Number of bytes copied */
  630. int *pnInsert /* OUT: Number of bytes inserted */
  631. ){
  632. unsigned int nInsert = 0;
  633. unsigned int nCopy = 0;
  634. (void)getInt(&zDelta, &lenDelta);
  635. if( *zDelta!='\n' ){
  636. /* ERROR: size integer not terminated by "\n" */
  637. return -1;
  638. }
  639. zDelta++; lenDelta--;
  640. while( *zDelta && lenDelta>0 ){
  641. unsigned int cnt;
  642. cnt = getInt(&zDelta, &lenDelta);
  643. switch( zDelta[0] ){
  644. case '@': {
  645. zDelta++; lenDelta--;
  646. (void)getInt(&zDelta, &lenDelta);
  647. if( lenDelta>0 && zDelta[0]!=',' ){
  648. /* ERROR: copy command not terminated by ',' */
  649. return -1;
  650. }
  651. zDelta++; lenDelta--;
  652. nCopy += cnt;
  653. break;
  654. }
  655. case ':': {
  656. zDelta++; lenDelta--;
  657. nInsert += cnt;
  658. if( cnt>lenDelta ){
  659. /* ERROR: insert count exceeds size of delta */
  660. return -1;
  661. }
  662. zDelta += cnt;
  663. lenDelta -= cnt;
  664. break;
  665. }
  666. case ';': {
  667. *pnCopy = nCopy;
  668. *pnInsert = nInsert;
  669. return 0;
  670. }
  671. default: {
  672. /* ERROR: unknown delta operator */
  673. return -1;
  674. }
  675. }
  676. }
  677. /* ERROR: unterminated delta */
  678. return -1;
  679. }