diff.js 12 KB


  1. /* See LICENSE file for terms of use */
  2. /*
  3. * Text diff implementation.
  4. *
  5. * This library supports the following APIS:
  6. * JsDiff.diffChars: Character by character diff
  7. * JsDiff.diffWords: Word (as defined by \b regex) diff which ignores whitespace
  8. * JsDiff.diffLines: Line based diff
  9. *
  10. * JsDiff.diffCss: Diff targeted at CSS content
  11. *
  12. * These methods are based on the implementation proposed in
  13. * "An O(ND) Difference Algorithm and its Variations" (Myers, 1986).
  14. * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927
  15. */
  16. var JsDiff = (function() {
  17. /*jshint maxparams: 5*/
  18. function clonePath(path) {
  19. return { newPos: path.newPos, components: path.components.slice(0) };
  20. }
  21. function removeEmpty(array) {
  22. var ret = [];
  23. for (var i = 0; i < array.length; i++) {
  24. if (array[i]) {
  25. ret.push(array[i]);
  26. }
  27. }
  28. return ret;
  29. }
  30. function escapeHTML(s) {
  31. var n = s;
  32. n = n.replace(/&/g, '&amp;');
  33. n = n.replace(/</g, '&lt;');
  34. n = n.replace(/>/g, '&gt;');
  35. n = n.replace(/"/g, '&quot;');
  36. return n;
  37. }
  38. var Diff = function(ignoreWhitespace) {
  39. this.ignoreWhitespace = ignoreWhitespace;
  40. };
  41. Diff.prototype = {
  42. diff: function(oldString, newString) {
  43. // Handle the identity case (this is due to unrolling editLength == 0
  44. if (newString === oldString) {
  45. return [{ value: newString }];
  46. }
  47. if (!newString) {
  48. return [{ value: oldString, removed: true }];
  49. }
  50. if (!oldString) {
  51. return [{ value: newString, added: true }];
  52. }
  53. newString = this.tokenize(newString);
  54. oldString = this.tokenize(oldString);
  55. var newLen = newString.length, oldLen = oldString.length;
  56. var maxEditLength = newLen + oldLen;
  57. var bestPath = [{ newPos: -1, components: [] }];
  58. // Seed editLength = 0
  59. var oldPos = this.extractCommon(bestPath[0], newString, oldString, 0);
  60. if (bestPath[0].newPos+1 >= newLen && oldPos+1 >= oldLen) {
  61. return bestPath[0].components;
  62. }
  63. for (var editLength = 1; editLength <= maxEditLength; editLength++) {
  64. for (var diagonalPath = -1*editLength; diagonalPath <= editLength; diagonalPath+=2) {
  65. var basePath;
  66. var addPath = bestPath[diagonalPath-1],
  67. removePath = bestPath[diagonalPath+1];
  68. oldPos = (removePath ? removePath.newPos : 0) - diagonalPath;
  69. if (addPath) {
  70. // No one else is going to attempt to use this value, clear it
  71. bestPath[diagonalPath-1] = undefined;
  72. }
  73. var canAdd = addPath && addPath.newPos+1 < newLen;
  74. var canRemove = removePath && 0 <= oldPos && oldPos < oldLen;
  75. if (!canAdd && !canRemove) {
  76. bestPath[diagonalPath] = undefined;
  77. continue;
  78. }
  79. // Select the diagonal that we want to branch from. We select the prior
  80. // path whose position in the new string is the farthest from the origin
  81. // and does not pass the bounds of the diff graph
  82. if (!canAdd || (canRemove && addPath.newPos < removePath.newPos)) {
  83. basePath = clonePath(removePath);
  84. this.pushComponent(basePath.components, oldString[oldPos], undefined, true);
  85. } else {
  86. basePath = clonePath(addPath);
  87. basePath.newPos++;
  88. this.pushComponent(basePath.components, newString[basePath.newPos], true, undefined);
  89. }
  90. var oldPos = this.extractCommon(basePath, newString, oldString, diagonalPath);
  91. if (basePath.newPos+1 >= newLen && oldPos+1 >= oldLen) {
  92. return basePath.components;
  93. } else {
  94. bestPath[diagonalPath] = basePath;
  95. }
  96. }
  97. }
  98. },
  99. pushComponent: function(components, value, added, removed) {
  100. var last = components[components.length-1];
  101. if (last && last.added === added && last.removed === removed) {
  102. // We need to clone here as the component clone operation is just
  103. // as shallow array clone
  104. components[components.length-1] =
  105. {value: this.join(last.value, value), added: added, removed: removed };
  106. } else {
  107. components.push({value: value, added: added, removed: removed });
  108. }
  109. },
  110. extractCommon: function(basePath, newString, oldString, diagonalPath) {
  111. var newLen = newString.length,
  112. oldLen = oldString.length,
  113. newPos = basePath.newPos,
  114. oldPos = newPos - diagonalPath;
  115. while (newPos+1 < newLen && oldPos+1 < oldLen && this.equals(newString[newPos+1], oldString[oldPos+1])) {
  116. newPos++;
  117. oldPos++;
  118. this.pushComponent(basePath.components, newString[newPos], undefined, undefined);
  119. }
  120. basePath.newPos = newPos;
  121. return oldPos;
  122. },
  123. equals: function(left, right) {
  124. var reWhitespace = /\S/;
  125. if (this.ignoreWhitespace && !reWhitespace.test(left) && !reWhitespace.test(right)) {
  126. return true;
  127. } else {
  128. return left === right;
  129. }
  130. },
  131. join: function(left, right) {
  132. return left + right;
  133. },
  134. tokenize: function(value) {
  135. return value;
  136. }
  137. };
  138. var CharDiff = new Diff();
  139. var WordDiff = new Diff(true);
  140. var WordWithSpaceDiff = new Diff();
  141. WordDiff.tokenize = WordWithSpaceDiff.tokenize = function(value) {
  142. return removeEmpty(value.split(/(\s+|\b)/));
  143. };
  144. var CssDiff = new Diff(true);
  145. CssDiff.tokenize = function(value) {
  146. return removeEmpty(value.split(/([{}:;,]|\s+)/));
  147. };
  148. var LineDiff = new Diff();
  149. LineDiff.tokenize = function(value) {
  150. var retLines = [],
  151. lines = value.split(/^/m);
  152. for(var i = 0; i < lines.length; i++) {
  153. var line = lines[i],
  154. lastLine = lines[i - 1];
  155. // Merge lines that may contain windows new lines
  156. if (line == '\n' && lastLine && lastLine[lastLine.length - 1] === '\r') {
  157. retLines[retLines.length - 1] += '\n';
  158. } else if (line) {
  159. retLines.push(line);
  160. }
  161. }
  162. return retLines;
  163. };
  164. return {
  165. Diff: Diff,
  166. diffChars: function(oldStr, newStr) { return CharDiff.diff(oldStr, newStr); },
  167. diffWords: function(oldStr, newStr) { return WordDiff.diff(oldStr, newStr); },
  168. diffWordsWithSpace: function(oldStr, newStr) { return WordWithSpaceDiff.diff(oldStr, newStr); },
  169. diffLines: function(oldStr, newStr) { return LineDiff.diff(oldStr, newStr); },
  170. diffCss: function(oldStr, newStr) { return CssDiff.diff(oldStr, newStr); },
  171. createPatch: function(fileName, oldStr, newStr, oldHeader, newHeader) {
  172. var ret = [];
  173. ret.push('Index: ' + fileName);
  174. ret.push('===================================================================');
  175. ret.push('--- ' + fileName + (typeof oldHeader === 'undefined' ? '' : '\t' + oldHeader));
  176. ret.push('+++ ' + fileName + (typeof newHeader === 'undefined' ? '' : '\t' + newHeader));
  177. var diff = LineDiff.diff(oldStr, newStr);
  178. if (!diff[diff.length-1].value) {
  179. diff.pop(); // Remove trailing newline add
  180. }
  181. diff.push({value: '', lines: []}); // Append an empty value to make cleanup easier
  182. function contextLines(lines) {
  183. return lines.map(function(entry) { return ' ' + entry; });
  184. }
  185. function eofNL(curRange, i, current) {
  186. var last = diff[diff.length-2],
  187. isLast = i === diff.length-2,
  188. isLastOfType = i === diff.length-3 && (current.added !== last.added || current.removed !== last.removed);
  189. // Figure out if this is the last line for the given file and missing NL
  190. if (!/\n$/.test(current.value) && (isLast || isLastOfType)) {
  191. curRange.push('\\ No newline at end of file');
  192. }
  193. }
  194. var oldRangeStart = 0, newRangeStart = 0, curRange = [],
  195. oldLine = 1, newLine = 1;
  196. for (var i = 0; i < diff.length; i++) {
  197. var current = diff[i],
  198. lines = current.lines || current.value.replace(/\n$/, '').split('\n');
  199. current.lines = lines;
  200. if (current.added || current.removed) {
  201. if (!oldRangeStart) {
  202. var prev = diff[i-1];
  203. oldRangeStart = oldLine;
  204. newRangeStart = newLine;
  205. if (prev) {
  206. curRange = contextLines(prev.lines.slice(-4));
  207. oldRangeStart -= curRange.length;
  208. newRangeStart -= curRange.length;
  209. }
  210. }
  211. curRange.push.apply(curRange, lines.map(function(entry) { return (current.added?'+':'-') + entry; }));
  212. eofNL(curRange, i, current);
  213. if (current.added) {
  214. newLine += lines.length;
  215. } else {
  216. oldLine += lines.length;
  217. }
  218. } else {
  219. if (oldRangeStart) {
  220. // Close out any changes that have been output (or join overlapping)
  221. if (lines.length <= 8 && i < diff.length-2) {
  222. // Overlapping
  223. curRange.push.apply(curRange, contextLines(lines));
  224. } else {
  225. // end the range and output
  226. var contextSize = Math.min(lines.length, 4);
  227. ret.push(
  228. '@@ -' + oldRangeStart + ',' + (oldLine-oldRangeStart+contextSize)
  229. + ' +' + newRangeStart + ',' + (newLine-newRangeStart+contextSize)
  230. + ' @@');
  231. ret.push.apply(ret, curRange);
  232. ret.push.apply(ret, contextLines(lines.slice(0, contextSize)));
  233. if (lines.length <= 4) {
  234. eofNL(ret, i, current);
  235. }
  236. oldRangeStart = 0; newRangeStart = 0; curRange = [];
  237. }
  238. }
  239. oldLine += lines.length;
  240. newLine += lines.length;
  241. }
  242. }
  243. return ret.join('\n') + '\n';
  244. },
  245. applyPatch: function(oldStr, uniDiff) {
  246. var diffstr = uniDiff.split('\n');
  247. var diff = [];
  248. var remEOFNL = false,
  249. addEOFNL = false;
  250. for (var i = (diffstr[0][0]==='I'?4:0); i < diffstr.length; i++) {
  251. if(diffstr[i][0] === '@') {
  252. var meh = diffstr[i].split(/@@ -(\d+),(\d+) \+(\d+),(\d+) @@/);
  253. diff.unshift({
  254. start:meh[3],
  255. oldlength:meh[2],
  256. oldlines:[],
  257. newlength:meh[4],
  258. newlines:[]
  259. });
  260. } else if(diffstr[i][0] === '+') {
  261. diff[0].newlines.push(diffstr[i].substr(1));
  262. } else if(diffstr[i][0] === '-') {
  263. diff[0].oldlines.push(diffstr[i].substr(1));
  264. } else if(diffstr[i][0] === ' ') {
  265. diff[0].newlines.push(diffstr[i].substr(1));
  266. diff[0].oldlines.push(diffstr[i].substr(1));
  267. } else if(diffstr[i][0] === '\\') {
  268. if (diffstr[i-1][0] === '+') {
  269. remEOFNL = true;
  270. } else if(diffstr[i-1][0] === '-') {
  271. addEOFNL = true;
  272. }
  273. }
  274. }
  275. var str = oldStr.split('\n');
  276. for (var i = diff.length - 1; i >= 0; i--) {
  277. var d = diff[i];
  278. for (var j = 0; j < d.oldlength; j++) {
  279. if(str[d.start-1+j] !== d.oldlines[j]) {
  280. return false;
  281. }
  282. }
  283. Array.prototype.splice.apply(str,[d.start-1,+d.oldlength].concat(d.newlines));
  284. }
  285. if (remEOFNL) {
  286. while (!str[str.length-1]) {
  287. str.pop();
  288. }
  289. } else if (addEOFNL) {
  290. str.push('');
  291. }
  292. return str.join('\n');
  293. },
  294. convertChangesToXML: function(changes){
  295. var ret = [];
  296. for ( var i = 0; i < changes.length; i++) {
  297. var change = changes[i];
  298. if (change.added) {
  299. ret.push('<ins>');
  300. } else if (change.removed) {
  301. ret.push('<del>');
  302. }
  303. ret.push(escapeHTML(change.value));
  304. if (change.added) {
  305. ret.push('</ins>');
  306. } else if (change.removed) {
  307. ret.push('</del>');
  308. }
  309. }
  310. return ret.join('');
  311. },
  312. // See: http://code.google.com/p/google-diff-match-patch/wiki/API
  313. convertChangesToDMP: function(changes){
  314. var ret = [], change;
  315. for ( var i = 0; i < changes.length; i++) {
  316. change = changes[i];
  317. ret.push([(change.added ? 1 : change.removed ? -1 : 0), change.value]);
  318. }
  319. return ret;
  320. }
  321. };
  322. })();
  323. if (typeof module !== 'undefined') {
  324. module.exports = JsDiff;
  325. }