Browse Source

- added Diff utility

Laurent Bedubourg 19 years ago
parent
commit
66735db148
1 changed files with 363 additions and 0 deletions
  1. 363 0
      std/mtwin/text/Diff.hx

+ 363 - 0
std/mtwin/text/Diff.hx

@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2006, Motion-Twin
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MOTION-TWIN "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE HAXE PROJECT CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+package mtwin.text;
+
+typedef LcsMatrix = Array<Array<Int>>
+
+/**
+	An haxe implementation of Diff/Patch/Unpatch.
+
+	The diff text format is the same as the gnu diff utility format.
+
+	Usage:
+
+	var myDiff = Diff.diff(sourceString, modifiedString);
+	var modified = Diff.patch(sourceString, myDiff); // modified == modifiedString
+	var origin = Diff.unpatch(modified, myDiff); // origin == sourceString
+**/
+class Diff {
+
+	static var END = 0;
+	static var DOWN = 1;
+	static var RIGHT = 2;
+	static var DOWN_RIGHT = 3;
+
+	/**
+		This class uses a longest common subsequence algorithm which computes the distance between 
+		two array of strings and creates an optimal modification cursor.
+
+		The cursor is stored in a int matrix and starts at index 0:0.
+
+		The matrix is filled of END, DOWN, DOWN_RIGHT, RIGHT movements.
+
+		DOWN => the src line must be removed
+		DOWN_RIGHT => lines are equals, no operation required
+		RIGHT => the dst line must be inserted
+		END => end of files reached, if END is reached before [m,n], some lines must be removed or inserted 
+	**/
+	static function longestCommonSubsequence( src:Array<String>, dst:Array<String> ) : LcsMatrix {
+		var m = src.length;
+		var n = dst.length;
+		var cursor = new Array();
+		var c = new Array();
+		for (i in 0...m+1) {
+			cursor[i] = new Array(); 
+			cursor[i][n] = 0;
+			c[i] = new Array(); 
+			c[i][n] = 0; 
+		}
+		for (j in 0...n+1) {
+			cursor[m][j] = 0; 
+			c[m][j] = 0;
+		}
+		var i = m-1;
+		while (i >= 0){
+			var j = n-1;
+			while (j >= 0){
+				if (src[i] == dst[j]){
+					c[i][j] = c[i+1][j+1]+1;
+					cursor[i][j] = DOWN_RIGHT;
+				}
+				else if (c[i+1][j] >= c[i][j+1]){
+					c[i][j] = c[i+1][j];
+					cursor[i][j] = DOWN;
+				}
+				else {
+					c[i][j] = c[i][j+1];
+					cursor[i][j] = RIGHT;
+				}
+				j--;
+			}
+			i--;
+		}
+		return  cursor;
+	}
+
+	/**
+		Returns an optimized cursor.
+
+		Compacts the cursor and transform it from a sequential operation cursor into a vector of operations.
+	**/
+	static function lcsToStack( c:LcsMatrix ) : List<Array<Int>> {
+		var stack = new List();
+		var i = 0;
+		var w = c.length-1;
+		var j = 0;
+		var h = c[0].length-1;
+		var prev = -1;
+		while (true){
+			// direction changes, store this position
+			if (c[i][j] != prev)
+				stack.add([i,j]); 
+			prev = c[i][j];
+			switch (c[i][j]){
+				case DOWN: i++;
+				case RIGHT: j++;
+				case DOWN_RIGHT: i++; j++; 
+				case END: break;
+				default: throw "Unknown "+c[i][j]+" at "+i+":"+j; i=0; j=0;
+			}
+		}
+		return stack;
+	}
+
+	/**
+		Returns the difference between two strings.
+
+		The returned string may be used by the gnu diff/patch utilities.
+	**/
+	public static function diff( source:String, dest:String ) : String {
+		var sourceLines = source.split("\n");
+		var destLines = dest.split("\n");
+
+		var m = sourceLines.length;
+		var n = destLines.length;
+		
+		var lcs = longestCommonSubsequence(sourceLines, destLines);
+		var stack = lcsToStack(lcs);
+
+		var cursorKind = function(p){
+			return lcs[p[0]][p[1]]; 
+		}
+
+		var operationAdd = function(after:Int, from:Int, to:Int){
+			var op = new StringBuf();
+			op.add(after); 
+			op.add("a"); 
+			if (from == to) op.add(from) else { op.add(from); op.add(","); op.add(to); }
+			op.add("\n");
+			for (i in (from-1)...to){
+				op.add("> "); 
+				op.add(destLines[i]); 
+				op.add("\n");
+			}
+			return op.toString();
+		}
+
+		var operationDel = function(from:Int, to:Int, dest:Int){
+			var op = new StringBuf();
+			if (from == to) op.add(from) else { op.add(from); op.add(","); op.add(to); }
+			op.add("d"); op.add(dest); op.add("\n");
+			for (i in (from-1)...to){
+				op.add("< ");
+				op.add(sourceLines[i]);
+				op.add("\n");
+			}
+			return op.toString();
+		}
+
+		var operationUpd = function(from:Int, to:Int, byFrom:Int, byTo:Int){
+			var op = new StringBuf();
+			if (from == to) op.add(from) else { op.add(from); op.add(","); op.add(to); }
+			op.add("c");
+			if (byFrom == byTo) op.add(byFrom) else { op.add(byFrom); op.add(","); op.add(byTo); }
+			op.add("\n");
+			for (i in (from-1)...to){
+				op.add("< ");
+				op.add(sourceLines[i]);
+				op.add("\n");
+			}
+			op.add("---\n");
+			for (i in (byFrom-1)...byTo){
+				op.add("> ");
+				op.add(destLines[i]);
+				op.add("\n");
+			}
+			return op.toString();
+		}
+
+		var result = new StringBuf();
+		while (stack.length > 0){
+			var pos = stack.pop();
+			switch (cursorKind(pos)){
+				case DOWN_RIGHT:
+
+				case DOWN:
+					var end = stack.pop(); 
+					if (cursorKind(end) == RIGHT){	
+						var del = stack.pop(); stack.push(del);
+						result.add(operationUpd(pos[0]+1, end[0], pos[1]+1, del[1]));
+					}
+					else {
+						stack.push(end);
+						result.add(operationDel(pos[0]+1, end[0], end[1]));
+					}
+
+				case RIGHT:
+					var end = stack.pop(); stack.push(end);
+					result.add(operationAdd(pos[0], end[1], pos[1]+1));
+
+				case END:
+					if (pos[0] < m){
+						result.add(operationDel(pos[0], m-1, n-1));
+					}
+					if (pos[1] < n){
+						result.add(operationAdd(m-1, pos[1], n-1));
+					}
+			}
+		}
+		return result.toString();
+	}
+
+	/**
+		Creates a patch operation structure.
+	**/
+	static function parsePatchOp( left:String, op:String, right:String ): {op:String, left:Array<Int>, right:Array<Int>, data:Array<String>}{
+		var l = Lambda.amap(left.split(","), function(v){ return Std.parseInt(v); });
+		if (l.length == 1) l.push(l[0]);
+		var r = Lambda.amap(right.split(","), function(v){ return Std.parseInt(v); });
+		if (r.length == 1) r.push(r[0]);
+		return { op:op, left:l, right:r, data:[] };
+	}
+
+	/**
+		Returns the patched string resulting of the application of the patch data on src.
+	**/
+	public static function patch( src:String, patch:String ) : String {
+		var opRegexp = ~/^([0-9,]+)([adc])([0-9,]+)$/;
+		var lines = src.split("\n");
+		var counter = 0;
+		var result = new StringBuf();
+		var patchLines = patch.split("\n");
+		while (patchLines.length > 0){
+			var patchLine = patchLines.shift();
+			if (opRegexp.match(patchLine)){
+				var op = parsePatchOp(opRegexp.matched(1), opRegexp.matched(2), opRegexp.matched(3));
+				var tmp = patchLines.shift();
+				while (tmp != "" && tmp != null && !opRegexp.match(tmp)){
+					op.data.push(tmp);
+					tmp = patchLines.shift();
+				}
+				if (tmp != null){
+					patchLines.unshift(tmp);
+				}
+				switch (op.op){
+					case "a":
+						while (counter < op.left[0]){
+							result.add(lines[counter]); result.add("\n");
+							counter++;
+						}
+						for (line in op.data){
+							result.add(line.substr(2, line.length)); result.add("\n");
+						}
+
+					case "d":
+						while (counter < op.left[0]-1){
+							result.add(lines[counter]); result.add("\n");
+							counter++;
+						}
+						counter = op.left[1];
+
+					case "c":
+						while (counter < op.left[0]-1){
+							result.add(lines[counter]); result.add("\n");
+							counter++;
+						}
+						for (line in op.data){
+							if (line.substr(0,2) == "> "){
+								result.add(line.substr(2, line.length)); result.add("\n");
+							}
+						}
+						counter = op.left[1];
+				}
+			}
+		}
+		for (i in counter...lines.length-1){
+			result.add(lines[i]); result.add("\n");
+		}
+		return result.toString();
+	}
+
+	/**
+		Returns the unpatched string resulting of the cancelation of the patch applied to src.
+	**/
+	public static function unpatch( src:String, patch:String ){
+		var opRegexp = ~/^([0-9,]+)([adc])([0-9,]+)$/;
+		var lines = src.split("\n");
+		var counter = 0;
+		var result = new StringBuf();
+		var patchLines = patch.split("\n");
+		while (patchLines.length > 0){
+			var patchLine = patchLines.shift();
+			if (opRegexp.match(patchLine)){
+				var op = parsePatchOp(opRegexp.matched(1), opRegexp.matched(2), opRegexp.matched(3));
+				var tmp = patchLines.shift();
+				while (tmp != "" && tmp != null && !opRegexp.match(tmp)){
+					op.data.push(tmp);
+					tmp = patchLines.shift();
+				}
+				if (tmp != null){
+					patchLines.unshift(tmp);
+				}
+				switch (op.op){
+					case "a": // unpatch => delete
+						while (counter < op.right[0]-1){
+							result.add(lines[counter]); result.add("\n");
+							counter++;
+						}
+						counter = op.right[1];
+
+					case "d": // unpatch => add
+						while (counter < op.right[0]){
+							result.add(lines[counter]); result.add("\n");
+							counter++;
+						}
+						for (line in op.data){
+							result.add(line.substr(2, line.length)); result.add("\n");
+						}
+
+					case "c": // unpatch => change reverse
+						while (counter < op.right[0]-1){
+							result.add(lines[counter]); result.add("\n");
+							counter++;
+						}
+						for (line in op.data){
+							if (line.substr(0,2) == "< "){
+								result.add(line.substr(2, line.length)); result.add("\n");
+							}
+						}
+						counter = op.right[1];
+				}
+			}
+		}
+		for (i in counter...lines.length-1){
+			result.add(lines[i]); result.add("\n");
+		}
+		return result.toString();
+	}
+
+	public static function main(){
+		var src = neko.File.getContent("src.txt");
+		var dst = neko.File.getContent("dst.txt");
+		var diff = diff(src,dst);
+		neko.Lib.print(diff);
+		var patched = patch(src, diff);
+		if (patched != dst) throw "Patch failed";
+		var unpatched = unpatch(patched, diff);
+		if (unpatched != src) throw "Unpatch failed";
+		neko.Lib.print("diff/patch/unpatch success");
+	}
+}