Browse Source

added levenshtein spell check (fixed issue #1425)

Simon Krajewski 12 years ago
parent
commit
3c048d4fc7
4 changed files with 58 additions and 3 deletions
  1. 2 0
      common.ml
  2. 5 1
      matcher.ml
  3. 49 0
      typecore.ml
  4. 2 2
      typer.ml

+ 2 - 0
common.ml

@@ -160,6 +160,7 @@ module Define = struct
 		| Interp
 		| JsClassic
 		| JsModern
+		| LevenshteinThreshold
 		| Macro
 		| MacroTimes
 		| MatchDebug
@@ -218,6 +219,7 @@ module Define = struct
 		| Interp -> ("interp","The code is compiled to be run with --interp")
 		| JsClassic -> ("js_classic","Don't use a function wrapper and strict mode in JS output")
 		| JsModern -> ("js_modern","Use function wrapper and strict mode in JS output")
+		| LevenshteinThreshold -> ("levenshtein-threshold","The maximum String distance allowed for spell-check suggestions")
 		| Macro -> ("macro","Defined when we compile code in the macro context")
 		| MacroTimes -> ("macro_times","Display per-macro timing when used with --times")
 		| MatchDebug -> ("match_debug","Show Pattern Matcher log")

+ 5 - 1
matcher.ml

@@ -363,7 +363,11 @@ let to_pattern ctx e t =
 				let tc = monomorphs ctx.type_params (t) in
 				let ec = match follow tc with
 					| TEnum(en,pl) ->
-						let ef = try PMap.find s en.e_constrs with Not_found when not (is_lower_ident s) -> error ("Expected constructor for enum " ^ (s_type_path en.e_path)) p in
+						let ef = try
+							PMap.find s en.e_constrs
+						with Not_found when not (is_lower_ident s) ->
+							error (string_error ctx s en.e_names ("Expected constructor for enum " ^ (s_type_path en.e_path))) p
+						in
 						(match ef.ef_type with
 							| TFun (args,_) ->
 								let msg = Printf.sprintf "Enum constructor %s.%s requires parameters %s"

+ 49 - 0
typecore.ml

@@ -298,6 +298,55 @@ let create_fake_module ctx file =
 	Hashtbl.replace ctx.g.modules mdep.m_path mdep;
 	mdep
 
+(* Source: http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#OCaml *)
+let levenshtein a b =
+	let x = Array.init (String.length a) (fun i -> a.[i]) in
+	let y = Array.init (String.length b) (fun i -> b.[i]) in
+	let minimum (x:int) y z =
+		let m' (a:int) b = if a < b then a else b in
+		m' (m' x y) z
+	in
+	let init_matrix n m =
+		let init_col = Array.init m in
+			Array.init n (function
+			| 0 -> init_col (function j -> j)
+			| i -> init_col (function 0 -> i | _ -> 0)
+		)
+	in
+	match Array.length x, Array.length y with
+		| 0, n -> n
+		| m, 0 -> m
+		| m, n ->
+			let matrix = init_matrix (m + 1) (n + 1) in
+			for i = 1 to m do
+				let s = matrix.(i) and t = matrix.(i - 1) in
+				for j = 1 to n do
+					let cost = abs (compare x.(i - 1) y.(j - 1)) in
+					s.(j) <- minimum (t.(j) + 1) (s.(j - 1) + 1) (t.(j - 1) + cost)
+				done
+			done;
+	matrix.(m).(n)
+
+let string_error ctx s sl msg =
+	let cl = List.map (fun s2 -> s2,levenshtein s s2) sl in
+	let cl = List.sort (fun (_,c1) (_,c2) -> compare c1 c2) cl in
+	let threshold = ref (try (int_of_string (Common.defined_value ctx.com Define.LevenshteinThreshold)) with
+		| Not_found -> 3
+		| _ -> error "Int expected for levenshtein-threshold=x" Ast.null_pos)
+	in
+	let last_threshold = ref 0 in
+	let rec loop sl = match sl with
+		| (s,i) :: sl when i <= !threshold || i = !last_threshold ->
+			last_threshold := i;
+			threshold := !threshold - 1;
+			s :: loop sl
+		| _ -> []
+	in
+	match loop cl with
+		| [] -> msg
+		| [s] -> Printf.sprintf "%s (Suggestion: %s)" msg s
+		| sl -> Printf.sprintf "%s (Suggestions: %s)" msg (String.concat ", " sl)
+
 (* -------------- debug functions to activate when debugging typer passes ------------------------------- *)
 (*/*
 

+ 2 - 2
typer.ml

@@ -1917,7 +1917,7 @@ and type_expr ctx (e,p) (with_type:with_type) =
 						mk (fast_enum_field e ef p) (apply_params e.e_types pl ef.ef_type) p
 					with Not_found ->
 						if ctx.untyped then raise Not_found;
-						with_type_error ctx ("Identifier '" ^ s ^ "' is not part of enum " ^ s_type_path e.e_path) p;
+						with_type_error ctx (string_error ctx s e.e_names ("Identifier '" ^ s ^ "' is not part of enum " ^ s_type_path e.e_path)) p;
 						mk (TConst TNull) t p)
 				| _ -> raise Not_found)
 			| _ ->
@@ -2320,7 +2320,7 @@ and type_expr ctx (e,p) (with_type:with_type) =
 				build_call ctx (AKExpr constr) el (WithType t) p
 			with Not_found ->
 				if ctx.untyped then raise Exit; (* __js__, etc. *)
-				with_type_error ctx ("Identifier '" ^ s ^ "' is not part of enum " ^ s_type_path e.e_path) p;
+				with_type_error ctx (string_error ctx s e.e_names "Identifier '" ^ s ^ "' is not part of enum " ^ s_type_path e.e_path) p;
 				mk (TConst TNull) t p
 		with Exit ->
 			type_call ctx e el with_type p)