瀏覽代碼

[jvm] use correct hash implementation

closes #10720
Simon Krajewski 3 年之前
父節點
當前提交
e239cf0fdb
共有 2 個文件被更改,包括 40 次插入8 次删除
  1. 21 8
      src/generators/jvm/jvmMethod.ml
  2. 19 0
      tests/unit/src/unit/issues/Issue10720.hx

+ 21 - 8
src/generators/jvm/jvmMethod.ml

@@ -31,15 +31,28 @@ let rec pow a b = match b with
 	| _ -> Int32.mul a (pow a (b - 1))
 
 let java_hash s =
+	let high_surrogate c = (c lsr 10) + 0xD7C0 in
+	let low_surrogate c = (c land 0x3FF) lor 0xDC00 in
 	let h = ref Int32.zero in
-	let l = UTF8.length s in
-	let i31 = Int32.of_int 31 in
-	let i = ref 0 in
-	UTF8.iter (fun char ->
-		let char = Int32.of_int (UCharExt.uint_code char) in
-		h := Int32.add !h (Int32.mul char (pow i31 (l - (!i + 1))));
-		incr i;
-	) s;
+	let thirtyone = Int32.of_int 31 in
+	(try
+		UTF8.validate s;
+		UTF8.iter (fun c ->
+			let c = (UCharExt.code c) in
+			if c > 0xFFFF then
+				(h := Int32.add (Int32.mul thirtyone !h)
+					(Int32.of_int (high_surrogate c));
+				h := Int32.add (Int32.mul thirtyone !h)
+					(Int32.of_int (low_surrogate c)))
+			else
+				h := Int32.add (Int32.mul thirtyone !h)
+					(Int32.of_int c)
+			) s
+	with UTF8.Malformed_code ->
+		String.iter (fun c ->
+			h := Int32.add (Int32.mul thirtyone !h)
+				(Int32.of_int (Char.code c))) s
+	);
 	!h
 
 module HashtblList = struct

+ 19 - 0
tests/unit/src/unit/issues/Issue10720.hx

@@ -0,0 +1,19 @@
+package unit.issues;
+
+class Issue10720 extends unit.Test {
+	function test() {
+		eq(1, switcheroo("😀 😀"));
+		eq(2, switcheroo("名 字"));
+	}
+
+	function switcheroo(x:String) {
+		return switch (x) {
+			case '😀 😀':
+				1;
+			case '名 字':
+				2;
+			case _:
+				0;
+		}
+	}
+}