Browse Source

Restrict `\x` escape sequence to values less than `\x80` (#8141)

* restrict \x to values less than \x80 (closes #8119)

* minor tweak

* again

* disabled test of #7449
Alexander Kuzmenko 6 years ago
parent
commit
9f7428fee2

+ 9 - 6
src/core/ast.ml

@@ -479,7 +479,7 @@ let s_token = function
 	| At -> "@"
 	| Dollar v -> "$" ^ v
 
-exception Invalid_escape_sequence of char * int
+exception Invalid_escape_sequence of char * int * (string option)
 
 let unescape s =
 	let b = Buffer.create 0 in
@@ -488,7 +488,7 @@ let unescape s =
 			()
 		else
 			let c = s.[i] in
-			let fail () = raise (Invalid_escape_sequence(c,i)) in
+			let fail msg = raise (Invalid_escape_sequence(c,i,msg)) in
 			if esc then begin
 				let inext = ref (i + 1) in
 				(match c with
@@ -497,11 +497,14 @@ let unescape s =
 				| 't' -> Buffer.add_char b '\t'
 				| '"' | '\'' | '\\' -> Buffer.add_char b c
 				| '0'..'3' ->
-					let c = (try char_of_int (int_of_string ("0o" ^ String.sub s i 3)) with _ -> fail()) in
+					let c = (try char_of_int (int_of_string ("0o" ^ String.sub s i 3)) with _ -> fail None) in
 					Buffer.add_char b c;
 					inext := !inext + 2;
 				| 'x' ->
-					let u = (try (int_of_string ("0x" ^ String.sub s (i+1) 2)) with _ -> fail()) in
+					let hex = String.sub s (i+1) 2 in
+					let u = (try (int_of_string ("0x" ^ hex)) with _ -> fail None) in
+					if u > 127 then
+						fail (Some ("Values greater than \\x7f are not allowed. Use \\u00" ^ hex ^ " instead."));
 					UTF8.add_uchar b (UChar.uchar_of_int u);
 					inext := !inext + 2;
 				| 'u' ->
@@ -515,12 +518,12 @@ let unescape s =
 							assert (u <= 0x10FFFF);
 							(u, l+2)
 						with _ ->
-							fail()
+							fail None
 					in
 					UTF8.add_uchar b (UChar.uchar_of_int u);
 					inext := !inext + a;
 				| _ ->
-					fail());
+					fail None);
 				loop false !inext;
 			end else
 				match c with

+ 5 - 4
src/syntax/lexer.ml

@@ -28,7 +28,7 @@ type error_msg =
 	| Unterminated_regexp
 	| Unclosed_comment
 	| Unclosed_code
-	| Invalid_escape of char
+	| Invalid_escape of char * (string option)
 	| Invalid_option
 	| Unterminated_markup
 
@@ -47,7 +47,8 @@ let error_msg = function
 	| Unterminated_regexp -> "Unterminated regular expression"
 	| Unclosed_comment -> "Unclosed comment"
 	| Unclosed_code -> "Unclosed code string"
-	| Invalid_escape c -> Printf.sprintf "Invalid escape sequence \\%s" (Char.escaped c)
+	| Invalid_escape (c,None) -> Printf.sprintf "Invalid escape sequence \\%s" (Char.escaped c)
+	| Invalid_escape (c,Some msg) -> Printf.sprintf "Invalid escape sequence \\%s. %s" (Char.escaped c) msg
 	| Invalid_option -> "Invalid regular expression option"
 	| Unterminated_markup -> "Unterminated markup literal"
 
@@ -370,13 +371,13 @@ let rec token lexbuf =
 		reset();
 		let pmin = lexeme_start lexbuf in
 		let pmax = (try string lexbuf with Exit -> error Unterminated_string pmin) in
-		let str = (try unescape (contents()) with Invalid_escape_sequence(c,i) -> error (Invalid_escape c) (pmin + i)) in
+		let str = (try unescape (contents()) with Invalid_escape_sequence(c,i,msg) -> error (Invalid_escape (c,msg)) (pmin + i)) in
 		mk_tok (Const (String str)) pmin pmax;
 	| "'" ->
 		reset();
 		let pmin = lexeme_start lexbuf in
 		let pmax = (try string2 lexbuf with Exit -> error Unterminated_string pmin) in
-		let str = (try unescape (contents()) with Invalid_escape_sequence(c,i) -> error (Invalid_escape c) (pmin + i)) in
+		let str = (try unescape (contents()) with Invalid_escape_sequence(c,i,msg) -> error (Invalid_escape (c,msg)) (pmin + i)) in
 		let t = mk_tok (Const (String str)) pmin pmax in
 		fast_add_fmt_string (snd t);
 		t

+ 5 - 0
tests/misc/projects/Issue8119/Main.hx

@@ -0,0 +1,5 @@
+class Main {
+	static public function main() {
+		"\x80";
+	}
+}

+ 1 - 0
tests/misc/projects/Issue8119/compile-fail.hxml

@@ -0,0 +1 @@
+-main Main

+ 1 - 0
tests/misc/projects/Issue8119/compile-fail.hxml.stderr

@@ -0,0 +1 @@
+Main.hx:3: character 4 : Invalid escape sequence \x. Values greater than \x7f are not allowed. Use \u0080 instead.

+ 8 - 5
tests/unit/src/unit/issues/Issue7449.hx

@@ -1,9 +1,12 @@
 package unit.issues;
 
 class Issue7449 extends unit.Test {
-	#if !(neko || (cpp && !cppia && !hxcpp_smart_strings))
-	function test() {
-		eq(220, "\xDC".charCodeAt(0));
-	}
-	#end
+	/**
+	 * Not happens since https://github.com/HaxeFoundation/haxe/pull/8141
+	 */
+	// #if !(neko || (cpp && !cppia && !hxcpp_smart_strings))
+	// function test() {
+	// 	eq(220, "\xDC".charCodeAt(0));
+	// }
+	// #end
 }