浏览代码

Revert "removed haxe.Utf8; added neko.Utf8"

This reverts commit 018b395a5c789a34ba7067002b3e508ade701728.
Aleksandr Kuzmenko 6 年之前
父节点
当前提交
32152bd809

+ 87 - 0
std/cpp/_std/haxe/Utf8.hx

@@ -0,0 +1,87 @@
+/*
+ * Copyright (C)2005-2019 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+package haxe;
+
+using cpp.NativeString;
+
+@:coreApi
+class Utf8
+{
+   var __s:Array<Int>;
+
+	public function new( ?size : Null<Int> ) : Void {
+      __s = new Array<Int>();
+      if (size!=null && size>0)
+         cpp.NativeArray.reserve(__s,size);
+	}
+
+	public function addChar( c : Int ) : Void {
+      __s.push(c);
+	}
+
+	public function toString() : String {
+		return untyped __global__.__hxcpp_char_array_to_utf8_string(__s);
+	}
+
+   // Incoming string is array of bytes containing possibly invalid utf8 chars
+   // Result is the same string with the bytes expanded into utf8 sequences
+	public static function encode( s : String ) : String {
+		return untyped __global__.__hxcpp_char_bytes_to_utf8_string(s);
+	}
+
+   // Incoming string is array of bytes representing valid utf8 chars
+   // Result is a string containing the compressed bytes
+	public static function decode( s : String ) : String {
+		return untyped __global__.__hxcpp_utf8_string_to_char_bytes(s);
+	}
+
+	public #if !cppia inline #end static function iter( s : String, chars : Int -> Void ) : Void {
+      var src = s.c_str();
+      var end = src.add( s.length );
+
+      while(src.lt(end))
+         chars(src.ptr.utf8DecodeAdvance());
+	}
+
+	public static function charCodeAt( s : String, index : Int ) : Int {
+      return s.utf8CharCodeAt(index);
+	}
+
+	public static function validate( s : String ) : Bool {
+      return s.utf8IsValid();
+	}
+
+	public static function length( s : String ) : Int {
+      return s.utf8Length();
+	}
+
+	public static function compare( a : String, b : String ) : Int {
+      return a.compare(b);
+	}
+
+	public static function sub( s : String, pos : Int, len : Int ) : String {
+      return s.utf8Sub(pos,len);
+	}
+
+}
+
+

+ 37 - 0
std/eval/_std/haxe/Utf8.hx

@@ -0,0 +1,37 @@
+/*
+ * Copyright (C)2005-2019 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+package haxe;
+
+@:coreApi
+extern class Utf8 {
+	public function new( ?size : Int ):Void;
+	public function addChar( c : Int ) : Void;
+	public function toString() : String;
+	public static function iter( s : String, chars : Int -> Void ):Void;
+	public static function encode( s : String ) : String;
+	public static function decode( s : String ) : String;
+	public static function charCodeAt( s : String, index : Int ) : Int;
+	public static function validate( s : String ) : Bool;
+	public static function length( s : String ) : Int;
+	public static function compare( a : String, b : String ) : Int;
+	public static function sub( s : String, pos : Int, len : Int ) : String;
+}

+ 112 - 0
std/haxe/Utf8.hx

@@ -0,0 +1,112 @@
+/*
+ * Copyright (C)2005-2019 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+package haxe;
+
+/**
+	Since not all platforms guarantee that `String` always uses UTF-8 encoding, you
+	can use this cross-platform API to perform operations on such strings.
+**/
+class Utf8 {
+
+	var __b : String;
+
+	/**
+		Allocate a new Utf8 buffer using an optional bytes size.
+	**/
+	public function new( ?size : Int ) {
+		__b = "";
+	}
+
+	/**
+		Add the given UTF8 character code to the buffer.
+	**/
+	public inline function addChar( c : Int ) : Void {
+		__b += String.fromCharCode(c);
+	}
+
+	/**
+		Returns the buffer converted to a String.
+	**/
+	public inline function toString() : String {
+		return __b;
+	}
+
+	/**
+		Call the `chars` function for each UTF8 char of the string.
+	**/
+	public static function iter( s : String, chars : Int -> Void ) {
+		for( i in 0...s.length )
+			chars(s.charCodeAt(i));
+	}
+
+	/**
+		Encode the input ISO string into the corresponding UTF8 one.
+	**/
+	public static function encode( s : String ) : String {
+		throw "Not implemented";
+	}
+
+	/**
+		Decode an UTF8 string back to an ISO string.
+		Throw an exception if a given UTF8 character is not supported by the decoder.
+	**/
+	public static function decode( s : String ) : String {
+		throw "Not implemented";
+	}
+
+	/**
+		Similar to `String.charCodeAt` but uses the UTF8 character position.
+	**/
+	public static inline function charCodeAt( s : String, index : Int ) : Int {
+		return s.charCodeAt(index);
+	}
+
+	/**
+		Tells if the String is correctly encoded as UTF8.
+	**/
+	public static inline function validate( s : String ) : Bool {
+		return true;
+	}
+
+	/**
+		Returns the number of UTF8 chars of the String.
+	**/
+	#if js extern #end
+	public static inline function length( s : String ) : Int {
+		return s.length;
+	}
+
+	/**
+		Compare two UTF8 strings, character by character.
+	**/
+	public static function compare( a : String, b : String ) : Int {
+		return a > b ? 1 : (a == b ? 0 : -1);
+	}
+
+	/**
+		This is similar to `String.substr` but the `pos` and `len` parts are considering UTF8 characters.
+	**/
+	public static inline function sub( s : String, pos : Int, len : Int ) : String {
+		return s.substr(pos,len);
+	}
+
+}

+ 5 - 5
std/haxe/format/JsonPrinter.hx

@@ -185,8 +185,8 @@ class JsonPrinter {
 	}
 
 	function quote( s : String ) {
-		#if neko
-		if( s.length != neko.Utf8.length(s) ) {
+		#if (neko || php || cpp)
+		if( s.length != haxe.Utf8.length(s) ) {
 			quoteUtf8(s);
 			return;
 		}
@@ -236,10 +236,10 @@ class JsonPrinter {
 		addChar('"'.code);
 	}
 
-	#if neko
+	#if (neko || php || cpp)
 	function quoteUtf8( s : String ) {
-		var u = new neko.Utf8();
-		neko.Utf8.iter(s,function(c) {
+		var u = new haxe.Utf8();
+		haxe.Utf8.iter(s,function(c) {
 			switch( c ) {
 			case '\\'.code, '"'.code: u.addChar('\\'.code); u.addChar(c);
 			case '\n'.code: u.addChar('\\'.code); u.addChar('n'.code);

+ 1 - 1
std/neko/Utf8.hx → std/neko/_std/haxe/Utf8.hx

@@ -19,7 +19,7 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-package neko;
+package haxe;
 
 @:coreApi
 class Utf8 {

+ 88 - 0
std/php/_std/haxe/Utf8.hx

@@ -0,0 +1,88 @@
+/*
+ * Copyright (C)2005-2019 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+package haxe;
+
+import php.Global;
+
+@:coreApi
+class Utf8 {
+
+	var __b : String;
+
+	public function new( ?size : Int ) : Void {
+		__b = '';
+	}
+
+	public function addChar( c : Int ) : Void {
+		__b += uchr(c);
+	}
+
+	public function toString() : String {
+		return __b;
+	}
+
+	public static function encode( s : String ) : String {
+		return Global.utf8_encode(s);
+	}
+
+	public static function decode( s : String ) : String {
+		return Global.utf8_decode(s);
+	}
+
+	public static function iter(s : String, chars : Int -> Void ) : Void {
+		var len = length(s);
+		for(i in 0...len) {
+			chars(charCodeAt(s, i));
+		}
+	}
+
+	public static function charCodeAt( s : String, index : Int ) : Int {
+		return uord(sub(s, index, 1));
+	}
+
+	static function uchr(i : Int) : String {
+		return Global.mb_convert_encoding(Global.pack('N', i), 'UTF-8', 'UCS-4BE');
+	}
+
+	static function uord(s : String) : Int {
+		var c = Global.unpack('N', Global.mb_convert_encoding(s, 'UCS-4BE', 'UTF-8'));
+		return c[1];
+	}
+
+	public static function validate( s : String ) : Bool {
+		return Global.mb_check_encoding(s, enc);
+	}
+
+	public static function length( s : String ) : Int {
+		return Global.mb_strlen(s, enc);
+	}
+
+	public static function compare( a : String, b : String ) : Int {
+		return Global.strcmp(a, b);
+	}
+
+	public static function sub( s : String, pos : Int, len : Int ) : String {
+		return Global.mb_substr(s, pos, len, enc);
+	}
+
+	private static inline var enc = "UTF-8";
+}

+ 56 - 0
tests/unit/src/unitstd/haxe/Utf8.unit.hx

@@ -0,0 +1,56 @@
+#if false
+// disabled tests with outside BMP chars (will be reenabled when we support them)
+var str = "あ𠀀い";
+haxe.Utf8.length(str) == 3;
+haxe.Utf8.charCodeAt(str, 0) == 0x3042;
+haxe.Utf8.charCodeAt(str, 1) == 0x20000;
+haxe.Utf8.charCodeAt(str, 2) == 0x3044;
+var buf = new haxe.Utf8();
+buf.addChar(0x3042);
+buf.addChar(0x20000);
+buf.addChar(0x3044);
+buf.toString() == str;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 3), str) == 0;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 2), "あ𠀀") == 0;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 2), "𠀀い") == 0;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 0), "") == 0;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 0), "") == 0;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 9, 0), "") == 0;
+#end
+
+
+// same tests with BMP chars (actually UCS2 compliance only)
+var str = "あéい";
+haxe.Utf8.length(str) == 3;
+haxe.Utf8.charCodeAt(str, 0) == 0x3042;
+haxe.Utf8.charCodeAt(str, 1) == 0xE9;
+haxe.Utf8.charCodeAt(str, 2) == 0x3044;
+var big = new haxe.Utf8(10);
+big.toString().length == 0;
+var buf = new haxe.Utf8();
+buf.addChar(0x3042);
+buf.addChar(0xE9);
+buf.addChar(0x3044);
+buf.toString() == str;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 3), str) == 0;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 2), "あé") == 0;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 2), "éい") == 0;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 0, 0), "") == 0;
+haxe.Utf8.compare(haxe.Utf8.sub(str, 1, 0), "") == 0;
+
+// unspecify outside of range Utf8.sub
+// haxe.Utf8.compare(haxe.Utf8.sub(str, 9, 0), "") == 0;
+
+// #if (neko || php || cpp || lua || macro)
+// TODO neko, cpp, macro
+#if php
+haxe.Utf8.validate(haxe.io.Bytes.ofHex("f0a9b8bde38182c3ab61").toString()) == true;
+haxe.Utf8.validate(haxe.io.Bytes.ofHex("ed9fbf").toString()) == true;
+haxe.Utf8.validate(haxe.io.Bytes.ofHex("ee8080").toString()) == true;
+haxe.Utf8.validate(haxe.io.Bytes.ofHex("f48fbfbf").toString()) == true;
+haxe.Utf8.validate(haxe.io.Bytes.ofHex("f0a9b8bde381c3ab61").toString()) == false;
+haxe.Utf8.validate(haxe.io.Bytes.ofHex("c0af").toString()) == false; // redundant sequence
+haxe.Utf8.validate(haxe.io.Bytes.ofHex("eda080").toString()) == false; // surrogate byte sequence
+haxe.Utf8.validate(haxe.io.Bytes.ofHex("edbfbf").toString()) == false; // surrogate byte sequence
+haxe.Utf8.validate(haxe.io.Bytes.ofHex("f4908080").toString()) == false; // U+110000
+#end