Forráskód Böngészése

Drop utf8 extern in favor of pure haxe version

Justin Donaldson 9 éve
szülő
commit
4b1ea0e0f9
2 módosított fájl, 76 hozzáadás és 38 törlés
  1. 0 30
      std/lua/Utf8.hx
  2. 76 8
      std/lua/_std/haxe/Utf8.hx

+ 0 - 30
std/lua/Utf8.hx

@@ -1,30 +0,0 @@
-package lua;
-#if (lua_ver > 5.3)
-@native("_G.utf8")
-#else
-@:luaRequire("lua-utf8")
-#end
-extern class  Utf8 {
-	public static function escape(str:String) : String;
-	public static function charpos(str:String, charpos:Int, offset:Int) : Int; //and Int
-	public static function next(str:String, charpos:Int, offset:Int) : Int; //and Int
-	public static function insert(str:String, idx:Int, substring:String) : String;
-	public static function remove(str:String, start:Int, stop:Int) : String;
-	public static function width(str:String, ambi_is_double:Bool, default_width:Int) : Int;
-	public static function widthindex(str:String, location:Int, ambi_is_double:Bool, default_width:Int) : Int;
-	public static function title(str:String) : String;
-	public static function fold(str:String) : String;
-	public static function nbasecmp(a:String, b:String) : Table<Int,Int>;
-	public static function byte(str:String, pos:Int) : Int;
-	public static function char(codes :haxe.extern.Rest<Int>) : String;
-	public static function len(str:String) : Int;
-	// public static function find() : String;
-	// public static function gmatch() : String;
-	// public static function gsub() : String;
-	// public static function len() : String;
-	// public static function lower() : String;
-	// public static function match() : String;
-	// public static function reverse() : String;
-	public static function sub(str:String, start:Int, end:Int) : String;
-	// public static function upper() : String;
-}

+ 76 - 8
std/lua/_std/haxe/Utf8.hx

@@ -21,6 +21,8 @@
  */
 package haxe;
 
+import lua.NativeStringTools;
+
 /**
   A Lua-specific implementation of Utf8, using a helper library.
 **/
@@ -40,7 +42,7 @@ class Utf8 {
 		Add the given UTF8 character code to the buffer.
 	**/
 	public inline function addChar( c : Int ) : Void {
-		__b =  lua.Utf8.insert(__b, lua.Utf8.len(__b)+1, lua.Utf8.char(c));
+		__b += char(c);
 	}
 
 	/**
@@ -54,15 +56,14 @@ class Utf8 {
 		Call the `chars` function for each UTF8 char of the string.
 	**/
 	public static function iter( s : String, chars : Int -> Void ) {
-		for( i in 0...s.length )
-			chars(s.charCodeAt(i));
+		for( i in 0...s.length ) chars(s.charCodeAt(i));
 	}
 
 	/**
 		Encode the input ISO string into the corresponding UTF8 one.
 	**/
 	public static function encode( s : String ) : String {
-		throw "Not implemented";
+		throw "Unimplemented";
 	}
 
 	/**
@@ -70,14 +71,30 @@ class Utf8 {
 		Throw an exception if a given UTF8 character is not supported by the decoder.
 	**/
 	public static function decode( s : String ) : String {
-		throw "Not implemented";
+		throw "Unimplemented";
 	}
 
 	/**
 		Similar to `String.charCodeAt` but uses the UTF8 character position.
 	**/
 	public static inline function charCodeAt( s : String, index : Int ) : Int {
-		return lua.Utf8.byte(s,index+1);
+		var cur_idx = 0;
+		var pos = 0;
+		for (i in 0...index){
+			pos += charWidth(s.charCodeAt(pos));
+		}
+		var ret = 0;
+		var code = s.charCodeAt(pos);
+		var bytes = charWidth(code);
+		if (bytes == 1){
+			return code;
+		} else if (bytes == 2){
+			return ((code & 0x1F) << 6) | (s.charCodeAt(pos+1) & 0x3F);
+		} else if (bytes == 3){
+			return ((code & 0x0F) << 12) | (((s.charCodeAt(pos+1) & 0x3F) << 6) | (s.charCodeAt(pos+2) & 0x3F));
+		} else {
+			return null;
+		}
 	}
 
 	/**
@@ -91,7 +108,13 @@ class Utf8 {
 		Returns the number of UTF8 chars of the String.
 	**/
 	public static inline function length( s : String ) : Int {
-		return lua.Utf8.len(s);
+		var pos = 0;
+		var len = 0;
+		while (pos < s.length){
+			pos += charWidth(s.charCodeAt(pos));
+			len++;
+		}
+		return len;
 	}
 
 	/**
@@ -105,7 +128,52 @@ class Utf8 {
 		This is similar to `String.substr` but the `pos` and `len` parts are considering UTF8 characters.
 	**/
 	public static inline function sub( s : String, pos : Int, len : Int ) : String {
-		return lua.Utf8.sub(s,pos+1,pos+len);
+		var startpos = 0;
+		var ret = new StringBuf();
+		for (i in 0...pos){
+			startpos += charWidth(s.charCodeAt(startpos));
+		}
+		var endpos = startpos;
+		for (i in 0...len){
+			endpos += charWidth(s.charCodeAt(endpos));
+		}
+		return s.substring(startpos, endpos);
+	}
+
+	private static function charWidth(c:Int) : Int {
+		return   if (c >  0   && c <= 127) 1;
+			else if (c >= 194 && c <= 223) 2;
+			else if (c >= 224 && c <= 239) 3;
+			else if (c >= 240 && c <= 244) 4;
+			else null;
 	}
 
+	private static function char( unicode : Int ) : String {
+		if (unicode <= 0x7F) {
+			return String.fromCharCode(unicode);
+		} else if (unicode <= 0x7FF) {
+			var b0 = 0xC0 + Math.floor(unicode / 0x40);
+			var b1 = 0x80 + (unicode % 0x40);
+			return NativeStringTools.char(b0, b1);
+		} else if (unicode <= 0xFFFF) {
+			var b0 = 0xE0 +  Math.floor(unicode / 0x1000);
+			var b1 = 0x80 + (Math.floor(unicode / 0x40) % 0x40);
+			var b2 = 0x80 + (unicode % 0x40);
+			return NativeStringTools.char(b0, b1, b2);
+		} else if (unicode <= 0x10FFFF) {
+			var code = unicode;
+			var b3   = 0x80 + (code % 0x40);
+			code     = Math.floor(code / 0x40);
+			var b2   = 0x80 + (code % 0x40);
+			code     = Math.floor(code / 0x40);
+			var b1   = 0x80 + (code % 0x40);
+			code     = Math.floor(code / 0x40);
+			var b0   = 0xF0 + code;
+
+			return NativeStringTools.char(b0, b1, b2, b3);
+		} else {
+			throw 'Unicode greater than U+10FFFF';
+		}
+	}
+	static var iso2uni = [ 8364, 65533, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 65533, 381, 65533, 65533, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 65533, 382, 376];
 }