|
@@ -21,6 +21,8 @@
|
|
|
*/
|
|
|
package haxe;
|
|
|
|
|
|
+import lua.NativeStringTools;
|
|
|
+
|
|
|
/**
|
|
|
A Lua-specific implementation of Utf8, using a helper library.
|
|
|
**/
|
|
@@ -40,7 +42,7 @@ class Utf8 {
|
|
|
Add the given UTF8 character code to the buffer.
|
|
|
**/
|
|
|
public inline function addChar( c : Int ) : Void {
|
|
|
- __b = lua.Utf8.insert(__b, lua.Utf8.len(__b)+1, lua.Utf8.char(c));
|
|
|
+ __b += char(c);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -54,15 +56,14 @@ class Utf8 {
|
|
|
Call the `chars` function for each UTF8 char of the string.
|
|
|
**/
|
|
|
public static function iter( s : String, chars : Int -> Void ) {
|
|
|
- for( i in 0...s.length )
|
|
|
- chars(s.charCodeAt(i));
|
|
|
+ for( i in 0...s.length ) chars(s.charCodeAt(i));
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
Encode the input ISO string into the corresponding UTF8 one.
|
|
|
**/
|
|
|
public static function encode( s : String ) : String {
|
|
|
- throw "Not implemented";
|
|
|
+ throw "Unimplemented";
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -70,14 +71,30 @@ class Utf8 {
|
|
|
Throw an exception if a given UTF8 character is not supported by the decoder.
|
|
|
**/
|
|
|
public static function decode( s : String ) : String {
|
|
|
- throw "Not implemented";
|
|
|
+ throw "Unimplemented";
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
Similar to `String.charCodeAt` but uses the UTF8 character position.
|
|
|
**/
|
|
|
public static inline function charCodeAt( s : String, index : Int ) : Int {
|
|
|
- return lua.Utf8.byte(s,index+1);
|
|
|
+ var cur_idx = 0;
|
|
|
+ var pos = 0;
|
|
|
+ for (i in 0...index){
|
|
|
+ pos += charWidth(s.charCodeAt(pos));
|
|
|
+ }
|
|
|
+ var ret = 0;
|
|
|
+ var code = s.charCodeAt(pos);
|
|
|
+ var bytes = charWidth(code);
|
|
|
+ if (bytes == 1){
|
|
|
+ return code;
|
|
|
+ } else if (bytes == 2){
|
|
|
+ return ((code & 0x1F) << 6) | (s.charCodeAt(pos+1) & 0x3F);
|
|
|
+ } else if (bytes == 3){
|
|
|
+ return ((code & 0x0F) << 12) | (((s.charCodeAt(pos+1) & 0x3F) << 6) | (s.charCodeAt(pos+2) & 0x3F));
|
|
|
+ } else {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -91,7 +108,13 @@ class Utf8 {
|
|
|
Returns the number of UTF8 chars of the String.
|
|
|
**/
|
|
|
public static inline function length( s : String ) : Int {
|
|
|
- return lua.Utf8.len(s);
|
|
|
+ var pos = 0;
|
|
|
+ var len = 0;
|
|
|
+ while (pos < s.length){
|
|
|
+ pos += charWidth(s.charCodeAt(pos));
|
|
|
+ len++;
|
|
|
+ }
|
|
|
+ return len;
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -105,7 +128,52 @@ class Utf8 {
|
|
|
This is similar to `String.substr` but the `pos` and `len` parts are considering UTF8 characters.
|
|
|
**/
|
|
|
public static inline function sub( s : String, pos : Int, len : Int ) : String {
|
|
|
- return lua.Utf8.sub(s,pos+1,pos+len);
|
|
|
+ var startpos = 0;
|
|
|
+ var ret = new StringBuf();
|
|
|
+ for (i in 0...pos){
|
|
|
+ startpos += charWidth(s.charCodeAt(startpos));
|
|
|
+ }
|
|
|
+ var endpos = startpos;
|
|
|
+ for (i in 0...len){
|
|
|
+ endpos += charWidth(s.charCodeAt(endpos));
|
|
|
+ }
|
|
|
+ return s.substring(startpos, endpos);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static function charWidth(c:Int) : Int {
|
|
|
+ return if (c > 0 && c <= 127) 1;
|
|
|
+ else if (c >= 194 && c <= 223) 2;
|
|
|
+ else if (c >= 224 && c <= 239) 3;
|
|
|
+ else if (c >= 240 && c <= 244) 4;
|
|
|
+ else null;
|
|
|
}
|
|
|
|
|
|
+ private static function char( unicode : Int ) : String {
|
|
|
+ if (unicode <= 0x7F) {
|
|
|
+ return String.fromCharCode(unicode);
|
|
|
+ } else if (unicode <= 0x7FF) {
|
|
|
+ var b0 = 0xC0 + Math.floor(unicode / 0x40);
|
|
|
+ var b1 = 0x80 + (unicode % 0x40);
|
|
|
+ return NativeStringTools.char(b0, b1);
|
|
|
+ } else if (unicode <= 0xFFFF) {
|
|
|
+ var b0 = 0xE0 + Math.floor(unicode / 0x1000);
|
|
|
+ var b1 = 0x80 + (Math.floor(unicode / 0x40) % 0x40);
|
|
|
+ var b2 = 0x80 + (unicode % 0x40);
|
|
|
+ return NativeStringTools.char(b0, b1, b2);
|
|
|
+ } else if (unicode <= 0x10FFFF) {
|
|
|
+ var code = unicode;
|
|
|
+ var b3 = 0x80 + (code % 0x40);
|
|
|
+ code = Math.floor(code / 0x40);
|
|
|
+ var b2 = 0x80 + (code % 0x40);
|
|
|
+ code = Math.floor(code / 0x40);
|
|
|
+ var b1 = 0x80 + (code % 0x40);
|
|
|
+ code = Math.floor(code / 0x40);
|
|
|
+ var b0 = 0xF0 + code;
|
|
|
+
|
|
|
+ return NativeStringTools.char(b0, b1, b2, b3);
|
|
|
+ } else {
|
|
|
+ throw 'Unicode greater than U+10FFFF';
|
|
|
+ }
|
|
|
+ }
|
|
|
+ static var iso2uni = [ 8364, 65533, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 65533, 381, 65533, 65533, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 65533, 382, 376];
|
|
|
}
|