Sfoglia il codice sorgente

[lua] remove Utf8 documentation (problem with documentation merging) [ci-skip]

Simon Krajewski 9 anni fa
parent
commit
a550642e6f
1 ha cambiato i file con 165 aggiunte e 209 eliminazioni
  1. 165 209
      std/lua/_std/haxe/Utf8.hx

+ 165 - 209
std/lua/_std/haxe/Utf8.hx

@@ -23,263 +23,219 @@ package haxe;
 
 import lua.NativeStringTools;
 
-/**
-  A Lua-specific implementation of Utf8, using a helper library.
- **/
-
 class Utf8 {
 
     var __b : String;
 
-    /**
-      Allocate a new Utf8 buffer using an optional bytes size.
-     **/
     public function new( ?size : Int ) {
-	__b = "";
+		__b = "";
     }
 
-    /**
-      Add the given UTF8 character code to the buffer.
-     **/
     public inline function addChar( c : Int ) : Void {
-	__b += char(c);
+		__b += char(c);
     }
 
-    /**
-      Returns the buffer converted to a String;
-     **/
     public inline function toString() : String {
-	return __b;
+		return __b;
     }
 
-    /**
-      Call the `chars` function for each UTF8 char of the string.
-     **/
     public static function iter( s : String, chars : Int -> Void ) {
-	var cur = 0;
-	while (cur < s.length){
-	    var code = s.charCodeAt(cur);
-	    var width = charWidth(code);
-	    var l = (code << 6)  | s.charCodeAt(cur+1);
-	    trace(l + " is the value for l");
-	    switch(width){
-		case 1 : chars(code);
-		case 2 : chars((code << 6)  | s.charCodeAt(cur+1));
-		case 3 : chars((code << 12) | (s.charCodeAt(cur+1) << 6) | s.charCodeAt(cur+2));
-	    }
-	    cur += width;
-	}
+		var cur = 0;
+		while (cur < s.length){
+			var code = s.charCodeAt(cur);
+			var width = charWidth(code);
+			var l = (code << 6)  | s.charCodeAt(cur+1);
+			trace(l + " is the value for l");
+			switch(width){
+			case 1 : chars(code);
+			case 2 : chars((code << 6)  | s.charCodeAt(cur+1));
+			case 3 : chars((code << 12) | (s.charCodeAt(cur+1) << 6) | s.charCodeAt(cur+2));
+			}
+			cur += width;
+		}
     }
 
-    /**
-      Encode the input ISO string into the corresponding UTF8 one.
-     **/
     public static function encode( s : String ) : String {
-	// ported from : http://phpjs.org/functions/utf8_encode/
-	if (s == null ) {
-	    return '';
-	}
-	var string = (s + ''); // .replace(/\r\n/g, "\n").replace(/\r/g, "\n");
-	var utftext = '';
-	var start = 0;
-	var end = 0;
-	var n = 0;
-	while (n < s.length) {
-	    var c1 = string.charCodeAt(n);
-	    var enc = null;
-
-	    if (c1 < 128) {
-		end++;
-	    } else if (c1 > 127 && c1 < 2048) {
-		enc = String.fromCharCode( (c1 >> 6) | 192) 
-		    + String.fromCharCode( (c1 & 63) | 128);
-	    } else if ((c1 & 0xF800) != 0xD800) {
-		enc = String.fromCharCode( (c1 >> 12) | 224)
-		    + String.fromCharCode( ((c1 >> 6) & 63) | 128)
-		    + String.fromCharCode( (c1 & 63) | 128);
-	    } else { // surrogate pairs
-		if ((c1 & 0xFC00) != 0xD800) {
-		    throw 'Unmatched trail surrogate at ' + n;
+		// ported from : http://phpjs.org/functions/utf8_encode/
+		if (s == null ) {
+			return '';
 		}
-		var c2 = string.charCodeAt(++n);
-		if ((c2 & 0xFC00) != 0xDC00) {
-		    throw 'Unmatched lead surrogate at ' + (n - 1);
+		var string = (s + ''); // .replace(/\r\n/g, "\n").replace(/\r/g, "\n");
+		var utftext = '';
+		var start = 0;
+		var end = 0;
+		var n = 0;
+		while (n < s.length) {
+			var c1 = string.charCodeAt(n);
+			var enc = null;
+
+			if (c1 < 128) {
+			end++;
+			} else if (c1 > 127 && c1 < 2048) {
+			enc = String.fromCharCode( (c1 >> 6) | 192)
+				+ String.fromCharCode( (c1 & 63) | 128);
+			} else if ((c1 & 0xF800) != 0xD800) {
+			enc = String.fromCharCode( (c1 >> 12) | 224)
+				+ String.fromCharCode( ((c1 >> 6) & 63) | 128)
+				+ String.fromCharCode( (c1 & 63) | 128);
+			} else { // surrogate pairs
+			if ((c1 & 0xFC00) != 0xD800) {
+				throw 'Unmatched trail surrogate at ' + n;
+			}
+			var c2 = string.charCodeAt(++n);
+			if ((c2 & 0xFC00) != 0xDC00) {
+				throw 'Unmatched lead surrogate at ' + (n - 1);
+			}
+			c1 = ((c1 & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000;
+			enc = String.fromCharCode( (c1 >> 18) | 240)
+				+ String.fromCharCode( ((c1 >> 12) & 63) | 128)
+				+ String.fromCharCode(((c1 >> 6) & 63) | 128)
+				+ String.fromCharCode((c1 & 63) | 128);
+			}
+			if (enc != null) {
+			if (end > start) {
+				utftext += string.substring(start, end);
+			}
+			utftext += enc;
+			start = end = n + 1;
+			}
+			n++;
 		}
-		c1 = ((c1 & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000;
-		enc = String.fromCharCode( (c1 >> 18) | 240)
-		    + String.fromCharCode( ((c1 >> 12) & 63) | 128)
-		    + String.fromCharCode(((c1 >> 6) & 63) | 128)
-		    + String.fromCharCode((c1 & 63) | 128);
-	    }
-	    if (enc != null) {
+
 		if (end > start) {
-		    utftext += string.substring(start, end);
+			utftext += string.substring(start, s.length);
 		}
-		utftext += enc;
-		start = end = n + 1;
-	    }
-	    n++;
-	}
-
-	if (end > start) {
-	    utftext += string.substring(start, s.length);
-	}
 
-	return utftext;
+		return utftext;
 
     }
 
-    /**
-      Decode an UTF8 string back to an ISO string.
-      Throw an exception if a given UTF8 character is not supported by the decoder.
-     **/
     public static function decode( s : String ) : String {
-	var ret = new StringBuf();
-	iter(s, function(c){
-	    if( c == 8364 ) // euro symbol
-		c = 164;
-	    else if( c > 255 ){
-		// throw new RangeError('Utf8 decode invalid character ($c)');
-		throw 'Utf8::decode invalid character ($c)';
-	    }
-
-	    if (c != 0xFEFF) // BOM
-		ret.add(String.fromCharCode(c));
-	});
-	return ret.toString();
+		var ret = new StringBuf();
+		iter(s, function(c){
+			if( c == 8364 ) // euro symbol
+			c = 164;
+			else if( c > 255 ){
+			// throw new RangeError('Utf8 decode invalid character ($c)');
+			throw 'Utf8::decode invalid character ($c)';
+			}
+
+			if (c != 0xFEFF) // BOM
+			ret.add(String.fromCharCode(c));
+		});
+		return ret.toString();
     }
 
-    /**
-      Similar to `String.charCodeAt` but uses the UTF8 character position.
-     **/
     public static inline function charCodeAt( s : String, index : Int ) : Int {
-	var cur_idx = 0;
-	var pos = 0;
-	for (i in 0...index){
-	    pos += charWidth(s.charCodeAt(pos));
-	}
-	var ret = 0;
-	var code = s.charCodeAt(pos);
-	var bytes = charWidth(code);
-	if (bytes == 1){
-	    return code;
-	} else if (bytes == 2){
-	    return ((code & 0x1F) << 6) | (s.charCodeAt(pos+1) & 0x3F);
-	} else if (bytes == 3){
-	    return ((code & 0x0F) << 12) | (((s.charCodeAt(pos+1) & 0x3F) << 6) | (s.charCodeAt(pos+2) & 0x3F));
-	} else {
-	    return null;
-	}
+		var cur_idx = 0;
+		var pos = 0;
+		for (i in 0...index){
+			pos += charWidth(s.charCodeAt(pos));
+		}
+		var ret = 0;
+		var code = s.charCodeAt(pos);
+		var bytes = charWidth(code);
+		if (bytes == 1){
+			return code;
+		} else if (bytes == 2){
+			return ((code & 0x1F) << 6) | (s.charCodeAt(pos+1) & 0x3F);
+		} else if (bytes == 3){
+			return ((code & 0x0F) << 12) | (((s.charCodeAt(pos+1) & 0x3F) << 6) | (s.charCodeAt(pos+2) & 0x3F));
+		} else {
+			return null;
+		}
     }
 
-    /**
-      Tells if the String is correctly encoded as UTF8.
-     **/
     public static function validate( s : String ) : Bool {
-	if (s == null) return false;
-	var cur = 0;
-	while (cur < s.length){
-	    var code = s.charCodeAt(cur);
-	    var width = charWidth(code);
-	    var expectedLen = 0;
-
-		 if ((code & 0x10000000) == 0x00000000) expectedLen = 1;
-	    else if ((code & 0x11100000) == 0x11000000) expectedLen = 2;
-	    else if ((code & 0x11110000) == 0x11100000) expectedLen = 3;
-	    else if ((code & 0x11111000) == 0x11110000) expectedLen = 4;
-	    else if ((code & 0x11111100) == 0x11111000) expectedLen = 5;
-	    else if ((code & 0x11111110) == 0x11111100) expectedLen = 6;
-	    else return false;
-
-	    if (cur + expectedLen > s.length) return false;
-
-	    for (i in (cur + 1)...expectedLen) {
-		if ((s.charCodeAt(i) & 0x11000000) != 0x10000000) {
-		    return false;
+		if (s == null) return false;
+		var cur = 0;
+		while (cur < s.length){
+			var code = s.charCodeAt(cur);
+			var width = charWidth(code);
+			var expectedLen = 0;
+
+			 if ((code & 0x10000000) == 0x00000000) expectedLen = 1;
+			else if ((code & 0x11100000) == 0x11000000) expectedLen = 2;
+			else if ((code & 0x11110000) == 0x11100000) expectedLen = 3;
+			else if ((code & 0x11111000) == 0x11110000) expectedLen = 4;
+			else if ((code & 0x11111100) == 0x11111000) expectedLen = 5;
+			else if ((code & 0x11111110) == 0x11111100) expectedLen = 6;
+			else return false;
+
+			if (cur + expectedLen > s.length) return false;
+
+			for (i in (cur + 1)...expectedLen) {
+			if ((s.charCodeAt(i) & 0x11000000) != 0x10000000) {
+				return false;
+			}
+			}
+
+			cur += width;
 		}
-	    }
-
-	    cur += width;
-	}
-	return true;
+		return true;
     }
 
-    /**
-      Returns the number of UTF8 chars of the String.
-     **/
     public static inline function length( s : String ) : Int {
-	var pos = 0;
-	var len = 0;
-	while (pos < s.length){
-	    pos += charWidth(s.charCodeAt(pos));
-	    len++;
-	}
-	return len;
+		var pos = 0;
+		var len = 0;
+		while (pos < s.length){
+			pos += charWidth(s.charCodeAt(pos));
+			len++;
+		}
+		return len;
     }
 
-    /**
-      Compare two UTF8 strings, character by character.
-     **/
     public static function compare( a : String, b : String ) : Int {
-	return a > b ? 1 : (a == b ? 0 : -1);
+		return a > b ? 1 : (a == b ? 0 : -1);
     }
 
-    /**
-      This is similar to `String.substr` but the `pos` and `len` parts are considering UTF8 characters.
-     **/
     public static inline function sub( s : String, pos : Int, len : Int ) : String {
-	var startpos = 0;
-	var ret = new StringBuf();
-	for (i in 0...pos){
-	    startpos += charWidth(s.charCodeAt(startpos));
-	}
-	var endpos = startpos;
-	for (i in 0...len){
-	    endpos += charWidth(s.charCodeAt(endpos));
-	}
-	return s.substring(startpos, endpos);
+		var startpos = 0;
+		var ret = new StringBuf();
+		for (i in 0...pos){
+			startpos += charWidth(s.charCodeAt(startpos));
+		}
+		var endpos = startpos;
+		for (i in 0...len){
+			endpos += charWidth(s.charCodeAt(endpos));
+		}
+		return s.substring(startpos, endpos);
     }
 
-    /**
-      Determines the expected character width of the utf8 codepoint
-     **/
     static function charWidth(c:Int) : Int {
-	return   if (c >  0   && c <= 127) 1;
-	    else if (c >= 194 && c <= 223) 2;
-	    else if (c >= 224 && c <= 239) 3;
-	    else if (c >= 240 && c <= 244) 4;
-	    else null;
+		return   if (c >  0   && c <= 127) 1;
+			else if (c >= 194 && c <= 223) 2;
+			else if (c >= 224 && c <= 239) 3;
+			else if (c >= 240 && c <= 244) 4;
+			else null;
     }
 
-    /**
-      Returns the string representation of the unicode codepoint
-     **/
     public static function char( unicode : Int ) : String {
-	if (unicode <= 0x7F) {
-	    return String.fromCharCode(unicode);
-	} else if (unicode <= 0x7FF) {
-	    var b0 = 0xC0 + Math.floor(unicode / 0x40);
-	    var b1 = 0x80 + (unicode % 0x40);
-	    return NativeStringTools.char(b0, b1);
-	} else if (unicode <= 0xFFFF) {
-	    var b0 = 0xE0 +  Math.floor(unicode / 0x1000);
-	    var b1 = 0x80 + (Math.floor(unicode / 0x40) % 0x40);
-	    var b2 = 0x80 + (unicode % 0x40);
-	    return NativeStringTools.char(b0, b1, b2);
-	} else if (unicode <= 0x10FFFF) {
-	    var code = unicode;
-	    var b3   = 0x80 + (code % 0x40);
-	    code     = Math.floor(code / 0x40);
-	    var b2   = 0x80 + (code % 0x40);
-	    code     = Math.floor(code / 0x40);
-	    var b1   = 0x80 + (code % 0x40);
-	    code     = Math.floor(code / 0x40);
-	    var b0   = 0xF0 + code;
-
-	    return NativeStringTools.char(b0, b1, b2, b3);
-	} else {
-	    throw 'Unicode greater than U+10FFFF';
-	}
+		if (unicode <= 0x7F) {
+			return String.fromCharCode(unicode);
+		} else if (unicode <= 0x7FF) {
+			var b0 = 0xC0 + Math.floor(unicode / 0x40);
+			var b1 = 0x80 + (unicode % 0x40);
+			return NativeStringTools.char(b0, b1);
+		} else if (unicode <= 0xFFFF) {
+			var b0 = 0xE0 +  Math.floor(unicode / 0x1000);
+			var b1 = 0x80 + (Math.floor(unicode / 0x40) % 0x40);
+			var b2 = 0x80 + (unicode % 0x40);
+			return NativeStringTools.char(b0, b1, b2);
+		} else if (unicode <= 0x10FFFF) {
+			var code = unicode;
+			var b3   = 0x80 + (code % 0x40);
+			code     = Math.floor(code / 0x40);
+			var b2   = 0x80 + (code % 0x40);
+			code     = Math.floor(code / 0x40);
+			var b1   = 0x80 + (code % 0x40);
+			code     = Math.floor(code / 0x40);
+			var b0   = 0xF0 + code;
+
+			return NativeStringTools.char(b0, b1, b2, b3);
+		} else {
+			throw 'Unicode greater than U+10FFFF';
+		}
     }
 }