|
@@ -37,14 +37,14 @@ abstract UnicodeString(String) from String to String {
|
|
|
/**
|
|
|
Tells if `b` is a correctly encoded UTF8 byte sequence.
|
|
|
**/
|
|
|
- static public function validate(b:Bytes, encoding:Encoding) : Bool {
|
|
|
+ static public function validate(b:Bytes, encoding:Encoding):Bool {
|
|
|
switch(encoding) {
|
|
|
case RawNative: throw "UnicodeString.validate: RawNative encoding is not supported";
|
|
|
case UTF8:
|
|
|
var data = b.getData();
|
|
|
var pos = 0;
|
|
|
var max = b.length;
|
|
|
- while( pos < max) {
|
|
|
+ while(pos < max) {
|
|
|
var c:Int = Bytes.fastGet(data, pos++);
|
|
|
if(c < 0x80) {
|
|
|
} else if(c < 0xC2) {
|
|
@@ -105,11 +105,6 @@ abstract UnicodeString(String) from String to String {
|
|
|
|
|
|
#if target.unicode
|
|
|
|
|
|
- /**
|
|
|
- The number of characters in `this` String.
|
|
|
- **/
|
|
|
- public var length(get,never):Int;
|
|
|
-
|
|
|
/**
|
|
|
Creates an instance of UnicodeString.
|
|
|
**/
|
|
@@ -117,32 +112,48 @@ abstract UnicodeString(String) from String to String {
|
|
|
this = string;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ Returns an iterator of the unicode code points.
|
|
|
+ **/
|
|
|
+ public inline function iterator():StringIteratorUnicode {
|
|
|
+ return new StringIteratorUnicode(this);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ Returns an iterator of the code point indices and unicode code points.
|
|
|
+ **/
|
|
|
+ public inline function keyValueIterator():StringKeyValueIteratorUnicode {
|
|
|
+ return new StringKeyValueIteratorUnicode(this);
|
|
|
+ }
|
|
|
+
|
|
|
+ #if target.utf16
|
|
|
+
|
|
|
+ /**
|
|
|
+ The number of characters in `this` String.
|
|
|
+ **/
|
|
|
+ public var length(get,never):Int;
|
|
|
+
|
|
|
/**
|
|
|
Returns the character at position `index` of `this` String.
|
|
|
|
|
|
If `index` is negative or exceeds `this.length`, the empty String `""`
|
|
|
is returned.
|
|
|
**/
|
|
|
- #if !utf16 inline #end
|
|
|
public function charAt(index:Int):String {
|
|
|
- #if utf16
|
|
|
- if(index < 0) return '';
|
|
|
- var unicodeOffset = 0;
|
|
|
- var nativeOffset = 0;
|
|
|
- while(nativeOffset < this.length) {
|
|
|
- var c = StringTools.utf16CodePointAt(this, nativeOffset++);
|
|
|
- if(unicodeOffset == index) {
|
|
|
- return String.fromCharCode(c);
|
|
|
- }
|
|
|
- if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
|
|
|
- nativeOffset++;
|
|
|
- }
|
|
|
- unicodeOffset++;
|
|
|
+ if(index < 0) return '';
|
|
|
+ var unicodeOffset = 0;
|
|
|
+ var nativeOffset = 0;
|
|
|
+ while(nativeOffset < this.length) {
|
|
|
+ var c = StringTools.utf16CodePointAt(this, nativeOffset++);
|
|
|
+ if(unicodeOffset == index) {
|
|
|
+ return String.fromCharCode(c);
|
|
|
}
|
|
|
- return '';
|
|
|
- #else
|
|
|
- return this.charAt(index);
|
|
|
- #end
|
|
|
+ if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
|
|
|
+ nativeOffset++;
|
|
|
+ }
|
|
|
+ unicodeOffset++;
|
|
|
+ }
|
|
|
+ return '';
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -150,55 +161,33 @@ abstract UnicodeString(String) from String to String {
|
|
|
|
|
|
If `index` is negative or exceeds `this.length`, `null` is returned.
|
|
|
**/
|
|
|
- #if !utf16 inline #end
|
|
|
public function charCodeAt(index:Int):Null<Int> {
|
|
|
- #if utf16
|
|
|
- if(index < 0) return null;
|
|
|
- var unicodeOffset = 0;
|
|
|
- var nativeOffset = 0;
|
|
|
- while(nativeOffset < this.length) {
|
|
|
- var c = StringTools.utf16CodePointAt(this, nativeOffset++);
|
|
|
- if(unicodeOffset == index) {
|
|
|
- return c;
|
|
|
- }
|
|
|
- if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
|
|
|
- nativeOffset++;
|
|
|
- }
|
|
|
- unicodeOffset++;
|
|
|
+ if(index < 0) return null;
|
|
|
+ var unicodeOffset = 0;
|
|
|
+ var nativeOffset = 0;
|
|
|
+ while(nativeOffset < this.length) {
|
|
|
+ var c = StringTools.utf16CodePointAt(this, nativeOffset++);
|
|
|
+ if(unicodeOffset == index) {
|
|
|
+ return c;
|
|
|
}
|
|
|
- return null;
|
|
|
- #else
|
|
|
- return this.charCodeAt(index);
|
|
|
- #end
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- Returns an iterator of the unicode code points.
|
|
|
- **/
|
|
|
- public inline function iterator():StringIteratorUnicode {
|
|
|
- return new StringIteratorUnicode(this);
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- Returns an iterator of the code point indices and unicode code points.
|
|
|
- **/
|
|
|
- public inline function keyValueIterator():StringKeyValueIteratorUnicode {
|
|
|
- return new StringKeyValueIteratorUnicode(this);
|
|
|
+ if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
|
|
|
+ nativeOffset++;
|
|
|
+ }
|
|
|
+ unicodeOffset++;
|
|
|
+ }
|
|
|
+ return null;
|
|
|
}
|
|
|
|
|
|
- #if !utf16 inline #end
|
|
|
function get_length():Int {
|
|
|
- #if utf16
|
|
|
- var l = 0;
|
|
|
- for(c in new StringIteratorUnicode(this)) {
|
|
|
- l++;
|
|
|
- }
|
|
|
- return l;
|
|
|
- #else
|
|
|
- return this.length;
|
|
|
- #end
|
|
|
+ var l = 0;
|
|
|
+ for(c in new StringIteratorUnicode(this)) {
|
|
|
+ l++;
|
|
|
+ }
|
|
|
+ return l;
|
|
|
}
|
|
|
|
|
|
+ #end
|
|
|
+
|
|
|
#end
|
|
|
|
|
|
@:op(A < B) static function lt(a:UnicodeString, b:UnicodeString):Bool;
|