123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215 |
- /*
- * Copyright (C)2005-2019 Haxe Foundation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
- import haxe.io.Bytes;
- import haxe.io.Encoding;
- import haxe.iterators.StringIteratorUnicode;
- import haxe.iterators.StringKeyValueIteratorUnicode;
- /**
- This abstract provides consistent cross-target unicode support.
- @see https://haxe.org/manual/std-UnicodeString.html
- **/
- @:forward
- @:access(StringTools)
- abstract UnicodeString(String) from String to String {
- /**
- Tells if `b` is a correctly encoded UTF8 byte sequence.
- **/
- static public function validate(b:Bytes, encoding:Encoding) : Bool {
- switch(encoding) {
- case RawNative: throw "UnicodeString.validate: RawNative encoding is not supported";
- case UTF8:
- var data = b.getData();
- var pos = 0;
- var max = b.length;
- while( pos < max) {
- var c:Int = Bytes.fastGet(data, pos++);
- if(c < 0x80) {
- } else if(c < 0xC2) {
- return false;
- } else if(c < 0xE0) {
- if(pos + 1 > max) {
- return false;
- }
- var c2:Int = Bytes.fastGet(data, pos++);
- if(c2 < 0x80 || c2 > 0xBF) {
- return false;
- }
- } else if(c < 0xF0) {
- if(pos + 2 > max) {
- return false;
- }
- var c2:Int = Bytes.fastGet(data, pos++);
- if(c == 0xE0) {
- if(c2 < 0xA0 || c2 > 0xBF) return false;
- } else {
- if(c2 < 0x80 || c2 > 0xBF) return false;
- }
- var c3:Int = Bytes.fastGet(data, pos++);
- if(c3 < 0x80 || c3 > 0xBF) {
- return false;
- }
- c = (c << 16) | (c2 << 8) | c3;
- if(0xEDA080 <= c && c <= 0xEDBFBF) { //surrogate pairs
- return false;
- }
- } else if(c > 0xF4) {
- return false;
- } else {
- if(pos + 3 > max) {
- return false;
- }
- var c2:Int = Bytes.fastGet(data, pos++);
- if(c == 0xF0) {
- if(c2 < 0x90 || c2 > 0xBF) return false;
- } else if(c == 0xF4) {
- if(c2 < 0x80 || c2 > 0x8F) return false;
- } else {
- if(c2 < 0x80 || c2 > 0xBF) return false;
- }
- var c3:Int = Bytes.fastGet(data, pos++);
- if(c3 < 0x80 || c3 > 0xBF) {
- return false;
- }
- var c4:Int = Bytes.fastGet(data, pos++);
- if(c4 < 0x80 || c4 > 0xBF) {
- return false;
- }
- }
- }
- return true;
- }
- }
- #if target.unicode
- /**
- The number of characters in `this` String.
- **/
- public var length(get,never):Int;
- /**
- Creates an instance of UnicodeString.
- **/
- public inline function new(string:String):Void {
- this = string;
- }
- /**
- Returns the character at position `index` of `this` String.
- If `index` is negative or exceeds `this.length`, the empty String `""`
- is returned.
- **/
- #if !utf16 inline #end
- public function charAt(index:Int):String {
- #if utf16
- if(index < 0) return '';
- var unicodeOffset = 0;
- var nativeOffset = 0;
- while(nativeOffset < this.length) {
- var c = StringTools.utf16CodePointAt(this, nativeOffset++);
- if(unicodeOffset == index) {
- return String.fromCharCode(c);
- }
- if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
- nativeOffset++;
- }
- unicodeOffset++;
- }
- return '';
- #else
- return this.charAt(index);
- #end
- }
- /**
- Returns the character code at position `index` of `this` String.
- If `index` is negative or exceeds `this.length`, `null` is returned.
- **/
- #if !utf16 inline #end
- public function charCodeAt(index:Int):Null<Int> {
- #if utf16
- if(index < 0) return null;
- var unicodeOffset = 0;
- var nativeOffset = 0;
- while(nativeOffset < this.length) {
- var c = StringTools.utf16CodePointAt(this, nativeOffset++);
- if(unicodeOffset == index) {
- return c;
- }
- if(c >= StringTools.MIN_SURROGATE_CODE_POINT) {
- nativeOffset++;
- }
- unicodeOffset++;
- }
- return null;
- #else
- return this.charCodeAt(index);
- #end
- }
- /**
- Returns an iterator of the unicode code points.
- **/
- public inline function iterator():StringIteratorUnicode {
- return new StringIteratorUnicode(this);
- }
- /**
- Returns an iterator of the code point indices and unicode code points.
- **/
- public inline function keyValueIterator():StringKeyValueIteratorUnicode {
- return new StringKeyValueIteratorUnicode(this);
- }
- #if !utf16 inline #end
- function get_length():Int {
- #if utf16
- var l = 0;
- for(c in new StringIteratorUnicode(this)) {
- l++;
- }
- return l;
- #else
- return this.length;
- #end
- }
- #end
- @:op(A < B) static function lt(a:UnicodeString, b:UnicodeString):Bool;
- @:op(A <= B) static function lte(a:UnicodeString, b:UnicodeString):Bool;
- @:op(A > B) static function gt(a:UnicodeString, b:UnicodeString):Bool;
- @:op(A >= B) static function gte(a:UnicodeString, b:UnicodeString):Bool;
- @:op(A == B) static function eq(a:UnicodeString, b:UnicodeString):Bool;
- @:op(A != B) static function neq(a:UnicodeString, b:UnicodeString):Bool;
- @:op(A + B) static function add(a:UnicodeString, b:UnicodeString):UnicodeString;
- @:op(A += B) static function assignAdd(a:UnicodeString, b:UnicodeString):UnicodeString;
- @:op(A + B) @:commutative static function add(a:UnicodeString, b:String):UnicodeString;
- @:op(A += B) @:commutative static function assignAdd(a:UnicodeString, b:String):UnicodeString;
- }
|