haxe
/
HaxeFoundation.haxe
镜像来自 https://github.com/HaxeFoundation/haxe.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
							/*
 * Copyright (C)2005-2019 Haxe Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

import haxe.io.Bytes;
import haxe.io.Encoding;
import haxe.iterators.StringIteratorUnicode;
import haxe.iterators.StringKeyValueIteratorUnicode;

/**
	This abstract provides consistent cross-target unicode support for characters of any width.

	Due to differing internal representations of strings across targets, only the basic
	multilingual plane (BMP) is supported consistently by `String` class.

	This abstract provides API to consistently handle all characters even beyond BMP.

	@see https://haxe.org/manual/std-String-unicode.html
**/
@:forward
@:access(StringTools)
abstract UnicodeString(String) from String to String {
	/**
		Tells if `b` is a correctly encoded UTF8 byte sequence.
	**/
	static public function validate(b:Bytes, encoding:Encoding):Bool {
		switch (encoding) {
			case RawNative:
				throw "UnicodeString.validate: RawNative encoding is not supported";
			case UTF8:
				var data = b.getData();
				var pos = 0;
				var max = b.length;
				while (pos < max) {
					var c:Int = Bytes.fastGet(data, pos++);
					if (c < 0x80) {} else if (c < 0xC2) {
						return false;
					} else if (c < 0xE0) {
						if (pos + 1 > max) {
							return false;
						}
						var c2:Int = Bytes.fastGet(data, pos++);
						if (c2 < 0x80 || c2 > 0xBF) {
							return false;
						}
					} else if (c < 0xF0) {
						if (pos + 2 > max) {
							return false;
						}
						var c2:Int = Bytes.fastGet(data, pos++);
						if (c == 0xE0) {
							if (c2 < 0xA0 || c2 > 0xBF)
								return false;
						} else {
							if (c2 < 0x80 || c2 > 0xBF)
								return false;
						}
						var c3:Int = Bytes.fastGet(data, pos++);
						if (c3 < 0x80 || c3 > 0xBF) {
							return false;
						}
						c = (c << 16) | (c2 << 8) | c3;
						if (0xEDA080 <= c && c <= 0xEDBFBF) { // surrogate pairs
							return false;
						}
					} else if (c > 0xF4) {
						return false;
					} else {
						if (pos + 3 > max) {
							return false;
						}
						var c2:Int = Bytes.fastGet(data, pos++);
						if (c == 0xF0) {
							if (c2 < 0x90 || c2 > 0xBF)
								return false;
						} else if (c == 0xF4) {
							if (c2 < 0x80 || c2 > 0x8F)
								return false;
						} else {
							if (c2 < 0x80 || c2 > 0xBF)
								return false;
						}
						var c3:Int = Bytes.fastGet(data, pos++);
						if (c3 < 0x80 || c3 > 0xBF) {
							return false;
						}
						var c4:Int = Bytes.fastGet(data, pos++);
						if (c4 < 0x80 || c4 > 0xBF) {
							return false;
						}
					}
				}
				return true;
		}
	}

	#if target.unicode
	/**
		Creates an instance of UnicodeString.
	**/
	public inline function new(string:String):Void {
		this = string;
	}

	/**
		Returns an iterator of the unicode code points.
	**/
	public inline function iterator():StringIteratorUnicode {
		return new StringIteratorUnicode(this);
	}

	/**
		Returns an iterator of the code point indices and unicode code points.
	**/
	public inline function keyValueIterator():StringKeyValueIteratorUnicode {
		return new StringKeyValueIteratorUnicode(this);
	}

	#if target.utf16
	/**
		The number of characters in `this` String.
	**/
	public var length(get, never):Int;

	/**
		Returns the character at position `index` of `this` String.

		If `index` is negative or exceeds `this.length`, the empty String `""`
		is returned.
	**/
	public function charAt(index:Int):String {
		if (index < 0)
			return '';
		var unicodeOffset = 0;
		var nativeOffset = 0;
		while (nativeOffset < this.length) {
			var c = StringTools.utf16CodePointAt(this, nativeOffset++);
			if (unicodeOffset == index) {
				return String.fromCharCode(c);
			}
			if (c >= StringTools.MIN_SURROGATE_CODE_POINT) {
				nativeOffset++;
			}
			unicodeOffset++;
		}
		return '';
	}

	/**
		Returns the character code at position `index` of `this` String.

		If `index` is negative or exceeds `this.length`, `null` is returned.
	**/
	public function charCodeAt(index:Int):Null<Int> {
		if (index < 0)
			return null;
		var unicodeOffset = 0;
		var nativeOffset = 0;
		while (nativeOffset < this.length) {
			var c = StringTools.utf16CodePointAt(this, nativeOffset++);
			if (unicodeOffset == index) {
				return c;
			}
			if (c >= StringTools.MIN_SURROGATE_CODE_POINT) {
				nativeOffset++;
			}
			unicodeOffset++;
		}
		return null;
	}

	/**
		@see String.indexOf
	**/
	public function indexOf(str:String, ?startIndex:Int):Int {
		var startIndex:Int = if (startIndex == null || startIndex < 0) {
			0;
		} else {
			startIndex;
		}
		if (str.length == 0) {
			if (startIndex > length) {
				return length;
			}
			return startIndex;
		}

		var unicodeOffset = 0;
		var nativeOffset = 0;
		var matchingOffset = 0;
		var result = -1;
		while (nativeOffset <= this.length) {
			var c = StringTools.utf16CodePointAt(this, nativeOffset);

			if (unicodeOffset >= startIndex) {
				var c2 = StringTools.utf16CodePointAt(str, matchingOffset);
				if (c == c2) {
					if (matchingOffset == 0) {
						result = unicodeOffset;
					}
					matchingOffset++;
					if (c2 >= StringTools.MIN_SURROGATE_CODE_POINT) {
						matchingOffset++;
					}
					if (matchingOffset == str.length) {
						return result;
					}
				} else if (matchingOffset != 0) {
					result = -1;
					matchingOffset = 0;
					continue;
				}
			}

			nativeOffset++;
			if (c >= StringTools.MIN_SURROGATE_CODE_POINT) {
				nativeOffset++;
			}
			unicodeOffset++;
		}
		return -1;
	}

	/**
		Returns the position of the rightmost occurrence of `str` within `this`
		String.

		If `startIndex` is given, the search is performed within the substring
		of `this` String from 0 to `startIndex + str.length`. Otherwise the search
		is performed within `this` String. In either case, the returned position
		is relative to the beginning of `this` String.

		If `str` cannot be found, -1 is returned.
	**/
	public function lastIndexOf(str:String, ?startIndex:Int):Int {
		if (startIndex == null) {
			startIndex = this.length;
		} else if (startIndex < 0) {
			startIndex = 0;
		}

		var unicodeOffset = 0;
		var nativeOffset = 0;
		var result = -1;
		var lastIndex = -1;
		var matchingOffset = 0;
		var strUnicodeLength = (str : UnicodeString).length;
		while (nativeOffset < this.length && unicodeOffset < startIndex + strUnicodeLength) {
			var c = StringTools.utf16CodePointAt(this, nativeOffset);

			var c2 = StringTools.utf16CodePointAt(str, matchingOffset);
			if (c == c2) {
				if (matchingOffset == 0) {
					lastIndex = unicodeOffset;
				}
				matchingOffset++;
				if (c2 >= StringTools.MIN_SURROGATE_CODE_POINT) {
					matchingOffset++;
				}
				if (matchingOffset == str.length) {
					result = lastIndex;
					lastIndex = -1;
				}
			} else if (matchingOffset != 0) {
				lastIndex = -1;
				matchingOffset = 0;
				continue;
			}

			nativeOffset++;
			if (c >= StringTools.MIN_SURROGATE_CODE_POINT) {
				nativeOffset++;
			}
			unicodeOffset++;
		}
		return result;
	}

	/**
		Returns `len` characters of `this` String, starting at position `pos`.

		If `len` is omitted, all characters from position `pos` to the end of
		`this` String are included.

		If `pos` is negative, its value is calculated from the end of `this`
		String by `this.length + pos`. If this yields a negative value, 0 is
		used instead.

		If the calculated position + `len` exceeds `this.length`, the characters
		from that position to the end of `this` String are returned.

		If `len` is negative, the result is unspecified.
	**/
	public function substr(pos:Int, ?len:Int):String {
		if (pos < 0) {
			pos = (this : UnicodeString).length + pos;
			if (pos < 0) {
				pos = 0;
			}
		}
		if (len != null) {
			if (len < 0) {
				len = (this : UnicodeString).length + len;
			}
			if (len <= 0) {
				return "";
			}
		}
		var unicodeOffset = 0;
		var nativeOffset = 0;
		var fromOffset = -1;
		var subLength = 0;
		while (nativeOffset < this.length) {
			var c = StringTools.utf16CodePointAt(this, nativeOffset);

			if (unicodeOffset >= pos) {
				if (fromOffset < 0) {
					if (len == null) {
						return this.substr(nativeOffset);
					}
					fromOffset = nativeOffset;
				}
				subLength++;
				if (subLength >= len) {
					var lastOffset = (c < StringTools.MIN_SURROGATE_CODE_POINT ? nativeOffset : nativeOffset + 1);
					return this.substr(fromOffset, lastOffset - fromOffset + 1);
				}
			}

			nativeOffset += (c >= StringTools.MIN_SURROGATE_CODE_POINT ? 2 : 1);
			unicodeOffset++;
		}
		return (fromOffset < 0 ? "" : this.substr(fromOffset));
	}

	/**
		Returns the part of `this` String from `startIndex` to but not including `endIndex`.

		If `startIndex` or `endIndex` are negative, 0 is used instead.

		If `startIndex` exceeds `endIndex`, they are swapped.

		If the (possibly swapped) `endIndex` is omitted or exceeds
		`this.length`, `this.length` is used instead.

		If the (possibly swapped) `startIndex` exceeds `this.length`, the empty
		String `""` is returned.
	**/
	public function substring(startIndex:Int, ?endIndex:Int):String {
		if (startIndex < 0) {
			startIndex = 0;
		}
		if (endIndex != null) {
			if (endIndex < 0) {
				endIndex = 0;
			}
			if (startIndex == endIndex) {
				return "";
			}
			if (startIndex > endIndex) {
				var tmp = startIndex;
				startIndex = endIndex;
				endIndex = tmp;
			}
		}

		var unicodeOffset = 0;
		var nativeOffset = 0;
		var fromOffset = -1;
		var subLength = 0;
		while (nativeOffset < this.length) {
			var c = StringTools.utf16CodePointAt(this, nativeOffset);

			if (startIndex <= unicodeOffset) {
				if (fromOffset < 0) {
					if (endIndex == null) {
						return this.substr(nativeOffset);
					}
					fromOffset = nativeOffset;
				}
				subLength++;
				if (subLength >= endIndex - startIndex) {
					var lastOffset = (c < StringTools.MIN_SURROGATE_CODE_POINT ? nativeOffset : nativeOffset + 1);
					return this.substr(fromOffset, lastOffset - fromOffset + 1);
				}
			}

			nativeOffset += (c >= StringTools.MIN_SURROGATE_CODE_POINT ? 2 : 1);
			unicodeOffset++;
		}
		return (fromOffset < 0 ? "" : this.substr(fromOffset));
	}

	function get_length():Int {
		var l = 0;
		for (c in new StringIteratorUnicode(this)) {
			l++;
		}
		return l;
	}
	#end
	#end
	@:op(A < B) static function lt(a:UnicodeString, b:UnicodeString):Bool;

	@:op(A <= B) static function lte(a:UnicodeString, b:UnicodeString):Bool;

	@:op(A > B) static function gt(a:UnicodeString, b:UnicodeString):Bool;

	@:op(A >= B) static function gte(a:UnicodeString, b:UnicodeString):Bool;

	@:op(A == B) static function eq(a:UnicodeString, b:UnicodeString):Bool;

	@:op(A != B) static function neq(a:UnicodeString, b:UnicodeString):Bool;

	@:op(A + B) static function add(a:UnicodeString, b:UnicodeString):UnicodeString;

	@:op(A += B) static function assignAdd(a:UnicodeString, b:UnicodeString):UnicodeString;

	@:op(A + B) @:commutative static function addString(a:UnicodeString, b:String):UnicodeString;

	@:op(A += B) @:commutative static function assignAddString(a:UnicodeString, b:String):UnicodeString;
}