123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596 |
- /*
- * Copyright (C)2005-2017 Haxe Foundation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
- /**
- This class provides advanced methods on Strings. It is ideally used with
- `using StringTools` and then acts as an [extension](https://haxe.org/manual/lf-static-extension.html)
- to the `String` class.
- If the first argument to any of the methods is null, the result is
- unspecified.
- **/
- #if cpp
- using cpp.NativeString;
- #end
- class StringTools {
- /**
- Encode an URL by using the standard format.
- **/
- #if (!java && !cpp && !lua && !eval) inline #end public static function urlEncode( s : String ) : String {
- #if flash
- return untyped __global__["encodeURIComponent"](s);
- #elseif neko
- return untyped new String(_urlEncode(s.__s));
- #elseif js
- return untyped encodeURIComponent(s);
- #elseif cpp
- return untyped s.__URLEncode();
- #elseif java
- return postProcessUrlEncode(java.net.URLEncoder.encode(s, "UTF-8"));
- #elseif cs
- return untyped cs.system.Uri.EscapeDataString(s);
- #elseif python
- return python.lib.urllib.Parse.quote(s, "");
- #elseif hl
- var len = 0;
- var b = @:privateAccess s.bytes.urlEncode(len);
- return @:privateAccess String.__alloc__(b,len);
- #elseif lua
- s = lua.NativeStringTools.gsub(s, "\n", "\r\n");
- s = lua.NativeStringTools.gsub(s, "([^%w %-%_%.%~])", function (c) {
- return lua.NativeStringTools.format("%%%02X", lua.NativeStringTools.byte(c) + '');
- });
- s = lua.NativeStringTools.gsub(s, " ", "+");
- return s;
- #else
- return null;
- #end
- }
- #if java
- private static function postProcessUrlEncode( s : String ) : String {
- var ret = new StringBuf();
- var i = 0,
- len = s.length;
- while (i < len) {
- switch(_charAt(s, i++)) {
- case '+'.code:
- ret.add('%20');
- case '%'.code if (i <= len - 2):
- var c1 = _charAt(s, i++),
- c2 = _charAt(s, i++);
- switch[c1, c2] {
- case ['2'.code, '1'.code]:
- ret.addChar('!'.code);
- case ['2'.code, '7'.code]:
- ret.addChar('\''.code);
- case ['2'.code, '8'.code]:
- ret.addChar('('.code);
- case ['2'.code, '9'.code]:
- ret.addChar(')'.code);
- case ['7'.code, 'E'.code] | ['7'.code, 'e'.code]:
- ret.addChar('~'.code);
- case _:
- ret.addChar('%'.code);
- ret.addChar(cast c1);
- ret.addChar(cast c2);
- }
- case chr:
- ret.addChar(cast chr);
- }
- }
- return ret.toString();
- }
- #end
- /**
- Decode an URL using the standard format.
- **/
- #if (!java && !cpp && !lua && !eval) inline #end public static function urlDecode( s : String ) : String {
- #if flash
- return untyped __global__["decodeURIComponent"](s.split("+").join(" "));
- #elseif neko
- return untyped new String(_urlDecode(s.__s));
- #elseif js
- return untyped decodeURIComponent(s.split("+").join(" "));
- #elseif cpp
- return untyped s.__URLDecode();
- #elseif java
- try
- return untyped __java__("java.net.URLDecoder.decode(s, \"UTF-8\")")
- catch (e:Dynamic) throw e;
- #elseif cs
- return untyped cs.system.Uri.UnescapeDataString(s);
- #elseif python
- return python.lib.urllib.Parse.unquote(s);
- #elseif hl
- var len = 0;
- var b = @:privateAccess s.bytes.urlDecode(len);
- return @:privateAccess String.__alloc__(b,len);
- #elseif lua
- s = lua.NativeStringTools.gsub (s, "+", " ");
- s = lua.NativeStringTools.gsub (s, "%%(%x%x)",
- function(h) {return lua.NativeStringTools.char(lua.Lua.tonumber(h,16));});
- s = lua.NativeStringTools.gsub (s, "\r\n", "\n");
- return s;
- #else
- return null;
- #end
- }
- /**
- Escapes HTML special characters of the string `s`.
- The following replacements are made:
- - `&` becomes `&`;
- - `<` becomes `<`;
- - `>` becomes `>`;
- If `quotes` is true, the following characters are also replaced:
- - `"` becomes `"`;
- - `'` becomes `'`;
- **/
- public static function htmlEscape( s : String, ?quotes : Bool ) : String {
- s = s.split("&").join("&").split("<").join("<").split(">").join(">");
- return quotes ? s.split('"').join(""").split("'").join("'") : s;
- }
- /**
- Unescapes HTML special characters of the string `s`.
- This is the inverse operation to htmlEscape, i.e. the following always
- holds: `htmlUnescape(htmlEscape(s)) == s`
- The replacements follow:
- - `&` becomes `&`
- - `<` becomes `<`
- - `>` becomes `>`
- - `"` becomes `"`
- - `'` becomes `'`
- **/
- public static function htmlUnescape( s : String ) : String {
- return s.split(">").join(">").split("<").join("<").split(""").join('"').split("'").join("'").split("&").join("&");
- }
- /**
- Tells if the string `s` starts with the string `start`.
- If `start` is `null`, the result is unspecified.
- If `start` is the empty String `""`, the result is true.
- **/
- public static #if (cs || java || python) inline #end function startsWith( s : String, start : String ) : Bool {
- #if java
- return untyped s.startsWith(start);
- #elseif cs
- return untyped s.StartsWith(start);
- #elseif cpp
- if (s.length<start.length)
- return false;
- var p0 = s.c_str();
- var p1 = start.c_str();
- for(i in 0...start.length)
- if ( p0.at(i) != p1.at(i) )
- return false;
- return true;
- #elseif hl
- return @:privateAccess (s.length >= start.length && s.bytes.compare(0,start.bytes,0,start.length<<1) == 0);
- #elseif python
- return python.NativeStringTools.startswith(s, start);
- #else
- return( s.length >= start.length && s.substr(0, start.length) == start );
- #end
- }
- /**
- Tells if the string `s` ends with the string `end`.
- If `end` is `null`, the result is unspecified.
- If `end` is the empty String `""`, the result is true.
- **/
- public static #if (cs || java || python) inline #end function endsWith( s : String, end : String ) : Bool {
- #if java
- return untyped s.endsWith(end);
- #elseif cs
- return untyped s.EndsWith(end);
- #elseif cpp
- if (s.length<end.length)
- return false;
- var p0 = s.c_str().add( s.length-end.length );
- var p1 = end.c_str();
- for(i in 0...end.length)
- if ( p0.at(i) != p1.at(i) )
- return false;
- return true;
- #elseif hl
- var elen = end.length;
- var slen = s.length;
- return @:privateAccess (slen >= elen && s.bytes.compare((slen - elen) << 1, end.bytes, 0, elen << 1) == 0);
- #elseif python
- return python.NativeStringTools.endswith(s, end);
- #else
- var elen = end.length;
- var slen = s.length;
- return( slen >= elen && s.substr(slen - elen, elen) == end );
- #end
- }
- /**
- Tells if the character in the string `s` at position `pos` is a space.
- A character is considered to be a space character if its character code
- is 9,10,11,12,13 or 32.
- If `s` is the empty String `""`, or if pos is not a valid position within
- `s`, the result is false.
- **/
- public static function isSpace( s : String, pos : Int ) : Bool {
- #if (python || lua)
- if (s.length == 0 || pos < 0 || pos >= s.length) return false;
- #end
- var c = s.charCodeAt( pos );
- return (c > 8 && c < 14) || c == 32;
- }
- /**
- Removes leading space characters of `s`.
- This function internally calls `isSpace()` to decide which characters to
- remove.
- If `s` is the empty String `""` or consists only of space characters, the
- result is the empty String `""`.
- **/
- public #if cs inline #end static function ltrim( s : String ) : String {
- #if cs
- return untyped s.TrimStart();
- #else
- var l = s.length;
- var r = 0;
- while( r < l && isSpace(s,r) ){
- r++;
- }
- if( r > 0 )
- return s.substr(r, l-r);
- else
- return s;
- #end
- }
- /**
- Removes trailing space characters of `s`.
- This function internally calls `isSpace()` to decide which characters to
- remove.
- If `s` is the empty String `""` or consists only of space characters, the
- result is the empty String `""`.
- **/
- public #if cs inline #end static function rtrim( s : String ) : String {
- #if cs
- return untyped s.TrimEnd();
- #else
- var l = s.length;
- var r = 0;
- while( r < l && isSpace(s,l-r-1) ){
- r++;
- }
- if( r > 0 ){
- return s.substr(0, l-r);
- }else{
- return s;
- }
- #end
- }
- /**
- Removes leading and trailing space characters of `s`.
- This is a convenience function for `ltrim(rtrim(s))`.
- **/
- public #if (cs || java) inline #end static function trim( s : String ) : String {
- #if cs
- return untyped s.Trim();
- #elseif java
- return untyped s.trim();
- #else
- return ltrim(rtrim(s));
- #end
- }
- /**
- Concatenates `c` to `s` until `s.length` is at least `l`.
- If `c` is the empty String `""` or if `l` does not exceed `s.length`,
- `s` is returned unchanged.
- If `c.length` is 1, the resulting String length is exactly `l`.
- Otherwise the length may exceed `l`.
- If `c` is null, the result is unspecified.
- **/
- public static function lpad( s : String, c : String, l : Int ) : String {
- if (c.length <= 0)
- return s;
- while (s.length < l) {
- s = c + s;
- }
- return s;
- }
- /**
- Appends `c` to `s` until `s.length` is at least `l`.
- If `c` is the empty String `""` or if `l` does not exceed `s.length`,
- `s` is returned unchanged.
- If `c.length` is 1, the resulting String length is exactly `l`.
- Otherwise the length may exceed `l`.
- If `c` is null, the result is unspecified.
- **/
- public static function rpad( s : String, c : String, l : Int ) : String {
- if (c.length <= 0)
- return s;
- while (s.length < l) {
- s = s + c;
- }
- return s;
- }
- /**
- Replace all occurrences of the String `sub` in the String `s` by the
- String `by`.
- If `sub` is the empty String `""`, `by` is inserted after each character
- of `s`. If `by` is also the empty String `""`, `s` remains unchanged.
- This is a convenience function for `s.split(sub).join(by)`.
- If `sub` or `by` are null, the result is unspecified.
- **/
- public static function replace( s : String, sub : String, by : String ) : String {
- #if java
- if (sub.length == 0)
- return s.split(sub).join(by);
- else
- return untyped s.replace(sub, by);
- #elseif cs
- if (sub.length == 0)
- return s.split(sub).join(by);
- else
- return untyped s.Replace(sub, by);
- #else
- return s.split(sub).join(by);
- #end
- }
- /**
- Encodes `n` into a hexadecimal representation.
- If `digits` is specified, the resulting String is padded with "0" until
- its `length` equals `digits`.
- **/
- public static function hex( n : Int, ?digits : Int ) {
- #if flash
- var n : UInt = n;
- var s : String = untyped n.toString(16);
- s = s.toUpperCase();
- #else
- var s = "";
- var hexChars = "0123456789ABCDEF";
- do {
- s = hexChars.charAt(n&15) + s;
- n >>>= 4;
- } while( n > 0 );
- #end
- #if python
- if (digits != null && s.length < digits) {
- var diff = digits - s.length;
- for (_ in 0...diff) {
- s = "0" + s;
- }
- }
- #else
- if( digits != null )
- while( s.length < digits )
- s = "0"+s;
- #end
- return s;
- }
- /**
- Returns the character code at position `index` of String `s`, or an
- end-of-file indicator at if `position` equals `s.length`.
- This method is faster than `String.charCodeAt()` on some platforms, but
- the result is unspecified if `index` is negative or greater than
- `s.length`.
- End of file status can be checked by calling `StringTools.isEof()` with
- the returned value as argument.
- This operation is not guaranteed to work if `s` contains the `\0`
- character.
- **/
- public static #if !eval inline #end function fastCodeAt( s : String, index : Int ) : Int {
- #if neko
- return untyped __dollar__sget(s.__s, index);
- #elseif cpp
- return untyped s.cca(index);
- #elseif flash
- return untyped s.cca(index);
- #elseif java
- return ( index < s.length ) ? cast(_charAt(s, index), Int) : -1;
- #elseif cs
- return ( cast(index, UInt) < s.length ) ? cast(s[index], Int) : -1;
- #elseif js
- return (untyped s).charCodeAt(index);
- #elseif python
- return if (index >= s.length) -1 else python.internal.UBuiltins.ord(python.Syntax.arrayAccess(s, index));
- #elseif hl
- return @:privateAccess s.bytes.getUI16(index << 1);
- #elseif lua
- return lua.NativeStringTools.byte(s,index+1);
- #else
- return untyped s.cca(index);
- #end
- }
- /*
- Tells if `c` represents the end-of-file (EOF) character.
- */
- @:noUsing public static inline function isEof( c : Int ) : Bool {
- #if (flash || cpp || hl)
- return c == 0;
- #elseif js
- return c != c; // fast NaN
- #elseif (neko || lua || eval)
- return c == null;
- #elseif cs
- return c == -1;
- #elseif java
- return c == -1;
- #elseif python
- return c == -1;
- #else
- return false;
- #end
- }
- /**
- Returns a String that can be used as a single command line argument
- on Unix.
- The input will be quoted, or escaped if necessary.
- */
- public static function quoteUnixArg(argument:String):String {
- // Based on cpython's shlex.quote().
- // https://hg.python.org/cpython/file/a3f076d4f54f/Lib/shlex.py#l278
- if (argument == "")
- return "''";
- if (!~/[^a-zA-Z0-9_@%+=:,.\/-]/.match(argument))
- return argument;
- // use single quotes, and put single quotes into double quotes
- // the string $'b is then quoted as '$'"'"'b'
- return "'" + replace(argument, "'", "'\"'\"'") + "'";
- }
- /**
- Character codes of the characters that will be escaped by `quoteWinArg(_, true)`.
- */
- public static var winMetaCharacters = [" ".code, "(".code, ")".code, "%".code, "!".code, "^".code, "\"".code, "<".code, ">".code, "&".code, "|".code, "\n".code, "\r".code, ",".code, ";".code];
- /**
- Returns a String that can be used as a single command line argument
- on Windows.
- The input will be quoted, or escaped if necessary, such that the output
- will be parsed as a single argument using the rule specified in
- http://msdn.microsoft.com/en-us/library/ms880421
- Examples:
- ```
- quoteWinArg("abc") == "abc";
- quoteWinArg("ab c") == '"ab c"';
- ```
- */
- public static function quoteWinArg(argument:String, escapeMetaCharacters:Bool):String {
- // If there is no space, tab, back-slash, or double-quotes, and it is not an empty string.
- if (!~/^[^ \t\\"]+$/.match(argument)) {
- // Based on cpython's subprocess.list2cmdline().
- // https://hg.python.org/cpython/file/50741316dd3a/Lib/subprocess.py#l620
- var result = new StringBuf();
- var needquote = argument.indexOf(" ") != -1 || argument.indexOf("\t") != -1 || argument == "";
- if (needquote)
- result.add('"');
- var bs_buf = new StringBuf();
- for (i in 0...argument.length) {
- switch (argument.charCodeAt(i)) {
- case "\\".code:
- // Don't know if we need to double yet.
- bs_buf.add("\\");
- case '"'.code:
- // Double backslashes.
- var bs = bs_buf.toString();
- result.add(bs);
- result.add(bs);
- bs_buf = new StringBuf();
- result.add('\\"');
- case c:
- // Normal char
- if (bs_buf.length > 0) {
- result.add(bs_buf.toString());
- bs_buf = new StringBuf();
- }
- result.addChar(c);
- }
- }
- // Add remaining backslashes, if any.
- result.add(bs_buf.toString());
- if (needquote) {
- result.add(bs_buf.toString());
- result.add('"');
- }
- argument = result.toString();
- }
- if (escapeMetaCharacters) {
- var result = new StringBuf();
- for (i in 0...argument.length) {
- var c = argument.charCodeAt(i);
- if (winMetaCharacters.indexOf(c) >= 0) {
- result.addChar("^".code);
- }
- result.addChar(c);
- }
- return result.toString();
- } else {
- return argument;
- }
- }
- #if java
- private static inline function _charAt(str:String, idx:Int):java.StdTypes.Char16 return untyped str._charAt(idx);
- #end
- #if neko
- private static var _urlEncode = neko.Lib.load("std","url_encode",1);
- private static var _urlDecode = neko.Lib.load("std","url_decode",1);
- #end
- }
|