StringTools.hx 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. /*
  2. * Copyright (C)2005-2018 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. /**
  23. This class provides advanced methods on Strings. It is ideally used with
  24. `using StringTools` and then acts as an [extension](https://haxe.org/manual/lf-static-extension.html)
  25. to the `String` class.
  26. If the first argument to any of the methods is null, the result is
  27. unspecified.
  28. **/
  29. #if cpp
  30. using cpp.NativeString;
  31. #end
  32. class StringTools {
  33. /**
  34. Encode an URL by using the standard format.
  35. **/
  36. #if (!java && !cpp && !lua && !eval) inline #end public static function urlEncode( s : String ) : String {
  37. #if flash
  38. return untyped __global__["encodeURIComponent"](s);
  39. #elseif neko
  40. return untyped new String(_urlEncode(s.__s));
  41. #elseif js
  42. return untyped encodeURIComponent(s);
  43. #elseif cpp
  44. return untyped s.__URLEncode();
  45. #elseif java
  46. return postProcessUrlEncode(java.net.URLEncoder.encode(s, "UTF-8"));
  47. #elseif cs
  48. return untyped cs.system.Uri.EscapeDataString(s);
  49. #elseif python
  50. return python.lib.urllib.Parse.quote(s, "");
  51. #elseif hl
  52. var len = 0;
  53. var b = @:privateAccess s.bytes.urlEncode(len);
  54. return @:privateAccess String.__alloc__(b,len);
  55. #elseif lua
  56. s = lua.NativeStringTools.gsub(s, "\n", "\r\n");
  57. s = lua.NativeStringTools.gsub(s, "([^%w %-%_%.%~])", function (c) {
  58. return lua.NativeStringTools.format("%%%02X", lua.NativeStringTools.byte(c) + '');
  59. });
  60. s = lua.NativeStringTools.gsub(s, " ", "+");
  61. return s;
  62. #else
  63. return null;
  64. #end
  65. }
  66. #if java
  67. private static function postProcessUrlEncode( s : String ) : String {
  68. var ret = new StringBuf();
  69. var i = 0,
  70. len = s.length;
  71. while (i < len) {
  72. switch(_charAt(s, i++)) {
  73. case '+'.code:
  74. ret.add('%20');
  75. case '%'.code if (i <= len - 2):
  76. var c1 = _charAt(s, i++),
  77. c2 = _charAt(s, i++);
  78. switch[c1, c2] {
  79. case ['2'.code, '1'.code]:
  80. ret.addChar('!'.code);
  81. case ['2'.code, '7'.code]:
  82. ret.addChar('\''.code);
  83. case ['2'.code, '8'.code]:
  84. ret.addChar('('.code);
  85. case ['2'.code, '9'.code]:
  86. ret.addChar(')'.code);
  87. case ['7'.code, 'E'.code] | ['7'.code, 'e'.code]:
  88. ret.addChar('~'.code);
  89. case _:
  90. ret.addChar('%'.code);
  91. ret.addChar(cast c1);
  92. ret.addChar(cast c2);
  93. }
  94. case var chr:
  95. ret.addChar(cast chr);
  96. }
  97. }
  98. return ret.toString();
  99. }
  100. #end
  101. /**
  102. Decode an URL using the standard format.
  103. **/
  104. #if (!java && !cpp && !lua && !eval) inline #end public static function urlDecode( s : String ) : String {
  105. #if flash
  106. return untyped __global__["decodeURIComponent"](s.split("+").join(" "));
  107. #elseif neko
  108. return untyped new String(_urlDecode(s.__s));
  109. #elseif js
  110. return untyped decodeURIComponent(s.split("+").join(" "));
  111. #elseif cpp
  112. return untyped s.__URLDecode();
  113. #elseif java
  114. try
  115. return untyped __java__("java.net.URLDecoder.decode(s, \"UTF-8\")")
  116. catch (e:Dynamic) throw e;
  117. #elseif cs
  118. return untyped cs.system.Uri.UnescapeDataString(s);
  119. #elseif python
  120. return python.lib.urllib.Parse.unquote(s);
  121. #elseif hl
  122. var len = 0;
  123. var b = @:privateAccess s.bytes.urlDecode(len);
  124. return @:privateAccess String.__alloc__(b,len);
  125. #elseif lua
  126. s = lua.NativeStringTools.gsub (s, "+", " ");
  127. s = lua.NativeStringTools.gsub (s, "%%(%x%x)",
  128. function(h) {return lua.NativeStringTools.char(lua.Lua.tonumber(h,16));});
  129. s = lua.NativeStringTools.gsub (s, "\r\n", "\n");
  130. return s;
  131. #else
  132. return null;
  133. #end
  134. }
  135. /**
  136. Escapes HTML special characters of the string `s`.
  137. The following replacements are made:
  138. - `&` becomes `&amp`;
  139. - `<` becomes `&lt`;
  140. - `>` becomes `&gt`;
  141. If `quotes` is true, the following characters are also replaced:
  142. - `"` becomes `&quot`;
  143. - `'` becomes `&#039`;
  144. **/
  145. public static function htmlEscape( s : String, ?quotes : Bool ) : String {
  146. var buf = new StringBuf();
  147. for (code in new haxe.iterators.StringIteratorUnicode(s)) {
  148. switch (code) {
  149. case '&'.code: buf.add("&amp;");
  150. case '<'.code: buf.add("&lt;");
  151. case '>'.code: buf.add("&gt;");
  152. case '"'.code if (quotes): buf.add("&quot;");
  153. case '\''.code if (quotes): buf.add("&#039;");
  154. case _: buf.addChar(code);
  155. }
  156. }
  157. return buf.toString();
  158. }
  159. /**
  160. Unescapes HTML special characters of the string `s`.
  161. This is the inverse operation to htmlEscape, i.e. the following always
  162. holds: `htmlUnescape(htmlEscape(s)) == s`
  163. The replacements follow:
  164. - `&amp;` becomes `&`
  165. - `&lt;` becomes `<`
  166. - `&gt;` becomes `>`
  167. - `&quot;` becomes `"`
  168. - `&#039;` becomes `'`
  169. **/
  170. public static function htmlUnescape( s : String ) : String {
  171. return s.split("&gt;").join(">").split("&lt;").join("<").split("&quot;").join('"').split("&#039;").join("'").split("&amp;").join("&");
  172. }
  173. /**
  174. Tells if the string `s` starts with the string `start`.
  175. If `start` is `null`, the result is unspecified.
  176. If `start` is the empty String `""`, the result is true.
  177. **/
  178. public static #if (cs || java || python) inline #end function startsWith( s : String, start : String ) : Bool {
  179. #if java
  180. return untyped s.startsWith(start);
  181. #elseif cs
  182. return untyped s.StartsWith(start);
  183. #elseif cpp
  184. if (s.length<start.length)
  185. return false;
  186. var p0 = s.c_str();
  187. var p1 = start.c_str();
  188. for(i in 0...start.length)
  189. if ( p0.at(i) != p1.at(i) )
  190. return false;
  191. return true;
  192. #elseif hl
  193. return @:privateAccess (s.length >= start.length && s.bytes.compare(0,start.bytes,0,start.length<<1) == 0);
  194. #elseif python
  195. return python.NativeStringTools.startswith(s, start);
  196. #else
  197. return( s.length >= start.length && s.substr(0, start.length) == start );
  198. #end
  199. }
  200. /**
  201. Tells if the string `s` ends with the string `end`.
  202. If `end` is `null`, the result is unspecified.
  203. If `end` is the empty String `""`, the result is true.
  204. **/
  205. public static #if (cs || java || python) inline #end function endsWith( s : String, end : String ) : Bool {
  206. #if java
  207. return untyped s.endsWith(end);
  208. #elseif cs
  209. return untyped s.EndsWith(end);
  210. #elseif cpp
  211. if (s.length<end.length)
  212. return false;
  213. var p0 = s.c_str().add( s.length-end.length );
  214. var p1 = end.c_str();
  215. for(i in 0...end.length)
  216. if ( p0.at(i) != p1.at(i) )
  217. return false;
  218. return true;
  219. #elseif hl
  220. var elen = end.length;
  221. var slen = s.length;
  222. return @:privateAccess (slen >= elen && s.bytes.compare((slen - elen) << 1, end.bytes, 0, elen << 1) == 0);
  223. #elseif python
  224. return python.NativeStringTools.endswith(s, end);
  225. #else
  226. var elen = end.length;
  227. var slen = s.length;
  228. return( slen >= elen && s.substr(slen - elen, elen) == end );
  229. #end
  230. }
  231. /**
  232. Tells if the character in the string `s` at position `pos` is a space.
  233. A character is considered to be a space character if its character code
  234. is 9,10,11,12,13 or 32.
  235. If `s` is the empty String `""`, or if pos is not a valid position within
  236. `s`, the result is false.
  237. **/
  238. public static function isSpace( s : String, pos : Int ) : Bool {
  239. #if (python || lua)
  240. if (s.length == 0 || pos < 0 || pos >= s.length) return false;
  241. #end
  242. var c = s.charCodeAt( pos );
  243. return (c > 8 && c < 14) || c == 32;
  244. }
  245. /**
  246. Removes leading space characters of `s`.
  247. This function internally calls `isSpace()` to decide which characters to
  248. remove.
  249. If `s` is the empty String `""` or consists only of space characters, the
  250. result is the empty String `""`.
  251. **/
  252. public #if cs inline #end static function ltrim( s : String ) : String {
  253. #if cs
  254. return untyped s.TrimStart();
  255. #else
  256. var l = s.length;
  257. var r = 0;
  258. while( r < l && isSpace(s,r) ){
  259. r++;
  260. }
  261. if( r > 0 )
  262. return s.substr(r, l-r);
  263. else
  264. return s;
  265. #end
  266. }
  267. /**
  268. Removes trailing space characters of `s`.
  269. This function internally calls `isSpace()` to decide which characters to
  270. remove.
  271. If `s` is the empty String `""` or consists only of space characters, the
  272. result is the empty String `""`.
  273. **/
  274. public #if cs inline #end static function rtrim( s : String ) : String {
  275. #if cs
  276. return untyped s.TrimEnd();
  277. #else
  278. var l = s.length;
  279. var r = 0;
  280. while( r < l && isSpace(s,l-r-1) ){
  281. r++;
  282. }
  283. if( r > 0 ){
  284. return s.substr(0, l-r);
  285. }else{
  286. return s;
  287. }
  288. #end
  289. }
  290. /**
  291. Removes leading and trailing space characters of `s`.
  292. This is a convenience function for `ltrim(rtrim(s))`.
  293. **/
  294. public #if (cs || java) inline #end static function trim( s : String ) : String {
  295. #if cs
  296. return untyped s.Trim();
  297. #elseif java
  298. return untyped s.trim();
  299. #else
  300. return ltrim(rtrim(s));
  301. #end
  302. }
  303. /**
  304. Concatenates `c` to `s` until `s.length` is at least `l`.
  305. If `c` is the empty String `""` or if `l` does not exceed `s.length`,
  306. `s` is returned unchanged.
  307. If `c.length` is 1, the resulting String length is exactly `l`.
  308. Otherwise the length may exceed `l`.
  309. If `c` is null, the result is unspecified.
  310. **/
  311. public static function lpad( s : String, c : String, l : Int ) : String {
  312. if (c.length <= 0)
  313. return s;
  314. var buf = new StringBuf();
  315. l -= s.length;
  316. while (buf.length < l) {
  317. buf.add(c);
  318. }
  319. buf.add(s);
  320. return buf.toString();
  321. }
  322. /**
  323. Appends `c` to `s` until `s.length` is at least `l`.
  324. If `c` is the empty String `""` or if `l` does not exceed `s.length`,
  325. `s` is returned unchanged.
  326. If `c.length` is 1, the resulting String length is exactly `l`.
  327. Otherwise the length may exceed `l`.
  328. If `c` is null, the result is unspecified.
  329. **/
  330. public static function rpad( s : String, c : String, l : Int ) : String {
  331. if (c.length <= 0)
  332. return s;
  333. var buf = new StringBuf();
  334. buf.add(s);
  335. while (buf.length < l) {
  336. buf.add(c);
  337. }
  338. return buf.toString();
  339. }
  340. /**
  341. Replace all occurrences of the String `sub` in the String `s` by the
  342. String `by`.
  343. If `sub` is the empty String `""`, `by` is inserted after each character
  344. of `s` except the last one. If `by` is also the empty String `""`, `s`
  345. remains unchanged.
  346. If `sub` or `by` are null, the result is unspecified.
  347. **/
  348. public static function replace( s : String, sub : String, by : String ) : String {
  349. #if java
  350. if (sub.length == 0)
  351. return s.split(sub).join(by);
  352. else
  353. return untyped s.replace(sub, by);
  354. #elseif cs
  355. if (sub.length == 0)
  356. return s.split(sub).join(by);
  357. else
  358. return untyped s.Replace(sub, by);
  359. #else
  360. return s.split(sub).join(by);
  361. #end
  362. }
  363. /**
  364. Encodes `n` into a hexadecimal representation.
  365. If `digits` is specified, the resulting String is padded with "0" until
  366. its `length` equals `digits`.
  367. **/
  368. public static function hex( n : Int, ?digits : Int ) {
  369. #if flash
  370. var n : UInt = n;
  371. var s : String = untyped n.toString(16);
  372. s = s.toUpperCase();
  373. #else
  374. var s = "";
  375. var hexChars = "0123456789ABCDEF";
  376. do {
  377. s = hexChars.charAt(n&15) + s;
  378. n >>>= 4;
  379. } while( n > 0 );
  380. #end
  381. #if python
  382. if (digits != null && s.length < digits) {
  383. var diff = digits - s.length;
  384. for (_ in 0...diff) {
  385. s = "0" + s;
  386. }
  387. }
  388. #else
  389. if( digits != null )
  390. while( s.length < digits )
  391. s = "0"+s;
  392. #end
  393. return s;
  394. }
  395. /**
  396. Returns the character code at position `index` of String `s`, or an
  397. end-of-file indicator at if `position` equals `s.length`.
  398. This method is faster than `String.charCodeAt()` on some platforms, but
  399. the result is unspecified if `index` is negative or greater than
  400. `s.length`.
  401. End of file status can be checked by calling `StringTools.isEof()` with
  402. the returned value as argument.
  403. This operation is not guaranteed to work if `s` contains the `\0`
  404. character.
  405. **/
  406. public static #if !eval inline #end function fastCodeAt( s : String, index : Int ) : Int {
  407. #if neko
  408. return untyped __dollar__sget(s.__s, index);
  409. #elseif cpp
  410. return untyped s.cca(index);
  411. #elseif flash
  412. return untyped s.cca(index);
  413. #elseif java
  414. return ( index < s.length ) ? cast(_charAt(s, index), Int) : -1;
  415. #elseif cs
  416. return ( cast(index, UInt) < s.length ) ? cast(s[index], Int) : -1;
  417. #elseif js
  418. return (cast s).charCodeAt(index);
  419. #elseif python
  420. return if (index >= s.length) -1 else python.internal.UBuiltins.ord(python.Syntax.arrayAccess(s, index));
  421. #elseif hl
  422. return @:privateAccess s.bytes.getUI16(index << 1);
  423. #elseif lua
  424. return lua.lib.luautf8.Utf8.byte(s,index+1);
  425. #else
  426. return untyped s.cca(index);
  427. #end
  428. }
  429. /*
  430. Tells if `c` represents the end-of-file (EOF) character.
  431. */
  432. @:noUsing public static inline function isEof( c : Int ) : Bool {
  433. #if (flash || cpp || hl)
  434. return c == 0;
  435. #elseif js
  436. return c != c; // fast NaN
  437. #elseif (neko || lua || eval)
  438. return c == null;
  439. #elseif cs
  440. return c == -1;
  441. #elseif java
  442. return c == -1;
  443. #elseif python
  444. return c == -1;
  445. #else
  446. return false;
  447. #end
  448. }
  449. /**
  450. Returns a String that can be used as a single command line argument
  451. on Unix.
  452. The input will be quoted, or escaped if necessary.
  453. */
  454. public static function quoteUnixArg(argument:String):String {
  455. // Based on cpython's shlex.quote().
  456. // https://hg.python.org/cpython/file/a3f076d4f54f/Lib/shlex.py#l278
  457. if (argument == "")
  458. return "''";
  459. if (!~/[^a-zA-Z0-9_@%+=:,.\/-]/.match(argument))
  460. return argument;
  461. // use single quotes, and put single quotes into double quotes
  462. // the string $'b is then quoted as '$'"'"'b'
  463. return "'" + replace(argument, "'", "'\"'\"'") + "'";
  464. }
  465. /**
  466. Character codes of the characters that will be escaped by `quoteWinArg(_, true)`.
  467. */
  468. public static var winMetaCharacters = [" ".code, "(".code, ")".code, "%".code, "!".code, "^".code, "\"".code, "<".code, ">".code, "&".code, "|".code, "\n".code, "\r".code, ",".code, ";".code];
  469. /**
  470. Returns a String that can be used as a single command line argument
  471. on Windows.
  472. The input will be quoted, or escaped if necessary, such that the output
  473. will be parsed as a single argument using the rule specified in
  474. http://msdn.microsoft.com/en-us/library/ms880421
  475. Examples:
  476. ```
  477. quoteWinArg("abc") == "abc";
  478. quoteWinArg("ab c") == '"ab c"';
  479. ```
  480. */
  481. public static function quoteWinArg(argument:String, escapeMetaCharacters:Bool):String {
  482. // If there is no space, tab, back-slash, or double-quotes, and it is not an empty string.
  483. if (!~/^[^ \t\\"]+$/.match(argument)) {
  484. // Based on cpython's subprocess.list2cmdline().
  485. // https://hg.python.org/cpython/file/50741316dd3a/Lib/subprocess.py#l620
  486. var result = new StringBuf();
  487. var needquote = argument.indexOf(" ") != -1 || argument.indexOf("\t") != -1 || argument == "";
  488. if (needquote)
  489. result.add('"');
  490. var bs_buf = new StringBuf();
  491. for (i in 0...argument.length) {
  492. switch (argument.charCodeAt(i)) {
  493. case "\\".code:
  494. // Don't know if we need to double yet.
  495. bs_buf.add("\\");
  496. case '"'.code:
  497. // Double backslashes.
  498. var bs = bs_buf.toString();
  499. result.add(bs);
  500. result.add(bs);
  501. bs_buf = new StringBuf();
  502. result.add('\\"');
  503. case var c:
  504. // Normal char
  505. if (bs_buf.length > 0) {
  506. result.add(bs_buf.toString());
  507. bs_buf = new StringBuf();
  508. }
  509. result.addChar(c);
  510. }
  511. }
  512. // Add remaining backslashes, if any.
  513. result.add(bs_buf.toString());
  514. if (needquote) {
  515. result.add(bs_buf.toString());
  516. result.add('"');
  517. }
  518. argument = result.toString();
  519. }
  520. if (escapeMetaCharacters) {
  521. var result = new StringBuf();
  522. for (i in 0...argument.length) {
  523. var c = argument.charCodeAt(i);
  524. if (winMetaCharacters.indexOf(c) >= 0) {
  525. result.addChar("^".code);
  526. }
  527. result.addChar(c);
  528. }
  529. return result.toString();
  530. } else {
  531. return argument;
  532. }
  533. }
  534. #if java
  535. private static inline function _charAt(str:String, idx:Int):java.StdTypes.Char16 return untyped str._charAt(idx);
  536. #end
  537. #if neko
  538. private static var _urlEncode = neko.Lib.load("std","url_encode",1);
  539. private static var _urlDecode = neko.Lib.load("std","url_decode",1);
  540. #end
  541. }