StringTools.hx 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. /*
  2. * Copyright (C)2005-2017 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. /**
  23. This class provides advanced methods on Strings. It is ideally used with
  24. `using StringTools` and then acts as an [extension](https://haxe.org/manual/lf-static-extension.html)
  25. to the `String` class.
  26. If the first argument to any of the methods is null, the result is
  27. unspecified.
  28. **/
  29. #if cpp
  30. using cpp.NativeString;
  31. #end
  32. class StringTools {
  33. /**
  34. Encode an URL by using the standard format.
  35. **/
  36. #if (!java && !cpp && !lua) inline #end public static function urlEncode( s : String ) : String {
  37. #if flash
  38. return untyped __global__["encodeURIComponent"](s);
  39. #elseif neko
  40. return untyped new String(_urlEncode(s.__s));
  41. #elseif js
  42. return untyped encodeURIComponent(s);
  43. #elseif cpp
  44. return untyped s.__URLEncode();
  45. #elseif java
  46. return postProcessUrlEncode(java.net.URLEncoder.encode(s, "UTF-8"));
  47. #elseif cs
  48. return untyped cs.system.Uri.EscapeDataString(s);
  49. #elseif python
  50. return python.lib.urllib.Parse.quote(s, "");
  51. #elseif hl
  52. var len = 0;
  53. var b = @:privateAccess s.bytes.urlEncode(len);
  54. return @:privateAccess String.__alloc__(b,len);
  55. #elseif lua
  56. s = lua.NativeStringTools.gsub(s, "\n", "\r\n");
  57. s = lua.NativeStringTools.gsub(s, "([^%w %-%_%.%~])", function (c) {
  58. return lua.NativeStringTools.format("%%%02X", lua.NativeStringTools.byte(c) + '');
  59. });
  60. s = lua.NativeStringTools.gsub(s, " ", "+");
  61. return s;
  62. #else
  63. return null;
  64. #end
  65. }
  66. #if java
  67. private static function postProcessUrlEncode( s : String ) : String {
  68. var ret = new StringBuf();
  69. var i = 0,
  70. len = s.length;
  71. while (i < len) {
  72. switch(_charAt(s, i++)) {
  73. case '+'.code:
  74. ret.add('%20');
  75. case '%'.code if (i <= len - 2):
  76. var c1 = _charAt(s, i++),
  77. c2 = _charAt(s, i++);
  78. switch[c1, c2] {
  79. case ['2'.code, '1'.code]:
  80. ret.addChar('!'.code);
  81. case ['2'.code, '7'.code]:
  82. ret.addChar('\''.code);
  83. case ['2'.code, '8'.code]:
  84. ret.addChar('('.code);
  85. case ['2'.code, '9'.code]:
  86. ret.addChar(')'.code);
  87. case ['7'.code, 'E'.code] | ['7'.code, 'e'.code]:
  88. ret.addChar('~'.code);
  89. case _:
  90. ret.addChar('%'.code);
  91. ret.addChar(cast c1);
  92. ret.addChar(cast c2);
  93. }
  94. case chr:
  95. ret.addChar(cast chr);
  96. }
  97. }
  98. return ret.toString();
  99. }
  100. #end
  101. /**
  102. Decode an URL using the standard format.
  103. **/
  104. #if (!java && !cpp && !lua) inline #end public static function urlDecode( s : String ) : String {
  105. #if flash
  106. return untyped __global__["decodeURIComponent"](s.split("+").join(" "));
  107. #elseif neko
  108. return untyped new String(_urlDecode(s.__s));
  109. #elseif js
  110. return untyped decodeURIComponent(s.split("+").join(" "));
  111. #elseif cpp
  112. return untyped s.__URLDecode();
  113. #elseif java
  114. try
  115. return untyped __java__("java.net.URLDecoder.decode(s, \"UTF-8\")")
  116. catch (e:Dynamic) throw e;
  117. #elseif cs
  118. return untyped cs.system.Uri.UnescapeDataString(s);
  119. #elseif python
  120. return python.lib.urllib.Parse.unquote(s);
  121. #elseif hl
  122. var len = 0;
  123. var b = @:privateAccess s.bytes.urlDecode(len);
  124. return @:privateAccess String.__alloc__(b,len);
  125. #elseif lua
  126. s = lua.NativeStringTools.gsub (s, "+", " ");
  127. s = lua.NativeStringTools.gsub (s, "%%(%x%x)",
  128. function(h) {return lua.NativeStringTools.char(lua.Lua.tonumber(h,16));});
  129. s = lua.NativeStringTools.gsub (s, "\r\n", "\n");
  130. return s;
  131. #else
  132. return null;
  133. #end
  134. }
  135. /**
  136. Escapes HTML special characters of the string `s`.
  137. The following replacements are made:
  138. - `&` becomes `&amp`;
  139. - `<` becomes `&lt`;
  140. - `>` becomes `&gt`;
  141. If `quotes` is true, the following characters are also replaced:
  142. - `"` becomes `&quot`;
  143. - `'` becomes `&#039`;
  144. **/
  145. public static function htmlEscape( s : String, ?quotes : Bool ) : String {
  146. s = s.split("&").join("&amp;").split("<").join("&lt;").split(">").join("&gt;");
  147. return quotes ? s.split('"').join("&quot;").split("'").join("&#039;") : s;
  148. }
  149. /**
  150. Unescapes HTML special characters of the string `s`.
  151. This is the inverse operation to htmlEscape, i.e. the following always
  152. holds: `htmlUnescape(htmlEscape(s)) == s`
  153. The replacements follow:
  154. - `&amp;` becomes `&`
  155. - `&lt;` becomes `<`
  156. - `&gt;` becomes `>`
  157. - `&quot;` becomes `"`
  158. - `&#039;` becomes `'`
  159. **/
  160. public static function htmlUnescape( s : String ) : String {
  161. return s.split("&gt;").join(">").split("&lt;").join("<").split("&quot;").join('"').split("&#039;").join("'").split("&amp;").join("&");
  162. }
  163. /**
  164. Tells if the string `s` starts with the string `start`.
  165. If `start` is `null`, the result is unspecified.
  166. If `start` is the empty String `""`, the result is true.
  167. **/
  168. public static #if (cs || java) inline #end function startsWith( s : String, start : String ) : Bool {
  169. #if java
  170. return untyped s.startsWith(start);
  171. #elseif cs
  172. return untyped s.StartsWith(start);
  173. #elseif cpp
  174. if (s.length<start.length)
  175. return false;
  176. var p0 = s.c_str();
  177. var p1 = start.c_str();
  178. for(i in 0...start.length)
  179. if ( p0.at(i) != p1.at(i) )
  180. return false;
  181. return true;
  182. #elseif hl
  183. return @:privateAccess (s.length >= start.length && s.bytes.compare(0,start.bytes,0,start.length<<1) == 0);
  184. #else
  185. return( s.length >= start.length && s.substr(0, start.length) == start );
  186. #end
  187. }
  188. /**
  189. Tells if the string `s` ends with the string `end`.
  190. If `end` is `null`, the result is unspecified.
  191. If `end` is the empty String `""`, the result is true.
  192. **/
  193. public static #if (cs || java) inline #end function endsWith( s : String, end : String ) : Bool {
  194. #if java
  195. return untyped s.endsWith(end);
  196. #elseif cs
  197. return untyped s.EndsWith(end);
  198. #elseif cpp
  199. if (s.length<end.length)
  200. return false;
  201. var p0 = s.c_str().add( s.length-end.length );
  202. var p1 = end.c_str();
  203. for(i in 0...end.length)
  204. if ( p0.at(i) != p1.at(i) )
  205. return false;
  206. return true;
  207. #elseif hl
  208. var elen = end.length;
  209. var slen = s.length;
  210. return @:privateAccess (slen >= elen && s.bytes.compare((slen - elen) << 1, end.bytes, 0, elen << 1) == 0);
  211. #else
  212. var elen = end.length;
  213. var slen = s.length;
  214. return( slen >= elen && s.substr(slen - elen, elen) == end );
  215. #end
  216. }
  217. /**
  218. Tells if the character in the string `s` at position `pos` is a space.
  219. A character is considered to be a space character if its character code
  220. is 9,10,11,12,13 or 32.
  221. If `s` is the empty String `""`, or if pos is not a valid position within
  222. `s`, the result is false.
  223. **/
  224. public static function isSpace( s : String, pos : Int ) : Bool {
  225. #if (python || lua)
  226. if (s.length == 0 || pos < 0 || pos >= s.length) return false;
  227. #end
  228. var c = s.charCodeAt( pos );
  229. return (c > 8 && c < 14) || c == 32;
  230. }
  231. /**
  232. Removes leading space characters of `s`.
  233. This function internally calls `isSpace()` to decide which characters to
  234. remove.
  235. If `s` is the empty String `""` or consists only of space characters, the
  236. result is the empty String `""`.
  237. **/
  238. public #if cs inline #end static function ltrim( s : String ) : String {
  239. #if cs
  240. return untyped s.TrimStart();
  241. #else
  242. var l = s.length;
  243. var r = 0;
  244. while( r < l && isSpace(s,r) ){
  245. r++;
  246. }
  247. if( r > 0 )
  248. return s.substr(r, l-r);
  249. else
  250. return s;
  251. #end
  252. }
  253. /**
  254. Removes trailing space characters of `s`.
  255. This function internally calls `isSpace()` to decide which characters to
  256. remove.
  257. If `s` is the empty String `""` or consists only of space characters, the
  258. result is the empty String `""`.
  259. **/
  260. public #if cs inline #end static function rtrim( s : String ) : String {
  261. #if cs
  262. return untyped s.TrimEnd();
  263. #else
  264. var l = s.length;
  265. var r = 0;
  266. while( r < l && isSpace(s,l-r-1) ){
  267. r++;
  268. }
  269. if( r > 0 ){
  270. return s.substr(0, l-r);
  271. }else{
  272. return s;
  273. }
  274. #end
  275. }
  276. /**
  277. Removes leading and trailing space characters of `s`.
  278. This is a convenience function for `ltrim(rtrim(s))`.
  279. **/
  280. public #if (cs || java) inline #end static function trim( s : String ) : String {
  281. #if cs
  282. return untyped s.Trim();
  283. #elseif java
  284. return untyped s.trim();
  285. #else
  286. return ltrim(rtrim(s));
  287. #end
  288. }
  289. /**
  290. Concatenates `c` to `s` until `s.length` is at least `l`.
  291. If `c` is the empty String `""` or if `l` does not exceed `s.length`,
  292. `s` is returned unchanged.
  293. If `c.length` is 1, the resulting String length is exactly `l`.
  294. Otherwise the length may exceed `l`.
  295. If `c` is null, the result is unspecified.
  296. **/
  297. public static function lpad( s : String, c : String, l : Int ) : String {
  298. if (c.length <= 0)
  299. return s;
  300. while (s.length < l) {
  301. s = c + s;
  302. }
  303. return s;
  304. }
  305. /**
  306. Appends `c` to `s` until `s.length` is at least `l`.
  307. If `c` is the empty String `""` or if `l` does not exceed `s.length`,
  308. `s` is returned unchanged.
  309. If `c.length` is 1, the resulting String length is exactly `l`.
  310. Otherwise the length may exceed `l`.
  311. If `c` is null, the result is unspecified.
  312. **/
  313. public static function rpad( s : String, c : String, l : Int ) : String {
  314. if (c.length <= 0)
  315. return s;
  316. while (s.length < l) {
  317. s = s + c;
  318. }
  319. return s;
  320. }
  321. /**
  322. Replace all occurrences of the String `sub` in the String `s` by the
  323. String `by`.
  324. If `sub` is the empty String `""`, `by` is inserted after each character
  325. of `s`. If `by` is also the empty String `""`, `s` remains unchanged.
  326. This is a convenience function for `s.split(sub).join(by)`.
  327. If `sub` or `by` are null, the result is unspecified.
  328. **/
  329. public static function replace( s : String, sub : String, by : String ) : String {
  330. #if java
  331. if (sub.length == 0)
  332. return s.split(sub).join(by);
  333. else
  334. return untyped s.replace(sub, by);
  335. #elseif cs
  336. if (sub.length == 0)
  337. return s.split(sub).join(by);
  338. else
  339. return untyped s.Replace(sub, by);
  340. #else
  341. return s.split(sub).join(by);
  342. #end
  343. }
  344. /**
  345. Encodes `n` into a hexadecimal representation.
  346. If `digits` is specified, the resulting String is padded with "0" until
  347. its `length` equals `digits`.
  348. **/
  349. public static function hex( n : Int, ?digits : Int ) {
  350. #if flash
  351. var n : UInt = n;
  352. var s : String = untyped n.toString(16);
  353. s = s.toUpperCase();
  354. #else
  355. var s = "";
  356. var hexChars = "0123456789ABCDEF";
  357. do {
  358. s = hexChars.charAt(n&15) + s;
  359. n >>>= 4;
  360. } while( n > 0 );
  361. #end
  362. #if python
  363. if (digits != null && s.length < digits) {
  364. var diff = digits - s.length;
  365. for (_ in 0...diff) {
  366. s = "0" + s;
  367. }
  368. }
  369. #else
  370. if( digits != null )
  371. while( s.length < digits )
  372. s = "0"+s;
  373. #end
  374. return s;
  375. }
  376. /**
  377. Returns the character code at position `index` of String `s`, or an
  378. end-of-file indicator at if `position` equals `s.length`.
  379. This method is faster than `String.charCodeAt()` on some platforms, but
  380. the result is unspecified if `index` is negative or greater than
  381. `s.length`.
  382. End of file status can be checked by calling `StringTools.isEof()` with
  383. the returned value as argument.
  384. This operation is not guaranteed to work if `s` contains the `\0`
  385. character.
  386. **/
  387. public static inline function fastCodeAt( s : String, index : Int ) : Int {
  388. #if neko
  389. return untyped __dollar__sget(s.__s, index);
  390. #elseif cpp
  391. return untyped s.cca(index);
  392. #elseif flash
  393. return untyped s.cca(index);
  394. #elseif java
  395. return ( index < s.length ) ? cast(_charAt(s, index), Int) : -1;
  396. #elseif cs
  397. return ( cast(index, UInt) < s.length ) ? cast(s[index], Int) : -1;
  398. #elseif js
  399. return (untyped s).charCodeAt(index);
  400. #elseif python
  401. return if (index >= s.length) -1 else python.internal.UBuiltins.ord(python.Syntax.arrayAccess(s, index));
  402. #elseif hl
  403. return @:privateAccess s.bytes.getUI16(index << 1);
  404. #elseif lua
  405. return lua.NativeStringTools.byte(s,index+1);
  406. #else
  407. return untyped s.cca(index);
  408. #end
  409. }
  410. /*
  411. Tells if `c` represents the end-of-file (EOF) character.
  412. */
  413. @:noUsing public static inline function isEof( c : Int ) : Bool {
  414. #if (flash || cpp || hl)
  415. return c == 0;
  416. #elseif js
  417. return c != c; // fast NaN
  418. #elseif (neko || lua)
  419. return c == null;
  420. #elseif cs
  421. return c == -1;
  422. #elseif java
  423. return c == -1;
  424. #elseif python
  425. return c == -1;
  426. #else
  427. return false;
  428. #end
  429. }
  430. /**
  431. Returns a String that can be used as a single command line argument
  432. on Unix.
  433. The input will be quoted, or escaped if necessary.
  434. */
  435. public static function quoteUnixArg(argument:String):String {
  436. // Based on cpython's shlex.quote().
  437. // https://hg.python.org/cpython/file/a3f076d4f54f/Lib/shlex.py#l278
  438. if (argument == "")
  439. return "''";
  440. if (!~/[^a-zA-Z0-9_@%+=:,.\/-]/.match(argument))
  441. return argument;
  442. // use single quotes, and put single quotes into double quotes
  443. // the string $'b is then quoted as '$'"'"'b'
  444. return "'" + replace(argument, "'", "'\"'\"'") + "'";
  445. }
  446. /**
  447. Character codes of the characters that will be escaped by `quoteWinArg(_, true)`.
  448. */
  449. public static var winMetaCharacters = [" ".code, "(".code, ")".code, "%".code, "!".code, "^".code, "\"".code, "<".code, ">".code, "&".code, "|".code, "\n".code, "\r".code, ",".code, ";".code];
  450. /**
  451. Returns a String that can be used as a single command line argument
  452. on Windows.
  453. The input will be quoted, or escaped if necessary, such that the output
  454. will be parsed as a single argument using the rule specified in
  455. http://msdn.microsoft.com/en-us/library/ms880421
  456. Examples:
  457. ```
  458. quoteWinArg("abc") == "abc";
  459. quoteWinArg("ab c") == '"ab c"';
  460. ```
  461. */
  462. public static function quoteWinArg(argument:String, escapeMetaCharacters:Bool):String {
  463. // If there is no space, tab, back-slash, or double-quotes, and it is not an empty string.
  464. if (!~/^[^ \t\\"]+$/.match(argument)) {
  465. // Based on cpython's subprocess.list2cmdline().
  466. // https://hg.python.org/cpython/file/50741316dd3a/Lib/subprocess.py#l620
  467. var result = new StringBuf();
  468. var needquote = argument.indexOf(" ") != -1 || argument.indexOf("\t") != -1 || argument == "";
  469. if (needquote)
  470. result.add('"');
  471. var bs_buf = new StringBuf();
  472. for (i in 0...argument.length) {
  473. switch (argument.charCodeAt(i)) {
  474. case "\\".code:
  475. // Don't know if we need to double yet.
  476. bs_buf.add("\\");
  477. case '"'.code:
  478. // Double backslashes.
  479. var bs = bs_buf.toString();
  480. result.add(bs);
  481. result.add(bs);
  482. bs_buf = new StringBuf();
  483. result.add('\\"');
  484. case c:
  485. // Normal char
  486. if (bs_buf.length > 0) {
  487. result.add(bs_buf.toString());
  488. bs_buf = new StringBuf();
  489. }
  490. result.addChar(c);
  491. }
  492. }
  493. // Add remaining backslashes, if any.
  494. result.add(bs_buf.toString());
  495. if (needquote) {
  496. result.add(bs_buf.toString());
  497. result.add('"');
  498. }
  499. argument = result.toString();
  500. }
  501. if (escapeMetaCharacters) {
  502. var result = new StringBuf();
  503. for (i in 0...argument.length) {
  504. var c = argument.charCodeAt(i);
  505. if (winMetaCharacters.indexOf(c) >= 0) {
  506. result.addChar("^".code);
  507. }
  508. result.addChar(c);
  509. }
  510. return result.toString();
  511. } else {
  512. return argument;
  513. }
  514. }
  515. #if java
  516. private static inline function _charAt(str:String, idx:Int):java.StdTypes.Char16 return untyped str._charAt(idx);
  517. #end
  518. #if neko
  519. private static var _urlEncode = neko.Lib.load("std","url_encode",1);
  520. private static var _urlDecode = neko.Lib.load("std","url_decode",1);
  521. #end
  522. }