StringTools.hx 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. /*
  2. * Copyright (C)2005-2017 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. /**
  23. This class provides advanced methods on Strings. It is ideally used with
  24. `using StringTools` and then acts as an [extension](https://haxe.org/manual/lf-static-extension.html)
  25. to the `String` class.
  26. If the first argument to any of the methods is null, the result is
  27. unspecified.
  28. **/
  29. #if cpp
  30. using cpp.NativeString;
  31. #end
  32. class StringTools {
  33. /**
  34. Encode an URL by using the standard format.
  35. **/
  36. #if (!java && !cpp && !lua && !eval) inline #end public static function urlEncode( s : String ) : String {
  37. #if flash
  38. return untyped __global__["encodeURIComponent"](s);
  39. #elseif neko
  40. return untyped new String(_urlEncode(s.__s));
  41. #elseif js
  42. return untyped encodeURIComponent(s);
  43. #elseif cpp
  44. return untyped s.__URLEncode();
  45. #elseif java
  46. return postProcessUrlEncode(java.net.URLEncoder.encode(s, "UTF-8"));
  47. #elseif cs
  48. return untyped cs.system.Uri.EscapeDataString(s);
  49. #elseif python
  50. return python.lib.urllib.Parse.quote(s, "");
  51. #elseif hl
  52. var len = 0;
  53. var b = @:privateAccess s.bytes.urlEncode(len);
  54. return @:privateAccess String.__alloc__(b,len);
  55. #elseif lua
  56. s = lua.NativeStringTools.gsub(s, "\n", "\r\n");
  57. s = lua.NativeStringTools.gsub(s, "([^%w %-%_%.%~])", function (c) {
  58. return lua.NativeStringTools.format("%%%02X", lua.NativeStringTools.byte(c) + '');
  59. });
  60. s = lua.NativeStringTools.gsub(s, " ", "+");
  61. return s;
  62. #else
  63. return null;
  64. #end
  65. }
  66. #if java
  67. private static function postProcessUrlEncode( s : String ) : String {
  68. var ret = new StringBuf();
  69. var i = 0,
  70. len = s.length;
  71. while (i < len) {
  72. switch(_charAt(s, i++)) {
  73. case '+'.code:
  74. ret.add('%20');
  75. case '%'.code if (i <= len - 2):
  76. var c1 = _charAt(s, i++),
  77. c2 = _charAt(s, i++);
  78. switch[c1, c2] {
  79. case ['2'.code, '1'.code]:
  80. ret.addChar('!'.code);
  81. case ['2'.code, '7'.code]:
  82. ret.addChar('\''.code);
  83. case ['2'.code, '8'.code]:
  84. ret.addChar('('.code);
  85. case ['2'.code, '9'.code]:
  86. ret.addChar(')'.code);
  87. case ['7'.code, 'E'.code] | ['7'.code, 'e'.code]:
  88. ret.addChar('~'.code);
  89. case _:
  90. ret.addChar('%'.code);
  91. ret.addChar(cast c1);
  92. ret.addChar(cast c2);
  93. }
  94. case chr:
  95. ret.addChar(cast chr);
  96. }
  97. }
  98. return ret.toString();
  99. }
  100. #end
  101. /**
  102. Decode an URL using the standard format.
  103. **/
  104. #if (!java && !cpp && !lua && !eval) inline #end public static function urlDecode( s : String ) : String {
  105. #if flash
  106. return untyped __global__["decodeURIComponent"](s.split("+").join(" "));
  107. #elseif neko
  108. return untyped new String(_urlDecode(s.__s));
  109. #elseif js
  110. return untyped decodeURIComponent(s.split("+").join(" "));
  111. #elseif cpp
  112. return untyped s.__URLDecode();
  113. #elseif java
  114. try
  115. return untyped __java__("java.net.URLDecoder.decode(s, \"UTF-8\")")
  116. catch (e:Dynamic) throw e;
  117. #elseif cs
  118. return untyped cs.system.Uri.UnescapeDataString(s);
  119. #elseif python
  120. return python.lib.urllib.Parse.unquote(s);
  121. #elseif hl
  122. var len = 0;
  123. var b = @:privateAccess s.bytes.urlDecode(len);
  124. return @:privateAccess String.__alloc__(b,len);
  125. #elseif lua
  126. s = lua.NativeStringTools.gsub (s, "+", " ");
  127. s = lua.NativeStringTools.gsub (s, "%%(%x%x)",
  128. function(h) {return lua.NativeStringTools.char(lua.Lua.tonumber(h,16));});
  129. s = lua.NativeStringTools.gsub (s, "\r\n", "\n");
  130. return s;
  131. #else
  132. return null;
  133. #end
  134. }
  135. /**
  136. Escapes HTML special characters of the string `s`.
  137. The following replacements are made:
  138. - `&` becomes `&amp`;
  139. - `<` becomes `&lt`;
  140. - `>` becomes `&gt`;
  141. If `quotes` is true, the following characters are also replaced:
  142. - `"` becomes `&quot`;
  143. - `'` becomes `&#039`;
  144. **/
  145. public static function htmlEscape( s : String, ?quotes : Bool ) : String {
  146. s = s.split("&").join("&amp;").split("<").join("&lt;").split(">").join("&gt;");
  147. return quotes ? s.split('"').join("&quot;").split("'").join("&#039;") : s;
  148. }
  149. /**
  150. Unescapes HTML special characters of the string `s`.
  151. This is the inverse operation to htmlEscape, i.e. the following always
  152. holds: `htmlUnescape(htmlEscape(s)) == s`
  153. The replacements follow:
  154. - `&amp;` becomes `&`
  155. - `&lt;` becomes `<`
  156. - `&gt;` becomes `>`
  157. - `&quot;` becomes `"`
  158. - `&#039;` becomes `'`
  159. **/
  160. public static function htmlUnescape( s : String ) : String {
  161. return s.split("&gt;").join(">").split("&lt;").join("<").split("&quot;").join('"').split("&#039;").join("'").split("&amp;").join("&");
  162. }
  163. /**
  164. Tells if the string `s` starts with the string `start`.
  165. If `start` is `null`, the result is unspecified.
  166. If `start` is the empty String `""`, the result is true.
  167. **/
  168. public static #if (cs || java || python) inline #end function startsWith( s : String, start : String ) : Bool {
  169. #if java
  170. return untyped s.startsWith(start);
  171. #elseif cs
  172. return untyped s.StartsWith(start);
  173. #elseif cpp
  174. if (s.length<start.length)
  175. return false;
  176. var p0 = s.c_str();
  177. var p1 = start.c_str();
  178. for(i in 0...start.length)
  179. if ( p0.at(i) != p1.at(i) )
  180. return false;
  181. return true;
  182. #elseif hl
  183. return @:privateAccess (s.length >= start.length && s.bytes.compare(0,start.bytes,0,start.length<<1) == 0);
  184. #elseif python
  185. return python.NativeStringTools.startswith(s, start);
  186. #else
  187. return( s.length >= start.length && s.substr(0, start.length) == start );
  188. #end
  189. }
  190. /**
  191. Tells if the string `s` ends with the string `end`.
  192. If `end` is `null`, the result is unspecified.
  193. If `end` is the empty String `""`, the result is true.
  194. **/
  195. public static #if (cs || java || python) inline #end function endsWith( s : String, end : String ) : Bool {
  196. #if java
  197. return untyped s.endsWith(end);
  198. #elseif cs
  199. return untyped s.EndsWith(end);
  200. #elseif cpp
  201. if (s.length<end.length)
  202. return false;
  203. var p0 = s.c_str().add( s.length-end.length );
  204. var p1 = end.c_str();
  205. for(i in 0...end.length)
  206. if ( p0.at(i) != p1.at(i) )
  207. return false;
  208. return true;
  209. #elseif hl
  210. var elen = end.length;
  211. var slen = s.length;
  212. return @:privateAccess (slen >= elen && s.bytes.compare((slen - elen) << 1, end.bytes, 0, elen << 1) == 0);
  213. #elseif python
  214. return python.NativeStringTools.endswith(s, end);
  215. #else
  216. var elen = end.length;
  217. var slen = s.length;
  218. return( slen >= elen && s.substr(slen - elen, elen) == end );
  219. #end
  220. }
  221. /**
  222. Tells if the character in the string `s` at position `pos` is a space.
  223. A character is considered to be a space character if its character code
  224. is 9,10,11,12,13 or 32.
  225. If `s` is the empty String `""`, or if pos is not a valid position within
  226. `s`, the result is false.
  227. **/
  228. public static function isSpace( s : String, pos : Int ) : Bool {
  229. #if (python || lua)
  230. if (s.length == 0 || pos < 0 || pos >= s.length) return false;
  231. #end
  232. var c = s.charCodeAt( pos );
  233. return (c > 8 && c < 14) || c == 32;
  234. }
  235. /**
  236. Removes leading space characters of `s`.
  237. This function internally calls `isSpace()` to decide which characters to
  238. remove.
  239. If `s` is the empty String `""` or consists only of space characters, the
  240. result is the empty String `""`.
  241. **/
  242. public #if cs inline #end static function ltrim( s : String ) : String {
  243. #if cs
  244. return untyped s.TrimStart();
  245. #else
  246. var l = s.length;
  247. var r = 0;
  248. while( r < l && isSpace(s,r) ){
  249. r++;
  250. }
  251. if( r > 0 )
  252. return s.substr(r, l-r);
  253. else
  254. return s;
  255. #end
  256. }
  257. /**
  258. Removes trailing space characters of `s`.
  259. This function internally calls `isSpace()` to decide which characters to
  260. remove.
  261. If `s` is the empty String `""` or consists only of space characters, the
  262. result is the empty String `""`.
  263. **/
  264. public #if cs inline #end static function rtrim( s : String ) : String {
  265. #if cs
  266. return untyped s.TrimEnd();
  267. #else
  268. var l = s.length;
  269. var r = 0;
  270. while( r < l && isSpace(s,l-r-1) ){
  271. r++;
  272. }
  273. if( r > 0 ){
  274. return s.substr(0, l-r);
  275. }else{
  276. return s;
  277. }
  278. #end
  279. }
  280. /**
  281. Removes leading and trailing space characters of `s`.
  282. This is a convenience function for `ltrim(rtrim(s))`.
  283. **/
  284. public #if (cs || java) inline #end static function trim( s : String ) : String {
  285. #if cs
  286. return untyped s.Trim();
  287. #elseif java
  288. return untyped s.trim();
  289. #else
  290. return ltrim(rtrim(s));
  291. #end
  292. }
  293. /**
  294. Concatenates `c` to `s` until `s.length` is at least `l`.
  295. If `c` is the empty String `""` or if `l` does not exceed `s.length`,
  296. `s` is returned unchanged.
  297. If `c.length` is 1, the resulting String length is exactly `l`.
  298. Otherwise the length may exceed `l`.
  299. If `c` is null, the result is unspecified.
  300. **/
  301. public static function lpad( s : String, c : String, l : Int ) : String {
  302. if (c.length <= 0)
  303. return s;
  304. while (s.length < l) {
  305. s = c + s;
  306. }
  307. return s;
  308. }
  309. /**
  310. Appends `c` to `s` until `s.length` is at least `l`.
  311. If `c` is the empty String `""` or if `l` does not exceed `s.length`,
  312. `s` is returned unchanged.
  313. If `c.length` is 1, the resulting String length is exactly `l`.
  314. Otherwise the length may exceed `l`.
  315. If `c` is null, the result is unspecified.
  316. **/
  317. public static function rpad( s : String, c : String, l : Int ) : String {
  318. if (c.length <= 0)
  319. return s;
  320. while (s.length < l) {
  321. s = s + c;
  322. }
  323. return s;
  324. }
  325. /**
  326. Replace all occurrences of the String `sub` in the String `s` by the
  327. String `by`.
  328. If `sub` is the empty String `""`, `by` is inserted after each character
  329. of `s`. If `by` is also the empty String `""`, `s` remains unchanged.
  330. This is a convenience function for `s.split(sub).join(by)`.
  331. If `sub` or `by` are null, the result is unspecified.
  332. **/
  333. public static function replace( s : String, sub : String, by : String ) : String {
  334. #if java
  335. if (sub.length == 0)
  336. return s.split(sub).join(by);
  337. else
  338. return untyped s.replace(sub, by);
  339. #elseif cs
  340. if (sub.length == 0)
  341. return s.split(sub).join(by);
  342. else
  343. return untyped s.Replace(sub, by);
  344. #else
  345. return s.split(sub).join(by);
  346. #end
  347. }
  348. /**
  349. Encodes `n` into a hexadecimal representation.
  350. If `digits` is specified, the resulting String is padded with "0" until
  351. its `length` equals `digits`.
  352. **/
  353. public static function hex( n : Int, ?digits : Int ) {
  354. #if flash
  355. var n : UInt = n;
  356. var s : String = untyped n.toString(16);
  357. s = s.toUpperCase();
  358. #else
  359. var s = "";
  360. var hexChars = "0123456789ABCDEF";
  361. do {
  362. s = hexChars.charAt(n&15) + s;
  363. n >>>= 4;
  364. } while( n > 0 );
  365. #end
  366. #if python
  367. if (digits != null && s.length < digits) {
  368. var diff = digits - s.length;
  369. for (_ in 0...diff) {
  370. s = "0" + s;
  371. }
  372. }
  373. #else
  374. if( digits != null )
  375. while( s.length < digits )
  376. s = "0"+s;
  377. #end
  378. return s;
  379. }
  380. /**
  381. Returns the character code at position `index` of String `s`, or an
  382. end-of-file indicator at if `position` equals `s.length`.
  383. This method is faster than `String.charCodeAt()` on some platforms, but
  384. the result is unspecified if `index` is negative or greater than
  385. `s.length`.
  386. End of file status can be checked by calling `StringTools.isEof()` with
  387. the returned value as argument.
  388. This operation is not guaranteed to work if `s` contains the `\0`
  389. character.
  390. **/
  391. public static #if !eval inline #end function fastCodeAt( s : String, index : Int ) : Int {
  392. #if neko
  393. return untyped __dollar__sget(s.__s, index);
  394. #elseif cpp
  395. return untyped s.cca(index);
  396. #elseif flash
  397. return untyped s.cca(index);
  398. #elseif java
  399. return ( index < s.length ) ? cast(_charAt(s, index), Int) : -1;
  400. #elseif cs
  401. return ( cast(index, UInt) < s.length ) ? cast(s[index], Int) : -1;
  402. #elseif js
  403. return (untyped s).charCodeAt(index);
  404. #elseif python
  405. return if (index >= s.length) -1 else python.internal.UBuiltins.ord(python.Syntax.arrayAccess(s, index));
  406. #elseif hl
  407. return @:privateAccess s.bytes.getUI16(index << 1);
  408. #elseif lua
  409. return lua.NativeStringTools.byte(s,index+1);
  410. #else
  411. return untyped s.cca(index);
  412. #end
  413. }
  414. /*
  415. Tells if `c` represents the end-of-file (EOF) character.
  416. */
  417. @:noUsing public static inline function isEof( c : Int ) : Bool {
  418. #if (flash || cpp || hl)
  419. return c == 0;
  420. #elseif js
  421. return c != c; // fast NaN
  422. #elseif (neko || lua || eval)
  423. return c == null;
  424. #elseif cs
  425. return c == -1;
  426. #elseif java
  427. return c == -1;
  428. #elseif python
  429. return c == -1;
  430. #else
  431. return false;
  432. #end
  433. }
  434. /**
  435. Returns a String that can be used as a single command line argument
  436. on Unix.
  437. The input will be quoted, or escaped if necessary.
  438. */
  439. public static function quoteUnixArg(argument:String):String {
  440. // Based on cpython's shlex.quote().
  441. // https://hg.python.org/cpython/file/a3f076d4f54f/Lib/shlex.py#l278
  442. if (argument == "")
  443. return "''";
  444. if (!~/[^a-zA-Z0-9_@%+=:,.\/-]/.match(argument))
  445. return argument;
  446. // use single quotes, and put single quotes into double quotes
  447. // the string $'b is then quoted as '$'"'"'b'
  448. return "'" + replace(argument, "'", "'\"'\"'") + "'";
  449. }
  450. /**
  451. Character codes of the characters that will be escaped by `quoteWinArg(_, true)`.
  452. */
  453. public static var winMetaCharacters = [" ".code, "(".code, ")".code, "%".code, "!".code, "^".code, "\"".code, "<".code, ">".code, "&".code, "|".code, "\n".code, "\r".code, ",".code, ";".code];
  454. /**
  455. Returns a String that can be used as a single command line argument
  456. on Windows.
  457. The input will be quoted, or escaped if necessary, such that the output
  458. will be parsed as a single argument using the rule specified in
  459. http://msdn.microsoft.com/en-us/library/ms880421
  460. Examples:
  461. ```
  462. quoteWinArg("abc") == "abc";
  463. quoteWinArg("ab c") == '"ab c"';
  464. ```
  465. */
  466. public static function quoteWinArg(argument:String, escapeMetaCharacters:Bool):String {
  467. // If there is no space, tab, back-slash, or double-quotes, and it is not an empty string.
  468. if (!~/^[^ \t\\"]+$/.match(argument)) {
  469. // Based on cpython's subprocess.list2cmdline().
  470. // https://hg.python.org/cpython/file/50741316dd3a/Lib/subprocess.py#l620
  471. var result = new StringBuf();
  472. var needquote = argument.indexOf(" ") != -1 || argument.indexOf("\t") != -1 || argument == "";
  473. if (needquote)
  474. result.add('"');
  475. var bs_buf = new StringBuf();
  476. for (i in 0...argument.length) {
  477. switch (argument.charCodeAt(i)) {
  478. case "\\".code:
  479. // Don't know if we need to double yet.
  480. bs_buf.add("\\");
  481. case '"'.code:
  482. // Double backslashes.
  483. var bs = bs_buf.toString();
  484. result.add(bs);
  485. result.add(bs);
  486. bs_buf = new StringBuf();
  487. result.add('\\"');
  488. case c:
  489. // Normal char
  490. if (bs_buf.length > 0) {
  491. result.add(bs_buf.toString());
  492. bs_buf = new StringBuf();
  493. }
  494. result.addChar(c);
  495. }
  496. }
  497. // Add remaining backslashes, if any.
  498. result.add(bs_buf.toString());
  499. if (needquote) {
  500. result.add(bs_buf.toString());
  501. result.add('"');
  502. }
  503. argument = result.toString();
  504. }
  505. if (escapeMetaCharacters) {
  506. var result = new StringBuf();
  507. for (i in 0...argument.length) {
  508. var c = argument.charCodeAt(i);
  509. if (winMetaCharacters.indexOf(c) >= 0) {
  510. result.addChar("^".code);
  511. }
  512. result.addChar(c);
  513. }
  514. return result.toString();
  515. } else {
  516. return argument;
  517. }
  518. }
  519. #if java
  520. private static inline function _charAt(str:String, idx:Int):java.StdTypes.Char16 return untyped str._charAt(idx);
  521. #end
  522. #if neko
  523. private static var _urlEncode = neko.Lib.load("std","url_encode",1);
  524. private static var _urlDecode = neko.Lib.load("std","url_decode",1);
  525. #end
  526. }