StringTools.hx 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /*
  2. * Copyright (C)2005-2012 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. /**
  23. This class provides advanced methods on Strings. It is ideally used with
  24. 'using StringTools' and then acts as an extension to the String class.
  25. If the first argument to any of the methods is null, the result is
  26. unspecified.
  27. **/
  28. #if cpp
  29. using cpp.NativeString;
  30. #end
  31. #if cs
  32. @:keep
  33. #end
  34. class StringTools {
  35. /**
  36. Encode an URL by using the standard format.
  37. **/
  38. #if (!java && !cpp) inline #end public static function urlEncode( s : String ) : String {
  39. #if flash
  40. return untyped __global__["encodeURIComponent"](s);
  41. #elseif neko
  42. return untyped new String(_urlEncode(s.__s));
  43. #elseif js
  44. return untyped encodeURIComponent(s);
  45. #elseif cpp
  46. return untyped s.__URLEncode();
  47. #elseif java
  48. try
  49. return untyped __java__("java.net.URLEncoder.encode(s, \"UTF-8\")")
  50. catch (e:Dynamic) throw e;
  51. #elseif cs
  52. return untyped cs.system.Uri.EscapeUriString(s);
  53. #elseif python
  54. return python.lib.urllib.Parse.quote(s);
  55. #else
  56. return null;
  57. #end
  58. }
  59. /**
  60. Decode an URL using the standard format.
  61. **/
  62. #if (!java && !cpp) inline #end public static function urlDecode( s : String ) : String {
  63. #if flash
  64. return untyped __global__["decodeURIComponent"](s.split("+").join(" "));
  65. #elseif neko
  66. return untyped new String(_urlDecode(s.__s));
  67. #elseif js
  68. return untyped decodeURIComponent(s.split("+").join(" "));
  69. #elseif cpp
  70. return untyped s.__URLDecode();
  71. #elseif java
  72. try
  73. return untyped __java__("java.net.URLDecoder.decode(s, \"UTF-8\")")
  74. catch (e:Dynamic) throw e;
  75. #elseif cs
  76. return untyped cs.system.Uri.UnescapeDataString(s);
  77. #elseif python
  78. return python.lib.urllib.Parse.unquote(s);
  79. #else
  80. return null;
  81. #end
  82. }
  83. /**
  84. Escapes HTML special characters of the string `s`.
  85. The following replacements are made:
  86. - `&` becomes `&amp`;
  87. - `<` becomes `&lt`;
  88. - `>` becomes `&gt`;
  89. If `quotes` is true, the following characters are also replaced:
  90. - `"` becomes `&quot`;
  91. - `'` becomes `&#039`;
  92. **/
  93. public static function htmlEscape( s : String, ?quotes : Bool ) : String {
  94. s = s.split("&").join("&amp;").split("<").join("&lt;").split(">").join("&gt;");
  95. return quotes ? s.split('"').join("&quot;").split("'").join("&#039;") : s;
  96. }
  97. /**
  98. Unescapes HTML special characters of the string `s`.
  99. This is the inverse operation to htmlEscape, i.e. the following always
  100. holds: htmlUnescape(htmlEscape(s)) == s
  101. The replacements follow:
  102. - `&amp;` becomes `&`
  103. - `&lt;` becomes `<`
  104. - `&gt;` becomes `>`
  105. - `&quot;` becomes `"`
  106. - `&#039;` becomes `'`
  107. **/
  108. public static function htmlUnescape( s : String ) : String {
  109. return s.split("&gt;").join(">").split("&lt;").join("<").split("&quot;").join('"').split("&#039;").join("'").split("&amp;").join("&");
  110. }
  111. /**
  112. Tells if the string `s` starts with the string `start`.
  113. If `start` is null, the result is unspecified.
  114. If `start` is the empty String "", the result is true.
  115. **/
  116. public static #if (cs || java) inline #end function startsWith( s : String, start : String ) : Bool {
  117. #if java
  118. return untyped s.startsWith(start);
  119. #elseif cs
  120. return untyped s.StartsWith(start);
  121. #elseif cpp
  122. if (s.length<start.length)
  123. return false;
  124. var p0 = s.c_str();
  125. var p1 = start.c_str();
  126. for(i in 0...start.length)
  127. if ( p0.at(i) != p1.at(i) )
  128. return false;
  129. return true;
  130. #else
  131. return( s.length >= start.length && s.substr(0, start.length) == start );
  132. #end
  133. }
  134. /**
  135. Tells if the string `s` ends with the string `end`.
  136. If `end` is null, the result is unspecified.
  137. If `end` is the empty String "", the result is true.
  138. **/
  139. public static #if (cs || java) inline #end function endsWith( s : String, end : String ) : Bool {
  140. #if java
  141. return untyped s.endsWith(end);
  142. #elseif cs
  143. return untyped s.EndsWith(end);
  144. #elseif cpp
  145. if (s.length<end.length)
  146. return false;
  147. var p0 = s.c_str().add( s.length-end.length );
  148. var p1 = end.c_str();
  149. for(i in 0...end.length)
  150. if ( p0.at(i) != p1.at(i) )
  151. return false;
  152. return true;
  153. #else
  154. var elen = end.length;
  155. var slen = s.length;
  156. return( slen >= elen && s.substr(slen - elen, elen) == end );
  157. #end
  158. }
  159. /**
  160. Tells if the character in the string `s` at position `pos` is a space.
  161. A character is considered to be a space character if its character code
  162. is 9,10,11,12,13 or 32.
  163. If `s` is the empty String "", or if pos is not a valid position within
  164. `s`, the result is false.
  165. **/
  166. public static function isSpace( s : String, pos : Int ) : Bool {
  167. #if python
  168. if (s.length == 0 || pos < 0 || pos >= s.length) return false;
  169. #end
  170. var c = s.charCodeAt( pos );
  171. return (c > 8 && c < 14) || c == 32;
  172. }
  173. /**
  174. Removes leading space characters of `s`.
  175. This function internally calls isSpace() to decide which characters to
  176. remove.
  177. If `s` is the empty String "" or consists only of space characters, the
  178. result is the empty String "".
  179. **/
  180. public #if cs inline #end static function ltrim( s : String ) : String {
  181. #if cs
  182. return untyped s.TrimStart();
  183. #else
  184. var l = s.length;
  185. var r = 0;
  186. while( r < l && isSpace(s,r) ){
  187. r++;
  188. }
  189. if( r > 0 )
  190. return s.substr(r, l-r);
  191. else
  192. return s;
  193. #end
  194. }
  195. /**
  196. Removes trailing space characters of `s`.
  197. This function internally calls isSpace() to decide which characters to
  198. remove.
  199. If `s` is the empty String "" or consists only of space characters, the
  200. result is the empty String "".
  201. **/
  202. public #if cs inline #end static function rtrim( s : String ) : String {
  203. #if cs
  204. return untyped s.TrimEnd();
  205. #else
  206. var l = s.length;
  207. var r = 0;
  208. while( r < l && isSpace(s,l-r-1) ){
  209. r++;
  210. }
  211. if( r > 0 ){
  212. return s.substr(0, l-r);
  213. }else{
  214. return s;
  215. }
  216. #end
  217. }
  218. /**
  219. Removes leading and trailing space characters of `s`.
  220. This is a convenience function for ltrim(rtrim(s)).
  221. **/
  222. public #if (cs || java) inline #end static function trim( s : String ) : String {
  223. #if cs
  224. return untyped s.Trim();
  225. #elseif java
  226. return untyped s.trim();
  227. #else
  228. return ltrim(rtrim(s));
  229. #end
  230. }
  231. /**
  232. Concatenates `c` to `s` until `s.length` is at least `l`.
  233. If `c` is the empty String "" or if `l` does not exceed `s.length`,
  234. `s` is returned unchanged.
  235. If `c.length` is 1, the resulting String length is exactly `l`.
  236. Otherwise the length may exceed `l`.
  237. If `c` is null, the result is unspecified.
  238. **/
  239. public static function lpad( s : String, c : String, l : Int ) : String {
  240. if (c.length <= 0)
  241. return s;
  242. while (s.length < l) {
  243. s = c + s;
  244. }
  245. return s;
  246. }
  247. /**
  248. Appends `c` to `s` until `s.length` is at least `l`.
  249. If `c` is the empty String "" or if `l` does not exceed `s.length`,
  250. `s` is returned unchanged.
  251. If `c.length` is 1, the resulting String length is exactly `l`.
  252. Otherwise the length may exceed `l`.
  253. If `c` is null, the result is unspecified.
  254. **/
  255. public static function rpad( s : String, c : String, l : Int ) : String {
  256. if (c.length <= 0)
  257. return s;
  258. while (s.length < l) {
  259. s = s + c;
  260. }
  261. return s;
  262. }
  263. /**
  264. Replace all occurences of the String `sub` in the String `s` by the
  265. String `by`.
  266. If `sub` is the empty String "", `by` is inserted after each character
  267. of `s`. If `by` is also the empty String "", `s` remains unchanged.
  268. This is a convenience function for `s.split(sub).join(by)`.
  269. If `sub` or `by` are null, the result is unspecified.
  270. **/
  271. public static function replace( s : String, sub : String, by : String ) : String {
  272. #if java
  273. if (sub.length == 0)
  274. return s.split(sub).join(by);
  275. else
  276. return untyped s.replace(sub, by);
  277. #elseif cs
  278. if (sub.length == 0)
  279. return s.split(sub).join(by);
  280. else
  281. return untyped s.Replace(sub, by);
  282. #else
  283. return s.split(sub).join(by);
  284. #end
  285. }
  286. /**
  287. Encodes `n` into a hexadecimal representation.
  288. If `digits` is specified, the resulting String is padded with "0" until
  289. its length equals `digits`.
  290. **/
  291. public static function hex( n : Int, ?digits : Int ) {
  292. #if flash
  293. var n : UInt = n;
  294. var s : String = untyped n.toString(16);
  295. s = s.toUpperCase();
  296. #else
  297. var s = "";
  298. var hexChars = "0123456789ABCDEF";
  299. do {
  300. s = hexChars.charAt(n&15) + s;
  301. n >>>= 4;
  302. } while( n > 0 );
  303. #end
  304. #if python
  305. if (digits != null && s.length < digits) {
  306. var diff = digits - s.length;
  307. for (_ in 0...diff) {
  308. s = "0" + s;
  309. }
  310. }
  311. #else
  312. if( digits != null )
  313. while( s.length < digits )
  314. s = "0"+s;
  315. #end
  316. return s;
  317. }
  318. /**
  319. Returns the character code at position `index` of String `s`, or an
  320. end-of-file indicator at if `position` equals `s.length`.
  321. This method is faster than String.charCodeAt() on some platforms, but
  322. the result is unspecified if `index` is negative or greater than
  323. `s.length`.
  324. End of file status can be checked by calling `StringTools.isEof` with
  325. the returned value as argument.
  326. This operation is not guaranteed to work if `s` contains the \0
  327. character.
  328. **/
  329. public static inline function fastCodeAt( s : String, index : Int ) : Int {
  330. #if neko
  331. return untyped __dollar__sget(s.__s, index);
  332. #elseif cpp
  333. return untyped s.cca(index);
  334. #elseif flash
  335. return untyped s.cca(index);
  336. #elseif java
  337. return ( index < s.length ) ? cast(_charAt(s, index), Int) : -1;
  338. #elseif cs
  339. return ( cast(index, UInt) < s.length ) ? cast(s[index], Int) : -1;
  340. #elseif js
  341. return (untyped s).charCodeAt(index);
  342. #elseif python
  343. return if (index >= s.length) -1 else python.internal.UBuiltins.ord(python.Syntax.arrayAccess(s, index));
  344. #else
  345. return untyped s.cca(index);
  346. #end
  347. }
  348. /*
  349. Tells if `c` represents the end-of-file (EOF) character.
  350. */
  351. @:noUsing public static inline function isEof( c : Int ) : Bool {
  352. #if (flash || cpp)
  353. return c == 0;
  354. #elseif js
  355. return c != c; // fast NaN
  356. #elseif neko
  357. return c == null;
  358. #elseif cs
  359. return c == -1;
  360. #elseif java
  361. return c == -1;
  362. #elseif python
  363. return c == -1;
  364. #else
  365. return false;
  366. #end
  367. }
  368. #if java
  369. private static inline function _charAt(str:String, idx:Int):java.StdTypes.Char16 return untyped str._charAt(idx);
  370. #end
  371. #if neko
  372. private static var _urlEncode = neko.Lib.load("std","url_encode",1);
  373. private static var _urlDecode = neko.Lib.load("std","url_decode",1);
  374. #end
  375. }