EReg.hx 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. /*
  2. * Copyright (C)2005-2012 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. import java.util.regex.Regex;
  23. /*
  24. * Copyright (c) 2005, The haXe Project Contributors
  25. * All rights reserved.
  26. * Redistribution and use in source and binary forms, with or without
  27. * modification, are permitted provided that the following conditions are met:
  28. *
  29. * - Redistributions of source code must retain the above copyright
  30. * notice, this list of conditions and the following disclaimer.
  31. * - Redistributions in binary form must reproduce the above copyright
  32. * notice, this list of conditions and the following disclaimer in the
  33. * documentation and/or other materials provided with the distribution.
  34. *
  35. * THIS SOFTWARE IS PROVIDED BY THE HAXE PROJECT CONTRIBUTORS "AS IS" AND ANY
  36. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  37. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38. * DISCLAIMED. IN NO EVENT SHALL THE HAXE PROJECT CONTRIBUTORS BE LIABLE FOR
  39. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  40. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  41. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  42. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  43. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  44. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  45. * DAMAGE.
  46. */
  47. /**
  48. Regular expressions are a way to find regular patterns into
  49. Strings. Have a look at the tutorial on haXe website to learn
  50. how to use them.
  51. **/
  52. @:coreApi
  53. class EReg {
  54. private var pattern:String;
  55. private var matcher:Matcher;
  56. private var cur:String;
  57. private var sub:Int;
  58. private var isGlobal:Bool;
  59. /**
  60. Creates a new regular expression with pattern [r] and
  61. options [opt].
  62. **/
  63. public function new( r : String, opt : String ) {
  64. var flags = 0;
  65. for (i in 0...opt.length)
  66. {
  67. switch(StringTools.fastCodeAt(opt, i))
  68. {
  69. case 'i'.code:
  70. flags |= Pattern.CASE_INSENSITIVE;
  71. case 'm'.code:
  72. flags |= Pattern.MULTILINE;
  73. case 's'.code:
  74. flags |= Pattern.DOTALL;
  75. case 'g'.code:
  76. isGlobal = true;
  77. }
  78. }
  79. matcher = Pattern.compile(convert(r), flags).matcher("");
  80. pattern = r;
  81. }
  82. private static function convert(r:String):String
  83. {
  84. //some references of the implementation:
  85. //http://stackoverflow.com/questions/809647/java-vs-javascript-regex-problem
  86. //http://stackoverflow.com/questions/4788413/how-to-convert-javascript-regex-to-safe-java-regex
  87. //Some necessary changes:
  88. //
  89. // \0 -> \x00
  90. // \v -> \x0b
  91. // [^] -> [\s\S]
  92. // unescaped ', " -> \', \"
  93. /* FIXME
  94. var pat = new StringBuf();
  95. var len = r.length;
  96. var i = 0;
  97. while (i < len)
  98. {
  99. var c = StringTools.fastCodeAt(r, i++);
  100. switch(c)
  101. {
  102. case '\\'.code: //escape-sequence
  103. }
  104. }
  105. */
  106. return r;
  107. }
  108. /**
  109. Tells if the regular expression matches the String.
  110. Updates the internal state accordingly.
  111. **/
  112. public function match( s : String ) : Bool {
  113. sub = 0;
  114. cur = s;
  115. matcher = matcher.reset(s);
  116. return matcher.find();
  117. }
  118. /**
  119. Returns a matched group or throw an expection if there
  120. is no such group. If [n = 0], the whole matched substring
  121. is returned.
  122. **/
  123. public function matched( n : Int ) : String
  124. {
  125. if (n == 0)
  126. return matcher.group();
  127. else
  128. return matcher.group(n);
  129. }
  130. /**
  131. Returns the part of the string that was as the left of
  132. of the matched substring.
  133. **/
  134. public function matchedLeft() : String
  135. {
  136. return untyped cur.substring(0, sub + matcher.start());
  137. }
  138. /**
  139. Returns the part of the string that was at the right of
  140. of the matched substring.
  141. **/
  142. public function matchedRight() : String
  143. {
  144. return untyped cur.substring(sub + matcher.end(), cur.length);
  145. }
  146. /**
  147. Returns the position of the matched substring within the
  148. original matched string.
  149. **/
  150. public function matchedPos() : { pos : Int, len : Int } {
  151. var start = matcher.start();
  152. return { pos : sub + start, len : matcher.end() - start };
  153. }
  154. public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
  155. var s2 = (len < 0 ? s.substr(pos) : s.substr(pos, len));
  156. sub = pos;
  157. matcher = matcher.reset(s2);
  158. cur = s;
  159. return matcher.find();
  160. }
  161. /**
  162. Split a string by using the regular expression to match
  163. the separators.
  164. **/
  165. public function split( s : String ) : Array<String>
  166. {
  167. if (isGlobal)
  168. {
  169. var ret = [];
  170. while(this.match(s))
  171. {
  172. ret.push(matchedLeft());
  173. s = matchedRight();
  174. }
  175. ret.push(s);
  176. return ret;
  177. } else {
  178. var m = matcher;
  179. m.reset(s);
  180. m.find();
  181. return untyped [s.substring(0, m.start()), s.substring(m.end(), s.length)];
  182. }
  183. }
  184. /**
  185. Replaces a pattern by another string. The [by] format can
  186. contains [$1] to [$9] that will correspond to groups matched
  187. while replacing. [$$] means the [$] character.
  188. **/
  189. public function replace( s : String, by : String ) : String {
  190. var matcher = matcher;
  191. matcher.reset(s);
  192. if (isGlobal)
  193. {
  194. return matcher.replaceAll(by);
  195. } else {
  196. matcher.find();
  197. return untyped (s.substring(0, matcher.start()) + by + s.substring(matcher.end(), s.length));
  198. }
  199. }
  200. /**
  201. For each occurence of the pattern in the string [s], the function [f] is called and
  202. can return the string that needs to be replaced. All occurences are matched anyway,
  203. and setting the [g] flag might cause some incorrect behavior on some platforms.
  204. **/
  205. public function map( s : String, f : EReg -> String ) : String {
  206. var offset = 0;
  207. var buf = new StringBuf();
  208. do {
  209. if (offset >= s.length)
  210. break;
  211. else if (!matchSub(s, offset)) {
  212. buf.add(s.substr(offset));
  213. break;
  214. }
  215. var p = matchedPos();
  216. buf.add(s.substr(offset, p.pos - offset));
  217. buf.add(f(this));
  218. if (p.len == 0) {
  219. buf.add(s.substr(p.pos, 1));
  220. offset = p.pos + 1;
  221. }
  222. else
  223. offset = p.pos + p.len;
  224. } while (isGlobal);
  225. if (!isGlobal && offset < s.length)
  226. buf.add(s.substr(offset));
  227. return buf.toString();
  228. }
  229. }