EReg.hx 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. import java.util.regex.Regex;
  2. /*
  3. * Copyright (c) 2005, The haXe Project Contributors
  4. * All rights reserved.
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * - Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * - Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE HAXE PROJECT CONTRIBUTORS "AS IS" AND ANY
  15. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  16. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  17. * DISCLAIMED. IN NO EVENT SHALL THE HAXE PROJECT CONTRIBUTORS BE LIABLE FOR
  18. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  20. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  21. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  24. * DAMAGE.
  25. */
  26. /**
  27. Regular expressions are a way to find regular patterns into
  28. Strings. Have a look at the tutorial on haXe website to learn
  29. how to use them.
  30. **/
  31. class EReg {
  32. private var pattern:String;
  33. private var matcher:Matcher;
  34. private var cur:String;
  35. private var isGlobal:Bool;
  36. /**
  37. Creates a new regular expression with pattern [r] and
  38. options [opt].
  39. **/
  40. public function new( r : String, opt : String ) {
  41. var flags = 0;
  42. for (i in 0...opt.length)
  43. {
  44. switch(StringTools.fastCodeAt(opt, i))
  45. {
  46. case 'i'.code:
  47. flags |= Pattern.CASE_INSENSITIVE;
  48. case 'm'.code:
  49. flags |= Pattern.MULTILINE;
  50. case 's'.code:
  51. flags |= Pattern.DOTALL;
  52. case 'g'.code:
  53. isGlobal = true;
  54. }
  55. }
  56. matcher = Pattern.compile(convert(r), flags).matcher("");
  57. pattern = r;
  58. }
  59. private static function convert(r:String):String
  60. {
  61. //some references of the implementation:
  62. //http://stackoverflow.com/questions/809647/java-vs-javascript-regex-problem
  63. //http://stackoverflow.com/questions/4788413/how-to-convert-javascript-regex-to-safe-java-regex
  64. //Some necessary changes:
  65. //
  66. // \0 -> \x00
  67. // \v -> \x0b
  68. // [^] -> [\s\S]
  69. // unescaped ', " -> \', \"
  70. /* FIXME
  71. var pat = new StringBuf();
  72. var len = r.length;
  73. var i = 0;
  74. while (i < len)
  75. {
  76. var c = StringTools.fastCodeAt(r, i++);
  77. switch(c)
  78. {
  79. case '\\'.code: //escape-sequence
  80. }
  81. }
  82. */
  83. return r;
  84. }
  85. /**
  86. Tells if the regular expression matches the String.
  87. Updates the internal state accordingly.
  88. **/
  89. public function match( s : String ) : Bool {
  90. cur = s;
  91. matcher = matcher.reset(s);
  92. return matcher.find();
  93. }
  94. /**
  95. Returns a matched group or throw an expection if there
  96. is no such group. If [n = 0], the whole matched substring
  97. is returned.
  98. **/
  99. public function matched( n : Int ) : String
  100. {
  101. if (n == 0)
  102. return matcher.group();
  103. else
  104. return matcher.group(n);
  105. }
  106. /**
  107. Returns the part of the string that was as the left of
  108. of the matched substring.
  109. **/
  110. public function matchedLeft() : String
  111. {
  112. return untyped cur.substring(0, matcher.start());
  113. }
  114. /**
  115. Returns the part of the string that was at the right of
  116. of the matched substring.
  117. **/
  118. public function matchedRight() : String
  119. {
  120. return untyped cur.substring(matcher.end(), cur.length);
  121. }
  122. /**
  123. Returns the position of the matched substring within the
  124. original matched string.
  125. **/
  126. public function matchedPos() : { pos : Int, len : Int } {
  127. var start = matcher.start();
  128. return { pos : start, len : matcher.end() - start };
  129. }
  130. /**
  131. Split a string by using the regular expression to match
  132. the separators.
  133. **/
  134. public function split( s : String ) : Array<String>
  135. {
  136. if (isGlobal)
  137. {
  138. return Array.ofNative(matcher.pattern().split(s));
  139. } else {
  140. var m = matcher;
  141. m.reset(s);
  142. m.find();
  143. return untyped [s.substring(0, m.start()), s.substring(m.end(), s.length)];
  144. }
  145. }
  146. /**
  147. Replaces a pattern by another string. The [by] format can
  148. contains [$1] to [$9] that will correspond to groups matched
  149. while replacing. [$$] means the [$] character.
  150. **/
  151. public function replace( s : String, by : String ) : String {
  152. var matcher = matcher;
  153. matcher.reset(s);
  154. if (isGlobal)
  155. {
  156. return matcher.replaceAll(by);
  157. } else {
  158. matcher.find();
  159. return untyped (s.substring(0, matcher.start()) + by + s.substring(matcher.end(), s.length));
  160. }
  161. }
  162. /**
  163. For each occurence of the pattern in the string [s], the function [f] is called and
  164. can return the string that needs to be replaced. All occurences are matched anyway,
  165. and setting the [g] flag might cause some incorrect behavior on some platforms.
  166. **/
  167. public function customReplace( s : String, f : EReg -> String ) : String {
  168. var buf = new StringBuf();
  169. while( true ) {
  170. if( !match(s) )
  171. break;
  172. buf.add(matchedLeft());
  173. buf.add(f(this));
  174. s = matchedRight();
  175. }
  176. buf.add(s);
  177. return buf.toString();
  178. }
  179. }