EReg.hx 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. /*
  2. * Copyright (C)2005-2019 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. import lua.Table;
  23. import lua.Lib;
  24. import lua.lib.lrexlib.Rex;
  25. // Note - lrexlib gives ascii-based offsets. Use native string tools.
  26. import lua.NativeStringTools.*;
  27. @:coreApi
  28. class EReg {
  29. var r:Rex; // the Rex extern instance.
  30. var global:Bool; // whether the regex is in global mode.
  31. var s:String; // the last matched string
  32. var m:Table<Int, Int>; // the [start:Int, end:Int, and submatches:String (matched groups)] as a single table.
  33. static var FLAGS:Table<String, Int> = Rex.flags();
  34. public function new(r:String, opt:String):Void {
  35. var ropt = 0;
  36. for (i in 0...opt.length) {
  37. switch (opt.charAt(i)) {
  38. case "i":
  39. ropt |= FLAGS.CASELESS;
  40. case "m":
  41. ropt |= FLAGS.MULTILINE;
  42. case "s":
  43. ropt |= FLAGS.DOTALL;
  44. case "g":
  45. global = true;
  46. default:
  47. null;
  48. }
  49. }
  50. ropt |= FLAGS.UTF; // always check validity of utf8 string
  51. ropt |= FLAGS.UCP; // always enable utf8 character properties
  52. if (global == null)
  53. global = false;
  54. this.r = Rex.create(r, ropt);
  55. }
  56. public function match(s:String):Bool {
  57. return matchFromByte(s, 1);
  58. }
  59. inline function matchFromByte(s:String, offset:Int):Bool {
  60. if (s == null)
  61. return false;
  62. this.m = lua.TableTools.pack(r.exec(s, offset));
  63. this.s = s;
  64. return m[1] != null;
  65. }
  66. public function matched(n:Int):String {
  67. if (m[1] == null || n < 0)
  68. throw "EReg::matched";
  69. else if (n == 0) {
  70. var k = sub(s, m[1], m[2]).match;
  71. return k;
  72. } else if (Std.isOfType(m[3], lua.Table)) {
  73. var mn = 2 * (n - 1);
  74. if (Std.isOfType(untyped m[3][mn + 1], Bool))
  75. return null;
  76. return sub(s, untyped m[3][mn + 1], untyped m[3][mn + 2]).match;
  77. } else {
  78. throw "EReg:matched";
  79. }
  80. }
  81. public function matchedLeft():String {
  82. if (m[1] == null)
  83. throw "No string matched";
  84. return sub(s, 1, m[1] - 1).match;
  85. }
  86. public function matchedRight():String {
  87. if (m[1] == null)
  88. throw "No string matched";
  89. return sub(s, m[2] + 1).match;
  90. }
  91. public function matchedPos():{pos:Int, len:Int} {
  92. var left = matchedLeft();
  93. var matched = matched(0);
  94. if (m[1] == null)
  95. throw "No string matched";
  96. return {
  97. pos: left.length,
  98. len: matched.length
  99. }
  100. }
  101. public function matchedNum():Int {
  102. if (m == null) return 0;
  103. else if (m[1] == null)
  104. return 0;
  105. else
  106. return 1 + untyped __lua_length__(m[3]) / 2;
  107. }
  108. public function matchSub(s:String, pos:Int, len:Int = -1):Bool {
  109. var ss = s.substr(0, len < 0 ? s.length : pos + len);
  110. if (global) {
  111. m = lua.TableTools.pack(r.exec(ss, pos + 1));
  112. var b = m[1] != null;
  113. if (b) {
  114. this.s = s;
  115. }
  116. return b;
  117. } else {
  118. m = lua.TableTools.pack(r.exec(ss, pos + 1));
  119. var b = m[1] != null;
  120. if (b) {
  121. this.s = s;
  122. }
  123. return b;
  124. }
  125. }
  126. public function split(s:String):Array<String> {
  127. if (global) {
  128. return Lib.fillArray(Rex.split(s, r));
  129. } else {
  130. // we can't use directly Rex.split because it's ignoring the 'g' flag
  131. var d = "#__delim__#";
  132. return Lib.fillArray(Rex.split(replace(s, d), d));
  133. }
  134. }
  135. public function replace(s:String, by:String):String {
  136. var chunks = by.split("$$");
  137. chunks = [for (chunk in chunks) Rex.gsub(chunk, "\\$(\\d)", "%%%1", 1)];
  138. by = chunks.join("$");
  139. return Rex.gsub(s, r, by, global ? null : 1);
  140. }
  141. public function map(s:String, f:EReg->String):String {
  142. var bytesOffset = 1;
  143. var buf = new StringBuf();
  144. do {
  145. if (bytesOffset > len(s)) {
  146. break;
  147. } else if (!matchFromByte(s, bytesOffset)) {
  148. buf.add(sub(s, bytesOffset).match);
  149. break;
  150. }
  151. var pos = m[1];
  152. var length = m[2] - m[1];
  153. buf.add(sub(s, bytesOffset, pos - 1).match);
  154. buf.add(f(this));
  155. if (length < 0) {
  156. var charBytes = len(sub(s, pos).match.charAt(0));
  157. buf.add(sub(s, pos, pos + charBytes - 1).match);
  158. bytesOffset = pos + charBytes;
  159. } else {
  160. bytesOffset = m[2] + 1;
  161. }
  162. } while (global);
  163. if (!global && bytesOffset > 1 && bytesOffset - 1 < len(s))
  164. buf.add(sub(s, bytesOffset).match);
  165. return buf.toString();
  166. }
  167. function map_old(s:String, f:EReg->String):String {
  168. var offset = 0;
  169. var buf = new StringBuf();
  170. do {
  171. if (offset >= s.length) {
  172. break;
  173. } else if (!matchSub(s, offset)) {
  174. buf.add(s.substr(offset));
  175. break;
  176. }
  177. var p = matchedPos();
  178. buf.add(s.substr(offset, p.pos - offset));
  179. buf.add(f(this));
  180. if (p.len == 0) {
  181. buf.add(s.substr(p.pos, 1));
  182. offset = p.pos + 1;
  183. } else
  184. offset = p.pos + p.len;
  185. } while (global);
  186. if (!global && offset > 0 && offset < s.length)
  187. buf.add(s.substr(offset));
  188. return buf.toString();
  189. }
  190. public static function escape(s:String):String {
  191. return escapeRegExpRe.map(s, function(r) return "\\" + r.matched(0));
  192. }
  193. static var escapeRegExpRe = ~/[\[\]{}()*+?.\\\^$|]/g;
  194. static function __init__():Void {
  195. if (Rex == null) {
  196. throw "Rex is missing. Please install lrexlib-pcre2.";
  197. }
  198. }
  199. }