regex.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: regex.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. //
  9. // Permission is hereby granted, free of charge, to any person obtaining
  10. // a copy of this software and associated documentation files (the
  11. // "Software"), to deal in the Software without restriction, including
  12. // without limitation the rights to use, copy, modify, merge, publish,
  13. // distribute, sublicense, and/or sell copies of the Software, and to
  14. // permit persons to whom the Software is furnished to do so, subject to
  15. // the following conditions:
  16. //
  17. // The above copyright notice and this permission notice shall be
  18. // included in all copies or substantial portions of the Software.
  19. //
  20. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  24. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. //
  28. using System;
  29. using System.Text;
  30. using System.Collections;
  31. using System.Reflection;
  32. using System.Reflection.Emit;
  33. using System.Runtime.Serialization;
  34. using RegularExpression = System.Text.RegularExpressions.Syntax.RegularExpression;
  35. using Parser = System.Text.RegularExpressions.Syntax.Parser;
  36. using System.Diagnostics;
  37. namespace System.Text.RegularExpressions {
  38. public delegate string MatchEvaluator (Match match);
  39. [Flags]
  40. public enum RegexOptions {
  41. None = 0x000,
  42. IgnoreCase = 0x001,
  43. Multiline = 0x002,
  44. ExplicitCapture = 0x004,
  45. Compiled = 0x008,
  46. Singleline = 0x010,
  47. IgnorePatternWhitespace = 0x020,
  48. RightToLeft = 0x040,
  49. ECMAScript = 0x100,
  50. CultureInvariant = 0x200
  51. }
  52. [Serializable]
  53. public class Regex : ISerializable {
  54. public static void CompileToAssembly
  55. (RegexCompilationInfo[] regexes, AssemblyName aname)
  56. {
  57. Regex.CompileToAssembly(regexes, aname, new CustomAttributeBuilder[] {}, null);
  58. }
  59. public static void CompileToAssembly
  60. (RegexCompilationInfo[] regexes, AssemblyName aname,
  61. CustomAttributeBuilder[] attribs)
  62. {
  63. Regex.CompileToAssembly(regexes, aname, attribs, null);
  64. }
  65. [MonoTODO]
  66. public static void CompileToAssembly
  67. (RegexCompilationInfo[] regexes, AssemblyName aname,
  68. CustomAttributeBuilder[] attribs, string resourceFile)
  69. {
  70. throw new NotImplementedException ();
  71. // TODO : Make use of attribs and resourceFile parameters
  72. /*
  73. AssemblyBuilder asmBuilder = AppDomain.CurrentDomain.DefineDynamicAssembly (aname, AssemblyBuilderAccess.RunAndSave);
  74. ModuleBuilder modBuilder = asmBuilder.DefineDynamicModule("InnerRegexModule",aname.Name);
  75. Parser psr = new Parser ();
  76. System.Console.WriteLine("CompileToAssembly");
  77. for(int i=0; i < regexes.Length; i++)
  78. {
  79. System.Console.WriteLine("Compiling expression :" + regexes[i].Pattern);
  80. RegularExpression re = psr.ParseRegularExpression (regexes[i].Pattern, regexes[i].Options);
  81. // compile
  82. CILCompiler cmp = new CILCompiler (modBuilder, i);
  83. bool reverse = (regexes[i].Options & RegexOptions.RightToLeft) !=0;
  84. re.Compile (cmp, reverse);
  85. cmp.Close();
  86. }
  87. // Define a runtime class with specified name and attributes.
  88. TypeBuilder builder = modBuilder.DefineType("ITest");
  89. builder.CreateType();
  90. asmBuilder.Save(aname.Name);
  91. */
  92. }
  93. public static string Escape (string str) {
  94. return Parser.Escape (str);
  95. }
  96. public static string Unescape (string str) {
  97. return Parser.Unescape (str);
  98. }
  99. public static bool IsMatch (string input, string pattern) {
  100. return IsMatch (input, pattern, RegexOptions.None);
  101. }
  102. public static bool IsMatch (string input, string pattern, RegexOptions options) {
  103. Regex re = new Regex (pattern, options);
  104. return re.IsMatch (input);
  105. }
  106. public static Match Match (string input, string pattern) {
  107. return Regex.Match (input, pattern, RegexOptions.None);
  108. }
  109. public static Match Match (string input, string pattern, RegexOptions options) {
  110. Regex re = new Regex (pattern, options);
  111. return re.Match (input);
  112. }
  113. public static MatchCollection Matches (string input, string pattern) {
  114. return Matches (input, pattern, RegexOptions.None);
  115. }
  116. public static MatchCollection Matches (string input, string pattern, RegexOptions options) {
  117. Regex re = new Regex (pattern, options);
  118. return re.Matches (input);
  119. }
  120. public static string Replace
  121. (string input, string pattern, MatchEvaluator evaluator)
  122. {
  123. return Regex.Replace (input, pattern, evaluator, RegexOptions.None);
  124. }
  125. public static string Replace
  126. (string input, string pattern, MatchEvaluator evaluator,
  127. RegexOptions options)
  128. {
  129. Regex re = new Regex (pattern, options);
  130. return re.Replace (input, evaluator);
  131. }
  132. public static string Replace
  133. (string input, string pattern, string replacement)
  134. {
  135. return Regex.Replace (input, pattern, replacement, RegexOptions.None);
  136. }
  137. public static string Replace
  138. (string input, string pattern, string replacement,
  139. RegexOptions options)
  140. {
  141. Regex re = new Regex (pattern, options);
  142. return re.Replace (input, replacement);
  143. }
  144. public static string[] Split (string input, string pattern) {
  145. return Regex.Split (input, pattern, RegexOptions.None);
  146. }
  147. public static string[] Split (string input, string pattern, RegexOptions options) {
  148. Regex re = new Regex (pattern, options);
  149. return re.Split (input);
  150. }
  151. // private
  152. private static FactoryCache cache = new FactoryCache (200); // TODO put some meaningful number here
  153. // constructors
  154. protected Regex () {
  155. // XXX what's this constructor for?
  156. // : Used to compile to assembly (Custum regex inherit from Regex and use this constructor)
  157. }
  158. public Regex (string pattern) : this (pattern, RegexOptions.None) {
  159. }
  160. public Regex (string pattern, RegexOptions options) {
  161. this.pattern = pattern;
  162. this.roptions = options;
  163. this.machineFactory = cache.Lookup (pattern, options);
  164. if (this.machineFactory == null) {
  165. // parse and install group mapping
  166. Parser psr = new Parser ();
  167. RegularExpression re = psr.ParseRegularExpression (pattern, options);
  168. this.group_count = re.GroupCount;
  169. this.mapping = psr.GetMapping ();
  170. // compile
  171. ICompiler cmp;
  172. //if ((options & RegexOptions.Compiled) != 0)
  173. // //throw new Exception ("Not implemented.");
  174. // cmp = new CILCompiler ();
  175. //else
  176. cmp = new PatternCompiler ();
  177. re.Compile (cmp, RightToLeft);
  178. // install machine factory and add to pattern cache
  179. this.machineFactory = cmp.GetMachineFactory ();
  180. this.machineFactory.Mapping = mapping;
  181. cache.Add (pattern, options, this.machineFactory);
  182. } else {
  183. this.group_count = this.machineFactory.GroupCount;
  184. this.mapping = this.machineFactory.Mapping;
  185. }
  186. }
  187. private Regex (SerializationInfo info, StreamingContext context) :
  188. this (info.GetString ("pattern"),
  189. (RegexOptions) info.GetValue ("options", typeof (RegexOptions))) {
  190. }
  191. // fixes public API signature
  192. ~Regex ()
  193. {
  194. }
  195. // public instance properties
  196. public RegexOptions Options {
  197. get { return roptions; }
  198. }
  199. public bool RightToLeft {
  200. get { return (roptions & RegexOptions.RightToLeft) != 0; }
  201. }
  202. // public instance methods
  203. public string[] GetGroupNames () {
  204. string[] names = new string[mapping.Count];
  205. mapping.Keys.CopyTo (names, 0);
  206. return names;
  207. }
  208. public int[] GetGroupNumbers () {
  209. int[] numbers = new int[mapping.Count];
  210. mapping.Values.CopyTo (numbers, 0);
  211. return numbers;
  212. }
  213. public string GroupNameFromNumber (int i) {
  214. if (i > group_count)
  215. return "";
  216. foreach (string name in mapping.Keys) {
  217. if ((int)mapping[name] == i)
  218. return name;
  219. }
  220. return "";
  221. }
  222. public int GroupNumberFromName (string name) {
  223. if (mapping.Contains (name))
  224. return (int)mapping[name];
  225. return -1;
  226. }
  227. // match methods
  228. public bool IsMatch (string input) {
  229. if (RightToLeft)
  230. return IsMatch (input, input.Length);
  231. else
  232. return IsMatch (input, 0);
  233. }
  234. public bool IsMatch (string input, int startat) {
  235. return Match (input, startat).Success;
  236. }
  237. public Match Match (string input) {
  238. if (RightToLeft)
  239. return Match (input, input.Length);
  240. else
  241. return Match (input, 0);
  242. }
  243. public Match Match (string input, int startat) {
  244. return CreateMachine ().Scan (this, input, startat, input.Length);
  245. }
  246. public Match Match (string input, int startat, int length) {
  247. return CreateMachine ().Scan (this, input, startat, startat + length);
  248. }
  249. public MatchCollection Matches (string input) {
  250. if (RightToLeft)
  251. return Matches (input, input.Length);
  252. else
  253. return Matches (input, 0);
  254. }
  255. public MatchCollection Matches (string input, int startat) {
  256. MatchCollection ms = new MatchCollection ();
  257. Match m = Match (input, startat);
  258. while (m.Success) {
  259. ms.Add (m);
  260. m = m.NextMatch ();
  261. }
  262. return ms;
  263. }
  264. // replace methods
  265. public string Replace (string input, MatchEvaluator evaluator) {
  266. if (RightToLeft)
  267. return Replace (input, evaluator, Int32.MaxValue, input.Length);
  268. else
  269. return Replace (input, evaluator, Int32.MaxValue, 0);
  270. }
  271. public string Replace (string input, MatchEvaluator evaluator, int count) {
  272. if (RightToLeft)
  273. return Replace (input, evaluator, count, input.Length);
  274. else
  275. return Replace (input, evaluator, count, 0);
  276. }
  277. public string Replace (string input, MatchEvaluator evaluator, int count, int startat)
  278. {
  279. StringBuilder result = new StringBuilder ();
  280. int ptr = startat;
  281. Match m = Match (input, startat);
  282. while (m.Success && count -- > 0) {
  283. result.Append (input.Substring (ptr, m.Index - ptr));
  284. result.Append (evaluator (m));
  285. ptr = m.Index + m.Length;
  286. m = m.NextMatch ();
  287. }
  288. result.Append (input.Substring (ptr));
  289. return result.ToString ();
  290. }
  291. public string Replace (string input, string replacement) {
  292. if (RightToLeft)
  293. return Replace (input, replacement, Int32.MaxValue, input.Length);
  294. else
  295. return Replace (input, replacement, Int32.MaxValue, 0);
  296. }
  297. public string Replace (string input, string replacement, int count) {
  298. if (RightToLeft)
  299. return Replace (input, replacement, count, input.Length);
  300. else
  301. return Replace (input, replacement, count, 0);
  302. }
  303. public string Replace (string input, string replacement, int count, int startat) {
  304. ReplacementEvaluator ev = new ReplacementEvaluator (this, replacement);
  305. return Replace (input, new MatchEvaluator (ev.Evaluate), count, startat);
  306. }
  307. // split methods
  308. public string[] Split (string input) {
  309. if (RightToLeft)
  310. return Split (input, Int32.MaxValue, input.Length);
  311. else
  312. return Split (input, Int32.MaxValue, 0);
  313. }
  314. public string[] Split (string input, int count) {
  315. if (RightToLeft)
  316. return Split (input, count, input.Length);
  317. else
  318. return Split (input, count, 0);
  319. }
  320. public string[] Split (string input, int count, int startat) {
  321. ArrayList splits = new ArrayList ();
  322. if (count == 0)
  323. count = Int32.MaxValue;
  324. int ptr = startat;
  325. while (--count > 0) {
  326. Match m = Match (input, ptr);
  327. if (!m.Success)
  328. break;
  329. if (RightToLeft)
  330. splits.Add (input.Substring (m.Index + m.Length , ptr - m.Index - m.Length ));
  331. else
  332. splits.Add (input.Substring (ptr, m.Index - ptr));
  333. int gcount = m.Groups.Count;
  334. for (int gindex = 1; gindex < gcount; gindex++) {
  335. Group grp = m.Groups [gindex];
  336. splits.Add (input.Substring (grp.Index, grp.Length));
  337. }
  338. if (RightToLeft)
  339. ptr = m.Index;
  340. else
  341. ptr = m.Index + m.Length;
  342. }
  343. if (RightToLeft) {
  344. if ( ptr >= 0) {
  345. splits.Add (input.Substring(0, ptr));
  346. }
  347. }
  348. else {
  349. if (ptr <= input.Length) {
  350. splits.Add (input.Substring (ptr));
  351. }
  352. }
  353. return (string []) splits.ToArray (typeof (string));
  354. }
  355. // MS undocummented method
  356. [MonoTODO]
  357. protected void InitializeReferences() {
  358. throw new NotImplementedException ();
  359. }
  360. [MonoTODO]
  361. protected bool UseOptionC(){
  362. throw new NotImplementedException ();
  363. }
  364. [MonoTODO]
  365. protected bool UseOptionR(){
  366. throw new NotImplementedException ();
  367. }
  368. // object methods
  369. public override string ToString () {
  370. return pattern;
  371. }
  372. // ISerializable interface
  373. void ISerializable.GetObjectData (SerializationInfo info, StreamingContext context) {
  374. info.AddValue ("pattern", this.ToString (), typeof (string));
  375. info.AddValue ("options", this.Options, typeof (RegexOptions));
  376. }
  377. // internal
  378. internal int GroupCount {
  379. get { return group_count; }
  380. }
  381. // private
  382. private IMachine CreateMachine () {
  383. return machineFactory.NewInstance ();
  384. }
  385. private IMachineFactory machineFactory;
  386. private IDictionary mapping;
  387. private int group_count;
  388. // protected members
  389. protected internal string pattern;
  390. protected internal RegexOptions roptions;
  391. // MS undocumented members
  392. [MonoTODO]
  393. protected internal System.Collections.Hashtable capnames;
  394. [MonoTODO]
  395. protected internal System.Collections.Hashtable caps;
  396. [MonoTODO]
  397. protected internal int capsize;
  398. [MonoTODO]
  399. protected internal string[] capslist;
  400. [MonoTODO]
  401. protected internal RegexRunnerFactory factory;
  402. }
  403. [Serializable]
  404. public class RegexCompilationInfo {
  405. public RegexCompilationInfo (string pattern, RegexOptions options, string name, string nspace, bool isPublic)
  406. {
  407. this.pattern = pattern;
  408. this.options = options;
  409. this.name = name;
  410. this.nspace = nspace;
  411. this.isPublic = isPublic;
  412. }
  413. public bool IsPublic {
  414. get { return isPublic; }
  415. set { isPublic = value; }
  416. }
  417. public string Name {
  418. get { return name; }
  419. set { name = value; }
  420. }
  421. public string Namespace {
  422. get { return nspace; }
  423. set { nspace = value; }
  424. }
  425. public RegexOptions Options {
  426. get { return options; }
  427. set { options = value; }
  428. }
  429. public string Pattern {
  430. get { return pattern; }
  431. set { pattern = value; }
  432. }
  433. // private
  434. private string pattern, name, nspace;
  435. private RegexOptions options;
  436. private bool isPublic;
  437. }
  438. }