Regex.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: regex.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. //
  9. // Permission is hereby granted, free of charge, to any person obtaining
  10. // a copy of this software and associated documentation files (the
  11. // "Software"), to deal in the Software without restriction, including
  12. // without limitation the rights to use, copy, modify, merge, publish,
  13. // distribute, sublicense, and/or sell copies of the Software, and to
  14. // permit persons to whom the Software is furnished to do so, subject to
  15. // the following conditions:
  16. //
  17. // The above copyright notice and this permission notice shall be
  18. // included in all copies or substantial portions of the Software.
  19. //
  20. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  24. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. //
  28. using System;
  29. using System.Text;
  30. using System.Collections;
  31. using System.Reflection;
  32. using System.Reflection.Emit;
  33. using System.Runtime.Serialization;
  34. using RegularExpression = System.Text.RegularExpressions.Syntax.RegularExpression;
  35. using Parser = System.Text.RegularExpressions.Syntax.Parser;
  36. using System.Diagnostics;
  37. namespace System.Text.RegularExpressions {
  38. [Serializable]
  39. public partial class Regex : ISerializable {
  40. #if !TARGET_JVM
  41. [MonoTODO]
  42. public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname)
  43. {
  44. Regex.CompileToAssembly(regexes, aname, new CustomAttributeBuilder [] {}, null);
  45. }
  46. [MonoTODO]
  47. public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
  48. CustomAttributeBuilder [] attribs)
  49. {
  50. Regex.CompileToAssembly(regexes, aname, attribs, null);
  51. }
  52. [MonoTODO]
  53. public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
  54. CustomAttributeBuilder [] attribs, string resourceFile)
  55. {
  56. throw new NotImplementedException ();
  57. // TODO : Make use of attribs and resourceFile parameters
  58. /*
  59. AssemblyBuilder asmBuilder = AppDomain.CurrentDomain.DefineDynamicAssembly (aname, AssemblyBuilderAccess.RunAndSave);
  60. ModuleBuilder modBuilder = asmBuilder.DefineDynamicModule("InnerRegexModule",aname.Name);
  61. Parser psr = new Parser ();
  62. System.Console.WriteLine("CompileToAssembly");
  63. for(int i=0; i < regexes.Length; i++)
  64. {
  65. System.Console.WriteLine("Compiling expression :" + regexes[i].Pattern);
  66. RegularExpression re = psr.ParseRegularExpression (regexes[i].Pattern, regexes[i].Options);
  67. // compile
  68. CILCompiler cmp = new CILCompiler (modBuilder, i);
  69. bool reverse = (regexes[i].Options & RegexOptions.RightToLeft) !=0;
  70. re.Compile (cmp, reverse);
  71. cmp.Close();
  72. }
  73. // Define a runtime class with specified name and attributes.
  74. TypeBuilder builder = modBuilder.DefineType("ITest");
  75. builder.CreateType();
  76. asmBuilder.Save(aname.Name);
  77. */
  78. }
  79. #endif
  80. public static string Escape (string str)
  81. {
  82. if (str == null)
  83. throw new ArgumentNullException ("str");
  84. return Parser.Escape (str);
  85. }
  86. public static string Unescape (string str)
  87. {
  88. if (str == null)
  89. throw new ArgumentNullException ("str");
  90. return Parser.Unescape (str);
  91. }
  92. public static bool IsMatch (string input, string pattern)
  93. {
  94. return IsMatch (input, pattern, RegexOptions.None);
  95. }
  96. public static bool IsMatch (string input, string pattern, RegexOptions options)
  97. {
  98. Regex re = new Regex (pattern, options);
  99. return re.IsMatch (input);
  100. }
  101. public static Match Match (string input, string pattern)
  102. {
  103. return Regex.Match (input, pattern, RegexOptions.None);
  104. }
  105. public static Match Match (string input, string pattern, RegexOptions options)
  106. {
  107. Regex re = new Regex (pattern, options);
  108. return re.Match (input);
  109. }
  110. public static MatchCollection Matches (string input, string pattern)
  111. {
  112. return Matches (input, pattern, RegexOptions.None);
  113. }
  114. public static MatchCollection Matches (string input, string pattern, RegexOptions options)
  115. {
  116. Regex re = new Regex (pattern, options);
  117. return re.Matches (input);
  118. }
  119. public static string Replace (string input, string pattern, MatchEvaluator evaluator)
  120. {
  121. return Regex.Replace (input, pattern, evaluator, RegexOptions.None);
  122. }
  123. public static string Replace (string input, string pattern, MatchEvaluator evaluator,
  124. RegexOptions options)
  125. {
  126. Regex re = new Regex (pattern, options);
  127. return re.Replace (input, evaluator);
  128. }
  129. public static string Replace (string input, string pattern, string replacement)
  130. {
  131. return Regex.Replace (input, pattern, replacement, RegexOptions.None);
  132. }
  133. public static string Replace (string input, string pattern, string replacement,
  134. RegexOptions options)
  135. {
  136. Regex re = new Regex (pattern, options);
  137. return re.Replace (input, replacement);
  138. }
  139. public static string [] Split (string input, string pattern)
  140. {
  141. return Regex.Split (input, pattern, RegexOptions.None);
  142. }
  143. public static string [] Split (string input, string pattern, RegexOptions options)
  144. {
  145. Regex re = new Regex (pattern, options);
  146. return re.Split (input);
  147. }
  148. #if NET_2_0
  149. static FactoryCache cache = new FactoryCache (15);
  150. public static int CacheSize {
  151. get { return cache.Capacity; }
  152. set {
  153. if (value < 0)
  154. throw new ArgumentOutOfRangeException ("CacheSize");
  155. cache.Capacity = value;
  156. }
  157. }
  158. #else
  159. static FactoryCache cache = new FactoryCache (200);
  160. #endif
  161. // private
  162. // constructors
  163. // This constructor is used by compiled regular expressions that are
  164. // classes derived from Regex class. No initialization required.
  165. protected Regex ()
  166. {
  167. }
  168. public Regex (string pattern) : this (pattern, RegexOptions.None)
  169. {
  170. }
  171. public Regex (string pattern, RegexOptions options)
  172. {
  173. if (pattern == null)
  174. throw new ArgumentNullException ("pattern");
  175. validate_options (options);
  176. this.pattern = pattern;
  177. this.roptions = options;
  178. Init ();
  179. }
  180. static void validate_options (RegexOptions options)
  181. {
  182. const RegexOptions allopts =
  183. RegexOptions.None |
  184. RegexOptions.IgnoreCase |
  185. RegexOptions.Multiline |
  186. RegexOptions.ExplicitCapture |
  187. #if MOBILE || !NET_2_1
  188. RegexOptions.Compiled |
  189. #endif
  190. RegexOptions.Singleline |
  191. RegexOptions.IgnorePatternWhitespace |
  192. RegexOptions.RightToLeft |
  193. RegexOptions.ECMAScript |
  194. RegexOptions.CultureInvariant;
  195. const RegexOptions ecmaopts =
  196. RegexOptions.IgnoreCase |
  197. RegexOptions.Multiline |
  198. #if MOBILE || !NET_2_1
  199. RegexOptions.Compiled |
  200. #endif
  201. RegexOptions.ECMAScript;
  202. if ((options & ~allopts) != 0)
  203. throw new ArgumentOutOfRangeException ("options");
  204. if ((options & RegexOptions.ECMAScript) != 0 && (options & ~ecmaopts) != 0)
  205. throw new ArgumentOutOfRangeException ("options");
  206. }
  207. #if !TARGET_JVM
  208. private void Init ()
  209. {
  210. this.machineFactory = cache.Lookup (this.pattern, this.roptions);
  211. if (this.machineFactory == null) {
  212. InitNewRegex();
  213. } else {
  214. this.group_count = this.machineFactory.GroupCount;
  215. this.gap = this.machineFactory.Gap;
  216. this.mapping = this.machineFactory.Mapping;
  217. this.group_names = this.machineFactory.NamesMapping;
  218. }
  219. }
  220. #endif
  221. private void InitNewRegex ()
  222. {
  223. this.machineFactory = CreateMachineFactory (this.pattern, this.roptions);
  224. cache.Add (this.pattern, this.roptions, this.machineFactory);
  225. this.group_count = machineFactory.GroupCount;
  226. this.gap = this.machineFactory.Gap;
  227. this.mapping = machineFactory.Mapping;
  228. this.group_names = this.machineFactory.NamesMapping;
  229. }
  230. #if !NET_2_1
  231. // The new rx engine seems to be working now, but
  232. // potential problems are being tracked down here:
  233. // https://bugzilla.novell.com/show_bug.cgi?id=470827
  234. static readonly bool old_rx =
  235. Environment.GetEnvironmentVariable ("MONO_NEW_RX") == null;
  236. #endif
  237. private static IMachineFactory CreateMachineFactory (string pattern, RegexOptions options)
  238. {
  239. Parser psr = new Parser ();
  240. RegularExpression re = psr.ParseRegularExpression (pattern, options);
  241. #if NET_2_1
  242. ICompiler cmp = new PatternCompiler ();
  243. #else
  244. ICompiler cmp;
  245. if (!old_rx) {
  246. if ((options & RegexOptions.Compiled) != 0)
  247. cmp = new CILCompiler ();
  248. else
  249. cmp = new RxCompiler ();
  250. } else {
  251. cmp = new PatternCompiler ();
  252. }
  253. #endif
  254. re.Compile (cmp, (options & RegexOptions.RightToLeft) != 0);
  255. IMachineFactory machineFactory = cmp.GetMachineFactory ();
  256. Hashtable mapping = new Hashtable ();
  257. machineFactory.Gap = psr.GetMapping (mapping);
  258. machineFactory.Mapping = mapping;
  259. machineFactory.NamesMapping = GetGroupNamesArray (machineFactory.GroupCount, machineFactory.Mapping);
  260. return machineFactory;
  261. }
  262. #if NET_2_0
  263. protected
  264. #else
  265. private
  266. #endif
  267. Regex (SerializationInfo info, StreamingContext context) :
  268. this (info.GetString ("pattern"),
  269. (RegexOptions) info.GetValue ("options", typeof (RegexOptions)))
  270. {
  271. }
  272. #if ONLY_1_1 && !TARGET_JVM
  273. // fixes public API signature
  274. ~Regex ()
  275. {
  276. }
  277. #endif
  278. // public instance properties
  279. public RegexOptions Options {
  280. get { return roptions; }
  281. }
  282. public bool RightToLeft {
  283. get { return (roptions & RegexOptions.RightToLeft) != 0; }
  284. }
  285. // public instance methods
  286. public string [] GetGroupNames ()
  287. {
  288. string [] names = new string [1 + group_count];
  289. Array.Copy (group_names, names, 1 + group_count);
  290. return names;
  291. }
  292. public int [] GetGroupNumbers ()
  293. {
  294. int [] numbers = new int [1 + group_count];
  295. Array.Copy (GroupNumbers, numbers, 1 + group_count);
  296. return numbers;
  297. }
  298. public string GroupNameFromNumber (int i)
  299. {
  300. i = GetGroupIndex (i);
  301. if (i < 0)
  302. return "";
  303. return group_names [i];
  304. }
  305. public int GroupNumberFromName (string name)
  306. {
  307. if (!mapping.Contains (name))
  308. return -1;
  309. int i = (int) mapping [name];
  310. if (i >= gap)
  311. i = Int32.Parse (name);
  312. return i;
  313. }
  314. internal int GetGroupIndex (int number)
  315. {
  316. if (number < gap)
  317. return number;
  318. if (gap > group_count)
  319. return -1;
  320. return Array.BinarySearch (GroupNumbers, gap, group_count - gap + 1, number);
  321. }
  322. int default_startat (string input)
  323. {
  324. return (RightToLeft && input != null) ? input.Length : 0;
  325. }
  326. // match methods
  327. public bool IsMatch (string input)
  328. {
  329. return IsMatch (input, default_startat (input));
  330. }
  331. public bool IsMatch (string input, int startat)
  332. {
  333. return Match (input, startat).Success;
  334. }
  335. public Match Match (string input)
  336. {
  337. return Match (input, default_startat (input));
  338. }
  339. public Match Match (string input, int startat)
  340. {
  341. if (input == null)
  342. throw new ArgumentNullException ("input");
  343. if (startat < 0 || startat > input.Length)
  344. throw new ArgumentOutOfRangeException ("startat");
  345. return CreateMachine ().Scan (this, input, startat, input.Length);
  346. }
  347. public Match Match (string input, int beginning, int length)
  348. {
  349. if (input == null)
  350. throw new ArgumentNullException ("input");
  351. if (beginning < 0 || beginning > input.Length)
  352. throw new ArgumentOutOfRangeException ("beginning");
  353. if (length < 0 || length > input.Length - beginning)
  354. throw new ArgumentOutOfRangeException ("length");
  355. return CreateMachine ().Scan (this, input, beginning, beginning + length);
  356. }
  357. public MatchCollection Matches (string input)
  358. {
  359. return Matches (input, default_startat (input));
  360. }
  361. public MatchCollection Matches (string input, int startat)
  362. {
  363. Match m = Match (input, startat);
  364. return new MatchCollection (m);
  365. }
  366. // replace methods
  367. public string Replace (string input, MatchEvaluator evaluator)
  368. {
  369. return Replace (input, evaluator, Int32.MaxValue, default_startat (input));
  370. }
  371. public string Replace (string input, MatchEvaluator evaluator, int count)
  372. {
  373. return Replace (input, evaluator, count, default_startat (input));
  374. }
  375. class Adapter {
  376. MatchEvaluator ev;
  377. public Adapter (MatchEvaluator ev) { this.ev = ev; }
  378. public void Evaluate (Match m, StringBuilder sb) { sb.Append (ev (m)); }
  379. }
  380. public string Replace (string input, MatchEvaluator evaluator, int count, int startat)
  381. {
  382. if (input == null)
  383. throw new ArgumentNullException ("input");
  384. if (evaluator == null)
  385. throw new ArgumentNullException ("evaluator");
  386. if (count < -1)
  387. throw new ArgumentOutOfRangeException ("count");
  388. if (startat < 0 || startat > input.Length)
  389. throw new ArgumentOutOfRangeException ("startat");
  390. BaseMachine m = (BaseMachine)CreateMachine ();
  391. if (RightToLeft)
  392. return m.RTLReplace (this, input, evaluator, count, startat);
  393. // NOTE: If this is a cause of a lot of allocations, we can convert it to
  394. // use a ThreadStatic allocation mitigator
  395. Adapter a = new Adapter (evaluator);
  396. return m.LTRReplace (this, input, new BaseMachine.MatchAppendEvaluator (a.Evaluate),
  397. count, startat);
  398. }
  399. public string Replace (string input, string replacement)
  400. {
  401. return Replace (input, replacement, Int32.MaxValue, default_startat (input));
  402. }
  403. public string Replace (string input, string replacement, int count)
  404. {
  405. return Replace (input, replacement, count, default_startat (input));
  406. }
  407. public string Replace (string input, string replacement, int count, int startat)
  408. {
  409. if (input == null)
  410. throw new ArgumentNullException ("input");
  411. if (replacement == null)
  412. throw new ArgumentNullException ("replacement");
  413. if (count < -1)
  414. throw new ArgumentOutOfRangeException ("count");
  415. if (startat < 0 || startat > input.Length)
  416. throw new ArgumentOutOfRangeException ("startat");
  417. return CreateMachine ().Replace (this, input, replacement, count, startat);
  418. }
  419. // split methods
  420. public string [] Split (string input)
  421. {
  422. return Split (input, Int32.MaxValue, default_startat (input));
  423. }
  424. public string [] Split (string input, int count)
  425. {
  426. return Split (input, count, default_startat (input));
  427. }
  428. public string [] Split (string input, int count, int startat)
  429. {
  430. if (input == null)
  431. throw new ArgumentNullException ("input");
  432. if (count < 0)
  433. throw new ArgumentOutOfRangeException ("count");
  434. if (startat < 0 || startat > input.Length)
  435. throw new ArgumentOutOfRangeException ("startat");
  436. return CreateMachine ().Split (this, input, count, startat);
  437. }
  438. // This method is called at the end of the constructor of compiled
  439. // regular expression classes to do internal initialization.
  440. protected void InitializeReferences ()
  441. {
  442. if (refsInitialized)
  443. throw new NotSupportedException ("This operation is only allowed once per object.");
  444. refsInitialized = true;
  445. // Compile pattern that results in performance loss as existing
  446. // CIL code is ignored but provides support for regular
  447. // expressions compiled to assemblies.
  448. Init ();
  449. }
  450. #if !NET_2_1
  451. protected bool UseOptionC ()
  452. {
  453. return ((roptions & RegexOptions.Compiled) != 0);
  454. }
  455. #endif
  456. protected bool UseOptionR ()
  457. {
  458. return ((roptions & RegexOptions.RightToLeft) != 0);
  459. }
  460. // object methods
  461. public override string ToString ()
  462. {
  463. return pattern;
  464. }
  465. // ISerializable interface
  466. void ISerializable.GetObjectData (SerializationInfo info, StreamingContext context)
  467. {
  468. info.AddValue ("pattern", this.ToString (), typeof (string));
  469. info.AddValue ("options", this.Options, typeof (RegexOptions));
  470. }
  471. // internal
  472. internal int GroupCount {
  473. get { return group_count; }
  474. }
  475. internal int Gap {
  476. get { return gap; }
  477. }
  478. // private
  479. private IMachine CreateMachine ()
  480. {
  481. return machineFactory.NewInstance ();
  482. }
  483. private static string [] GetGroupNamesArray (int groupCount, IDictionary mapping)
  484. {
  485. string [] group_names = new string [groupCount + 1];
  486. IDictionaryEnumerator de = mapping.GetEnumerator ();
  487. while (de.MoveNext ())
  488. group_names [(int) de.Value] = (string) de.Key;
  489. return group_names;
  490. }
  491. private int [] GroupNumbers {
  492. get {
  493. if (group_numbers == null) {
  494. group_numbers = new int [1 + group_count];
  495. for (int i = 0; i < gap; ++i)
  496. group_numbers [i] = i;
  497. for (int i = gap; i <= group_count; ++i)
  498. group_numbers [i] = Int32.Parse (group_names [i]);
  499. return group_numbers;
  500. }
  501. return group_numbers;
  502. }
  503. }
  504. private IMachineFactory machineFactory;
  505. private IDictionary mapping;
  506. private int group_count;
  507. private int gap;
  508. private bool refsInitialized;
  509. private string [] group_names;
  510. private int [] group_numbers;
  511. // protected members
  512. protected internal string pattern;
  513. protected internal RegexOptions roptions;
  514. // MS undocumented members
  515. #if NET_2_1
  516. [MonoTODO]
  517. internal System.Collections.Generic.Dictionary<string, int> capnames;
  518. [MonoTODO]
  519. internal System.Collections.Generic.Dictionary<int, int> caps;
  520. #else
  521. [MonoTODO]
  522. protected internal System.Collections.Hashtable capnames;
  523. [MonoTODO]
  524. protected internal System.Collections.Hashtable caps;
  525. [MonoTODO]
  526. protected internal RegexRunnerFactory factory;
  527. #endif
  528. [MonoTODO]
  529. protected internal int capsize;
  530. [MonoTODO]
  531. protected internal string [] capslist;
  532. }
  533. }