compiler.cs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: compiler.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. //
  9. // Permission is hereby granted, free of charge, to any person obtaining
  10. // a copy of this software and associated documentation files (the
  11. // "Software"), to deal in the Software without restriction, including
  12. // without limitation the rights to use, copy, modify, merge, publish,
  13. // distribute, sublicense, and/or sell copies of the Software, and to
  14. // permit persons to whom the Software is furnished to do so, subject to
  15. // the following conditions:
  16. //
  17. // The above copyright notice and this permission notice shall be
  18. // included in all copies or substantial portions of the Software.
  19. //
  20. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  24. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. //
  28. using System;
  29. using System.Collections;
  30. namespace System.Text.RegularExpressions {
  31. abstract class LinkRef {
  32. // empty
  33. }
  34. interface ICompiler {
  35. void Reset ();
  36. IMachineFactory GetMachineFactory ();
  37. // instruction emission
  38. void EmitFalse ();
  39. void EmitTrue ();
  40. // character matching
  41. void EmitCharacter (char c, bool negate, bool ignore, bool reverse);
  42. void EmitCategory (Category cat, bool negate, bool reverse);
  43. void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse);
  44. void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse);
  45. // other operators
  46. void EmitString (string str, bool ignore, bool reverse);
  47. void EmitPosition (Position pos);
  48. void EmitOpen (int gid);
  49. void EmitClose (int gid);
  50. void EmitBalanceStart(int gid, int balance, bool capture, LinkRef tail);
  51. void EmitBalance ();
  52. void EmitReference (int gid, bool ignore, bool reverse);
  53. // constructs
  54. void EmitIfDefined (int gid, LinkRef tail);
  55. void EmitSub (LinkRef tail);
  56. void EmitTest (LinkRef yes, LinkRef tail);
  57. void EmitBranch (LinkRef next);
  58. void EmitJump (LinkRef target);
  59. void EmitRepeat (int min, int max, bool lazy, LinkRef until);
  60. void EmitUntil (LinkRef repeat);
  61. void EmitIn (LinkRef tail);
  62. void EmitInfo (int count, int min, int max);
  63. void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail);
  64. void EmitAnchor (bool reverse, int offset, LinkRef tail);
  65. // event for the CILCompiler
  66. void EmitBranchEnd();
  67. void EmitAlternationEnd();
  68. LinkRef NewLink ();
  69. void ResolveLink (LinkRef link);
  70. }
  71. class InterpreterFactory : IMachineFactory {
  72. public InterpreterFactory (ushort[] pattern) {
  73. this.pattern = pattern;
  74. }
  75. public IMachine NewInstance () {
  76. return new Interpreter (pattern);
  77. }
  78. public int GroupCount {
  79. get { return pattern[1]; }
  80. }
  81. public IDictionary Mapping {
  82. get { return mapping; }
  83. set { mapping = value; }
  84. }
  85. private IDictionary mapping;
  86. private ushort[] pattern;
  87. }
  88. class PatternCompiler : ICompiler {
  89. public static ushort EncodeOp (OpCode op, OpFlags flags) {
  90. return (ushort)((int)op | ((int)flags & 0xff00));
  91. }
  92. public static void DecodeOp (ushort word, out OpCode op, out OpFlags flags) {
  93. op = (OpCode)(word & 0x00ff);
  94. flags = (OpFlags)(word & 0xff00);
  95. }
  96. public PatternCompiler () {
  97. pgm = new ArrayList ();
  98. }
  99. // ICompiler implementation
  100. public void Reset () {
  101. pgm.Clear ();
  102. }
  103. public IMachineFactory GetMachineFactory () {
  104. ushort[] image = new ushort[pgm.Count];
  105. pgm.CopyTo (image);
  106. return new InterpreterFactory (image);
  107. }
  108. public void EmitFalse () {
  109. Emit (OpCode.False);
  110. }
  111. public void EmitTrue () {
  112. Emit (OpCode.True);
  113. }
  114. public void EmitCharacter (char c, bool negate, bool ignore, bool reverse) {
  115. Emit (OpCode.Character, MakeFlags (negate, ignore, reverse, false));
  116. if (ignore)
  117. c = Char.ToLower (c);
  118. Emit ((ushort)c);
  119. }
  120. public void EmitCategory (Category cat, bool negate, bool reverse) {
  121. Emit (OpCode.Category, MakeFlags (negate, false, reverse, false));
  122. Emit ((ushort)cat);
  123. }
  124. public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) {
  125. Emit (OpCode.Range, MakeFlags (negate, ignore, reverse, false));
  126. Emit ((ushort)lo);
  127. Emit ((ushort)hi);
  128. }
  129. public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse) {
  130. Emit (OpCode.Set, MakeFlags (negate, ignore, reverse, false));
  131. Emit ((ushort)lo);
  132. int len = (set.Length + 0xf) >> 4;
  133. Emit ((ushort)len);
  134. int b = 0;
  135. while (len -- != 0) {
  136. ushort word = 0;
  137. for (int i = 0; i < 16; ++ i) {
  138. if (b >= set.Length)
  139. break;
  140. if (set[b ++])
  141. word |= (ushort)(1 << i);
  142. }
  143. Emit (word);
  144. }
  145. }
  146. public void EmitString (string str, bool ignore, bool reverse) {
  147. Emit (OpCode.String, MakeFlags (false, ignore, reverse, false));
  148. int len = str.Length;
  149. Emit ((ushort)len);
  150. if (ignore)
  151. str = str.ToLower ();
  152. for (int i = 0; i < len; ++ i)
  153. Emit ((ushort)str[i]);
  154. }
  155. public void EmitPosition (Position pos) {
  156. Emit (OpCode.Position, 0);
  157. Emit ((ushort)pos);
  158. }
  159. public void EmitOpen (int gid) {
  160. Emit (OpCode.Open);
  161. Emit ((ushort)gid);
  162. }
  163. public void EmitClose (int gid) {
  164. Emit (OpCode.Close);
  165. Emit ((ushort)gid);
  166. }
  167. public void EmitBalanceStart (int gid, int balance, bool capture, LinkRef tail) {
  168. BeginLink (tail);
  169. Emit (OpCode.BalanceStart);
  170. Emit ((ushort)gid);
  171. Emit ((ushort)balance);
  172. Emit ((ushort)(capture ? 1 : 0));
  173. EmitLink (tail);
  174. }
  175. public void EmitBalance () {
  176. Emit (OpCode.Balance);
  177. }
  178. public void EmitReference (int gid, bool ignore, bool reverse) {
  179. Emit (OpCode.Reference, MakeFlags (false, ignore, reverse, false));
  180. Emit ((ushort)gid);
  181. }
  182. public void EmitIfDefined (int gid, LinkRef tail) {
  183. BeginLink (tail);
  184. Emit (OpCode.IfDefined);
  185. EmitLink (tail);
  186. Emit ((ushort)gid);
  187. }
  188. public void EmitSub (LinkRef tail) {
  189. BeginLink (tail);
  190. Emit (OpCode.Sub);
  191. EmitLink (tail);
  192. }
  193. public void EmitTest (LinkRef yes, LinkRef tail) {
  194. BeginLink (yes);
  195. BeginLink (tail);
  196. Emit (OpCode.Test);
  197. EmitLink (yes);
  198. EmitLink (tail);
  199. }
  200. public void EmitBranch (LinkRef next) {
  201. BeginLink (next);
  202. Emit (OpCode.Branch, 0);
  203. EmitLink (next);
  204. }
  205. public void EmitJump (LinkRef target) {
  206. BeginLink (target);
  207. Emit (OpCode.Jump, 0);
  208. EmitLink (target);
  209. }
  210. public void EmitRepeat (int min, int max, bool lazy, LinkRef until) {
  211. BeginLink (until);
  212. Emit (OpCode.Repeat, MakeFlags (false, false, false, lazy));
  213. EmitLink (until);
  214. Emit ((ushort)min);
  215. Emit ((ushort)max);
  216. }
  217. public void EmitUntil (LinkRef repeat) {
  218. ResolveLink (repeat);
  219. Emit (OpCode.Until);
  220. }
  221. public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail) {
  222. BeginLink (tail);
  223. Emit (OpCode.FastRepeat, MakeFlags (false, false, false, lazy));
  224. EmitLink (tail);
  225. Emit ((ushort)min);
  226. Emit ((ushort)max);
  227. }
  228. public void EmitIn (LinkRef tail) {
  229. BeginLink (tail);
  230. Emit (OpCode.In);
  231. EmitLink (tail);
  232. }
  233. public void EmitAnchor (bool reverse, int offset, LinkRef tail) {
  234. BeginLink (tail);
  235. Emit (OpCode.Anchor, MakeFlags(false, false, reverse, false));
  236. EmitLink (tail);
  237. Emit ((ushort)offset);
  238. }
  239. public void EmitInfo (int count, int min, int max) {
  240. Emit (OpCode.Info);
  241. Emit ((ushort)count);
  242. Emit ((ushort)min);
  243. Emit ((ushort)max);
  244. }
  245. public LinkRef NewLink () {
  246. return new PatternLinkStack ();
  247. }
  248. public void ResolveLink (LinkRef lref) {
  249. PatternLinkStack stack = (PatternLinkStack)lref;
  250. while (stack.Pop ())
  251. pgm[stack.OffsetAddress] = (ushort)stack.GetOffset (CurrentAddress);
  252. }
  253. public void EmitBranchEnd(){}
  254. public void EmitAlternationEnd(){}
  255. // private members
  256. private static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) {
  257. OpFlags flags = 0;
  258. if (negate) flags |= OpFlags.Negate;
  259. if (ignore) flags |= OpFlags.IgnoreCase;
  260. if (reverse) flags |= OpFlags.RightToLeft;
  261. if (lazy) flags |= OpFlags.Lazy;
  262. return flags;
  263. }
  264. private void Emit (OpCode op) {
  265. Emit (op, (OpFlags)0);
  266. }
  267. private void Emit (OpCode op, OpFlags flags) {
  268. Emit (EncodeOp (op, flags));
  269. }
  270. private void Emit (ushort word) {
  271. pgm.Add (word);
  272. }
  273. private int CurrentAddress {
  274. get { return pgm.Count; }
  275. }
  276. private void BeginLink (LinkRef lref) {
  277. PatternLinkStack stack = (PatternLinkStack)lref;
  278. stack.BaseAddress = CurrentAddress;
  279. }
  280. private void EmitLink (LinkRef lref) {
  281. PatternLinkStack stack = (PatternLinkStack)lref;
  282. stack.OffsetAddress = CurrentAddress;
  283. Emit ((ushort)0); // placeholder
  284. stack.Push ();
  285. }
  286. private class PatternLinkStack : LinkStack {
  287. public PatternLinkStack () {
  288. }
  289. public int BaseAddress {
  290. set { link.base_addr = value; }
  291. }
  292. public int OffsetAddress {
  293. get { return link.offset_addr; }
  294. set { link.offset_addr = value; }
  295. }
  296. public int GetOffset (int target_addr) {
  297. return target_addr - link.base_addr;
  298. }
  299. // LinkStack implementation
  300. protected override object GetCurrent () { return link; }
  301. protected override void SetCurrent (object l) { link = (Link)l; }
  302. private struct Link {
  303. public int base_addr;
  304. public int offset_addr;
  305. }
  306. Link link;
  307. }
  308. private ArrayList pgm;
  309. }
  310. abstract class LinkStack : LinkRef {
  311. public LinkStack () {
  312. stack = new Stack ();
  313. }
  314. public void Push () {
  315. stack.Push (GetCurrent ());
  316. }
  317. public bool Pop () {
  318. if (stack.Count > 0) {
  319. SetCurrent (stack.Pop ());
  320. return true;
  321. }
  322. return false;
  323. }
  324. protected abstract object GetCurrent ();
  325. protected abstract void SetCurrent (object l);
  326. private Stack stack;
  327. }
  328. //Used by CILCompiler and Interpreter
  329. internal struct Mark {
  330. public int Start, End;
  331. public int Previous;
  332. public bool IsDefined {
  333. get { return Start >= 0 && End >= 0; }
  334. }
  335. public int Index {
  336. get { return Start < End ? Start : End; }
  337. }
  338. public int Length {
  339. get { return Start < End ? End - Start : Start - End; }
  340. }
  341. }
  342. }