compiler.cs 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: compiler.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. using System;
  9. using System.Collections;
  10. namespace System.Text.RegularExpressions {
  11. abstract class LinkRef {
  12. // empty
  13. }
  14. interface ICompiler {
  15. void Reset ();
  16. IMachineFactory GetMachineFactory ();
  17. // instruction emission
  18. void EmitFalse ();
  19. void EmitTrue ();
  20. // character matching
  21. void EmitCharacter (char c, bool negate, bool ignore, bool reverse);
  22. void EmitCategory (Category cat, bool negate, bool reverse);
  23. void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse);
  24. void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse);
  25. // other operators
  26. void EmitString (string str, bool ignore, bool reverse);
  27. void EmitPosition (Position pos);
  28. void EmitOpen (int gid);
  29. void EmitClose (int gid);
  30. void EmitBalance (int gid, int balance);
  31. void EmitReference (int gid, bool ignore, bool reverse);
  32. // constructs
  33. void EmitIfDefined (int gid, LinkRef tail);
  34. void EmitSub (LinkRef tail);
  35. void EmitTest (LinkRef yes, LinkRef tail);
  36. void EmitBranch (LinkRef next);
  37. void EmitJump (LinkRef target);
  38. void EmitRepeat (int min, int max, bool lazy, LinkRef until);
  39. void EmitUntil (LinkRef repeat);
  40. void EmitIn (LinkRef tail);
  41. void EmitInfo (int count, int min, int max);
  42. void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail);
  43. void EmitAnchor (int offset, LinkRef tail);
  44. LinkRef NewLink ();
  45. void ResolveLink (LinkRef link);
  46. }
  47. class InterpreterFactory : IMachineFactory {
  48. public InterpreterFactory (ushort[] pattern) {
  49. this.pattern = pattern;
  50. }
  51. public IMachine NewInstance () {
  52. return new Interpreter (pattern);
  53. }
  54. public int GroupCount {
  55. get { return pattern[0]; }
  56. }
  57. public IDictionary Mapping {
  58. get { return mapping; }
  59. set { mapping = value; }
  60. }
  61. private IDictionary mapping;
  62. private ushort[] pattern;
  63. }
  64. class PatternCompiler : ICompiler {
  65. public static ushort EncodeOp (OpCode op, OpFlags flags) {
  66. return (ushort)((int)op | ((int)flags & 0xff00));
  67. }
  68. public static void DecodeOp (ushort word, out OpCode op, out OpFlags flags) {
  69. op = (OpCode)(word & 0x00ff);
  70. flags = (OpFlags)(word & 0xff00);
  71. }
  72. public PatternCompiler () {
  73. pgm = new ArrayList ();
  74. }
  75. // ICompiler implementation
  76. public void Reset () {
  77. pgm.Clear ();
  78. }
  79. public IMachineFactory GetMachineFactory () {
  80. ushort[] image = new ushort[pgm.Count];
  81. pgm.CopyTo (image);
  82. return new InterpreterFactory (image);
  83. }
  84. public void EmitFalse () {
  85. Emit (OpCode.False);
  86. }
  87. public void EmitTrue () {
  88. Emit (OpCode.True);
  89. }
  90. public void EmitCharacter (char c, bool negate, bool ignore, bool reverse) {
  91. Emit (OpCode.Character, MakeFlags (negate, ignore, reverse, false));
  92. if (ignore)
  93. c = Char.ToLower (c);
  94. Emit ((ushort)c);
  95. }
  96. public void EmitCategory (Category cat, bool negate, bool reverse) {
  97. Emit (OpCode.Category, MakeFlags (negate, false, reverse, false));
  98. Emit ((ushort)cat);
  99. }
  100. public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) {
  101. Emit (OpCode.Range, MakeFlags (negate, ignore, reverse, false));
  102. Emit ((ushort)lo);
  103. Emit ((ushort)hi);
  104. }
  105. public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse) {
  106. Emit (OpCode.Set, MakeFlags (negate, ignore, reverse, false));
  107. Emit ((ushort)lo);
  108. int len = (set.Length + 0xf) >> 4;
  109. Emit ((ushort)len);
  110. int b = 0;
  111. while (len -- != 0) {
  112. ushort word = 0;
  113. for (int i = 0; i < 16; ++ i) {
  114. if (b >= set.Length)
  115. break;
  116. if (set[b ++])
  117. word |= (ushort)(1 << i);
  118. }
  119. Emit (word);
  120. }
  121. }
  122. public void EmitString (string str, bool ignore, bool reverse) {
  123. Emit (OpCode.String, MakeFlags (false, ignore, reverse, false));
  124. int len = str.Length;
  125. Emit ((ushort)len);
  126. if (ignore)
  127. str = str.ToLower ();
  128. for (int i = 0; i < len; ++ i)
  129. Emit ((ushort)str[i]);
  130. }
  131. public void EmitPosition (Position pos) {
  132. Emit (OpCode.Position, 0);
  133. Emit ((ushort)pos);
  134. }
  135. public void EmitOpen (int gid) {
  136. Emit (OpCode.Open);
  137. Emit ((ushort)gid);
  138. }
  139. public void EmitClose (int gid) {
  140. Emit (OpCode.Close);
  141. Emit ((ushort)gid);
  142. }
  143. public void EmitBalance (int gid, int balance) {
  144. Emit (OpCode.Balance);
  145. Emit ((ushort)gid);
  146. Emit ((ushort)balance);
  147. }
  148. public void EmitReference (int gid, bool ignore, bool reverse) {
  149. Emit (OpCode.Reference, MakeFlags (false, ignore, reverse, false));
  150. Emit ((ushort)gid);
  151. }
  152. public void EmitIfDefined (int gid, LinkRef tail) {
  153. BeginLink (tail);
  154. Emit (OpCode.IfDefined);
  155. EmitLink (tail);
  156. Emit ((ushort)gid);
  157. }
  158. public void EmitSub (LinkRef tail) {
  159. BeginLink (tail);
  160. Emit (OpCode.Sub);
  161. EmitLink (tail);
  162. }
  163. public void EmitTest (LinkRef yes, LinkRef tail) {
  164. BeginLink (yes);
  165. BeginLink (tail);
  166. Emit (OpCode.Test);
  167. EmitLink (yes);
  168. EmitLink (tail);
  169. }
  170. public void EmitBranch (LinkRef next) {
  171. BeginLink (next);
  172. Emit (OpCode.Branch, 0);
  173. EmitLink (next);
  174. }
  175. public void EmitJump (LinkRef target) {
  176. BeginLink (target);
  177. Emit (OpCode.Jump, 0);
  178. EmitLink (target);
  179. }
  180. public void EmitRepeat (int min, int max, bool lazy, LinkRef until) {
  181. BeginLink (until);
  182. Emit (OpCode.Repeat, MakeFlags (false, false, false, lazy));
  183. EmitLink (until);
  184. Emit ((ushort)min);
  185. Emit ((ushort)max);
  186. }
  187. public void EmitUntil (LinkRef repeat) {
  188. ResolveLink (repeat);
  189. Emit (OpCode.Until);
  190. }
  191. public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail) {
  192. BeginLink (tail);
  193. Emit (OpCode.FastRepeat, MakeFlags (false, false, false, lazy));
  194. EmitLink (tail);
  195. Emit ((ushort)min);
  196. Emit ((ushort)max);
  197. }
  198. public void EmitIn (LinkRef tail) {
  199. BeginLink (tail);
  200. Emit (OpCode.In);
  201. EmitLink (tail);
  202. }
  203. public void EmitAnchor (int offset, LinkRef tail) {
  204. BeginLink (tail);
  205. Emit (OpCode.Anchor);
  206. EmitLink (tail);
  207. Emit ((ushort)offset);
  208. }
  209. public void EmitInfo (int count, int min, int max) {
  210. Emit (OpCode.Info);
  211. Emit ((ushort)count);
  212. Emit ((ushort)min);
  213. Emit ((ushort)max);
  214. }
  215. public LinkRef NewLink () {
  216. return new PatternLinkStack ();
  217. }
  218. public void ResolveLink (LinkRef lref) {
  219. PatternLinkStack stack = (PatternLinkStack)lref;
  220. while (stack.Pop ())
  221. pgm[stack.OffsetAddress] = (ushort)stack.GetOffset (CurrentAddress);
  222. }
  223. // private members
  224. private static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) {
  225. OpFlags flags = 0;
  226. if (negate) flags |= OpFlags.Negate;
  227. if (ignore) flags |= OpFlags.IgnoreCase;
  228. if (reverse) flags |= OpFlags.RightToLeft;
  229. if (lazy) flags |= OpFlags.Lazy;
  230. return flags;
  231. }
  232. private void Emit (OpCode op) {
  233. Emit (op, (OpFlags)0);
  234. }
  235. private void Emit (OpCode op, OpFlags flags) {
  236. Emit (EncodeOp (op, flags));
  237. }
  238. private void Emit (ushort word) {
  239. pgm.Add (word);
  240. }
  241. private int CurrentAddress {
  242. get { return pgm.Count; }
  243. }
  244. private void BeginLink (LinkRef lref) {
  245. PatternLinkStack stack = (PatternLinkStack)lref;
  246. stack.BaseAddress = CurrentAddress;
  247. }
  248. private void EmitLink (LinkRef lref) {
  249. PatternLinkStack stack = (PatternLinkStack)lref;
  250. stack.OffsetAddress = CurrentAddress;
  251. Emit ((ushort)0); // placeholder
  252. stack.Push ();
  253. }
  254. private class PatternLinkStack : LinkStack {
  255. public PatternLinkStack () {
  256. }
  257. public int BaseAddress {
  258. set { link.base_addr = value; }
  259. }
  260. public int OffsetAddress {
  261. get { return link.offset_addr; }
  262. set { link.offset_addr = value; }
  263. }
  264. public int GetOffset (int target_addr) {
  265. return target_addr - link.base_addr;
  266. }
  267. // LinkStack implementation
  268. protected override object GetCurrent () { return link; }
  269. protected override void SetCurrent (object l) { link = (Link)l; }
  270. private struct Link {
  271. public int base_addr;
  272. public int offset_addr;
  273. }
  274. Link link;
  275. }
  276. private ArrayList pgm;
  277. }
  278. abstract class LinkStack : LinkRef {
  279. public LinkStack () {
  280. stack = new Stack ();
  281. }
  282. public void Push () {
  283. stack.Push (GetCurrent ());
  284. }
  285. public bool Pop () {
  286. if (stack.Count > 0) {
  287. SetCurrent (stack.Pop ());
  288. return true;
  289. }
  290. return false;
  291. }
  292. protected abstract object GetCurrent ();
  293. protected abstract void SetCurrent (object l);
  294. private Stack stack;
  295. }
  296. }