interpreter.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: interpreter.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. using System;
  9. using System.Collections;
  10. using System.Diagnostics;
  11. using System.Globalization;
  12. namespace System.Text.RegularExpressions {
  13. class Interpreter : IMachine {
  14. public Interpreter (ushort[] program) {
  15. this.program = program;
  16. this.qs = null;
  17. // process info block
  18. Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
  19. this.group_count = program[1] + 1;
  20. this.match_min = program[2];
  21. this.match_max = program[3];
  22. // setup
  23. this.program_start = 4;
  24. this.groups = new int [group_count];
  25. }
  26. // IMachine implementation
  27. public Match Scan (Regex regex, string text, int start, int end) {
  28. this.text = text;
  29. this.text_end = end;
  30. this.scan_ptr = start;
  31. if (Eval (Mode.Match, ref scan_ptr, program_start))
  32. return GenerateMatch (regex);
  33. return Match.Empty;
  34. }
  35. // private methods
  36. private void Reset () {
  37. ResetGroups ();
  38. fast = repeat = null;
  39. }
  40. private bool Eval (Mode mode, ref int ref_ptr, int pc) {
  41. int ptr = ref_ptr;
  42. Begin:
  43. for (;;) {
  44. ushort word = program[pc];
  45. OpCode op = (OpCode)(word & 0x00ff);
  46. OpFlags flags = (OpFlags)(word & 0xff00);
  47. switch (op) {
  48. case OpCode.Anchor: {
  49. int skip = program[pc + 1];
  50. int anch_offset = program[pc + 2];
  51. int anch_ptr = ptr + anch_offset;
  52. int anch_end = text_end - match_min + anch_offset; // maximum anchor position
  53. // the general case for an anchoring expression is at the bottom, however we
  54. // do some checks for the common cases before to save processing time. the current
  55. // optimizer only outputs three types of anchoring expressions: fixed position,
  56. // fixed substring, and no anchor.
  57. OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);
  58. if (anch_op == OpCode.Position && skip == 6) { // position anchor
  59. // Anchor
  60. // Position
  61. // True
  62. switch ((Position)program[pc + 4]) {
  63. case Position.StartOfString:
  64. if (anch_ptr == 0) {
  65. ptr = 0;
  66. if (TryMatch (ref ptr, pc + skip))
  67. goto Pass;
  68. }
  69. break;
  70. case Position.StartOfLine:
  71. if (anch_ptr == 0) {
  72. ptr = 0;
  73. if (TryMatch (ref ptr, pc + skip))
  74. goto Pass;
  75. ++ anch_ptr;
  76. }
  77. while (anch_ptr <= anch_end) {
  78. if (text[anch_ptr - 1] == '\n') {
  79. ptr = anch_ptr - anch_offset;
  80. if (TryMatch (ref ptr, pc + skip))
  81. goto Pass;
  82. }
  83. ++ anch_ptr;
  84. }
  85. break;
  86. case Position.StartOfScan:
  87. if (anch_ptr == scan_ptr) {
  88. ptr = scan_ptr - anch_offset;
  89. if (TryMatch (ref ptr, pc + skip))
  90. goto Pass;
  91. }
  92. break;
  93. default:
  94. // FIXME
  95. break;
  96. }
  97. }
  98. else if (qs != null ||
  99. (anch_op == OpCode.String && skip == 6 + program[pc + 4])) { // substring anchor
  100. // Anchor
  101. // String
  102. // True
  103. if (qs == null) {
  104. bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
  105. string substring = GetString (pc + 3);
  106. qs = new QuickSearch (substring, ignore);
  107. }
  108. while (anch_ptr <= anch_end) {
  109. anch_ptr = qs.Search (text, anch_ptr, anch_end);
  110. if (anch_ptr < 0)
  111. break;
  112. ptr = anch_ptr - anch_offset;
  113. if (TryMatch (ref ptr, pc + skip))
  114. goto Pass;
  115. ++ anch_ptr;
  116. }
  117. }
  118. else if (anch_op == OpCode.True) { // no anchor
  119. // Anchor
  120. // True
  121. while (anch_ptr <= anch_end) {
  122. ptr = anch_ptr;
  123. if (TryMatch (ref ptr, pc + skip))
  124. goto Pass;
  125. ++ anch_ptr;
  126. }
  127. }
  128. else { // general case
  129. // Anchor
  130. // <expr>
  131. // True
  132. while (anch_ptr <= anch_end) {
  133. ptr = anch_ptr;
  134. if (Eval (Mode.Match, ref ptr, pc + 3)) {
  135. // anchor expression passed: try real expression at the correct offset
  136. ptr = anch_ptr - anch_offset;
  137. if (TryMatch (ref ptr, pc + skip))
  138. goto Pass;
  139. }
  140. ++ anch_ptr;
  141. }
  142. }
  143. goto Fail;
  144. }
  145. case OpCode.False: {
  146. goto Fail;
  147. }
  148. case OpCode.True: {
  149. goto Pass;
  150. }
  151. case OpCode.Position: {
  152. if (!IsPosition ((Position)program[pc + 1], ptr))
  153. goto Fail;
  154. pc += 2;
  155. break;
  156. }
  157. case OpCode.String: {
  158. bool reverse = (flags & OpFlags.RightToLeft) != 0;
  159. bool ignore = (flags & OpFlags.IgnoreCase) != 0;
  160. int len = program[pc + 1];
  161. if (reverse) {
  162. ptr -= len;
  163. if (ptr < 0)
  164. goto Fail;
  165. }
  166. else if (ptr + len > text_end)
  167. goto Fail;
  168. pc += 2;
  169. for (int i = 0; i < len; ++ i) {
  170. char c = text[ptr + i];
  171. if (ignore)
  172. c = Char.ToLower (c);
  173. if (c != (char)program[pc ++])
  174. goto Fail;
  175. }
  176. if (!reverse)
  177. ptr += len;
  178. break;
  179. }
  180. case OpCode.Reference: {
  181. bool reverse = (flags & OpFlags.RightToLeft) != 0;
  182. bool ignore = (flags & OpFlags.IgnoreCase) != 0;
  183. int m = GetLastDefined (program [pc + 1]);
  184. if (m < 0)
  185. goto Fail;
  186. int str = marks [m].Index;
  187. int len = marks [m].Length;
  188. if (reverse) {
  189. ptr -= len;
  190. if (ptr < 0)
  191. goto Fail;
  192. }
  193. else if (ptr + len > text_end)
  194. goto Fail;
  195. pc += 2;
  196. for (int i = 0; i < len; ++ i) {
  197. if (ignore) {
  198. if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
  199. goto Fail;
  200. }
  201. else {
  202. if (text[ptr + i] != text[str + i])
  203. goto Fail;
  204. }
  205. }
  206. if (!reverse)
  207. ptr += len;
  208. break;
  209. }
  210. case OpCode.Character: case OpCode.Category:
  211. case OpCode.Range: case OpCode.Set: {
  212. if (!EvalChar (mode, ref ptr, ref pc, false))
  213. goto Fail;
  214. break;
  215. }
  216. case OpCode.In: {
  217. int target = pc + program[pc + 1];
  218. pc += 2;
  219. if (!EvalChar (mode, ref ptr, ref pc, true))
  220. goto Fail;
  221. pc = target;
  222. break;
  223. }
  224. case OpCode.Open: {
  225. Open (program[pc + 1], ptr);
  226. pc += 2;
  227. break;
  228. }
  229. case OpCode.Close: {
  230. Close (program[pc + 1], ptr);
  231. pc += 2;
  232. break;
  233. }
  234. case OpCode.Balance: {
  235. Balance (program[pc + 1], program[pc + 2], ptr);
  236. break;
  237. }
  238. case OpCode.IfDefined: {
  239. int m = GetLastDefined (program [pc + 2]);
  240. if (m < 0)
  241. pc += program[pc + 1];
  242. else
  243. pc += 3;
  244. break;
  245. }
  246. case OpCode.Sub: {
  247. if (!Eval (Mode.Match, ref ptr, pc + 2))
  248. goto Fail;
  249. pc += program[pc + 1];
  250. break;
  251. }
  252. case OpCode.Test: {
  253. int cp = Checkpoint ();
  254. int test_ptr = ptr;
  255. if (Eval (Mode.Match, ref test_ptr, pc + 3))
  256. pc += program[pc + 1];
  257. else {
  258. Backtrack (cp);
  259. pc += program[pc + 2];
  260. }
  261. break;
  262. }
  263. case OpCode.Branch: {
  264. OpCode branch_op;
  265. do {
  266. int cp = Checkpoint ();
  267. if (Eval (Mode.Match, ref ptr, pc + 2))
  268. goto Pass;
  269. Backtrack (cp);
  270. pc += program[pc + 1];
  271. branch_op = (OpCode)(program[pc] & 0xff);
  272. } while (branch_op != OpCode.False);
  273. goto Fail;
  274. }
  275. case OpCode.Jump: {
  276. pc += program[pc + 1];
  277. break;
  278. }
  279. case OpCode.Repeat: {
  280. this.repeat = new RepeatContext (
  281. this.repeat, // previous context
  282. program[pc + 2], // minimum
  283. program[pc + 3], // maximum
  284. (flags & OpFlags.Lazy) != 0, // lazy
  285. pc + 4 // subexpression
  286. );
  287. if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
  288. goto Pass;
  289. else {
  290. this.repeat = this.repeat.Previous;
  291. goto Fail;
  292. }
  293. }
  294. case OpCode.Until: {
  295. RepeatContext current = this.repeat;
  296. int start = current.Start;
  297. if (!current.IsMinimum) {
  298. ++ current.Count;
  299. current.Start = ptr;
  300. if (Eval (Mode.Match, ref ptr, repeat.Expression))
  301. goto Pass;
  302. current.Start = start;
  303. -- current.Count;
  304. goto Fail;
  305. }
  306. if (ptr == current.Start) {
  307. // degenerate match ... match tail or fail
  308. this.repeat = current.Previous;
  309. if (Eval (Mode.Match, ref ptr, pc + 1))
  310. goto Pass;
  311. goto Fail;
  312. }
  313. if (current.IsLazy) {
  314. // match tail first ...
  315. this.repeat = current.Previous;
  316. int cp = Checkpoint ();
  317. if (Eval (Mode.Match, ref ptr, pc + 1))
  318. goto Pass;
  319. Backtrack (cp);
  320. // ... then match more
  321. this.repeat = current;
  322. if (!current.IsMaximum) {
  323. ++ current.Count;
  324. current.Start = ptr;
  325. if (Eval (Mode.Match, ref ptr, current.Expression))
  326. goto Pass;
  327. current.Start = start;
  328. -- current.Count;
  329. goto Fail;
  330. }
  331. return false;
  332. }
  333. else {
  334. // match more first ...
  335. if (!current.IsMaximum) {
  336. int cp = Checkpoint ();
  337. ++ current.Count;
  338. current.Start = ptr;
  339. if (Eval (Mode.Match, ref ptr, current.Expression))
  340. goto Pass;
  341. current.Start = start;
  342. -- current.Count;
  343. Backtrack (cp);
  344. }
  345. // ... then match tail
  346. this.repeat = current.Previous;
  347. if (Eval (Mode.Match, ref ptr, pc + 1))
  348. goto Pass;
  349. this.repeat = current;
  350. goto Fail;
  351. }
  352. }
  353. case OpCode.FastRepeat: {
  354. this.fast = new RepeatContext (
  355. fast,
  356. program[pc + 2], // minimum
  357. program[pc + 3], // maximum
  358. (flags & OpFlags.Lazy) != 0, // lazy
  359. pc + 4 // subexpression
  360. );
  361. fast.Start = ptr;
  362. int cp = Checkpoint ();
  363. pc += program[pc + 1]; // tail expression
  364. ushort tail_word = program[pc];
  365. int c1, c2; // first character of tail operator
  366. int coff; // 0 or -1 depending on direction
  367. OpCode tail_op = (OpCode)(tail_word & 0xff);
  368. if (tail_op == OpCode.Character || tail_op == OpCode.String) {
  369. OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
  370. if (tail_op == OpCode.String)
  371. c1 = program[pc + 2]; // first char of string
  372. else
  373. c1 = program[pc + 1]; // character
  374. if ((tail_flags & OpFlags.IgnoreCase) != 0)
  375. c2 = Char.ToUpper ((char)c1); // ignore case
  376. else
  377. c2 = c1;
  378. if ((tail_flags & OpFlags.RightToLeft) != 0)
  379. coff = -1; // reverse
  380. else
  381. coff = 0;
  382. }
  383. else {
  384. c1 = c2 = -1;
  385. coff = 0;
  386. }
  387. if (fast.IsLazy) {
  388. if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
  389. //Console.WriteLine ("lazy fast: failed mininum.");
  390. fast = fast.Previous;
  391. goto Fail;
  392. }
  393. while (true) {
  394. int p = ptr + coff;
  395. if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
  396. Eval (Mode.Match, ref ptr, pc))
  397. break;
  398. if (fast.IsMaximum) {
  399. //Console.WriteLine ("lazy fast: failed with maximum.");
  400. fast = fast.Previous;
  401. goto Fail;
  402. }
  403. Backtrack (cp);
  404. if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
  405. //Console.WriteLine ("lazy fast: no more.");
  406. fast = fast.Previous;
  407. goto Fail;
  408. }
  409. }
  410. fast = fast.Previous;
  411. goto Pass;
  412. }
  413. else {
  414. if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
  415. fast = fast.Previous;
  416. goto Fail;
  417. }
  418. int width;
  419. if (fast.Count > 0)
  420. width = (ptr - fast.Start) / fast.Count;
  421. else
  422. width = 0;
  423. while (true) {
  424. int p = ptr + coff;
  425. if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
  426. Eval (Mode.Match, ref ptr, pc))
  427. break;
  428. -- fast.Count;
  429. if (!fast.IsMinimum) {
  430. fast = fast.Previous;
  431. goto Fail;
  432. }
  433. ptr -= width;
  434. Backtrack (cp);
  435. }
  436. fast = fast.Previous;
  437. goto Pass;
  438. }
  439. }
  440. case OpCode.Info: {
  441. Debug.Assert (false, "Regex", "Info block found in pattern");
  442. goto Fail;
  443. }
  444. }
  445. }
  446. Pass:
  447. ref_ptr = ptr;
  448. switch (mode) {
  449. case Mode.Match:
  450. return true;
  451. case Mode.Count: {
  452. ++ fast.Count;
  453. if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))
  454. return true;
  455. pc = fast.Expression;
  456. goto Begin;
  457. }
  458. }
  459. Fail:
  460. switch (mode) {
  461. case Mode.Match:
  462. return false;
  463. case Mode.Count: {
  464. if (!fast.IsLazy && fast.IsMinimum)
  465. return true;
  466. ref_ptr = fast.Start;
  467. return false;
  468. }
  469. }
  470. return false;
  471. }
  472. private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {
  473. bool consumed = false;
  474. char c = '\0';
  475. bool negate;
  476. bool ignore;
  477. do {
  478. ushort word = program[pc];
  479. OpCode op = (OpCode)(word & 0x00ff);
  480. OpFlags flags = (OpFlags)(word & 0xff00);
  481. ++ pc;
  482. ignore = (flags & OpFlags.IgnoreCase) != 0;
  483. // consume character: the direction of an In construct is
  484. // determined by the direction of its first op
  485. if (!consumed) {
  486. if ((flags & OpFlags.RightToLeft) != 0) {
  487. if (ptr <= 0)
  488. return false;
  489. c = text[-- ptr];
  490. }
  491. else {
  492. if (ptr >= text_end)
  493. return false;
  494. c = text[ptr ++];
  495. }
  496. if (ignore)
  497. c = Char.ToLower (c);
  498. consumed = true;
  499. }
  500. // negate flag
  501. negate = (flags & OpFlags.Negate) != 0;
  502. // execute op
  503. switch (op) {
  504. case OpCode.True:
  505. return true;
  506. case OpCode.False:
  507. return false;
  508. case OpCode.Character: {
  509. if (c == (char)program[pc ++])
  510. return !negate;
  511. break;
  512. }
  513. case OpCode.Category: {
  514. if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
  515. return !negate;
  516. break;
  517. }
  518. case OpCode.Range: {
  519. int lo = (char)program[pc ++];
  520. int hi = (char)program[pc ++];
  521. if (lo <= c && c <= hi)
  522. return !negate;
  523. break;
  524. }
  525. case OpCode.Set: {
  526. int lo = (char)program[pc ++];
  527. int len = (char)program[pc ++];
  528. int bits = pc;
  529. pc += len;
  530. int i = (int)c - lo;
  531. if (i < 0 || i >= len << 4)
  532. break;
  533. if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)
  534. return !negate;
  535. break;
  536. }
  537. }
  538. } while (multi);
  539. return negate;
  540. }
  541. private bool TryMatch (ref int ref_ptr, int pc) {
  542. Reset ();
  543. int ptr = ref_ptr;
  544. marks [groups [0]].Start = ptr;
  545. if (Eval (Mode.Match, ref ptr, pc)) {
  546. marks [groups [0]].End = ptr;
  547. ref_ptr = ptr;
  548. return true;
  549. }
  550. return false;
  551. }
  552. private bool IsPosition (Position pos, int ptr) {
  553. switch (pos) {
  554. case Position.Start: case Position.StartOfString:
  555. return ptr == 0;
  556. case Position.StartOfLine:
  557. return ptr == 0 || text[ptr - 1] == '\n';
  558. case Position.StartOfScan:
  559. return ptr == scan_ptr;
  560. case Position.End:
  561. return ptr == text_end ||
  562. (ptr == text_end - 1 && text[ptr] == '\n');
  563. case Position.EndOfLine:
  564. return ptr == text_end || text[ptr] == '\n';
  565. case Position.EndOfString:
  566. return ptr == text_end;
  567. case Position.Boundary:
  568. if (text_end == 0)
  569. return false;
  570. if (ptr == 0)
  571. return IsWordChar (text[ptr]);
  572. else if (ptr == text_end)
  573. return IsWordChar (text[ptr - 1]);
  574. else
  575. return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);
  576. case Position.NonBoundary:
  577. if (text_end == 0)
  578. return false;
  579. if (ptr == 0)
  580. return !IsWordChar (text[ptr]);
  581. else if (ptr == text_end)
  582. return !IsWordChar (text[ptr - 1]);
  583. else
  584. return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);
  585. default:
  586. return false;
  587. }
  588. }
  589. private bool IsWordChar (char c) {
  590. return CategoryUtils.IsCategory (Category.Word, c);
  591. }
  592. private string GetString (int pc) {
  593. int len = program[pc + 1];
  594. int str = pc + 2;
  595. char[] cs = new char[len];
  596. for (int i = 0; i < len; ++ i)
  597. cs[i] = (char)program[str ++];
  598. return new string (cs);
  599. }
  600. // capture management
  601. private void Open (int gid, int ptr) {
  602. int m = groups [gid];
  603. if (m < mark_start || marks [m].IsDefined) {
  604. m = CreateMark (m);
  605. groups [gid] = m;
  606. }
  607. marks [m].Start = ptr;
  608. }
  609. private void Close (int gid, int ptr) {
  610. marks [groups [gid]].End = ptr;
  611. }
  612. private void Balance (int gid, int balance_gid, int ptr) {
  613. int b = groups [balance_gid];
  614. Debug.Assert (marks [b].IsDefined, "Regex", "Balancing group not closed");
  615. if (gid > 0) {
  616. Open (gid, marks [b].Index + marks [b].Length);
  617. Close (gid, ptr);
  618. }
  619. groups [balance_gid] = marks [b].Previous;
  620. }
  621. private int Checkpoint () {
  622. mark_start = mark_end;
  623. return mark_start;
  624. }
  625. private void Backtrack (int cp) {
  626. Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
  627. for (int i = 0; i < groups.Length; ++ i) {
  628. int m = groups [i];
  629. while (cp <= m)
  630. m = marks [m].Previous;
  631. groups [i] = m;
  632. }
  633. }
  634. private void ResetGroups () {
  635. int n = groups.Length;
  636. if (marks == null)
  637. marks = new Mark [n * 10];
  638. for (int i = 0; i < n; ++ i) {
  639. groups [i] = i;
  640. marks [i].Start = -1;
  641. marks [i].End = -1;
  642. marks [i].Previous = -1;
  643. }
  644. mark_start = 0;
  645. mark_end = n;
  646. }
  647. private int GetLastDefined (int gid) {
  648. int m = groups [gid];
  649. while (m >= 0 && !marks [m].IsDefined)
  650. m = marks [m].Previous;
  651. return m;
  652. }
  653. private int CreateMark (int previous) {
  654. if (mark_end == marks.Length) {
  655. Mark [] dest = new Mark [marks.Length * 2];
  656. marks.CopyTo (dest, 0);
  657. marks = dest;
  658. }
  659. int m = mark_end ++;
  660. marks [m].Start = marks [m].End = -1;
  661. marks [m].Previous = previous;
  662. return m;
  663. }
  664. private Match GenerateMatch (Regex regex) {
  665. int[][] grps = new int[groups.Length][];
  666. ArrayList caps = new ArrayList ();
  667. for (int gid = 0; gid < groups.Length; ++ gid) {
  668. caps.Clear ();
  669. for (int m = groups[gid]; m >= 0; m = marks[m].Previous) {
  670. if (!marks[m].IsDefined)
  671. continue;
  672. caps.Add (marks[m].Index);
  673. caps.Add (marks[m].Length);
  674. }
  675. grps[gid] = (int[])caps.ToArray (typeof (int));
  676. }
  677. return new Match (regex, this, text, text_end, grps);
  678. }
  679. // interpreter attributes
  680. private ushort[] program; // regex program
  681. private int program_start; // first instruction after info block
  682. private string text; // input text
  683. private int text_end; // end of input text (last character + 1)
  684. private int group_count; // number of capturing groups
  685. private int match_min, match_max; // match width information
  686. private QuickSearch qs; // fast substring matcher
  687. // match state
  688. private int scan_ptr; // start of scan
  689. private RepeatContext repeat; // current repeat context
  690. private RepeatContext fast; // fast repeat context
  691. private Mark[] marks = null; // mark stack
  692. private int mark_start; // start of current checkpoint
  693. private int mark_end; // end of checkpoint/next free mark
  694. private int[] groups; // current group definitions
  695. // private classes
  696. private struct Mark {
  697. public int Start, End;
  698. public int Previous;
  699. public bool IsDefined {
  700. get { return Start >= 0 && End >= 0; }
  701. }
  702. public int Index {
  703. get { return Start < End ? Start : End; }
  704. }
  705. public int Length {
  706. get { return Start < End ? End - Start : Start - End; }
  707. }
  708. }
  709. private class RepeatContext {
  710. public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
  711. this.previous = previous;
  712. this.min = min;
  713. this.max = max;
  714. this.lazy = lazy;
  715. this.expr_pc = expr_pc;
  716. this.start = -1;
  717. this.count = 0;
  718. }
  719. public int Count {
  720. get { return count; }
  721. set { count = value; }
  722. }
  723. public int Start {
  724. get { return start; }
  725. set { start = value; }
  726. }
  727. public bool IsMinimum {
  728. get { return min <= count; }
  729. }
  730. public bool IsMaximum {
  731. get { return max <= count; }
  732. }
  733. public bool IsLazy {
  734. get { return lazy; }
  735. }
  736. public int Expression {
  737. get { return expr_pc; }
  738. }
  739. public RepeatContext Previous {
  740. get { return previous; }
  741. }
  742. private int start;
  743. private int min, max;
  744. private bool lazy;
  745. private int expr_pc;
  746. private RepeatContext previous;
  747. private int count;
  748. }
  749. private enum Mode {
  750. Search,
  751. Match,
  752. Count
  753. }
  754. }
  755. }