interpreter.cs 23 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: interpreter.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. //
  9. // Permission is hereby granted, free of charge, to any person obtaining
  10. // a copy of this software and associated documentation files (the
  11. // "Software"), to deal in the Software without restriction, including
  12. // without limitation the rights to use, copy, modify, merge, publish,
  13. // distribute, sublicense, and/or sell copies of the Software, and to
  14. // permit persons to whom the Software is furnished to do so, subject to
  15. // the following conditions:
  16. //
  17. // The above copyright notice and this permission notice shall be
  18. // included in all copies or substantial portions of the Software.
  19. //
  20. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  24. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. //
  28. using System;
  29. using System.Collections;
  30. using System.Diagnostics;
  31. using System.Globalization;
  32. namespace System.Text.RegularExpressions {
  33. class Interpreter : IMachine {
  34. public Interpreter (ushort[] program) {
  35. this.program = program;
  36. this.qs = null;
  37. // process info block
  38. Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
  39. this.group_count = program[1] + 1;
  40. this.match_min = program[2];
  41. this.match_max = program[3];
  42. // setup
  43. this.program_start = 4;
  44. this.groups = new int [group_count];
  45. }
  46. // IMachine implementation
  47. public Match Scan (Regex regex, string text, int start, int end) {
  48. this.text = text;
  49. this.text_end = end;
  50. this.scan_ptr = start;
  51. if (Eval (Mode.Match, ref scan_ptr, program_start))
  52. return GenerateMatch (regex);
  53. return Match.Empty;
  54. }
  55. // private methods
  56. private void Reset () {
  57. ResetGroups ();
  58. fast = repeat = null;
  59. }
  60. private bool Eval (Mode mode, ref int ref_ptr, int pc) {
  61. int ptr = ref_ptr;
  62. Begin:
  63. for (;;) {
  64. ushort word = program[pc];
  65. OpCode op = (OpCode)(word & 0x00ff);
  66. OpFlags flags = (OpFlags)(word & 0xff00);
  67. switch (op) {
  68. case OpCode.Anchor: {
  69. int skip = program[pc + 1];
  70. int anch_offset = program[pc + 2];
  71. bool anch_reverse = (flags & OpFlags.RightToLeft) != 0;
  72. int anch_ptr = anch_reverse ? ptr - anch_offset : ptr + anch_offset;
  73. int anch_end = text_end - match_min + anch_offset; // maximum anchor position
  74. int anch_begin = 0;
  75. // the general case for an anchoring expression is at the bottom, however we
  76. // do some checks for the common cases before to save processing time. the current
  77. // optimizer only outputs three types of anchoring expressions: fixed position,
  78. // fixed substring, and no anchor.
  79. OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);
  80. if (anch_op == OpCode.Position && skip == 6) { // position anchor
  81. // Anchor
  82. // Position
  83. // True
  84. switch ((Position)program[pc + 4]) {
  85. case Position.StartOfString:
  86. if (anch_reverse || anch_offset == 0) {
  87. ptr = anch_offset;
  88. if (TryMatch (ref ptr, pc + skip))
  89. goto Pass;
  90. }
  91. break;
  92. case Position.StartOfLine:
  93. if (anch_ptr == 0) {
  94. ptr = 0;
  95. if (TryMatch (ref ptr, pc + skip))
  96. goto Pass;
  97. ++ anch_ptr;
  98. }
  99. while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) {
  100. if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') {
  101. if (anch_reverse)
  102. ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset;
  103. else
  104. ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset;
  105. if (TryMatch (ref ptr, pc + skip))
  106. goto Pass;
  107. }
  108. if (anch_reverse)
  109. -- anch_ptr;
  110. else
  111. ++ anch_ptr;
  112. }
  113. break;
  114. case Position.StartOfScan:
  115. if (anch_ptr == scan_ptr) {
  116. ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset;
  117. if (TryMatch (ref ptr, pc + skip))
  118. goto Pass;
  119. }
  120. break;
  121. default:
  122. // FIXME
  123. break;
  124. }
  125. }
  126. else if (qs != null ||
  127. (anch_op == OpCode.String && skip == 6 + program[pc + 4])) { // substring anchor
  128. // Anchor
  129. // String
  130. // True
  131. bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;
  132. string substring = GetString (pc + 3);
  133. if (qs == null) {
  134. bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
  135. qs = new QuickSearch (substring, ignore, reverse);
  136. }
  137. while ((anch_reverse && anch_ptr >= anch_begin)
  138. || (!anch_reverse && anch_ptr <= anch_end)) {
  139. if (reverse)
  140. {
  141. anch_ptr = qs.Search (text, anch_ptr, anch_begin);
  142. if (anch_ptr != -1)
  143. anch_ptr += substring.Length ;
  144. }
  145. else
  146. anch_ptr = qs.Search (text, anch_ptr, anch_end);
  147. if (anch_ptr < 0)
  148. break;
  149. ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
  150. if (TryMatch (ref ptr, pc + skip))
  151. goto Pass;
  152. if (reverse)
  153. anch_ptr -= 2;
  154. else
  155. ++ anch_ptr;
  156. }
  157. }
  158. else if (anch_op == OpCode.True) { // no anchor
  159. // Anchor
  160. // True
  161. while ((anch_reverse && anch_ptr >= anch_begin)
  162. || (!anch_reverse && anch_ptr <= anch_end)) {
  163. ptr = anch_ptr;
  164. if (TryMatch (ref ptr, pc + skip))
  165. goto Pass;
  166. if (anch_reverse)
  167. -- anch_ptr;
  168. else
  169. ++ anch_ptr;
  170. }
  171. }
  172. else { // general case
  173. // Anchor
  174. // <expr>
  175. // True
  176. while ((anch_reverse && anch_ptr >= anch_begin)
  177. || (!anch_reverse && anch_ptr <= anch_end)) {
  178. ptr = anch_ptr;
  179. if (Eval (Mode.Match, ref ptr, pc + 3)) {
  180. // anchor expression passed: try real expression at the correct offset
  181. ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
  182. if (TryMatch (ref ptr, pc + skip))
  183. goto Pass;
  184. }
  185. if (anch_reverse)
  186. -- anch_ptr;
  187. else
  188. ++ anch_ptr;
  189. }
  190. }
  191. goto Fail;
  192. }
  193. case OpCode.False: {
  194. goto Fail;
  195. }
  196. case OpCode.True: {
  197. goto Pass;
  198. }
  199. case OpCode.Position: {
  200. if (!IsPosition ((Position)program[pc + 1], ptr))
  201. goto Fail;
  202. pc += 2;
  203. break;
  204. }
  205. case OpCode.String: {
  206. bool reverse = (flags & OpFlags.RightToLeft) != 0;
  207. bool ignore = (flags & OpFlags.IgnoreCase) != 0;
  208. int len = program[pc + 1];
  209. if (reverse) {
  210. ptr -= len;
  211. if (ptr < 0)
  212. goto Fail;
  213. }
  214. else
  215. if (ptr + len > text_end)
  216. goto Fail;
  217. pc += 2;
  218. for (int i = 0; i < len; ++ i) {
  219. char c = text[ptr + i];
  220. if (ignore)
  221. c = Char.ToLower (c);
  222. if (c != (char)program[pc ++])
  223. goto Fail;
  224. }
  225. if (!reverse)
  226. ptr += len;
  227. break;
  228. }
  229. case OpCode.Reference: {
  230. bool reverse = (flags & OpFlags.RightToLeft) != 0;
  231. bool ignore = (flags & OpFlags.IgnoreCase) != 0;
  232. int m = GetLastDefined (program [pc + 1]);
  233. if (m < 0)
  234. goto Fail;
  235. int str = marks [m].Index;
  236. int len = marks [m].Length;
  237. if (reverse) {
  238. ptr -= len;
  239. if (ptr < 0)
  240. goto Fail;
  241. }
  242. else if (ptr + len > text_end)
  243. goto Fail;
  244. pc += 2;
  245. for (int i = 0; i < len; ++ i) {
  246. if (ignore) {
  247. if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
  248. goto Fail;
  249. }
  250. else {
  251. if (text[ptr + i] != text[str + i])
  252. goto Fail;
  253. }
  254. }
  255. if (!reverse)
  256. ptr += len;
  257. break;
  258. }
  259. case OpCode.Character: case OpCode.Category:
  260. case OpCode.Range: case OpCode.Set: {
  261. if (!EvalChar (mode, ref ptr, ref pc, false))
  262. goto Fail;
  263. break;
  264. }
  265. case OpCode.In: {
  266. int target = pc + program[pc + 1];
  267. pc += 2;
  268. if (!EvalChar (mode, ref ptr, ref pc, true))
  269. goto Fail;
  270. pc = target;
  271. break;
  272. }
  273. case OpCode.Open: {
  274. Open (program[pc + 1], ptr);
  275. pc += 2;
  276. break;
  277. }
  278. case OpCode.Close: {
  279. Close (program[pc + 1], ptr);
  280. pc += 2;
  281. break;
  282. }
  283. case OpCode.BalanceStart: {
  284. int start = ptr; //point before the balancing group
  285. if (!Eval (Mode.Match, ref ptr, pc + 5))
  286. goto Fail;
  287. if(!Balance (program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false) , start)) {
  288. goto Fail;
  289. }
  290. pc += program[pc + 4];
  291. break;
  292. }
  293. case OpCode.Balance: {
  294. goto Pass;
  295. }
  296. case OpCode.IfDefined: {
  297. int m = GetLastDefined (program [pc + 2]);
  298. if (m < 0)
  299. pc += program[pc + 1];
  300. else
  301. pc += 3;
  302. break;
  303. }
  304. case OpCode.Sub: {
  305. if (!Eval (Mode.Match, ref ptr, pc + 2))
  306. goto Fail;
  307. pc += program[pc + 1];
  308. break;
  309. }
  310. case OpCode.Test: {
  311. int cp = Checkpoint ();
  312. int test_ptr = ptr;
  313. if (Eval (Mode.Match, ref test_ptr, pc + 3))
  314. pc += program[pc + 1];
  315. else {
  316. Backtrack (cp);
  317. pc += program[pc + 2];
  318. }
  319. break;
  320. }
  321. case OpCode.Branch: {
  322. OpCode branch_op;
  323. do {
  324. int cp = Checkpoint ();
  325. if (Eval (Mode.Match, ref ptr, pc + 2))
  326. goto Pass;
  327. Backtrack (cp);
  328. pc += program[pc + 1];
  329. branch_op = (OpCode)(program[pc] & 0xff);
  330. } while (branch_op != OpCode.False);
  331. goto Fail;
  332. }
  333. case OpCode.Jump: {
  334. pc += program[pc + 1];
  335. break;
  336. }
  337. case OpCode.Repeat: {
  338. this.repeat = new RepeatContext (
  339. this.repeat, // previous context
  340. program[pc + 2], // minimum
  341. program[pc + 3], // maximum
  342. (flags & OpFlags.Lazy) != 0, // lazy
  343. pc + 4 // subexpression
  344. );
  345. if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
  346. goto Pass;
  347. else {
  348. this.repeat = this.repeat.Previous;
  349. goto Fail;
  350. }
  351. }
  352. case OpCode.Until: {
  353. RepeatContext current = this.repeat;
  354. int start = current.Start;
  355. if (!current.IsMinimum) {
  356. ++ current.Count;
  357. current.Start = ptr;
  358. if (Eval (Mode.Match, ref ptr, repeat.Expression))
  359. goto Pass;
  360. current.Start = start;
  361. -- current.Count;
  362. goto Fail;
  363. }
  364. if (ptr == current.Start) {
  365. // degenerate match ... match tail or fail
  366. this.repeat = current.Previous;
  367. if (Eval (Mode.Match, ref ptr, pc + 1))
  368. goto Pass;
  369. this.repeat = current;
  370. goto Fail;
  371. }
  372. if (current.IsLazy) {
  373. // match tail first ...
  374. this.repeat = current.Previous;
  375. int cp = Checkpoint ();
  376. if (Eval (Mode.Match, ref ptr, pc + 1))
  377. goto Pass;
  378. Backtrack (cp);
  379. // ... then match more
  380. this.repeat = current;
  381. if (!current.IsMaximum) {
  382. ++ current.Count;
  383. current.Start = ptr;
  384. if (Eval (Mode.Match, ref ptr, current.Expression))
  385. goto Pass;
  386. current.Start = start;
  387. -- current.Count;
  388. goto Fail;
  389. }
  390. return false;
  391. }
  392. else {
  393. // match more first ...
  394. if (!current.IsMaximum) {
  395. int cp = Checkpoint ();
  396. ++ current.Count;
  397. current.Start = ptr;
  398. if (Eval (Mode.Match, ref ptr, current.Expression))
  399. goto Pass;
  400. current.Start = start;
  401. -- current.Count;
  402. Backtrack (cp);
  403. }
  404. // ... then match tail
  405. this.repeat = current.Previous;
  406. if (Eval (Mode.Match, ref ptr, pc + 1))
  407. goto Pass;
  408. this.repeat = current;
  409. goto Fail;
  410. }
  411. }
  412. case OpCode.FastRepeat: {
  413. this.fast = new RepeatContext (
  414. fast,
  415. program[pc + 2], // minimum
  416. program[pc + 3], // maximum
  417. (flags & OpFlags.Lazy) != 0, // lazy
  418. pc + 4 // subexpression
  419. );
  420. fast.Start = ptr;
  421. int cp = Checkpoint ();
  422. pc += program[pc + 1]; // tail expression
  423. ushort tail_word = program[pc];
  424. int c1, c2; // first character of tail operator
  425. int coff; // 0 or -1 depending on direction
  426. OpCode tail_op = (OpCode)(tail_word & 0xff);
  427. if (tail_op == OpCode.Character || tail_op == OpCode.String) {
  428. OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
  429. if (tail_op == OpCode.String)
  430. {
  431. int offset = 0;
  432. if ((tail_flags & OpFlags.RightToLeft) != 0)
  433. {
  434. offset = program[pc + 1] - 1 ;
  435. }
  436. c1 = program[pc + 2 + offset]; // first char of string
  437. }
  438. else
  439. c1 = program[pc + 1]; // character
  440. if ((tail_flags & OpFlags.IgnoreCase) != 0)
  441. c2 = Char.ToUpper ((char)c1); // ignore case
  442. else
  443. c2 = c1;
  444. if ((tail_flags & OpFlags.RightToLeft) != 0)
  445. coff = -1; // reverse
  446. else
  447. coff = 0;
  448. }
  449. else {
  450. c1 = c2 = -1;
  451. coff = 0;
  452. }
  453. if (fast.IsLazy) {
  454. if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
  455. //Console.WriteLine ("lazy fast: failed mininum.");
  456. fast = fast.Previous;
  457. goto Fail;
  458. }
  459. while (true) {
  460. int p = ptr + coff;
  461. if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
  462. Eval (Mode.Match, ref ptr, pc))
  463. break;
  464. if (fast.IsMaximum) {
  465. //Console.WriteLine ("lazy fast: failed with maximum.");
  466. fast = fast.Previous;
  467. goto Fail;
  468. }
  469. Backtrack (cp);
  470. if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
  471. //Console.WriteLine ("lazy fast: no more.");
  472. fast = fast.Previous;
  473. goto Fail;
  474. }
  475. }
  476. fast = fast.Previous;
  477. goto Pass;
  478. }
  479. else {
  480. if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
  481. fast = fast.Previous;
  482. goto Fail;
  483. }
  484. int width;
  485. if (fast.Count > 0)
  486. width = (ptr - fast.Start) / fast.Count;
  487. else
  488. width = 0;
  489. while (true) {
  490. int p = ptr + coff;
  491. if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
  492. Eval (Mode.Match, ref ptr, pc))
  493. break;
  494. -- fast.Count;
  495. if (!fast.IsMinimum) {
  496. fast = fast.Previous;
  497. goto Fail;
  498. }
  499. ptr -= width;
  500. Backtrack (cp);
  501. }
  502. fast = fast.Previous;
  503. goto Pass;
  504. }
  505. }
  506. case OpCode.Info: {
  507. Debug.Assert (false, "Regex", "Info block found in pattern");
  508. goto Fail;
  509. }
  510. }
  511. }
  512. Pass:
  513. ref_ptr = ptr;
  514. switch (mode) {
  515. case Mode.Match:
  516. return true;
  517. case Mode.Count: {
  518. ++ fast.Count;
  519. if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))
  520. return true;
  521. pc = fast.Expression;
  522. goto Begin;
  523. }
  524. }
  525. Fail:
  526. switch (mode) {
  527. case Mode.Match:
  528. return false;
  529. case Mode.Count: {
  530. if (!fast.IsLazy && fast.IsMinimum)
  531. return true;
  532. ref_ptr = fast.Start;
  533. return false;
  534. }
  535. }
  536. return false;
  537. }
  538. private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {
  539. bool consumed = false;
  540. char c = '\0';
  541. bool negate;
  542. bool ignore;
  543. do {
  544. ushort word = program[pc];
  545. OpCode op = (OpCode)(word & 0x00ff);
  546. OpFlags flags = (OpFlags)(word & 0xff00);
  547. ++ pc;
  548. ignore = (flags & OpFlags.IgnoreCase) != 0;
  549. // consume character: the direction of an In construct is
  550. // determined by the direction of its first op
  551. if (!consumed) {
  552. if ((flags & OpFlags.RightToLeft) != 0) {
  553. if (ptr <= 0)
  554. return false;
  555. c = text[-- ptr];
  556. }
  557. else {
  558. if (ptr >= text_end)
  559. return false;
  560. c = text[ptr ++];
  561. }
  562. if (ignore)
  563. c = Char.ToLower (c);
  564. consumed = true;
  565. }
  566. // negate flag
  567. negate = (flags & OpFlags.Negate) != 0;
  568. // execute op
  569. switch (op) {
  570. case OpCode.True:
  571. return true;
  572. case OpCode.False:
  573. return false;
  574. case OpCode.Character: {
  575. if (c == (char)program[pc ++])
  576. return !negate;
  577. break;
  578. }
  579. case OpCode.Category: {
  580. if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
  581. return !negate;
  582. break;
  583. }
  584. case OpCode.Range: {
  585. int lo = (char)program[pc ++];
  586. int hi = (char)program[pc ++];
  587. if (lo <= c && c <= hi)
  588. return !negate;
  589. break;
  590. }
  591. case OpCode.Set: {
  592. int lo = (char)program[pc ++];
  593. int len = (char)program[pc ++];
  594. int bits = pc;
  595. pc += len;
  596. int i = (int)c - lo;
  597. if (i < 0 || i >= len << 4)
  598. break;
  599. if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)
  600. return !negate;
  601. break;
  602. }
  603. }
  604. } while (multi);
  605. return negate;
  606. }
  607. private bool TryMatch (ref int ref_ptr, int pc) {
  608. Reset ();
  609. int ptr = ref_ptr;
  610. marks [groups [0]].Start = ptr;
  611. if (Eval (Mode.Match, ref ptr, pc)) {
  612. marks [groups [0]].End = ptr;
  613. ref_ptr = ptr;
  614. return true;
  615. }
  616. return false;
  617. }
  618. private bool IsPosition (Position pos, int ptr) {
  619. switch (pos) {
  620. case Position.Start: case Position.StartOfString:
  621. return ptr == 0;
  622. case Position.StartOfLine:
  623. return ptr == 0 || text[ptr - 1] == '\n';
  624. case Position.StartOfScan:
  625. return ptr == scan_ptr;
  626. case Position.End:
  627. return ptr == text_end ||
  628. (ptr == text_end - 1 && text[ptr] == '\n');
  629. case Position.EndOfLine:
  630. return ptr == text_end || text[ptr] == '\n';
  631. case Position.EndOfString:
  632. return ptr == text_end;
  633. case Position.Boundary:
  634. if (text_end == 0)
  635. return false;
  636. if (ptr == 0)
  637. return IsWordChar (text[ptr]);
  638. else if (ptr == text_end)
  639. return IsWordChar (text[ptr - 1]);
  640. else
  641. return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);
  642. case Position.NonBoundary:
  643. if (text_end == 0)
  644. return false;
  645. if (ptr == 0)
  646. return !IsWordChar (text[ptr]);
  647. else if (ptr == text_end)
  648. return !IsWordChar (text[ptr - 1]);
  649. else
  650. return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);
  651. default:
  652. return false;
  653. }
  654. }
  655. private bool IsWordChar (char c) {
  656. return CategoryUtils.IsCategory (Category.Word, c);
  657. }
  658. private string GetString (int pc) {
  659. int len = program[pc + 1];
  660. int str = pc + 2;
  661. char[] cs = new char[len];
  662. for (int i = 0; i < len; ++ i)
  663. cs[i] = (char)program[str ++];
  664. return new string (cs);
  665. }
  666. // capture management
  667. private void Open (int gid, int ptr) {
  668. int m = groups [gid];
  669. if (m < mark_start || marks [m].IsDefined) {
  670. m = CreateMark (m);
  671. groups [gid] = m;
  672. }
  673. marks [m].Start = ptr;
  674. }
  675. private void Close (int gid, int ptr) {
  676. marks [groups [gid]].End = ptr;
  677. }
  678. private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
  679. int b = groups [balance_gid];
  680. if(b == -1 || marks[b].Index < 0) {
  681. //Group not previously matched
  682. return false;
  683. }
  684. Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
  685. if (gid > 0 && capture){
  686. Open (gid, marks [b].Index + marks [b].Length);
  687. Close (gid, ptr);
  688. }
  689. groups [balance_gid] = marks[b].Previous;
  690. return true;
  691. }
  692. private int Checkpoint () {
  693. mark_start = mark_end;
  694. return mark_start;
  695. }
  696. private void Backtrack (int cp) {
  697. Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
  698. for (int i = 0; i < groups.Length; ++ i) {
  699. int m = groups [i];
  700. while (cp <= m)
  701. m = marks [m].Previous;
  702. groups [i] = m;
  703. }
  704. }
  705. private void ResetGroups () {
  706. int n = groups.Length;
  707. if (marks == null)
  708. marks = new Mark [n * 10];
  709. for (int i = 0; i < n; ++ i) {
  710. groups [i] = i;
  711. marks [i].Start = -1;
  712. marks [i].End = -1;
  713. marks [i].Previous = -1;
  714. }
  715. mark_start = 0;
  716. mark_end = n;
  717. }
  718. private int GetLastDefined (int gid) {
  719. int m = groups [gid];
  720. while (m >= 0 && !marks [m].IsDefined)
  721. m = marks [m].Previous;
  722. return m;
  723. }
  724. private int CreateMark (int previous) {
  725. if (mark_end == marks.Length) {
  726. Mark [] dest = new Mark [marks.Length * 2];
  727. marks.CopyTo (dest, 0);
  728. marks = dest;
  729. }
  730. int m = mark_end ++;
  731. marks [m].Start = marks [m].End = -1;
  732. marks [m].Previous = previous;
  733. return m;
  734. }
  735. private Match GenerateMatch (Regex regex) {
  736. int[][] grps = new int[groups.Length][];
  737. ArrayList caps = new ArrayList ();
  738. for (int gid = 0; gid < groups.Length; ++ gid) {
  739. caps.Clear ();
  740. for (int m = groups[gid]; m >= 0; m = marks[m].Previous) {
  741. if (!marks[m].IsDefined)
  742. continue;
  743. caps.Add (marks[m].Index);
  744. caps.Add (marks[m].Length);
  745. }
  746. grps[gid] = (int[])caps.ToArray (typeof (int));
  747. }
  748. return new Match (regex, this, text, text_end, grps);
  749. }
  750. // interpreter attributes
  751. private ushort[] program; // regex program
  752. private int program_start; // first instruction after info block
  753. private string text; // input text
  754. private int text_end; // end of input text (last character + 1)
  755. private int group_count; // number of capturing groups
  756. private int match_min, match_max; // match width information
  757. private QuickSearch qs; // fast substring matcher
  758. // match state
  759. private int scan_ptr; // start of scan
  760. private RepeatContext repeat; // current repeat context
  761. private RepeatContext fast; // fast repeat context
  762. private Mark[] marks = null; // mark stack
  763. private int mark_start; // start of current checkpoint
  764. private int mark_end; // end of checkpoint/next free mark
  765. private int[] groups; // current group definitions
  766. // private classes
  767. /*
  768. private struct Mark {
  769. public int Start, End;
  770. public int Previous;
  771. public bool IsDefined {
  772. get { return Start >= 0 && End >= 0; }
  773. }
  774. public int Index {
  775. get { return Start < End ? Start : End; }
  776. }
  777. public int Length {
  778. get { return Start < End ? End - Start : Start - End; }
  779. }
  780. }
  781. */
  782. private class RepeatContext {
  783. public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
  784. this.previous = previous;
  785. this.min = min;
  786. this.max = max;
  787. this.lazy = lazy;
  788. this.expr_pc = expr_pc;
  789. this.start = -1;
  790. this.count = 0;
  791. }
  792. public int Count {
  793. get { return count; }
  794. set { count = value; }
  795. }
  796. public int Start {
  797. get { return start; }
  798. set { start = value; }
  799. }
  800. public bool IsMinimum {
  801. get { return min <= count; }
  802. }
  803. public bool IsMaximum {
  804. get { return max <= count; }
  805. }
  806. public bool IsLazy {
  807. get { return lazy; }
  808. }
  809. public int Expression {
  810. get { return expr_pc; }
  811. }
  812. public RepeatContext Previous {
  813. get { return previous; }
  814. }
  815. private int start;
  816. private int min, max;
  817. private bool lazy;
  818. private int expr_pc;
  819. private RepeatContext previous;
  820. private int count;
  821. }
  822. private enum Mode {
  823. Search,
  824. Match,
  825. Count
  826. }
  827. }
  828. }