| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048 |
- //
- // assembly: System
- // namespace: System.Text.RegularExpressions
- // file: interpreter.cs
- //
- // author: Dan Lewis ([email protected])
- // (c) 2002
- //
- // Permission is hereby granted, free of charge, to any person obtaining
- // a copy of this software and associated documentation files (the
- // "Software"), to deal in the Software without restriction, including
- // without limitation the rights to use, copy, modify, merge, publish,
- // distribute, sublicense, and/or sell copies of the Software, and to
- // permit persons to whom the Software is furnished to do so, subject to
- // the following conditions:
- //
- // The above copyright notice and this permission notice shall be
- // included in all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- //
- using System;
- using System.Collections;
- using System.Diagnostics;
- using System.Globalization;
- namespace System.Text.RegularExpressions {
- class Interpreter : IMachine {
- public Interpreter (ushort[] program) {
- this.program = program;
- this.qs = null;
- // process info block
- Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
- this.group_count = program[1] + 1;
- this.match_min = program[2];
- this.match_max = program[3];
- // setup
- this.program_start = 4;
- this.groups = new int [group_count];
- }
- // IMachine implementation
- public Match Scan (Regex regex, string text, int start, int end) {
- this.text = text;
- this.text_end = end;
- this.scan_ptr = start;
- if (Eval (Mode.Match, ref scan_ptr, program_start))
- return GenerateMatch (regex);
- return Match.Empty;
- }
- // private methods
- private void Reset () {
- ResetGroups ();
- fast = repeat = null;
- }
- private bool Eval (Mode mode, ref int ref_ptr, int pc) {
- int ptr = ref_ptr;
- Begin:
- for (;;) {
- ushort word = program[pc];
- OpCode op = (OpCode)(word & 0x00ff);
- OpFlags flags = (OpFlags)(word & 0xff00);
- switch (op) {
- case OpCode.Anchor: {
- int skip = program[pc + 1];
-
- int anch_offset = program[pc + 2];
- bool anch_reverse = (flags & OpFlags.RightToLeft) != 0;
- int anch_ptr = anch_reverse ? ptr - anch_offset : ptr + anch_offset;
- int anch_end = text_end - match_min + anch_offset; // maximum anchor position
-
-
- int anch_begin = 0;
- // the general case for an anchoring expression is at the bottom, however we
- // do some checks for the common cases before to save processing time. the current
- // optimizer only outputs three types of anchoring expressions: fixed position,
- // fixed substring, and no anchor.
- OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);
- if (anch_op == OpCode.Position && skip == 6) { // position anchor
- // Anchor
- // Position
- // True
- switch ((Position)program[pc + 4]) {
- case Position.StartOfString:
- if (anch_reverse || anch_offset == 0) {
- ptr = anch_offset;
- if (TryMatch (ref ptr, pc + skip))
- goto Pass;
- }
- break;
-
- case Position.StartOfLine:
-
- if (anch_ptr == 0) {
- ptr = 0;
- if (TryMatch (ref ptr, pc + skip))
- goto Pass;
-
- ++ anch_ptr;
- }
- while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) {
- if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') {
- if (anch_reverse)
- ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset;
- else
- ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset;
- if (TryMatch (ref ptr, pc + skip))
- goto Pass;
- }
-
- if (anch_reverse)
- -- anch_ptr;
- else
- ++ anch_ptr;
- }
- break;
-
- case Position.StartOfScan:
- if (anch_ptr == scan_ptr) {
- ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset;
- if (TryMatch (ref ptr, pc + skip))
- goto Pass;
- }
- break;
- default:
- // FIXME
- break;
- }
- }
- else if (qs != null ||
- (anch_op == OpCode.String && skip == 6 + program[pc + 4])) { // substring anchor
- // Anchor
- // String
- // True
-
- bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;
- string substring = GetString (pc + 3);
- if (qs == null) {
- bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
- qs = new QuickSearch (substring, ignore, reverse);
- }
- while ((anch_reverse && anch_ptr >= anch_begin)
- || (!anch_reverse && anch_ptr <= anch_end)) {
- if (reverse)
- {
- anch_ptr = qs.Search (text, anch_ptr, anch_begin);
- if (anch_ptr != -1)
- anch_ptr += substring.Length ;
-
- }
- else
- anch_ptr = qs.Search (text, anch_ptr, anch_end);
- if (anch_ptr < 0)
- break;
- ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
- if (TryMatch (ref ptr, pc + skip))
- goto Pass;
- if (reverse)
- anch_ptr -= 2;
- else
- ++ anch_ptr;
- }
- }
- else if (anch_op == OpCode.True) { // no anchor
- // Anchor
- // True
-
- while ((anch_reverse && anch_ptr >= anch_begin)
- || (!anch_reverse && anch_ptr <= anch_end)) {
- ptr = anch_ptr;
- if (TryMatch (ref ptr, pc + skip))
- goto Pass;
- if (anch_reverse)
- -- anch_ptr;
- else
- ++ anch_ptr;
- }
- }
- else { // general case
- // Anchor
- // <expr>
- // True
- while ((anch_reverse && anch_ptr >= anch_begin)
- || (!anch_reverse && anch_ptr <= anch_end)) {
- ptr = anch_ptr;
- if (Eval (Mode.Match, ref ptr, pc + 3)) {
- // anchor expression passed: try real expression at the correct offset
- ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
- if (TryMatch (ref ptr, pc + skip))
- goto Pass;
- }
- if (anch_reverse)
- -- anch_ptr;
- else
- ++ anch_ptr;
- }
- }
- goto Fail;
- }
-
- case OpCode.False: {
- goto Fail;
- }
- case OpCode.True: {
- goto Pass;
- }
- case OpCode.Position: {
- if (!IsPosition ((Position)program[pc + 1], ptr))
- goto Fail;
- pc += 2;
- break;
- }
- case OpCode.String: {
- bool reverse = (flags & OpFlags.RightToLeft) != 0;
- bool ignore = (flags & OpFlags.IgnoreCase) != 0;
- int len = program[pc + 1];
- if (reverse) {
- ptr -= len;
- if (ptr < 0)
- goto Fail;
- }
- else
- if (ptr + len > text_end)
- goto Fail;
- pc += 2;
- for (int i = 0; i < len; ++ i) {
- char c = text[ptr + i];
- if (ignore)
- c = Char.ToLower (c);
- if (c != (char)program[pc ++])
- goto Fail;
- }
- if (!reverse)
- ptr += len;
- break;
- }
- case OpCode.Reference: {
- bool reverse = (flags & OpFlags.RightToLeft) != 0;
- bool ignore = (flags & OpFlags.IgnoreCase) != 0;
- int m = GetLastDefined (program [pc + 1]);
- if (m < 0)
- goto Fail;
- int str = marks [m].Index;
- int len = marks [m].Length;
- if (reverse) {
- ptr -= len;
- if (ptr < 0)
- goto Fail;
- }
- else if (ptr + len > text_end)
- goto Fail;
- pc += 2;
- for (int i = 0; i < len; ++ i) {
- if (ignore) {
- if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
- goto Fail;
- }
- else {
- if (text[ptr + i] != text[str + i])
- goto Fail;
- }
- }
- if (!reverse)
- ptr += len;
- break;
- }
- case OpCode.Character: case OpCode.Category:
- case OpCode.Range: case OpCode.Set: {
- if (!EvalChar (mode, ref ptr, ref pc, false))
- goto Fail;
- break;
- }
- case OpCode.In: {
- int target = pc + program[pc + 1];
- pc += 2;
- if (!EvalChar (mode, ref ptr, ref pc, true))
- goto Fail;
- pc = target;
- break;
- }
- case OpCode.Open: {
- Open (program[pc + 1], ptr);
- pc += 2;
- break;
- }
- case OpCode.Close: {
- Close (program[pc + 1], ptr);
- pc += 2;
- break;
- }
- case OpCode.BalanceStart: {
- int start = ptr; //point before the balancing group
-
- if (!Eval (Mode.Match, ref ptr, pc + 5))
- goto Fail;
-
-
-
- if(!Balance (program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false) , start)) {
- goto Fail;
- }
-
- pc += program[pc + 4];
- break;
- }
- case OpCode.Balance: {
- goto Pass;
- }
- case OpCode.IfDefined: {
- int m = GetLastDefined (program [pc + 2]);
- if (m < 0)
- pc += program[pc + 1];
- else
- pc += 3;
- break;
- }
- case OpCode.Sub: {
- if (!Eval (Mode.Match, ref ptr, pc + 2))
- goto Fail;
- pc += program[pc + 1];
- break;
- }
- case OpCode.Test: {
- int cp = Checkpoint ();
- int test_ptr = ptr;
- if (Eval (Mode.Match, ref test_ptr, pc + 3))
- pc += program[pc + 1];
- else {
- Backtrack (cp);
- pc += program[pc + 2];
- }
- break;
- }
- case OpCode.Branch: {
- OpCode branch_op;
- do {
- int cp = Checkpoint ();
- if (Eval (Mode.Match, ref ptr, pc + 2))
- goto Pass;
-
- Backtrack (cp);
-
- pc += program[pc + 1];
- branch_op = (OpCode)(program[pc] & 0xff);
- } while (branch_op != OpCode.False);
- goto Fail;
- }
- case OpCode.Jump: {
- pc += program[pc + 1];
- break;
- }
- case OpCode.Repeat: {
- this.repeat = new RepeatContext (
- this.repeat, // previous context
- program[pc + 2], // minimum
- program[pc + 3], // maximum
- (flags & OpFlags.Lazy) != 0, // lazy
- pc + 4 // subexpression
- );
- if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
- goto Pass;
- else {
- this.repeat = this.repeat.Previous;
- goto Fail;
- }
- }
- case OpCode.Until: {
- RepeatContext current = this.repeat;
- int start = current.Start;
- if (!current.IsMinimum) {
- ++ current.Count;
- current.Start = ptr;
- if (Eval (Mode.Match, ref ptr, repeat.Expression))
- goto Pass;
- current.Start = start;
- -- current.Count;
- goto Fail;
- }
- if (ptr == current.Start) {
- // degenerate match ... match tail or fail
- this.repeat = current.Previous;
- if (Eval (Mode.Match, ref ptr, pc + 1))
- goto Pass;
-
- this.repeat = current;
- goto Fail;
- }
- if (current.IsLazy) {
- // match tail first ...
- this.repeat = current.Previous;
- int cp = Checkpoint ();
- if (Eval (Mode.Match, ref ptr, pc + 1))
- goto Pass;
- Backtrack (cp);
- // ... then match more
- this.repeat = current;
- if (!current.IsMaximum) {
- ++ current.Count;
- current.Start = ptr;
- if (Eval (Mode.Match, ref ptr, current.Expression))
- goto Pass;
- current.Start = start;
- -- current.Count;
- goto Fail;
- }
- return false;
- }
- else {
- // match more first ...
- if (!current.IsMaximum) {
- int cp = Checkpoint ();
- ++ current.Count;
- current.Start = ptr;
- if (Eval (Mode.Match, ref ptr, current.Expression))
- goto Pass;
- current.Start = start;
- -- current.Count;
- Backtrack (cp);
- }
- // ... then match tail
- this.repeat = current.Previous;
- if (Eval (Mode.Match, ref ptr, pc + 1))
- goto Pass;
- this.repeat = current;
- goto Fail;
- }
- }
- case OpCode.FastRepeat: {
- this.fast = new RepeatContext (
- fast,
- program[pc + 2], // minimum
- program[pc + 3], // maximum
- (flags & OpFlags.Lazy) != 0, // lazy
- pc + 4 // subexpression
- );
- fast.Start = ptr;
- int cp = Checkpoint ();
- pc += program[pc + 1]; // tail expression
- ushort tail_word = program[pc];
- int c1, c2; // first character of tail operator
- int coff; // 0 or -1 depending on direction
- OpCode tail_op = (OpCode)(tail_word & 0xff);
- if (tail_op == OpCode.Character || tail_op == OpCode.String) {
- OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
- if (tail_op == OpCode.String)
- {
- int offset = 0;
-
- if ((tail_flags & OpFlags.RightToLeft) != 0)
- {
- offset = program[pc + 1] - 1 ;
- }
-
- c1 = program[pc + 2 + offset]; // first char of string
- }
- else
- c1 = program[pc + 1]; // character
-
- if ((tail_flags & OpFlags.IgnoreCase) != 0)
- c2 = Char.ToUpper ((char)c1); // ignore case
- else
- c2 = c1;
- if ((tail_flags & OpFlags.RightToLeft) != 0)
- coff = -1; // reverse
- else
- coff = 0;
- }
- else {
- c1 = c2 = -1;
- coff = 0;
- }
- if (fast.IsLazy) {
- if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
- //Console.WriteLine ("lazy fast: failed mininum.");
- fast = fast.Previous;
- goto Fail;
- }
-
- while (true) {
- int p = ptr + coff;
- if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
- Eval (Mode.Match, ref ptr, pc))
- break;
- if (fast.IsMaximum) {
- //Console.WriteLine ("lazy fast: failed with maximum.");
- fast = fast.Previous;
- goto Fail;
- }
- Backtrack (cp);
- if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
- //Console.WriteLine ("lazy fast: no more.");
- fast = fast.Previous;
- goto Fail;
- }
- }
- fast = fast.Previous;
- goto Pass;
- }
- else {
- if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
- fast = fast.Previous;
- goto Fail;
- }
-
- int width;
- if (fast.Count > 0)
- width = (ptr - fast.Start) / fast.Count;
- else
- width = 0;
- while (true) {
- int p = ptr + coff;
- if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
- Eval (Mode.Match, ref ptr, pc))
- break;
- -- fast.Count;
- if (!fast.IsMinimum) {
- fast = fast.Previous;
- goto Fail;
- }
- ptr -= width;
- Backtrack (cp);
- }
- fast = fast.Previous;
- goto Pass;
- }
- }
- case OpCode.Info: {
- Debug.Assert (false, "Regex", "Info block found in pattern");
- goto Fail;
- }
- }
- }
- Pass:
- ref_ptr = ptr;
- switch (mode) {
- case Mode.Match:
- return true;
- case Mode.Count: {
- ++ fast.Count;
- if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))
- return true;
- pc = fast.Expression;
- goto Begin;
- }
- }
- Fail:
- switch (mode) {
- case Mode.Match:
- return false;
- case Mode.Count: {
- if (!fast.IsLazy && fast.IsMinimum)
- return true;
- ref_ptr = fast.Start;
- return false;
- }
- }
- return false;
- }
- private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {
- bool consumed = false;
- char c = '\0';
- bool negate;
- bool ignore;
-
- do {
- ushort word = program[pc];
- OpCode op = (OpCode)(word & 0x00ff);
- OpFlags flags = (OpFlags)(word & 0xff00);
- ++ pc;
- ignore = (flags & OpFlags.IgnoreCase) != 0;
-
- // consume character: the direction of an In construct is
- // determined by the direction of its first op
- if (!consumed) {
- if ((flags & OpFlags.RightToLeft) != 0) {
- if (ptr <= 0)
- return false;
- c = text[-- ptr];
- }
- else {
- if (ptr >= text_end)
- return false;
- c = text[ptr ++];
- }
- if (ignore)
- c = Char.ToLower (c);
- consumed = true;
- }
- // negate flag
- negate = (flags & OpFlags.Negate) != 0;
- // execute op
-
- switch (op) {
- case OpCode.True:
- return true;
- case OpCode.False:
- return false;
-
- case OpCode.Character: {
- if (c == (char)program[pc ++])
- return !negate;
- break;
- }
- case OpCode.Category: {
- if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
- return !negate;
- break;
- }
-
- case OpCode.Range: {
- int lo = (char)program[pc ++];
- int hi = (char)program[pc ++];
- if (lo <= c && c <= hi)
- return !negate;
- break;
- }
- case OpCode.Set: {
- int lo = (char)program[pc ++];
- int len = (char)program[pc ++];
- int bits = pc;
- pc += len;
- int i = (int)c - lo;
- if (i < 0 || i >= len << 4)
- break;
-
- if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)
- return !negate;
- break;
- }
- }
- } while (multi);
- return negate;
- }
- private bool TryMatch (ref int ref_ptr, int pc) {
- Reset ();
-
- int ptr = ref_ptr;
- marks [groups [0]].Start = ptr;
- if (Eval (Mode.Match, ref ptr, pc)) {
- marks [groups [0]].End = ptr;
- ref_ptr = ptr;
- return true;
- }
- return false;
- }
-
- private bool IsPosition (Position pos, int ptr) {
- switch (pos) {
- case Position.Start: case Position.StartOfString:
- return ptr == 0;
- case Position.StartOfLine:
- return ptr == 0 || text[ptr - 1] == '\n';
-
- case Position.StartOfScan:
- return ptr == scan_ptr;
-
- case Position.End:
- return ptr == text_end ||
- (ptr == text_end - 1 && text[ptr] == '\n');
- case Position.EndOfLine:
- return ptr == text_end || text[ptr] == '\n';
-
- case Position.EndOfString:
- return ptr == text_end;
-
- case Position.Boundary:
- if (text_end == 0)
- return false;
- if (ptr == 0)
- return IsWordChar (text[ptr]);
- else if (ptr == text_end)
- return IsWordChar (text[ptr - 1]);
- else
- return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);
- case Position.NonBoundary:
- if (text_end == 0)
- return false;
- if (ptr == 0)
- return !IsWordChar (text[ptr]);
- else if (ptr == text_end)
- return !IsWordChar (text[ptr - 1]);
- else
- return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);
-
- default:
- return false;
- }
- }
- private bool IsWordChar (char c) {
- return CategoryUtils.IsCategory (Category.Word, c);
- }
- private string GetString (int pc) {
- int len = program[pc + 1];
- int str = pc + 2;
- char[] cs = new char[len];
- for (int i = 0; i < len; ++ i)
- cs[i] = (char)program[str ++];
- return new string (cs);
- }
- // capture management
- private void Open (int gid, int ptr) {
- int m = groups [gid];
- if (m < mark_start || marks [m].IsDefined) {
- m = CreateMark (m);
- groups [gid] = m;
- }
- marks [m].Start = ptr;
- }
- private void Close (int gid, int ptr) {
- marks [groups [gid]].End = ptr;
- }
- private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
- int b = groups [balance_gid];
- if(b == -1 || marks[b].Index < 0) {
- //Group not previously matched
- return false;
- }
- Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
- if (gid > 0 && capture){
- Open (gid, marks [b].Index + marks [b].Length);
- Close (gid, ptr);
- }
- groups [balance_gid] = marks[b].Previous;
- return true;
- }
- private int Checkpoint () {
- mark_start = mark_end;
- return mark_start;
- }
- private void Backtrack (int cp) {
- Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
- for (int i = 0; i < groups.Length; ++ i) {
- int m = groups [i];
- while (cp <= m)
- m = marks [m].Previous;
- groups [i] = m;
- }
- }
- private void ResetGroups () {
- int n = groups.Length;
- if (marks == null)
- marks = new Mark [n * 10];
- for (int i = 0; i < n; ++ i) {
- groups [i] = i;
- marks [i].Start = -1;
- marks [i].End = -1;
- marks [i].Previous = -1;
- }
-
- mark_start = 0;
- mark_end = n;
- }
- private int GetLastDefined (int gid) {
- int m = groups [gid];
- while (m >= 0 && !marks [m].IsDefined)
- m = marks [m].Previous;
- return m;
- }
- private int CreateMark (int previous) {
- if (mark_end == marks.Length) {
- Mark [] dest = new Mark [marks.Length * 2];
- marks.CopyTo (dest, 0);
- marks = dest;
- }
- int m = mark_end ++;
- marks [m].Start = marks [m].End = -1;
- marks [m].Previous = previous;
- return m;
- }
- private Match GenerateMatch (Regex regex) {
- int[][] grps = new int[groups.Length][];
- ArrayList caps = new ArrayList ();
- for (int gid = 0; gid < groups.Length; ++ gid) {
- caps.Clear ();
- for (int m = groups[gid]; m >= 0; m = marks[m].Previous) {
- if (!marks[m].IsDefined)
- continue;
-
- caps.Add (marks[m].Index);
- caps.Add (marks[m].Length);
- }
- grps[gid] = (int[])caps.ToArray (typeof (int));
- }
- return new Match (regex, this, text, text_end, grps);
- }
- // interpreter attributes
- private ushort[] program; // regex program
- private int program_start; // first instruction after info block
- private string text; // input text
- private int text_end; // end of input text (last character + 1)
- private int group_count; // number of capturing groups
- private int match_min, match_max; // match width information
- private QuickSearch qs; // fast substring matcher
- // match state
-
- private int scan_ptr; // start of scan
- private RepeatContext repeat; // current repeat context
- private RepeatContext fast; // fast repeat context
- private Mark[] marks = null; // mark stack
- private int mark_start; // start of current checkpoint
- private int mark_end; // end of checkpoint/next free mark
- private int[] groups; // current group definitions
- // private classes
- /*
- private struct Mark {
- public int Start, End;
- public int Previous;
- public bool IsDefined {
- get { return Start >= 0 && End >= 0; }
- }
- public int Index {
- get { return Start < End ? Start : End; }
- }
- public int Length {
- get { return Start < End ? End - Start : Start - End; }
- }
- }
- */
- private class RepeatContext {
- public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
- this.previous = previous;
- this.min = min;
- this.max = max;
- this.lazy = lazy;
- this.expr_pc = expr_pc;
-
- this.start = -1;
- this.count = 0;
- }
- public int Count {
- get { return count; }
- set { count = value; }
- }
- public int Start {
- get { return start; }
- set { start = value; }
- }
- public bool IsMinimum {
- get { return min <= count; }
- }
- public bool IsMaximum {
- get { return max <= count; }
- }
- public bool IsLazy {
- get { return lazy; }
- }
- public int Expression {
- get { return expr_pc; }
- }
- public RepeatContext Previous {
- get { return previous; }
- }
-
- private int start;
- private int min, max;
- private bool lazy;
- private int expr_pc;
- private RepeatContext previous;
- private int count;
- }
- private enum Mode {
- Search,
- Match,
- Count
- }
- }
- }
|