| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140 |
- //
- // assembly: System
- // namespace: System.Text.RegularExpressions
- // file: parser.cs
- //
- // author: Dan Lewis ([email protected])
- // (c) 2002
- using System;
- using System.Collections;
- using System.Globalization;
- namespace System.Text.RegularExpressions.Syntax {
- class Parser {
- public static int ParseDecimal (string str, ref int ptr) {
- return ParseNumber (str, ref ptr, 10, 1, Int32.MaxValue);
- }
- public static int ParseOctal (string str, ref int ptr) {
- return ParseNumber (str, ref ptr, 8, 1, 3);
- }
- public static int ParseHex (string str, ref int ptr, int digits) {
- return ParseNumber (str, ref ptr, 16, digits, digits);
- }
- public static int ParseNumber (string str, ref int ptr, int b, int min, int max) {
- int p = ptr, n = 0, digits = 0, d;
- if (max < min)
- max = Int32.MaxValue;
- while (digits < max && p < str.Length) {
- d = ParseDigit (str[p ++], b, digits);
- if (d < 0) {
- -- p;
- break;
- }
- n = n * b + d;
- ++ digits;
- }
- if (digits < min)
- return -1;
- ptr = p;
- return n;
- }
- public static string ParseName (string str, ref int ptr) {
- if (Char.IsDigit (str[ptr])) {
- int gid = ParseNumber (str, ref ptr, 10, 1, 0);
- if (gid > 0)
- return gid.ToString ();
-
- return null;
- }
- int start = ptr;
- for (;;) {
- if (!IsNameChar (str[ptr]))
- break;
- ++ ptr;
- }
- if (ptr - start > 0)
- return str.Substring (start, ptr - start);
- return null;
- }
- public static string Escape (string str) {
- string result = "";
- for (int i = 0; i < str.Length; ++ i) {
- char c = str[i];
- switch (c) {
- case '\\': case '*': case '+': case '?': case '|':
- case '{': case '[': case '(': case ')': case '^':
- case '$': case '.': case '#': case ' ':
- result += "\\" + c;
- break;
- case '\t': result += "\\t"; break;
- case '\n': result += "\\n"; break;
- case '\r': result += "\\r"; break;
- case '\f': result += "\\f"; break;
- default: result += c; break;
- }
- }
- return result;
- }
- public static string Unescape (string str) {
- return new Parser ().ParseString (str);
- }
- // public instance
- public Parser () {
- this.caps = new ArrayList ();
- this.refs = new Hashtable ();
- }
- public RegularExpression ParseRegularExpression (string pattern, RegexOptions options) {
- this.pattern = pattern;
- this.ptr = 0;
- caps.Clear ();
- refs.Clear ();
- this.num_groups = 0;
- try {
- RegularExpression re = new RegularExpression ();
- ParseGroup (re, options, null);
- ResolveReferences ();
- re.GroupCount = num_groups;
-
- return re;
- }
- catch (IndexOutOfRangeException) {
- throw NewParseException ("Unexpected end of pattern.");
- }
- }
- public IDictionary GetMapping () {
- Hashtable mapping = new Hashtable ();
- Hashtable numbers = new Hashtable ();
- int end = caps.Count;
- mapping.Add ("0", 0);
- for (int i = 0; i < end; i++) {
- CapturingGroup group = (CapturingGroup) caps [i];
- if (group.Name != null && !mapping.Contains (group.Name)) {
- mapping.Add (group.Name, group.Number);
- numbers.Add (group.Number, group.Number);
- }
- }
- for (int i = 1; i < end; i++) {
- if (numbers [i] == null)
- mapping.Add (i.ToString (), i);
- }
- return mapping;
- }
- // private methods
- private void ParseGroup (Group group, RegexOptions options, Assertion assertion) {
- bool is_top_level = group is RegularExpression;
-
- Alternation alternation = null;
- string literal = null;
- Group current = new Group ();
- Expression expr = null;
- bool closed = false;
- while (true) {
- ConsumeWhitespace (IsIgnorePatternWhitespace (options));
- if (ptr >= pattern.Length)
- break;
-
- // (1) Parse for Expressions
-
- char ch = pattern[ptr ++];
-
- switch (ch) {
- case '^': {
- Position pos =
- IsMultiline (options) ? Position.StartOfLine : Position.Start;
- expr = new PositionAssertion (pos);
- break;
- }
- case '$': {
- Position pos =
- IsMultiline (options) ? Position.EndOfLine : Position.End;
- expr = new PositionAssertion (pos);
- break;
- }
- case '.': {
- Category cat =
- IsSingleline (options) ? Category.AnySingleline : Category.Any;
- expr = new CharacterClass (cat, false);
- break;
- }
- case '\\': {
- int c = ParseEscape ();
- if (c >= 0)
- ch = (char)c;
- else {
- expr = ParseSpecial (options);
- if (expr == null)
- ch = pattern[ptr ++]; // default escape
- }
- break;
- }
- case '[': {
- expr = ParseCharacterClass (options);
- break;
- }
- case '(': {
- bool ignore = IsIgnoreCase (options);
- expr = ParseGroupingConstruct (ref options);
- if (expr == null) {
- if (literal != null && IsIgnoreCase (options) != ignore) {
- current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
- literal = null;
- }
- continue;
- }
- break;
- }
- case ')': {
- closed = true;
- goto EndOfGroup;
- }
- case '|': {
- if (literal != null) {
- current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
- literal = null;
- }
- if (assertion != null) {
- if (assertion.TrueExpression == null)
- assertion.TrueExpression = current;
- else if (assertion.FalseExpression == null)
- assertion.FalseExpression = current;
- else
- throw NewParseException ("Too many | in (?()|).");
- }
- else {
- if (alternation == null)
- alternation = new Alternation ();
- alternation.AddAlternative (current);
- }
- current = new Group ();
- continue;
- }
- case '*': case '+': case '?': {
- throw NewParseException ("Bad quantifier.");
- }
- default:
- break; // literal character
- }
- ConsumeWhitespace (IsIgnorePatternWhitespace (options));
-
- // (2) Check for Repetitions
-
- if (ptr < pattern.Length) {
- char k = pattern[ptr];
- if (k == '?' || k == '*' || k == '+' || k == '{') {
- ++ ptr;
- int min = 0, max = 0;
- bool lazy = false;
- switch (k) {
- case '?': min = 0; max = 1; break;
- case '*': min = 0; max = 0xffff; break;
- case '+': min = 1; max = 0xffff; break;
- case '{': ParseRepetitionBounds (out min, out max, options); break;
- }
- ConsumeWhitespace (IsIgnorePatternWhitespace (options));
- if (ptr < pattern.Length && pattern[ptr] == '?') {
- ++ ptr;
- lazy = true;
- }
- Repetition repetition = new Repetition (min, max, lazy);
- if (expr == null)
- repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options));
- else
- repetition.Expression = expr;
- expr = repetition;
- }
- }
- // (3) Append Expression and/or Literal
- if (expr == null) {
- if (literal == null)
- literal = "";
- literal += ch;
- }
- else {
- if (literal != null) {
- current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
- literal = null;
- }
- current.AppendExpression (expr);
- expr = null;
- }
- if (is_top_level && ptr >= pattern.Length)
- goto EndOfGroup;
- }
- EndOfGroup:
- if (is_top_level && closed)
- throw NewParseException ("Too many )'s.");
- if (!is_top_level && !closed)
- throw NewParseException ("Not enough )'s.");
-
-
- // clean up literals and alternations
- if (literal != null)
- current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
- if (assertion != null) {
- if (assertion.TrueExpression == null)
- assertion.TrueExpression = current;
- else
- assertion.FalseExpression = current;
-
- group.AppendExpression (assertion);
- }
- else if (alternation != null) {
- alternation.AddAlternative (current);
- group.AppendExpression (alternation);
- }
- else
- group.AppendExpression (current);
- }
- private Expression ParseGroupingConstruct (ref RegexOptions options) {
- if (pattern[ptr] != '?') {
- Group group;
- if (IsExplicitCapture (options))
- group = new Group ();
- else {
- group = new CapturingGroup ();
- caps.Add (group);
- }
- ParseGroup (group, options, null);
- return group;
- }
- else
- ++ ptr;
- switch (pattern[ptr]) {
- case ':': { // non-capturing group
- ++ ptr;
- Group group = new Group ();
- ParseGroup (group, options, null);
- return group;
- }
- case '>': { // non-backtracking group
- ++ ptr;
- Group group = new NonBacktrackingGroup ();
- ParseGroup (group, options, null);
-
- return group;
- }
- case 'i': case 'm': case 'n':
- case 's': case 'x': case '-': { // options
- RegexOptions o = options;
- ParseOptions (ref o, false);
- if (pattern[ptr] == '-') {
- ++ ptr;
- ParseOptions (ref o, true);
- }
- if (pattern[ptr] == ':') { // pass options to child group
- ++ ptr;
- Group group = new Group ();
- ParseGroup (group, o, null);
- return group;
- }
- else if (pattern[ptr] == ')') { // change options of enclosing group
- ++ ptr;
- options = o;
- return null;
- }
- else
- throw NewParseException ("Bad options");
- }
- case '<': case '=': case '!': { // lookahead/lookbehind
- ExpressionAssertion asn = new ExpressionAssertion ();
- if (!ParseAssertionType (asn))
- goto case '\''; // it's a (?<name> ) construct
- Group test = new Group ();
- ParseGroup (test, options, null);
- asn.TestExpression = test;
- return asn;
- }
- case '\'': { // named/balancing group
- char delim;
- if (pattern[ptr] == '<')
- delim = '>';
- else
- delim = '\'';
- ++ ptr;
- string name = ParseName ();
- if (pattern[ptr] == delim) {
- // capturing group
- if (name == null)
- throw NewParseException ("Bad group name.");
- ++ ptr;
- CapturingGroup cap = new CapturingGroup ();
- cap.Name = name;
- caps.Add (cap);
- ParseGroup (cap, options, null);
- return cap;
- }
- else if (pattern[ptr] == '-') {
- // balancing group
- ++ ptr;
- string balance_name = ParseName ();
- if (balance_name == null || pattern[ptr] != delim)
- throw NewParseException ("Bad balancing group name.");
- ++ ptr;
- BalancingGroup bal = new BalancingGroup ();
- bal.Name = name;
-
- if(bal.IsNamed) {
- caps.Add (bal);
- }
- refs.Add (bal, balance_name);
- ParseGroup (bal, options, null);
- return bal;
- }
- else
- throw NewParseException ("Bad group name.");
- }
- case '(': { // expression/capture test
- Assertion asn;
-
- ++ ptr;
- int p = ptr;
- string name = ParseName ();
- if (name == null || pattern[ptr] != ')') { // expression test
- // FIXME MS implementation doesn't seem to
- // implement this version of (?(x) ...)
- ptr = p;
- ExpressionAssertion expr_asn = new ExpressionAssertion ();
- if (pattern[ptr] == '?') {
- ++ ptr;
- if (!ParseAssertionType (expr_asn))
- throw NewParseException ("Bad conditional.");
- }
- else {
- expr_asn.Negate = false;
- expr_asn.Reverse = false;
- }
- Group test = new Group ();
- ParseGroup (test, options, null);
- expr_asn.TestExpression = test;
- asn = expr_asn;
- }
- else { // capture test
- ++ ptr;
- asn = new CaptureAssertion ();
- refs.Add (asn, name);
- }
- Group group = new Group ();
- ParseGroup (group, options, asn);
- return group;
- }
- case '#': { // comment
- ++ ptr;
- while (pattern[ptr ++] != ')') {
- if (ptr >= pattern.Length)
- throw NewParseException ("Unterminated (?#...) comment.");
- }
- return null;
- }
- default: // error
- throw NewParseException ("Bad grouping construct.");
- }
- }
- private bool ParseAssertionType (ExpressionAssertion assertion) {
- if (pattern[ptr] == '<') {
- switch (pattern[ptr + 1]) {
- case '=':
- assertion.Negate = false;
- break;
- case '!':
- assertion.Negate = true;
- break;
- default:
- return false;
- }
- assertion.Reverse = true;
- ptr += 2;
- }
- else {
- switch (pattern[ptr]) {
- case '=':
- assertion.Negate = false;
- break;
- case '!':
- assertion.Negate = true;
- break;
- default:
- return false;
- }
- assertion.Reverse = false;
- ptr += 1;
- }
- return true;
- }
- private void ParseOptions (ref RegexOptions options, bool negate) {
- for (;;) {
- switch (pattern[ptr]) {
- case 'i':
- if (negate)
- options &= ~RegexOptions.IgnoreCase;
- else
- options |= RegexOptions.IgnoreCase;
- break;
- case 'm':
- if (negate)
- options &= ~RegexOptions.Multiline;
- else
- options |= RegexOptions.Multiline;
- break;
-
- case 'n':
- if (negate)
- options &= ~RegexOptions.ExplicitCapture;
- else
- options |= RegexOptions.ExplicitCapture;
- break;
-
- case 's':
- if (negate)
- options &= ~RegexOptions.Singleline;
- else
- options |= RegexOptions.Singleline;
- break;
-
- case 'x':
- if (negate)
- options &= ~RegexOptions.IgnorePatternWhitespace;
- else
- options |= RegexOptions.IgnorePatternWhitespace;
- break;
- default:
- return;
- }
- ++ ptr;
- }
- }
- private Expression ParseCharacterClass (RegexOptions options) {
- bool negate, ecma;
- if (pattern[ptr] == '^') {
- negate = true;
- ++ ptr;
- }
- else
- negate = false;
-
- ecma = IsECMAScript (options);
- CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));
- if (pattern[ptr] == ']') {
- cls.AddCharacter (']');
- ++ ptr;
- }
- int c = -1;
- int last = -1;
- bool range = false;
- bool closed = false;
- while (ptr < pattern.Length) {
- c = pattern[ptr ++];
- if (c == ']') {
- closed = true;
- break;
- }
-
- if (c == '-') {
- range = true;
- continue;
- }
- if (c == '\\') {
- c = ParseEscape ();
- if (c < 0) {
- // didn't recognize escape
- c = pattern[ptr ++];
- switch (c) {
- case 'b': c = '\b'; break;
- case 'd':
- cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, false);
- last = -1;
- continue;
-
- case 'w':
- cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, false);
- last = -1;
- continue;
-
- case 's':
- cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
- last = -1;
- continue;
-
- case 'p':
- cls.AddCategory (ParseUnicodeCategory (), false); // ignore ecma
- last = -1;
- continue;
-
- case 'D':
- cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, true);
- last = -1;
- continue;
-
- case 'W':
- cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, true);
- last = -1;
- continue;
-
- case 'S':
- cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
- last = -1;
- continue;
-
- case 'P':
- cls.AddCategory (ParseUnicodeCategory (), true);
- last = -1;
- continue;
- default: break; // add escaped character
- }
- }
- }
- if (range) {
- if (c < last)
- throw NewParseException ("[x-y] range in reverse order.");
- if (last >=0 )
- cls.AddRange ((char)last, (char)c);
- else {
- cls.AddCharacter ((char)c);
- cls.AddCharacter ('-');
- }
- range = false;
- last = -1;
- }
- else {
- cls.AddCharacter ((char)c);
- last = c;
- }
- }
- if (!closed)
- throw NewParseException ("Unterminated [] set.");
- if (range)
- cls.AddCharacter ('-');
- return cls;
- }
- private void ParseRepetitionBounds (out int min, out int max, RegexOptions options) {
- int n, m;
- /* check syntax */
- ConsumeWhitespace (IsIgnorePatternWhitespace (options));
-
- if (pattern[ptr] == ',') {
- n = -1;
- } else {
- n = ParseNumber (10, 1, 0);
- ConsumeWhitespace (IsIgnorePatternWhitespace (options));
- }
-
- switch (pattern[ptr ++]) {
- case '}':
- m = n;
- break;
- case ',':
- ConsumeWhitespace (IsIgnorePatternWhitespace (options));
- m = ParseNumber (10, 1, 0);
- ConsumeWhitespace (IsIgnorePatternWhitespace (options));
- if (pattern[ptr ++] != '}')
- throw NewParseException ("Illegal {x,y} - bad value of y.");
- break;
- default:
- throw NewParseException ("Illegal {x,y}");
- }
- /* check bounds and ordering */
- if (n >= 0xffff || m >= 0xffff)
- throw NewParseException ("Illegal {x, y} - maximum of 65535.");
- if (m >= 0 && m < n)
- throw NewParseException ("Illegal {x, y} with x > y.");
- /* assign min and max */
-
- min = n;
- if (m > 0)
- max = m;
- else
- max = 0xffff;
- }
- private Category ParseUnicodeCategory () {
- if (pattern[ptr ++] != '{')
- throw NewParseException ("Incomplete \\p{X} character escape.");
- string name = ParseName (pattern, ref ptr);
- if (name == null)
- throw NewParseException ("Incomplete \\p{X} character escape.");
- Category cat = CategoryUtils.CategoryFromName (name);
- if (cat == Category.None)
- throw NewParseException ("Unknown property '" + name + "'.");
- if (pattern[ptr ++] != '}')
- throw NewParseException ("Incomplete \\p{X} character escape.");
- return cat;
- }
- private Expression ParseSpecial (RegexOptions options) {
- int p = ptr;
- bool ecma = IsECMAScript (options);
- Expression expr = null;
-
- switch (pattern[ptr ++]) {
- // categories
- case 'd':
- expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, false);
- break;
-
- case 'w':
- expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, false);
- break;
-
- case 's':
- expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
- break;
-
- case 'p':
- // this is odd - ECMAScript isn't supposed to support Unicode,
- // yet \p{..} compiles and runs under the MS implementation
- // identically to canonical mode. That's why I'm ignoring the
- // value of ecma here.
-
- expr = new CharacterClass (ParseUnicodeCategory (), false);
- break;
-
- case 'D':
- expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, true);
- break;
-
- case 'W':
- expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, true);
- break;
-
- case 'S':
- expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
- break;
-
- case 'P':
- expr = new CharacterClass (ParseUnicodeCategory (), true);
- break;
- // positions
- case 'A': expr = new PositionAssertion (Position.StartOfString); break;
- case 'Z': expr = new PositionAssertion (Position.End); break;
- case 'z': expr = new PositionAssertion (Position.EndOfString); break;
- case 'G': expr = new PositionAssertion (Position.StartOfScan); break;
- case 'b': expr = new PositionAssertion (Position.Boundary); break;
- case 'B': expr = new PositionAssertion (Position.NonBoundary); break;
-
- // references
- case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9': {
- ptr --;
- int n = ParseNumber (10, 1, 0);
- if (n < 0) {
- ptr = p;
- return null;
- }
- // FIXME test if number is within number of assigned groups
- // this may present a problem for right-to-left matching
- Reference reference = new Reference (IsIgnoreCase (options));
- refs.Add (reference, n.ToString ());
- expr = reference;
- break;
- }
- case 'k': {
- char delim = pattern[ptr ++];
- if (delim == '<')
- delim = '>';
- else if (delim != '\'')
- throw NewParseException ("Malformed \\k<...> named backreference.");
- string name = ParseName ();
- if (name == null || pattern[ptr] != delim)
- throw NewParseException ("Malformed \\k<...> named backreference.");
- ++ ptr;
- Reference reference = new Reference (IsIgnoreCase (options));
- refs.Add (reference, name);
- expr = reference;
- break;
- }
- default:
- expr = null;
- break;
- }
- if (expr == null)
- ptr = p;
- return expr;
- }
- private int ParseEscape () {
- int p = ptr;
- int c;
- if (p >= pattern.Length)
- throw new ArgumentException (
- String.Format ("Parsing \"{0}\" - Illegal \\ at end of " +
- "pattern.", pattern), pattern);
-
- switch (pattern[ptr ++]) {
-
- // standard escapes (except \b)
- case 'a': return '\u0007';
- case 't': return '\u0009';
- case 'r': return '\u000d';
- case 'v': return '\u000b';
- case 'f': return '\u000c';
- case 'n': return '\u000a';
- case 'e': return '\u001b';
- case '\\': return '\\';
- // character codes
- case '0':
- int prevptr = ptr;
- int result = ParseOctal (pattern, ref ptr);
- if (result == -1 && prevptr == ptr)
- return 0;
- return result;
- case 'x':
- c = ParseHex (pattern, ref ptr, 2);
- if (c < 0)
- throw NewParseException ("Insufficient hex digits");
- return c;
- case 'u':
- c = ParseHex (pattern, ref ptr, 4);
- if (c < 0)
- throw NewParseException ("Insufficient hex digits");
-
- return c;
- // control characters
- case 'c':
- c = pattern[p ++];
- if (c >= 'A' && c <= 'Z')
- return c - 'A';
- else if (c >= '@' && c <= '_')
- return c - '@';
- else
- throw NewParseException ("Unrecognized control character.");
- // unknown escape
- default:
- ptr = p;
- return -1;
- }
- }
- private string ParseName () {
- return Parser.ParseName (pattern, ref ptr);
- }
- private static bool IsNameChar (char c) {
- UnicodeCategory cat = Char.GetUnicodeCategory (c);
- if (cat == UnicodeCategory.ModifierLetter)
- return false;
- if (cat == UnicodeCategory.ConnectorPunctuation)
- return true;
- return Char.IsLetterOrDigit (c);
- }
-
- private int ParseNumber (int b, int min, int max) {
- return Parser.ParseNumber (pattern, ref ptr, b, min, max);
- }
- private int ParseDecimal () {
- return Parser.ParseDecimal (pattern, ref ptr);
- }
- private static int ParseDigit (char c, int b, int n) {
- switch (b) {
- case 8:
- if (c >= '0' && c <= '7')
- return c - '0';
- else
- return -1;
- case 10:
- if (c >= '0' && c <= '9')
- return c - '0';
- else
- return -1;
- case 16:
- if (c >= '0' && c <= '9')
- return c - '0';
- else if (c >= 'a' && c <= 'f')
- return 10 + c - 'a';
- else if (c >= 'A' && c <= 'F')
- return 10 + c - 'A';
- else
- return -1;
- default:
- return -1;
- }
- }
- private void ConsumeWhitespace (bool ignore) {
- while (true) {
- if (ptr >= pattern.Length)
- break;
-
- if (pattern[ptr] == '(') {
- if (ptr + 3 >= pattern.Length)
- return;
- if (pattern[ptr + 1] != '?' || pattern[ptr + 2] != '#')
- return;
- ptr += 3;
- while (pattern[ptr ++] != ')')
- /* ignore */ ;
- }
- else if (ignore && pattern[ptr] == '#') {
- while (ptr < pattern.Length && pattern[ptr ++] != '\n')
- /* ignore */ ;
- }
- else if (ignore && Char.IsWhiteSpace (pattern[ptr])) {
- while (ptr < pattern.Length && Char.IsWhiteSpace (pattern[ptr]))
- ++ ptr;
- }
- else
- return;
- }
- }
- private string ParseString (string pattern) {
- this.pattern = pattern;
- this.ptr = 0;
- string result = "";
- while (ptr < pattern.Length) {
- int c = pattern[ptr];
- if (c == '\\')
- c = ParseEscape ();
- ptr ++;
- result += (char)c;
- }
- return result;
- }
- private void ResolveReferences () {
- int gid = 1;
- Hashtable dict = new Hashtable ();
- // number unnamed groups
- foreach (CapturingGroup group in caps) {
- if (group.Name == null) {
- dict.Add (gid.ToString (), group);
- group.Number = gid ++;
- ++ num_groups;
- }
- }
- // number named groups
- foreach (CapturingGroup group in caps) {
- if (group.Name != null) {
- if (!dict.Contains (group.Name)) {
- dict.Add (group.Name, group);
- group.Number = gid ++;
- ++ num_groups;
- }
- else {
- CapturingGroup prev = (CapturingGroup)dict[group.Name];
- group.Number = prev.Number;
- }
- }
- }
- // resolve references
- foreach (Expression expr in refs.Keys) {
- string name = (string)refs[expr];
- if (!dict.Contains (name)) {
- throw NewParseException ("Reference to undefined group " +
- (Char.IsDigit (name[0]) ? "number " : "name ") +
- name);
- }
- CapturingGroup group = (CapturingGroup)dict[name];
- if (expr is Reference)
- ((Reference)expr).CapturingGroup = group;
- else if (expr is CaptureAssertion)
- ((CaptureAssertion)expr).CapturingGroup = group;
- else if (expr is BalancingGroup)
- ((BalancingGroup)expr).Balance = group;
- }
- }
- // flag helper functions
- private static bool IsIgnoreCase (RegexOptions options) {
- return (options & RegexOptions.IgnoreCase) != 0;
- }
- private static bool IsMultiline (RegexOptions options) {
- return (options & RegexOptions.Multiline) != 0;
- }
- private static bool IsExplicitCapture (RegexOptions options) {
- return (options & RegexOptions.ExplicitCapture) != 0;
- }
-
- private static bool IsSingleline (RegexOptions options) {
- return (options & RegexOptions.Singleline) != 0;
- }
- private static bool IsIgnorePatternWhitespace (RegexOptions options) {
- return (options & RegexOptions.IgnorePatternWhitespace) != 0;
- }
- private static bool IsRightToLeft (RegexOptions options) {
- return (options & RegexOptions.RightToLeft) != 0;
- }
- private static bool IsECMAScript (RegexOptions options) {
- return (options & RegexOptions.ECMAScript) != 0;
- }
- // exception creation
- private ArgumentException NewParseException (string msg) {
- msg = "parsing \"" + pattern + "\" - " + msg;
- return new ArgumentException (msg, pattern);
- }
- private string pattern;
- private int ptr;
- private ArrayList caps;
- private Hashtable refs;
- private int num_groups;
- }
- }
|