syntax.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: syntax.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. using System;
  9. using System.Collections;
  10. namespace System.Text.RegularExpressions.Syntax {
  11. // collection classes
  12. class ExpressionCollection : CollectionBase {
  13. public void Add (Expression e) {
  14. List.Add (e);
  15. }
  16. public Expression this[int i] {
  17. get { return (Expression)List[i]; }
  18. set { List[i] = value; }
  19. }
  20. protected override void OnValidate (object o) {
  21. // allow null elements
  22. }
  23. }
  24. // abstract classes
  25. abstract class Expression {
  26. public abstract void Compile (ICompiler cmp, bool reverse);
  27. public abstract void GetWidth (out int min, out int max);
  28. public int GetFixedWidth () {
  29. int min, max;
  30. GetWidth (out min, out max);
  31. if (min == max)
  32. return min;
  33. return -1;
  34. }
  35. public virtual AnchorInfo GetAnchorInfo () {
  36. return new AnchorInfo (this, GetFixedWidth ());
  37. }
  38. public virtual bool IsComplex () {
  39. return true;
  40. }
  41. }
  42. // composite expressions
  43. abstract class CompositeExpression : Expression {
  44. public CompositeExpression () {
  45. expressions = new ExpressionCollection ();
  46. }
  47. protected ExpressionCollection Expressions {
  48. get { return expressions; }
  49. }
  50. protected void GetWidth (out int min, out int max, int count) {
  51. min = Int32.MaxValue;
  52. max = 0;
  53. bool empty = true;
  54. for (int i = 0; i < count; ++ i) {
  55. Expression e = Expressions[i];
  56. if (e == null)
  57. continue;
  58. empty = false;
  59. int a, b;
  60. e.GetWidth (out a, out b);
  61. if (a < min) min = a;
  62. if (b > max) max = b;
  63. }
  64. if (empty)
  65. min = max = 0;
  66. }
  67. private ExpressionCollection expressions;
  68. }
  69. // groups
  70. class Group : CompositeExpression {
  71. public Group () {
  72. }
  73. public Expression Expression {
  74. get { return Expressions[0]; }
  75. set { Expressions[0] = value; }
  76. }
  77. public void AppendExpression (Expression e) {
  78. Expressions.Add (e);
  79. }
  80. public override void Compile (ICompiler cmp, bool reverse) {
  81. int count = Expressions.Count;
  82. for (int i = 0; i < count; ++ i) {
  83. Expression e;
  84. if (reverse)
  85. e = Expressions[count - i - 1];
  86. else
  87. e = Expressions[i];
  88. e.Compile (cmp, reverse);
  89. }
  90. }
  91. public override void GetWidth (out int min, out int max) {
  92. min = 0;
  93. max = 0;
  94. foreach (Expression e in Expressions) {
  95. int a, b;
  96. e.GetWidth (out a, out b);
  97. min += a;
  98. if (max == Int32.MaxValue || b == Int32.MaxValue)
  99. max = Int32.MaxValue;
  100. else
  101. max += b;
  102. }
  103. }
  104. public override AnchorInfo GetAnchorInfo () {
  105. int ptr;
  106. int width = GetFixedWidth ();
  107. ArrayList infos = new ArrayList ();
  108. IntervalCollection segments = new IntervalCollection ();
  109. // accumulate segments
  110. ptr = 0;
  111. foreach (Expression e in Expressions) {
  112. AnchorInfo info = e.GetAnchorInfo ();
  113. infos.Add (info);
  114. if (info.IsPosition)
  115. return new AnchorInfo (this, ptr + info.Offset, width, info.Position);
  116. if (info.IsSubstring)
  117. segments.Add (info.GetInterval (ptr));
  118. if (info.IsUnknownWidth)
  119. break;
  120. ptr += info.Width;
  121. }
  122. // normalize and find the longest segment
  123. segments.Normalize ();
  124. Interval longest = Interval.Empty;
  125. foreach (Interval segment in segments) {
  126. if (segment.Size > longest.Size)
  127. longest = segment;
  128. }
  129. // now chain the substrings that made this segment together
  130. if (!longest.IsEmpty) {
  131. string str = "";
  132. bool ignore = false;
  133. ptr = 0;
  134. foreach (AnchorInfo info in infos) {
  135. if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {
  136. str += info.Substring; // TODO mark subexpressions
  137. ignore |= info.IgnoreCase;
  138. }
  139. if (info.IsUnknownWidth)
  140. break;
  141. ptr += info.Width;
  142. }
  143. return new AnchorInfo (this, longest.low, width, str, ignore);
  144. }
  145. return new AnchorInfo (this, width);
  146. }
  147. public override bool IsComplex () {
  148. bool comp = false;
  149. foreach (Expression e in Expressions) {
  150. comp |= e.IsComplex ();
  151. }
  152. return comp | GetFixedWidth () <= 0;
  153. }
  154. }
  155. class RegularExpression : Group {
  156. public RegularExpression () {
  157. group_count = 0;
  158. }
  159. public int GroupCount {
  160. get { return group_count; }
  161. set { group_count = value; }
  162. }
  163. public override void Compile (ICompiler cmp, bool reverse) {
  164. // info block
  165. int min, max;
  166. GetWidth (out min, out max);
  167. cmp.EmitInfo (group_count, min, max);
  168. // anchoring expression
  169. AnchorInfo info = GetAnchorInfo ();
  170. if (reverse)
  171. info = new AnchorInfo (this, GetFixedWidth ()); // FIXME
  172. LinkRef pattern = cmp.NewLink ();
  173. cmp.EmitAnchor (info.Offset, pattern);
  174. if (info.IsPosition)
  175. cmp.EmitPosition (info.Position);
  176. else if (info.IsSubstring)
  177. cmp.EmitString (info.Substring, info.IgnoreCase, reverse);
  178. cmp.EmitTrue ();
  179. // pattern
  180. cmp.ResolveLink (pattern);
  181. base.Compile (cmp, reverse);
  182. cmp.EmitTrue ();
  183. }
  184. private int group_count;
  185. }
  186. class CapturingGroup : Group {
  187. public CapturingGroup () {
  188. this.gid = 0;
  189. this.name = null;
  190. }
  191. public int Number {
  192. get { return gid; }
  193. set { gid = value; }
  194. }
  195. public string Name {
  196. get { return name; }
  197. set { name = value; }
  198. }
  199. public bool IsNamed {
  200. get { return name != null; }
  201. }
  202. public override void Compile (ICompiler cmp, bool reverse) {
  203. cmp.EmitOpen (gid);
  204. base.Compile (cmp, reverse);
  205. cmp.EmitClose (gid);
  206. }
  207. public override bool IsComplex () {
  208. return true;
  209. }
  210. private int gid;
  211. private string name;
  212. }
  213. class BalancingGroup : CapturingGroup {
  214. public BalancingGroup () {
  215. this.balance = null;
  216. }
  217. public CapturingGroup Balance {
  218. get { return balance; }
  219. set { balance = value; }
  220. }
  221. public override void Compile (ICompiler cmp, bool reverse) {
  222. // can't invoke Group.Compile from here :(
  223. // so I'll just repeat the code
  224. int count = Expressions.Count;
  225. for (int i = 0; i < count; ++ i) {
  226. Expression e;
  227. if (reverse)
  228. e = Expressions[count - i - 1];
  229. else
  230. e = Expressions[i];
  231. e.Compile (cmp, reverse);
  232. }
  233. cmp.EmitBalance (this.Number, balance.Number);
  234. }
  235. private CapturingGroup balance;
  236. }
  237. class NonBacktrackingGroup : Group {
  238. public NonBacktrackingGroup () {
  239. }
  240. public override void Compile (ICompiler cmp, bool reverse) {
  241. LinkRef tail = cmp.NewLink ();
  242. cmp.EmitSub (tail);
  243. base.Compile (cmp, reverse);
  244. cmp.EmitTrue ();
  245. cmp.ResolveLink (tail);
  246. }
  247. public override bool IsComplex () {
  248. return true;
  249. }
  250. }
  251. // repetition
  252. class Repetition : CompositeExpression {
  253. public Repetition (int min, int max, bool lazy) {
  254. Expressions.Add (null);
  255. this.min = min;
  256. this.max = max;
  257. this.lazy = lazy;
  258. }
  259. public Expression Expression {
  260. get { return Expressions[0]; }
  261. set { Expressions[0] = value; }
  262. }
  263. public int Minimum {
  264. get { return min; }
  265. set { min = value; }
  266. }
  267. public int Maximum {
  268. get { return max; }
  269. set { max = value; }
  270. }
  271. public bool Lazy {
  272. get { return lazy; }
  273. set { lazy = value; }
  274. }
  275. public override void Compile (ICompiler cmp, bool reverse) {
  276. if (Expression.IsComplex ()) {
  277. LinkRef until = cmp.NewLink ();
  278. cmp.EmitRepeat (min, max, lazy, until);
  279. Expression.Compile (cmp, reverse);
  280. cmp.EmitUntil (until);
  281. }
  282. else {
  283. LinkRef tail = cmp.NewLink ();
  284. cmp.EmitFastRepeat (min, max, lazy, tail);
  285. Expression.Compile (cmp, reverse);
  286. cmp.EmitTrue ();
  287. cmp.ResolveLink (tail);
  288. }
  289. }
  290. public override void GetWidth (out int min, out int max) {
  291. Expression.GetWidth (out min, out max);
  292. min = min * this.min;
  293. if (max == Int32.MaxValue || this.max == 0xffff)
  294. max = Int32.MaxValue;
  295. else
  296. max = max * this.max;
  297. }
  298. public override AnchorInfo GetAnchorInfo () {
  299. int width = GetFixedWidth ();
  300. if (Minimum == 0)
  301. return new AnchorInfo (this, width);
  302. AnchorInfo info = Expression.GetAnchorInfo ();
  303. if (info.IsPosition)
  304. return new AnchorInfo (this, info.Offset, width, info.Position);
  305. if (info.IsSubstring) {
  306. if (info.IsComplete) {
  307. string str = "";
  308. for (int i = 0; i < Minimum; ++ i)
  309. str += info.Substring;
  310. return new AnchorInfo (this, 0, width, str, info.IgnoreCase);
  311. }
  312. return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);
  313. }
  314. return new AnchorInfo (this, width);
  315. }
  316. private int min, max;
  317. private bool lazy;
  318. }
  319. // assertions
  320. abstract class Assertion : CompositeExpression {
  321. public Assertion () {
  322. Expressions.Add (null); // true expression
  323. Expressions.Add (null); // false expression
  324. }
  325. public Expression TrueExpression {
  326. get { return Expressions[0]; }
  327. set { Expressions[0] = value; }
  328. }
  329. public Expression FalseExpression {
  330. get { return Expressions[1]; }
  331. set { Expressions[1] = value; }
  332. }
  333. public override void GetWidth (out int min, out int max) {
  334. GetWidth (out min, out max, 2);
  335. if (TrueExpression == null || FalseExpression == null)
  336. min = 0;
  337. }
  338. }
  339. class CaptureAssertion : Assertion {
  340. public CaptureAssertion () {
  341. }
  342. public CapturingGroup CapturingGroup {
  343. get { return group; }
  344. set { group = value; }
  345. }
  346. public override void Compile (ICompiler cmp, bool reverse) {
  347. int gid = group.Number;
  348. LinkRef tail = cmp.NewLink ();
  349. if (FalseExpression == null) {
  350. // IfDefined :1
  351. // <yes_exp>
  352. // 1: <tail>
  353. cmp.EmitIfDefined (gid, tail);
  354. TrueExpression.Compile (cmp, reverse);
  355. }
  356. else {
  357. // IfDefined :1
  358. // <yes_expr>
  359. // Jump :2
  360. // 1: <no_expr>
  361. // 2: <tail>
  362. LinkRef false_expr = cmp.NewLink ();
  363. cmp.EmitIfDefined (gid, false_expr);
  364. TrueExpression.Compile (cmp, reverse);
  365. cmp.EmitJump (tail);
  366. cmp.ResolveLink (false_expr);
  367. FalseExpression.Compile (cmp, reverse);
  368. }
  369. cmp.ResolveLink (tail);
  370. }
  371. public override bool IsComplex () {
  372. bool comp = false;
  373. if (TrueExpression != null)
  374. comp |= TrueExpression.IsComplex ();
  375. if (FalseExpression != null)
  376. comp |= FalseExpression.IsComplex ();
  377. return comp | GetFixedWidth () <= 0;
  378. }
  379. private CapturingGroup group;
  380. }
  381. class ExpressionAssertion : Assertion {
  382. public ExpressionAssertion () {
  383. Expressions.Add (null); // test expression
  384. }
  385. public bool Reverse {
  386. get { return reverse; }
  387. set { reverse = value; }
  388. }
  389. public bool Negate {
  390. get { return negate; }
  391. set { negate = value; }
  392. }
  393. public Expression TestExpression {
  394. get { return Expressions[2]; }
  395. set { Expressions[2] = value; }
  396. }
  397. public override void Compile (ICompiler cmp, bool reverse) {
  398. LinkRef true_expr = cmp.NewLink ();
  399. LinkRef false_expr = cmp.NewLink ();
  400. // test op: positive / negative
  401. if (!negate)
  402. cmp.EmitTest (true_expr, false_expr);
  403. else
  404. cmp.EmitTest (false_expr, true_expr);
  405. // test expression: lookahead / lookbehind
  406. TestExpression.Compile (cmp, reverse ^ this.reverse);
  407. cmp.EmitTrue ();
  408. // target expressions
  409. if (TrueExpression == null) { // (?= ...)
  410. // Test :1, :2
  411. // <test_expr>
  412. // :2 False
  413. // :1 <tail>
  414. cmp.ResolveLink (false_expr);
  415. cmp.EmitFalse ();
  416. cmp.ResolveLink (true_expr);
  417. }
  418. else {
  419. cmp.ResolveLink (true_expr);
  420. TrueExpression.Compile (cmp, reverse);
  421. if (FalseExpression == null) { // (?(...) ...)
  422. // Test :1, :2
  423. // <test_expr>
  424. // :1 <yes_expr>
  425. // :2 <tail>
  426. cmp.ResolveLink (false_expr);
  427. }
  428. else { // (?(...) ... | ...)
  429. // Test :1, :2
  430. // <test_expr>
  431. // :1 <yes_expr>
  432. // Jump :3
  433. // :2 <no_expr>
  434. // :3 <tail>
  435. LinkRef tail = cmp.NewLink ();
  436. cmp.EmitJump (tail);
  437. cmp.ResolveLink (false_expr);
  438. FalseExpression.Compile (cmp, reverse);
  439. cmp.ResolveLink (tail);
  440. }
  441. }
  442. }
  443. private bool reverse, negate;
  444. }
  445. // alternation
  446. class Alternation : CompositeExpression {
  447. public Alternation () {
  448. }
  449. public ExpressionCollection Alternatives {
  450. get { return Expressions; }
  451. }
  452. public void AddAlternative (Expression e) {
  453. Alternatives.Add (e);
  454. }
  455. public override void Compile (ICompiler cmp, bool reverse) {
  456. LinkRef next = cmp.NewLink ();
  457. LinkRef tail = cmp.NewLink ();
  458. foreach (Expression e in Alternatives) {
  459. cmp.EmitBranch (next);
  460. e.Compile (cmp, reverse);
  461. cmp.EmitJump (tail);
  462. cmp.ResolveLink (next);
  463. }
  464. cmp.EmitFalse ();
  465. cmp.ResolveLink (tail);
  466. }
  467. public override void GetWidth (out int min, out int max) {
  468. GetWidth (out min, out max, Alternatives.Count);
  469. }
  470. public override bool IsComplex () {
  471. bool comp = false;
  472. foreach (Expression e in Alternatives) {
  473. comp |= e.IsComplex ();
  474. }
  475. return comp | GetFixedWidth () <= 0;
  476. }
  477. }
  478. // terminal expressions
  479. class Literal : Expression {
  480. public Literal (string str, bool ignore) {
  481. this.str = str;
  482. this.ignore = ignore;
  483. }
  484. public string String {
  485. get { return str; }
  486. set { str = value; }
  487. }
  488. public bool IgnoreCase {
  489. get { return ignore; }
  490. set { ignore = value; }
  491. }
  492. public override void Compile (ICompiler cmp, bool reverse) {
  493. if (str.Length == 0)
  494. return;
  495. if (str.Length == 1)
  496. cmp.EmitCharacter (str[0], false, ignore, reverse);
  497. else
  498. cmp.EmitString (str, ignore, reverse);
  499. }
  500. public override void GetWidth (out int min, out int max) {
  501. min = max = str.Length;
  502. }
  503. public override AnchorInfo GetAnchorInfo () {
  504. return new AnchorInfo (this, 0, str.Length, str, ignore);
  505. }
  506. public override bool IsComplex () {
  507. return false;
  508. }
  509. private string str;
  510. private bool ignore;
  511. }
  512. class PositionAssertion : Expression {
  513. public PositionAssertion (Position pos) {
  514. this.pos = pos;
  515. }
  516. public Position Position {
  517. get { return pos; }
  518. set { pos = value; }
  519. }
  520. public override void Compile (ICompiler cmp, bool reverse) {
  521. cmp.EmitPosition (pos);
  522. }
  523. public override void GetWidth (out int min, out int max) {
  524. min = max = 0;
  525. }
  526. public override bool IsComplex () {
  527. return false;
  528. }
  529. public override AnchorInfo GetAnchorInfo () {
  530. switch (pos) {
  531. case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:
  532. return new AnchorInfo (this, 0, 0, pos);
  533. default:
  534. return new AnchorInfo (this, 0);
  535. }
  536. }
  537. private Position pos;
  538. }
  539. class Reference : Expression {
  540. public Reference (bool ignore) {
  541. this.ignore = ignore;
  542. }
  543. public CapturingGroup CapturingGroup {
  544. get { return group; }
  545. set { group = value; }
  546. }
  547. public bool IgnoreCase {
  548. get { return ignore; }
  549. set { ignore = value; }
  550. }
  551. public override void Compile (ICompiler cmp, bool reverse) {
  552. cmp.EmitReference (group.Number, ignore, reverse);
  553. }
  554. public override void GetWidth (out int min, out int max) {
  555. //group.GetWidth (out min, out max);
  556. // TODO set width to referenced group for non-cyclical references
  557. min = 0;
  558. max = Int32.MaxValue;
  559. }
  560. public override bool IsComplex () {
  561. return true; // FIXME incorporate cyclic check
  562. }
  563. private CapturingGroup group;
  564. private bool ignore;
  565. }
  566. class CharacterClass : Expression {
  567. public CharacterClass (bool negate, bool ignore) {
  568. this.negate = negate;
  569. this.ignore = ignore;
  570. intervals = new IntervalCollection ();
  571. // initialize pos/neg category arrays
  572. Array cat_values = Enum.GetValues (typeof (Category));
  573. int cat_size = (int)(Category)cat_values.GetValue (cat_values.Length - 1) + 1;
  574. pos_cats = new bool[cat_size];
  575. neg_cats = new bool[cat_size];
  576. for (int i = 0; i < cat_size; ++ i) {
  577. pos_cats[i] = false;
  578. neg_cats[i] = false;
  579. }
  580. }
  581. public CharacterClass (Category cat, bool negate) : this (false, false) {
  582. this.AddCategory (cat, negate);
  583. }
  584. public bool Negate {
  585. get { return negate; }
  586. set { negate = value; }
  587. }
  588. public bool IgnoreCase {
  589. get { return ignore; }
  590. set { ignore = value; }
  591. }
  592. public void AddCategory (Category cat, bool negate) {
  593. int n = (int)cat;
  594. if (negate) {
  595. if (pos_cats[n])
  596. pos_cats[n] = false;
  597. neg_cats[n] = true;
  598. }
  599. else {
  600. if (neg_cats[n])
  601. neg_cats[n] = false;
  602. pos_cats[n] = true;
  603. }
  604. }
  605. public void AddCharacter (char c) {
  606. intervals.Add (new Interval (c, c));
  607. }
  608. public void AddRange (char lo, char hi) {
  609. intervals.Add (new Interval (lo, hi));
  610. }
  611. public override void Compile (ICompiler cmp, bool reverse) {
  612. // create the meta-collection
  613. IntervalCollection meta =
  614. intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));
  615. // count ops
  616. int count = meta.Count;
  617. for (int i = 0; i < pos_cats.Length; ++ i) {
  618. if (pos_cats[i]) ++ count;
  619. if (neg_cats[i]) ++ count;
  620. }
  621. if (count == 0)
  622. return;
  623. // emit in op for |meta| > 1
  624. LinkRef tail = cmp.NewLink ();
  625. if (count > 1)
  626. cmp.EmitIn (tail);
  627. // emit categories
  628. for (int i = 0; i < pos_cats.Length; ++ i) {
  629. if (pos_cats[i])
  630. cmp.EmitCategory ((Category)i, negate, reverse);
  631. else if (neg_cats[i])
  632. cmp.EmitCategory ((Category)i, !negate, reverse);
  633. }
  634. // emit character/range/sets from meta-collection
  635. foreach (Interval a in meta) {
  636. if (a.IsDiscontiguous) { // Set
  637. BitArray bits = new BitArray (a.Size);
  638. foreach (Interval b in intervals) {
  639. if (a.Contains (b)) {
  640. for (int i = b.low; i <= b.high; ++ i)
  641. bits[i - a.low] = true;
  642. }
  643. }
  644. cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);
  645. }
  646. else if (a.IsSingleton) // Character
  647. cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);
  648. else // Range
  649. cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);
  650. }
  651. // finish up
  652. if (count > 1) {
  653. if (negate)
  654. cmp.EmitTrue ();
  655. else
  656. cmp.EmitFalse ();
  657. cmp.ResolveLink (tail);
  658. }
  659. }
  660. public override void GetWidth (out int min, out int max) {
  661. min = max = 1;
  662. }
  663. public override bool IsComplex () {
  664. return false;
  665. }
  666. // private
  667. private static double GetIntervalCost (Interval i) {
  668. // use op length as cost metric (=> optimize for space)
  669. if (i.IsDiscontiguous)
  670. return 3 + ((i.Size + 0xf) >> 4); // Set
  671. else if (i.IsSingleton)
  672. return 2; // Character
  673. else
  674. return 3; // Range
  675. }
  676. private bool negate, ignore;
  677. private bool[] pos_cats, neg_cats;
  678. private IntervalCollection intervals;
  679. }
  680. class AnchorInfo {
  681. private Expression expr;
  682. private Position pos;
  683. private int offset;
  684. private string str;
  685. private int width;
  686. private bool ignore;
  687. public AnchorInfo (Expression expr, int width) {
  688. this.expr = expr;
  689. this.offset = 0;
  690. this.width = width;
  691. this.str = null;
  692. this.ignore = false;
  693. this.pos = Position.Any;
  694. }
  695. public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {
  696. this.expr = expr;
  697. this.offset = offset;
  698. this.width = width;
  699. this.str = ignore ? str.ToLower () : str;
  700. this.ignore = ignore;
  701. this.pos = Position.Any;
  702. }
  703. public AnchorInfo (Expression expr, int offset, int width, Position pos) {
  704. this.expr = expr;
  705. this.offset = offset;
  706. this.width = width;
  707. this.pos = pos;
  708. this.str = null;
  709. this.ignore = false;
  710. }
  711. public Expression Expression {
  712. get { return expr; }
  713. }
  714. public int Offset {
  715. get { return offset; }
  716. }
  717. public int Width {
  718. get { return width; }
  719. }
  720. public int Length {
  721. get { return (str != null) ? str.Length : 0; }
  722. }
  723. public bool IsUnknownWidth {
  724. get { return width < 0; }
  725. }
  726. public bool IsComplete {
  727. get { return Length == Width; }
  728. }
  729. public string Substring {
  730. get { return str; }
  731. }
  732. public bool IgnoreCase {
  733. get { return ignore; }
  734. }
  735. public Position Position {
  736. get { return pos; }
  737. }
  738. public bool IsSubstring {
  739. get { return str != null; }
  740. }
  741. public bool IsPosition {
  742. get { return pos != Position.Any; }
  743. }
  744. public Interval GetInterval () {
  745. return GetInterval (0);
  746. }
  747. public Interval GetInterval (int start) {
  748. if (!IsSubstring)
  749. return Interval.Empty;
  750. return new Interval (start + Offset, start + Offset + Length - 1);
  751. }
  752. }
  753. }