syntax.cs 22 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: syntax.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. using System;
  9. using System.Collections;
  10. namespace System.Text.RegularExpressions.Syntax {
  11. // collection classes
  12. class ExpressionCollection : CollectionBase {
  13. public void Add (Expression e) {
  14. List.Add (e);
  15. }
  16. public Expression this[int i] {
  17. get { return (Expression)List[i]; }
  18. set { List[i] = value; }
  19. }
  20. protected override void OnValidate (object o) {
  21. // allow null elements
  22. }
  23. }
  24. // abstract classes
  25. abstract class Expression {
  26. public abstract void Compile (ICompiler cmp, bool reverse);
  27. public abstract void GetWidth (out int min, out int max);
  28. public int GetFixedWidth () {
  29. int min, max;
  30. GetWidth (out min, out max);
  31. if (min == max)
  32. return min;
  33. return -1;
  34. }
  35. public virtual AnchorInfo GetAnchorInfo () {
  36. return new AnchorInfo (this, GetFixedWidth ());
  37. }
  38. public virtual bool IsComplex () {
  39. return true;
  40. }
  41. }
  42. // composite expressions
  43. abstract class CompositeExpression : Expression {
  44. public CompositeExpression () {
  45. expressions = new ExpressionCollection ();
  46. }
  47. protected ExpressionCollection Expressions {
  48. get { return expressions; }
  49. }
  50. protected void GetWidth (out int min, out int max, int count) {
  51. min = Int32.MaxValue;
  52. max = 0;
  53. bool empty = true;
  54. for (int i = 0; i < count; ++ i) {
  55. Expression e = Expressions[i];
  56. if (e == null)
  57. continue;
  58. empty = false;
  59. int a, b;
  60. e.GetWidth (out a, out b);
  61. if (a < min) min = a;
  62. if (b > max) max = b;
  63. }
  64. if (empty)
  65. min = max = 0;
  66. }
  67. private ExpressionCollection expressions;
  68. }
  69. // groups
  70. class Group : CompositeExpression {
  71. public Group () {
  72. }
  73. public Expression Expression {
  74. get { return Expressions[0]; }
  75. set { Expressions[0] = value; }
  76. }
  77. public void AppendExpression (Expression e) {
  78. Expressions.Add (e);
  79. }
  80. public override void Compile (ICompiler cmp, bool reverse) {
  81. int count = Expressions.Count;
  82. for (int i = 0; i < count; ++ i) {
  83. Expression e;
  84. if (reverse)
  85. e = Expressions[count - i - 1];
  86. else
  87. e = Expressions[i];
  88. e.Compile (cmp, reverse);
  89. }
  90. }
  91. public override void GetWidth (out int min, out int max) {
  92. min = 0;
  93. max = 0;
  94. foreach (Expression e in Expressions) {
  95. int a, b;
  96. e.GetWidth (out a, out b);
  97. min += a;
  98. if (max == Int32.MaxValue || b == Int32.MaxValue)
  99. max = Int32.MaxValue;
  100. else
  101. max += b;
  102. }
  103. }
  104. public override AnchorInfo GetAnchorInfo () {
  105. int ptr;
  106. int width = GetFixedWidth ();
  107. ArrayList infos = new ArrayList ();
  108. IntervalCollection segments = new IntervalCollection ();
  109. // accumulate segments
  110. ptr = 0;
  111. foreach (Expression e in Expressions) {
  112. AnchorInfo info = e.GetAnchorInfo ();
  113. infos.Add (info);
  114. if (info.IsPosition)
  115. return new AnchorInfo (this, ptr + info.Offset, width, info.Position);
  116. if (info.IsSubstring)
  117. segments.Add (info.GetInterval (ptr));
  118. if (info.IsUnknownWidth)
  119. break;
  120. ptr += info.Width;
  121. }
  122. // normalize and find the longest segment
  123. segments.Normalize ();
  124. Interval longest = Interval.Empty;
  125. foreach (Interval segment in segments) {
  126. if (segment.Size > longest.Size)
  127. longest = segment;
  128. }
  129. // now chain the substrings that made this segment together
  130. if (!longest.IsEmpty) {
  131. string str = "";
  132. bool ignore = false;
  133. ptr = 0;
  134. foreach (AnchorInfo info in infos) {
  135. if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {
  136. str += info.Substring; // TODO mark subexpressions
  137. ignore |= info.IgnoreCase;
  138. }
  139. if (info.IsUnknownWidth)
  140. break;
  141. ptr += info.Width;
  142. }
  143. return new AnchorInfo (this, longest.low, width, str, ignore);
  144. }
  145. return new AnchorInfo (this, width);
  146. }
  147. public override bool IsComplex () {
  148. bool comp = false;
  149. foreach (Expression e in Expressions) {
  150. comp |= e.IsComplex ();
  151. }
  152. return comp | GetFixedWidth () <= 0;
  153. }
  154. }
  155. class RegularExpression : Group {
  156. public RegularExpression () {
  157. group_count = 0;
  158. }
  159. public int GroupCount {
  160. get { return group_count; }
  161. set { group_count = value; }
  162. }
  163. public override void Compile (ICompiler cmp, bool reverse) {
  164. // info block
  165. int min, max;
  166. GetWidth (out min, out max);
  167. cmp.EmitInfo (group_count, min, max);
  168. // anchoring expression
  169. AnchorInfo info = GetAnchorInfo ();
  170. if (reverse)
  171. info = new AnchorInfo (this, GetFixedWidth ()); // FIXME
  172. LinkRef pattern = cmp.NewLink ();
  173. cmp.EmitAnchor (info.Offset, pattern);
  174. if (info.IsPosition)
  175. cmp.EmitPosition (info.Position);
  176. else if (info.IsSubstring)
  177. cmp.EmitString (info.Substring, info.IgnoreCase, reverse);
  178. cmp.EmitTrue ();
  179. // pattern
  180. cmp.ResolveLink (pattern);
  181. base.Compile (cmp, reverse);
  182. cmp.EmitTrue ();
  183. }
  184. private int group_count;
  185. }
  186. class CapturingGroup : Group {
  187. public CapturingGroup () {
  188. this.gid = 0;
  189. this.name = null;
  190. }
  191. public int Number {
  192. get { return gid; }
  193. set { gid = value; }
  194. }
  195. public string Name {
  196. get { return name; }
  197. set { name = value; }
  198. }
  199. public bool IsNamed {
  200. get { return name != null; }
  201. }
  202. public override void Compile (ICompiler cmp, bool reverse) {
  203. cmp.EmitOpen (gid);
  204. base.Compile (cmp, reverse);
  205. cmp.EmitClose (gid);
  206. }
  207. public override bool IsComplex () {
  208. return true;
  209. }
  210. private int gid;
  211. private string name;
  212. }
  213. class BalancingGroup : CapturingGroup {
  214. public BalancingGroup () {
  215. this.balance = null;
  216. }
  217. public CapturingGroup Balance {
  218. get { return balance; }
  219. set { balance = value; }
  220. }
  221. public override void Compile (ICompiler cmp, bool reverse) {
  222. // can't invoke Group.Compile from here :(
  223. // so I'll just repeat the code
  224. int count = Expressions.Count;
  225. for (int i = 0; i < count; ++ i) {
  226. Expression e;
  227. if (reverse)
  228. e = Expressions[count - i - 1];
  229. else
  230. e = Expressions[i];
  231. e.Compile (cmp, reverse);
  232. }
  233. cmp.EmitBalance (this.Number, balance.Number);
  234. }
  235. private CapturingGroup balance;
  236. }
  237. class NonBacktrackingGroup : Group {
  238. public NonBacktrackingGroup () {
  239. }
  240. public override void Compile (ICompiler cmp, bool reverse) {
  241. LinkRef tail = cmp.NewLink ();
  242. cmp.EmitSub (tail);
  243. base.Compile (cmp, reverse);
  244. cmp.EmitTrue ();
  245. cmp.ResolveLink (tail);
  246. }
  247. public override bool IsComplex () {
  248. return true;
  249. }
  250. }
  251. // repetition
  252. class Repetition : CompositeExpression {
  253. public Repetition (int min, int max, bool lazy) {
  254. Expressions.Add (null);
  255. this.min = min;
  256. this.max = max;
  257. this.lazy = lazy;
  258. }
  259. public Expression Expression {
  260. get { return Expressions[0]; }
  261. set { Expressions[0] = value; }
  262. }
  263. public int Minimum {
  264. get { return min; }
  265. set { min = value; }
  266. }
  267. public int Maximum {
  268. get { return max; }
  269. set { max = value; }
  270. }
  271. public bool Lazy {
  272. get { return lazy; }
  273. set { lazy = value; }
  274. }
  275. public override void Compile (ICompiler cmp, bool reverse) {
  276. if (Expression.IsComplex ()) {
  277. LinkRef until = cmp.NewLink ();
  278. cmp.EmitRepeat (min, max, lazy, until);
  279. Expression.Compile (cmp, reverse);
  280. cmp.EmitUntil (until);
  281. }
  282. else {
  283. LinkRef tail = cmp.NewLink ();
  284. cmp.EmitFastRepeat (min, max, lazy, tail);
  285. Expression.Compile (cmp, reverse);
  286. cmp.EmitTrue ();
  287. cmp.ResolveLink (tail);
  288. }
  289. }
  290. public override void GetWidth (out int min, out int max) {
  291. Expression.GetWidth (out min, out max);
  292. min = min * this.min;
  293. if (max == Int32.MaxValue || this.max == 0xffff)
  294. max = Int32.MaxValue;
  295. else
  296. max = max * this.max;
  297. }
  298. public override AnchorInfo GetAnchorInfo () {
  299. int width = GetFixedWidth ();
  300. if (Minimum == 0)
  301. return new AnchorInfo (this, width);
  302. AnchorInfo info = Expression.GetAnchorInfo ();
  303. if (info.IsPosition)
  304. return new AnchorInfo (this, info.Offset, width, info.Position);
  305. if (info.IsSubstring) {
  306. if (info.IsComplete) {
  307. string str = "";
  308. for (int i = 0; i < Minimum; ++ i)
  309. str += info.Substring;
  310. return new AnchorInfo (this, 0, width, str, info.IgnoreCase);
  311. }
  312. return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);
  313. }
  314. return new AnchorInfo (this, width);
  315. }
  316. private int min, max;
  317. private bool lazy;
  318. }
  319. // assertions
  320. abstract class Assertion : CompositeExpression {
  321. public Assertion () {
  322. Expressions.Add (null); // true expression
  323. Expressions.Add (null); // false expression
  324. }
  325. public Expression TrueExpression {
  326. get { return Expressions[0]; }
  327. set { Expressions[0] = value; }
  328. }
  329. public Expression FalseExpression {
  330. get { return Expressions[1]; }
  331. set { Expressions[1] = value; }
  332. }
  333. public override void GetWidth (out int min, out int max) {
  334. GetWidth (out min, out max, 2);
  335. if (TrueExpression == null || FalseExpression == null)
  336. min = 0;
  337. }
  338. }
  339. class CaptureAssertion : Assertion {
  340. public CaptureAssertion () {
  341. }
  342. public CapturingGroup CapturingGroup {
  343. get { return group; }
  344. set { group = value; }
  345. }
  346. public override void Compile (ICompiler cmp, bool reverse) {
  347. int gid = group.Number;
  348. LinkRef tail = cmp.NewLink ();
  349. if (FalseExpression == null) {
  350. // IfDefined :1
  351. // <yes_exp>
  352. // 1: <tail>
  353. cmp.EmitIfDefined (gid, tail);
  354. TrueExpression.Compile (cmp, reverse);
  355. }
  356. else {
  357. // IfDefined :1
  358. // <yes_expr>
  359. // Jump :2
  360. // 1: <no_expr>
  361. // 2: <tail>
  362. LinkRef false_expr = cmp.NewLink ();
  363. cmp.EmitIfDefined (gid, false_expr);
  364. TrueExpression.Compile (cmp, reverse);
  365. cmp.EmitJump (tail);
  366. cmp.ResolveLink (false_expr);
  367. FalseExpression.Compile (cmp, reverse);
  368. }
  369. cmp.ResolveLink (tail);
  370. }
  371. public override bool IsComplex () {
  372. bool comp = false;
  373. if (TrueExpression != null)
  374. comp |= TrueExpression.IsComplex ();
  375. if (FalseExpression != null)
  376. comp |= FalseExpression.IsComplex ();
  377. return comp | GetFixedWidth () <= 0;
  378. }
  379. private CapturingGroup group;
  380. }
  381. class ExpressionAssertion : Assertion {
  382. public ExpressionAssertion () {
  383. Expressions.Add (null); // test expression
  384. }
  385. public bool Reverse {
  386. get { return reverse; }
  387. set { reverse = value; }
  388. }
  389. public bool Negate {
  390. get { return negate; }
  391. set { negate = value; }
  392. }
  393. public Expression TestExpression {
  394. get { return Expressions[2]; }
  395. set { Expressions[2] = value; }
  396. }
  397. public override void Compile (ICompiler cmp, bool reverse) {
  398. LinkRef true_expr = cmp.NewLink ();
  399. LinkRef false_expr = cmp.NewLink ();
  400. // test op: positive / negative
  401. if (!negate)
  402. cmp.EmitTest (true_expr, false_expr);
  403. else
  404. cmp.EmitTest (false_expr, true_expr);
  405. // test expression: lookahead / lookbehind
  406. TestExpression.Compile (cmp, reverse ^ this.reverse);
  407. cmp.EmitTrue ();
  408. // target expressions
  409. if (TrueExpression == null) { // (?= ...)
  410. // Test :1, :2
  411. // <test_expr>
  412. // :2 False
  413. // :1 <tail>
  414. cmp.ResolveLink (false_expr);
  415. cmp.EmitFalse ();
  416. cmp.ResolveLink (true_expr);
  417. }
  418. else {
  419. cmp.ResolveLink (true_expr);
  420. TrueExpression.Compile (cmp, reverse);
  421. if (FalseExpression == null) { // (?(...) ...)
  422. // Test :1, :2
  423. // <test_expr>
  424. // :1 <yes_expr>
  425. // :2 <tail>
  426. cmp.ResolveLink (false_expr);
  427. }
  428. else { // (?(...) ... | ...)
  429. // Test :1, :2
  430. // <test_expr>
  431. // :1 <yes_expr>
  432. // Jump :3
  433. // :2 <no_expr>
  434. // :3 <tail>
  435. LinkRef tail = cmp.NewLink ();
  436. cmp.EmitJump (tail);
  437. cmp.ResolveLink (false_expr);
  438. FalseExpression.Compile (cmp, reverse);
  439. cmp.ResolveLink (tail);
  440. }
  441. }
  442. }
  443. private bool reverse, negate;
  444. }
  445. // alternation
  446. class Alternation : CompositeExpression {
  447. public Alternation () {
  448. }
  449. public ExpressionCollection Alternatives {
  450. get { return Expressions; }
  451. }
  452. public void AddAlternative (Expression e) {
  453. Alternatives.Add (e);
  454. }
  455. public override void Compile (ICompiler cmp, bool reverse) {
  456. LinkRef next = cmp.NewLink ();
  457. LinkRef tail = cmp.NewLink ();
  458. foreach (Expression e in Alternatives) {
  459. cmp.EmitBranch (next);
  460. e.Compile (cmp, reverse);
  461. cmp.EmitJump (tail);
  462. cmp.ResolveLink (next);
  463. }
  464. cmp.EmitFalse ();
  465. cmp.ResolveLink (tail);
  466. }
  467. public override void GetWidth (out int min, out int max) {
  468. GetWidth (out min, out max, Alternatives.Count);
  469. }
  470. public override bool IsComplex () {
  471. bool comp = false;
  472. foreach (Expression e in Alternatives) {
  473. comp |= e.IsComplex ();
  474. }
  475. return comp | GetFixedWidth () <= 0;
  476. }
  477. }
  478. // terminal expressions
  479. class Literal : Expression {
  480. public Literal (string str, bool ignore) {
  481. this.str = str;
  482. this.ignore = ignore;
  483. }
  484. public string String {
  485. get { return str; }
  486. set { str = value; }
  487. }
  488. public bool IgnoreCase {
  489. get { return ignore; }
  490. set { ignore = value; }
  491. }
  492. public override void Compile (ICompiler cmp, bool reverse) {
  493. if (str.Length == 0)
  494. return;
  495. if (str.Length == 1)
  496. cmp.EmitCharacter (str[0], false, ignore, reverse);
  497. else
  498. cmp.EmitString (str, ignore, reverse);
  499. }
  500. public override void GetWidth (out int min, out int max) {
  501. min = max = str.Length;
  502. }
  503. public override AnchorInfo GetAnchorInfo () {
  504. return new AnchorInfo (this, 0, str.Length, str, ignore);
  505. }
  506. public override bool IsComplex () {
  507. return false;
  508. }
  509. private string str;
  510. private bool ignore;
  511. }
  512. class PositionAssertion : Expression {
  513. public PositionAssertion (Position pos) {
  514. this.pos = pos;
  515. }
  516. public Position Position {
  517. get { return pos; }
  518. set { pos = value; }
  519. }
  520. public override void Compile (ICompiler cmp, bool reverse) {
  521. cmp.EmitPosition (pos);
  522. }
  523. public override void GetWidth (out int min, out int max) {
  524. min = max = 0;
  525. }
  526. public override bool IsComplex () {
  527. return false;
  528. }
  529. public override AnchorInfo GetAnchorInfo () {
  530. switch (pos) {
  531. case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:
  532. return new AnchorInfo (this, 0, 0, pos);
  533. default:
  534. return new AnchorInfo (this, 0);
  535. }
  536. }
  537. private Position pos;
  538. }
  539. class Reference : Expression {
  540. public Reference (bool ignore) {
  541. this.ignore = ignore;
  542. }
  543. public CapturingGroup CapturingGroup {
  544. get { return group; }
  545. set { group = value; }
  546. }
  547. public bool IgnoreCase {
  548. get { return ignore; }
  549. set { ignore = value; }
  550. }
  551. public override void Compile (ICompiler cmp, bool reverse) {
  552. cmp.EmitReference (group.Number, ignore, reverse);
  553. }
  554. public override void GetWidth (out int min, out int max) {
  555. //group.GetWidth (out min, out max);
  556. // TODO set width to referenced group for non-cyclical references
  557. min = 0;
  558. max = Int32.MaxValue;
  559. }
  560. public override bool IsComplex () {
  561. return true; // FIXME incorporate cyclic check
  562. }
  563. private CapturingGroup group;
  564. private bool ignore;
  565. }
  566. class CharacterClass : Expression {
  567. public CharacterClass (bool negate, bool ignore) {
  568. this.negate = negate;
  569. this.ignore = ignore;
  570. intervals = new IntervalCollection ();
  571. // initialize pos/neg category arrays
  572. Array cat_values = Enum.GetValues (typeof (Category));
  573. int cat_size = (int)(Category)cat_values.GetValue (cat_values.Length - 1) + 1;
  574. pos_cats = new bool[cat_size];
  575. neg_cats = new bool[cat_size];
  576. for (int i = 0; i < cat_size; ++ i) {
  577. pos_cats[i] = false;
  578. neg_cats[i] = false;
  579. }
  580. }
  581. public CharacterClass (Category cat, bool negate) : this (false, false) {
  582. this.AddCategory (cat, negate);
  583. }
  584. public bool Negate {
  585. get { return negate; }
  586. set { negate = value; }
  587. }
  588. public bool IgnoreCase {
  589. get { return ignore; }
  590. set { ignore = value; }
  591. }
  592. public void AddCategory (Category cat, bool negate) {
  593. int n = (int)cat;
  594. if (negate) {
  595. if (pos_cats[n])
  596. pos_cats[n] = false;
  597. neg_cats[n] = true;
  598. }
  599. else {
  600. if (neg_cats[n])
  601. neg_cats[n] = false;
  602. pos_cats[n] = true;
  603. }
  604. }
  605. public void AddCharacter (char c) {
  606. // TODO: this is certainly not the most efficient way of doing things
  607. // TODO: but at least it produces correct results.
  608. AddRange (c, c);
  609. }
  610. public void AddRange (char lo, char hi) {
  611. Interval new_interval = new Interval (lo, hi);
  612. // ignore case is on. we must make sure our interval does not
  613. // use upper case. if it does, we must normalize the upper case
  614. // characters into lower case.
  615. if (ignore) {
  616. if (upper_case_characters.Intersects (new_interval)) {
  617. Interval partial_new_interval;
  618. if (new_interval.low < upper_case_characters.low) {
  619. partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case,
  620. new_interval.high + distance_between_upper_and_lower_case);
  621. new_interval.high = upper_case_characters.low - 1;
  622. }
  623. else {
  624. partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case,
  625. upper_case_characters.high + distance_between_upper_and_lower_case);
  626. new_interval.low = upper_case_characters.high + 1;
  627. }
  628. intervals.Add (partial_new_interval);
  629. }
  630. else if (upper_case_characters.Contains (new_interval)) {
  631. new_interval.high += distance_between_upper_and_lower_case;
  632. new_interval.low += distance_between_upper_and_lower_case;
  633. }
  634. }
  635. intervals.Add (new_interval);
  636. }
  637. public override void Compile (ICompiler cmp, bool reverse) {
  638. // create the meta-collection
  639. IntervalCollection meta =
  640. intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));
  641. // count ops
  642. int count = meta.Count;
  643. for (int i = 0; i < pos_cats.Length; ++ i) {
  644. if (pos_cats[i]) ++ count;
  645. if (neg_cats[i]) ++ count;
  646. }
  647. if (count == 0)
  648. return;
  649. // emit in op for |meta| > 1
  650. LinkRef tail = cmp.NewLink ();
  651. if (count > 1)
  652. cmp.EmitIn (tail);
  653. // emit categories
  654. for (int i = 0; i < pos_cats.Length; ++ i) {
  655. if (pos_cats[i])
  656. cmp.EmitCategory ((Category)i, negate, reverse);
  657. else if (neg_cats[i])
  658. cmp.EmitCategory ((Category)i, !negate, reverse);
  659. }
  660. // emit character/range/sets from meta-collection
  661. foreach (Interval a in meta) {
  662. if (a.IsDiscontiguous) { // Set
  663. BitArray bits = new BitArray (a.Size);
  664. foreach (Interval b in intervals) {
  665. if (a.Contains (b)) {
  666. for (int i = b.low; i <= b.high; ++ i)
  667. bits[i - a.low] = true;
  668. }
  669. }
  670. cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);
  671. }
  672. else if (a.IsSingleton) // Character
  673. cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);
  674. else // Range
  675. cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);
  676. }
  677. // finish up
  678. if (count > 1) {
  679. if (negate)
  680. cmp.EmitTrue ();
  681. else
  682. cmp.EmitFalse ();
  683. cmp.ResolveLink (tail);
  684. }
  685. }
  686. public override void GetWidth (out int min, out int max) {
  687. min = max = 1;
  688. }
  689. public override bool IsComplex () {
  690. return false;
  691. }
  692. // private
  693. private static double GetIntervalCost (Interval i) {
  694. // use op length as cost metric (=> optimize for space)
  695. if (i.IsDiscontiguous)
  696. return 3 + ((i.Size + 0xf) >> 4); // Set
  697. else if (i.IsSingleton)
  698. return 2; // Character
  699. else
  700. return 3; // Range
  701. }
  702. private static Interval upper_case_characters = new Interval ((char)65, (char)90);
  703. private const int distance_between_upper_and_lower_case = 32;
  704. private bool negate, ignore;
  705. private bool[] pos_cats, neg_cats;
  706. private IntervalCollection intervals;
  707. }
  708. class AnchorInfo {
  709. private Expression expr;
  710. private Position pos;
  711. private int offset;
  712. private string str;
  713. private int width;
  714. private bool ignore;
  715. public AnchorInfo (Expression expr, int width) {
  716. this.expr = expr;
  717. this.offset = 0;
  718. this.width = width;
  719. this.str = null;
  720. this.ignore = false;
  721. this.pos = Position.Any;
  722. }
  723. public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {
  724. this.expr = expr;
  725. this.offset = offset;
  726. this.width = width;
  727. this.str = ignore ? str.ToLower () : str;
  728. this.ignore = ignore;
  729. this.pos = Position.Any;
  730. }
  731. public AnchorInfo (Expression expr, int offset, int width, Position pos) {
  732. this.expr = expr;
  733. this.offset = offset;
  734. this.width = width;
  735. this.pos = pos;
  736. this.str = null;
  737. this.ignore = false;
  738. }
  739. public Expression Expression {
  740. get { return expr; }
  741. }
  742. public int Offset {
  743. get { return offset; }
  744. }
  745. public int Width {
  746. get { return width; }
  747. }
  748. public int Length {
  749. get { return (str != null) ? str.Length : 0; }
  750. }
  751. public bool IsUnknownWidth {
  752. get { return width < 0; }
  753. }
  754. public bool IsComplete {
  755. get { return Length == Width; }
  756. }
  757. public string Substring {
  758. get { return str; }
  759. }
  760. public bool IgnoreCase {
  761. get { return ignore; }
  762. }
  763. public Position Position {
  764. get { return pos; }
  765. }
  766. public bool IsSubstring {
  767. get { return str != null; }
  768. }
  769. public bool IsPosition {
  770. get { return pos != Position.Any; }
  771. }
  772. public Interval GetInterval () {
  773. return GetInterval (0);
  774. }
  775. public Interval GetInterval (int start) {
  776. if (!IsSubstring)
  777. return Interval.Empty;
  778. return new Interval (start + Offset, start + Offset + Length - 1);
  779. }
  780. }
  781. }