syntax.cs 22 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: syntax.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. using System;
  9. using System.Collections;
  10. namespace System.Text.RegularExpressions.Syntax {
  11. // collection classes
  12. class ExpressionCollection : CollectionBase {
  13. public void Add (Expression e) {
  14. List.Add (e);
  15. }
  16. public Expression this[int i] {
  17. get { return (Expression)List[i]; }
  18. set { List[i] = value; }
  19. }
  20. protected override void OnValidate (object o) {
  21. // allow null elements
  22. }
  23. }
  24. // abstract classes
  25. abstract class Expression {
  26. public abstract void Compile (ICompiler cmp, bool reverse);
  27. public abstract void GetWidth (out int min, out int max);
  28. public int GetFixedWidth () {
  29. int min, max;
  30. GetWidth (out min, out max);
  31. if (min == max)
  32. return min;
  33. return -1;
  34. }
  35. public virtual AnchorInfo GetAnchorInfo (bool reverse) {
  36. return new AnchorInfo (this, GetFixedWidth ());
  37. }
  38. public virtual bool IsComplex () {
  39. return true;
  40. }
  41. }
  42. // composite expressions
  43. abstract class CompositeExpression : Expression {
  44. public CompositeExpression () {
  45. expressions = new ExpressionCollection ();
  46. }
  47. protected ExpressionCollection Expressions {
  48. get { return expressions; }
  49. }
  50. protected void GetWidth (out int min, out int max, int count) {
  51. min = Int32.MaxValue;
  52. max = 0;
  53. bool empty = true;
  54. for (int i = 0; i < count; ++ i) {
  55. Expression e = Expressions[i];
  56. if (e == null)
  57. continue;
  58. empty = false;
  59. int a, b;
  60. e.GetWidth (out a, out b);
  61. if (a < min) min = a;
  62. if (b > max) max = b;
  63. }
  64. if (empty)
  65. min = max = 0;
  66. }
  67. private ExpressionCollection expressions;
  68. }
  69. // groups
  70. class Group : CompositeExpression {
  71. public Group () {
  72. }
  73. public Expression Expression {
  74. get { return Expressions[0]; }
  75. set { Expressions[0] = value; }
  76. }
  77. public void AppendExpression (Expression e) {
  78. Expressions.Add (e);
  79. }
  80. public override void Compile (ICompiler cmp, bool reverse) {
  81. int count = Expressions.Count;
  82. for (int i = 0; i < count; ++ i) {
  83. Expression e;
  84. if (reverse)
  85. e = Expressions[count - i - 1];
  86. else
  87. e = Expressions[i];
  88. e.Compile (cmp, reverse);
  89. }
  90. }
  91. public override void GetWidth (out int min, out int max) {
  92. min = 0;
  93. max = 0;
  94. foreach (Expression e in Expressions) {
  95. int a, b;
  96. e.GetWidth (out a, out b);
  97. min += a;
  98. if (max == Int32.MaxValue || b == Int32.MaxValue)
  99. max = Int32.MaxValue;
  100. else
  101. max += b;
  102. }
  103. }
  104. public override AnchorInfo GetAnchorInfo (bool reverse) {
  105. int ptr;
  106. int width = GetFixedWidth ();
  107. ArrayList infos = new ArrayList ();
  108. IntervalCollection segments = new IntervalCollection ();
  109. // accumulate segments
  110. ptr = 0;
  111. //foreach (Expression e in Expressions) {
  112. int count = Expressions.Count;
  113. for (int i = 0; i < count; ++ i) {
  114. Expression e;
  115. if (reverse)
  116. e = Expressions [count - i - 1];
  117. else
  118. e = Expressions [i];
  119. AnchorInfo info = e.GetAnchorInfo (reverse);
  120. infos.Add (info);
  121. if (info.IsPosition)
  122. return new AnchorInfo (this, ptr + info.Offset, width, info.Position);
  123. if (info.IsSubstring)
  124. segments.Add (info.GetInterval (ptr));
  125. if (info.IsUnknownWidth)
  126. break;
  127. ptr += info.Width;
  128. }
  129. // normalize and find the longest segment
  130. segments.Normalize ();
  131. Interval longest = Interval.Empty;
  132. foreach (Interval segment in segments) {
  133. if (segment.Size > longest.Size)
  134. longest = segment;
  135. }
  136. // now chain the substrings that made this segment together
  137. if (!longest.IsEmpty) {
  138. string str = "";
  139. ArrayList strs = new ArrayList();
  140. bool ignore = false;
  141. ptr = 0;
  142. //foreach (AnchorInfo info in infos) {
  143. for (int i = 0; i < infos.Count; ++ i) {
  144. AnchorInfo info;
  145. info = (AnchorInfo)infos[i];
  146. if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {
  147. //str += info.Substring; // TODO mark subexpressions
  148. strs.Add(info.Substring);
  149. ignore |= info.IgnoreCase;
  150. }
  151. if (info.IsUnknownWidth)
  152. break;
  153. ptr += info.Width;
  154. }
  155. for (int i = 0; i< strs.Count; ++i)
  156. {
  157. if (reverse)
  158. str += strs [strs.Count - i - 1];
  159. else
  160. str += strs [i];
  161. }
  162. return new AnchorInfo (this, longest.low, width, str, ignore);
  163. }
  164. return new AnchorInfo (this, width);
  165. }
  166. public override bool IsComplex () {
  167. bool comp = false;
  168. foreach (Expression e in Expressions) {
  169. comp |= e.IsComplex ();
  170. }
  171. return comp | GetFixedWidth () <= 0;
  172. }
  173. }
  174. class RegularExpression : Group {
  175. public RegularExpression () {
  176. group_count = 0;
  177. }
  178. public int GroupCount {
  179. get { return group_count; }
  180. set { group_count = value; }
  181. }
  182. public override void Compile (ICompiler cmp, bool reverse) {
  183. // info block
  184. int min, max;
  185. GetWidth (out min, out max);
  186. cmp.EmitInfo (group_count, min, max);
  187. // anchoring expression
  188. AnchorInfo info = GetAnchorInfo (reverse);
  189. //if (reverse)
  190. // info = new AnchorInfo (this, GetFixedWidth ()); // FIXME
  191. LinkRef pattern = cmp.NewLink ();
  192. cmp.EmitAnchor (reverse, info.Offset, pattern);
  193. if (info.IsPosition)
  194. cmp.EmitPosition (info.Position);
  195. else if (info.IsSubstring)
  196. cmp.EmitString (info.Substring, info.IgnoreCase, reverse);
  197. cmp.EmitTrue ();
  198. // pattern
  199. cmp.ResolveLink (pattern);
  200. base.Compile (cmp, reverse);
  201. cmp.EmitTrue ();
  202. }
  203. private int group_count;
  204. }
  205. class CapturingGroup : Group {
  206. public CapturingGroup () {
  207. this.gid = 0;
  208. this.name = null;
  209. }
  210. public int Number {
  211. get { return gid; }
  212. set { gid = value; }
  213. }
  214. public string Name {
  215. get { return name; }
  216. set { name = value; }
  217. }
  218. public bool IsNamed {
  219. get { return name != null; }
  220. }
  221. public override void Compile (ICompiler cmp, bool reverse) {
  222. cmp.EmitOpen (gid);
  223. base.Compile (cmp, reverse);
  224. cmp.EmitClose (gid);
  225. }
  226. public override bool IsComplex () {
  227. return true;
  228. }
  229. private int gid;
  230. private string name;
  231. }
  232. class BalancingGroup : CapturingGroup {
  233. public BalancingGroup () {
  234. this.balance = null;
  235. }
  236. public CapturingGroup Balance {
  237. get { return balance; }
  238. set { balance = value; }
  239. }
  240. public override void Compile (ICompiler cmp, bool reverse) {
  241. // can't invoke Group.Compile from here :(
  242. // so I'll just repeat the code
  243. LinkRef tail = cmp.NewLink ();
  244. cmp.EmitBalanceStart (this.Number, balance.Number, this.IsNamed, tail);
  245. int count = Expressions.Count;
  246. for (int i = 0; i < count; ++ i) {
  247. Expression e;
  248. if (reverse)
  249. e = Expressions[count - i - 1];
  250. else
  251. e = Expressions[i];
  252. e.Compile (cmp, reverse);
  253. }
  254. cmp.EmitBalance ();
  255. cmp.ResolveLink(tail);
  256. }
  257. private CapturingGroup balance;
  258. }
  259. class NonBacktrackingGroup : Group {
  260. public NonBacktrackingGroup () {
  261. }
  262. public override void Compile (ICompiler cmp, bool reverse) {
  263. LinkRef tail = cmp.NewLink ();
  264. cmp.EmitSub (tail);
  265. base.Compile (cmp, reverse);
  266. cmp.EmitTrue ();
  267. cmp.ResolveLink (tail);
  268. }
  269. public override bool IsComplex () {
  270. return true;
  271. }
  272. }
  273. // repetition
  274. class Repetition : CompositeExpression {
  275. public Repetition (int min, int max, bool lazy) {
  276. Expressions.Add (null);
  277. this.min = min;
  278. this.max = max;
  279. this.lazy = lazy;
  280. }
  281. public Expression Expression {
  282. get { return Expressions[0]; }
  283. set { Expressions[0] = value; }
  284. }
  285. public int Minimum {
  286. get { return min; }
  287. set { min = value; }
  288. }
  289. public int Maximum {
  290. get { return max; }
  291. set { max = value; }
  292. }
  293. public bool Lazy {
  294. get { return lazy; }
  295. set { lazy = value; }
  296. }
  297. public override void Compile (ICompiler cmp, bool reverse) {
  298. if (Expression.IsComplex ()) {
  299. LinkRef until = cmp.NewLink ();
  300. cmp.EmitRepeat (min, max, lazy, until);
  301. Expression.Compile (cmp, reverse);
  302. cmp.EmitUntil (until);
  303. }
  304. else {
  305. LinkRef tail = cmp.NewLink ();
  306. cmp.EmitFastRepeat (min, max, lazy, tail);
  307. Expression.Compile (cmp, reverse);
  308. cmp.EmitTrue ();
  309. cmp.ResolveLink (tail);
  310. }
  311. }
  312. public override void GetWidth (out int min, out int max) {
  313. Expression.GetWidth (out min, out max);
  314. min = min * this.min;
  315. if (max == Int32.MaxValue || this.max == 0xffff)
  316. max = Int32.MaxValue;
  317. else
  318. max = max * this.max;
  319. }
  320. public override AnchorInfo GetAnchorInfo (bool reverse) {
  321. int width = GetFixedWidth ();
  322. if (Minimum == 0)
  323. return new AnchorInfo (this, width);
  324. AnchorInfo info = Expression.GetAnchorInfo (reverse);
  325. if (info.IsPosition)
  326. return new AnchorInfo (this, info.Offset, width, info.Position);
  327. if (info.IsSubstring) {
  328. if (info.IsComplete) {
  329. string str = "";
  330. for (int i = 0; i < Minimum; ++ i)
  331. str += info.Substring;
  332. return new AnchorInfo (this, 0, width, str, info.IgnoreCase);
  333. }
  334. return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);
  335. }
  336. return new AnchorInfo (this, width);
  337. }
  338. private int min, max;
  339. private bool lazy;
  340. }
  341. // assertions
  342. abstract class Assertion : CompositeExpression {
  343. public Assertion () {
  344. Expressions.Add (null); // true expression
  345. Expressions.Add (null); // false expression
  346. }
  347. public Expression TrueExpression {
  348. get { return Expressions[0]; }
  349. set { Expressions[0] = value; }
  350. }
  351. public Expression FalseExpression {
  352. get { return Expressions[1]; }
  353. set { Expressions[1] = value; }
  354. }
  355. public override void GetWidth (out int min, out int max) {
  356. GetWidth (out min, out max, 2);
  357. if (TrueExpression == null || FalseExpression == null)
  358. min = 0;
  359. }
  360. }
  361. class CaptureAssertion : Assertion {
  362. public CaptureAssertion () {
  363. }
  364. public CapturingGroup CapturingGroup {
  365. get { return group; }
  366. set { group = value; }
  367. }
  368. public override void Compile (ICompiler cmp, bool reverse) {
  369. int gid = group.Number;
  370. LinkRef tail = cmp.NewLink ();
  371. if (FalseExpression == null) {
  372. // IfDefined :1
  373. // <yes_exp>
  374. // 1: <tail>
  375. cmp.EmitIfDefined (gid, tail);
  376. TrueExpression.Compile (cmp, reverse);
  377. }
  378. else {
  379. // IfDefined :1
  380. // <yes_expr>
  381. // Jump :2
  382. // 1: <no_expr>
  383. // 2: <tail>
  384. LinkRef false_expr = cmp.NewLink ();
  385. cmp.EmitIfDefined (gid, false_expr);
  386. TrueExpression.Compile (cmp, reverse);
  387. cmp.EmitJump (tail);
  388. cmp.ResolveLink (false_expr);
  389. FalseExpression.Compile (cmp, reverse);
  390. }
  391. cmp.ResolveLink (tail);
  392. }
  393. public override bool IsComplex () {
  394. bool comp = false;
  395. if (TrueExpression != null)
  396. comp |= TrueExpression.IsComplex ();
  397. if (FalseExpression != null)
  398. comp |= FalseExpression.IsComplex ();
  399. return comp | GetFixedWidth () <= 0;
  400. }
  401. private CapturingGroup group;
  402. }
  403. class ExpressionAssertion : Assertion {
  404. public ExpressionAssertion () {
  405. Expressions.Add (null); // test expression
  406. }
  407. public bool Reverse {
  408. get { return reverse; }
  409. set { reverse = value; }
  410. }
  411. public bool Negate {
  412. get { return negate; }
  413. set { negate = value; }
  414. }
  415. public Expression TestExpression {
  416. get { return Expressions[2]; }
  417. set { Expressions[2] = value; }
  418. }
  419. public override void Compile (ICompiler cmp, bool reverse) {
  420. LinkRef true_expr = cmp.NewLink ();
  421. LinkRef false_expr = cmp.NewLink ();
  422. // test op: positive / negative
  423. if (!negate)
  424. cmp.EmitTest (true_expr, false_expr);
  425. else
  426. cmp.EmitTest (false_expr, true_expr);
  427. // test expression: lookahead / lookbehind
  428. TestExpression.Compile (cmp, this.reverse);
  429. cmp.EmitTrue ();
  430. // target expressions
  431. if (TrueExpression == null) { // (?= ...)
  432. // Test :1, :2
  433. // <test_expr>
  434. // :2 False
  435. // :1 <tail>
  436. cmp.ResolveLink (false_expr);
  437. cmp.EmitFalse ();
  438. cmp.ResolveLink (true_expr);
  439. }
  440. else {
  441. cmp.ResolveLink (true_expr);
  442. TrueExpression.Compile (cmp, reverse);
  443. if (FalseExpression == null) { // (?(...) ...)
  444. // Test :1, :2
  445. // <test_expr>
  446. // :1 <yes_expr>
  447. // :2 <tail>
  448. cmp.ResolveLink (false_expr);
  449. }
  450. else { // (?(...) ... | ...)
  451. // Test :1, :2
  452. // <test_expr>
  453. // :1 <yes_expr>
  454. // Jump :3
  455. // :2 <no_expr>
  456. // :3 <tail>
  457. LinkRef tail = cmp.NewLink ();
  458. cmp.EmitJump (tail);
  459. cmp.ResolveLink (false_expr);
  460. FalseExpression.Compile (cmp, reverse);
  461. cmp.ResolveLink (tail);
  462. }
  463. }
  464. }
  465. private bool reverse, negate;
  466. }
  467. // alternation
  468. class Alternation : CompositeExpression {
  469. public Alternation () {
  470. }
  471. public ExpressionCollection Alternatives {
  472. get { return Expressions; }
  473. }
  474. public void AddAlternative (Expression e) {
  475. Alternatives.Add (e);
  476. }
  477. public override void Compile (ICompiler cmp, bool reverse) {
  478. // LinkRef next = cmp.NewLink ();
  479. LinkRef tail = cmp.NewLink ();
  480. foreach (Expression e in Alternatives) {
  481. LinkRef next = cmp.NewLink ();
  482. cmp.EmitBranch (next);
  483. e.Compile (cmp, reverse);
  484. cmp.EmitJump (tail);
  485. cmp.ResolveLink (next);
  486. cmp.EmitBranchEnd();
  487. }
  488. cmp.EmitFalse ();
  489. cmp.ResolveLink (tail);
  490. cmp.EmitAlternationEnd();
  491. }
  492. public override void GetWidth (out int min, out int max) {
  493. GetWidth (out min, out max, Alternatives.Count);
  494. }
  495. public override bool IsComplex () {
  496. bool comp = false;
  497. foreach (Expression e in Alternatives) {
  498. comp |= e.IsComplex ();
  499. }
  500. return comp | GetFixedWidth () <= 0;
  501. }
  502. }
  503. // terminal expressions
  504. class Literal : Expression {
  505. public Literal (string str, bool ignore) {
  506. this.str = str;
  507. this.ignore = ignore;
  508. }
  509. public string String {
  510. get { return str; }
  511. set { str = value; }
  512. }
  513. public bool IgnoreCase {
  514. get { return ignore; }
  515. set { ignore = value; }
  516. }
  517. public override void Compile (ICompiler cmp, bool reverse) {
  518. if (str.Length == 0)
  519. return;
  520. if (str.Length == 1)
  521. cmp.EmitCharacter (str[0], false, ignore, reverse);
  522. else
  523. cmp.EmitString (str, ignore, reverse);
  524. }
  525. public override void GetWidth (out int min, out int max) {
  526. min = max = str.Length;
  527. }
  528. public override AnchorInfo GetAnchorInfo (bool reverse) {
  529. return new AnchorInfo (this, 0, str.Length, str, ignore);
  530. }
  531. public override bool IsComplex () {
  532. return false;
  533. }
  534. private string str;
  535. private bool ignore;
  536. }
  537. class PositionAssertion : Expression {
  538. public PositionAssertion (Position pos) {
  539. this.pos = pos;
  540. }
  541. public Position Position {
  542. get { return pos; }
  543. set { pos = value; }
  544. }
  545. public override void Compile (ICompiler cmp, bool reverse) {
  546. cmp.EmitPosition (pos);
  547. }
  548. public override void GetWidth (out int min, out int max) {
  549. min = max = 0;
  550. }
  551. public override bool IsComplex () {
  552. return false;
  553. }
  554. public override AnchorInfo GetAnchorInfo (bool revers) {
  555. switch (pos) {
  556. case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:
  557. return new AnchorInfo (this, 0, 0, pos);
  558. default:
  559. return new AnchorInfo (this, 0);
  560. }
  561. }
  562. private Position pos;
  563. }
  564. class Reference : Expression {
  565. public Reference (bool ignore) {
  566. this.ignore = ignore;
  567. }
  568. public CapturingGroup CapturingGroup {
  569. get { return group; }
  570. set { group = value; }
  571. }
  572. public bool IgnoreCase {
  573. get { return ignore; }
  574. set { ignore = value; }
  575. }
  576. public override void Compile (ICompiler cmp, bool reverse) {
  577. cmp.EmitReference (group.Number, ignore, reverse);
  578. }
  579. public override void GetWidth (out int min, out int max) {
  580. //group.GetWidth (out min, out max);
  581. // TODO set width to referenced group for non-cyclical references
  582. min = 0;
  583. max = Int32.MaxValue;
  584. }
  585. public override bool IsComplex () {
  586. return true; // FIXME incorporate cyclic check
  587. }
  588. private CapturingGroup group;
  589. private bool ignore;
  590. }
  591. class CharacterClass : Expression {
  592. public CharacterClass (bool negate, bool ignore) {
  593. this.negate = negate;
  594. this.ignore = ignore;
  595. intervals = new IntervalCollection ();
  596. // initialize pos/neg category arrays
  597. int cat_size = (int) Category.LastValue;
  598. pos_cats = new BitArray (cat_size);
  599. neg_cats = new BitArray (cat_size);
  600. }
  601. public CharacterClass (Category cat, bool negate) : this (false, false) {
  602. this.AddCategory (cat, negate);
  603. }
  604. public bool Negate {
  605. get { return negate; }
  606. set { negate = value; }
  607. }
  608. public bool IgnoreCase {
  609. get { return ignore; }
  610. set { ignore = value; }
  611. }
  612. public void AddCategory (Category cat, bool negate) {
  613. int n = (int)cat;
  614. if (negate) {
  615. neg_cats[n] = true;
  616. } else {
  617. pos_cats[n] = true;
  618. }
  619. }
  620. public void AddCharacter (char c) {
  621. // TODO: this is certainly not the most efficient way of doing things
  622. // TODO: but at least it produces correct results.
  623. AddRange (c, c);
  624. }
  625. public void AddRange (char lo, char hi) {
  626. Interval new_interval = new Interval (lo, hi);
  627. // ignore case is on. we must make sure our interval does not
  628. // use upper case. if it does, we must normalize the upper case
  629. // characters into lower case.
  630. if (ignore) {
  631. if (upper_case_characters.Intersects (new_interval)) {
  632. Interval partial_new_interval;
  633. if (new_interval.low < upper_case_characters.low) {
  634. partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case,
  635. new_interval.high + distance_between_upper_and_lower_case);
  636. new_interval.high = upper_case_characters.low - 1;
  637. }
  638. else {
  639. partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case,
  640. upper_case_characters.high + distance_between_upper_and_lower_case);
  641. new_interval.low = upper_case_characters.high + 1;
  642. }
  643. intervals.Add (partial_new_interval);
  644. }
  645. else if (upper_case_characters.Contains (new_interval)) {
  646. new_interval.high += distance_between_upper_and_lower_case;
  647. new_interval.low += distance_between_upper_and_lower_case;
  648. }
  649. }
  650. intervals.Add (new_interval);
  651. }
  652. public override void Compile (ICompiler cmp, bool reverse) {
  653. // create the meta-collection
  654. IntervalCollection meta =
  655. intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));
  656. // count ops
  657. int count = meta.Count;
  658. for (int i = 0; i < pos_cats.Length; ++ i) {
  659. if (pos_cats[i]) ++ count;
  660. if (neg_cats[i]) ++ count;
  661. }
  662. if (count == 0)
  663. return;
  664. // emit in op for |meta| > 1
  665. LinkRef tail = cmp.NewLink ();
  666. if (count > 1)
  667. cmp.EmitIn (tail);
  668. // emit categories
  669. for (int i = 0; i < pos_cats.Length; ++ i) {
  670. if (pos_cats[i])
  671. cmp.EmitCategory ((Category)i, negate, reverse);
  672. else if (neg_cats[i])
  673. cmp.EmitCategory ((Category)i, !negate, reverse);
  674. }
  675. // emit character/range/sets from meta-collection
  676. foreach (Interval a in meta) {
  677. if (a.IsDiscontiguous) { // Set
  678. BitArray bits = new BitArray (a.Size);
  679. foreach (Interval b in intervals) {
  680. if (a.Contains (b)) {
  681. for (int i = b.low; i <= b.high; ++ i)
  682. bits[i - a.low] = true;
  683. }
  684. }
  685. cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);
  686. }
  687. else if (a.IsSingleton) // Character
  688. cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);
  689. else // Range
  690. cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);
  691. }
  692. // finish up
  693. if (count > 1) {
  694. if (negate)
  695. cmp.EmitTrue ();
  696. else
  697. cmp.EmitFalse ();
  698. cmp.ResolveLink (tail);
  699. }
  700. }
  701. public override void GetWidth (out int min, out int max) {
  702. min = max = 1;
  703. }
  704. public override bool IsComplex () {
  705. return false;
  706. }
  707. // private
  708. private static double GetIntervalCost (Interval i) {
  709. // use op length as cost metric (=> optimize for space)
  710. if (i.IsDiscontiguous)
  711. return 3 + ((i.Size + 0xf) >> 4); // Set
  712. else if (i.IsSingleton)
  713. return 2; // Character
  714. else
  715. return 3; // Range
  716. }
  717. private static Interval upper_case_characters = new Interval ((char)65, (char)90);
  718. private const int distance_between_upper_and_lower_case = 32;
  719. private bool negate, ignore;
  720. private BitArray pos_cats, neg_cats;
  721. private IntervalCollection intervals;
  722. }
  723. class AnchorInfo {
  724. private Expression expr;
  725. private Position pos;
  726. private int offset;
  727. private string str;
  728. private int width;
  729. private bool ignore;
  730. public AnchorInfo (Expression expr, int width) {
  731. this.expr = expr;
  732. this.offset = 0;
  733. this.width = width;
  734. this.str = null;
  735. this.ignore = false;
  736. this.pos = Position.Any;
  737. }
  738. public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {
  739. this.expr = expr;
  740. this.offset = offset;
  741. this.width = width;
  742. this.str = ignore ? str.ToLower () : str;
  743. this.ignore = ignore;
  744. this.pos = Position.Any;
  745. }
  746. public AnchorInfo (Expression expr, int offset, int width, Position pos) {
  747. this.expr = expr;
  748. this.offset = offset;
  749. this.width = width;
  750. this.pos = pos;
  751. this.str = null;
  752. this.ignore = false;
  753. }
  754. public Expression Expression {
  755. get { return expr; }
  756. }
  757. public int Offset {
  758. get { return offset; }
  759. }
  760. public int Width {
  761. get { return width; }
  762. }
  763. public int Length {
  764. get { return (str != null) ? str.Length : 0; }
  765. }
  766. public bool IsUnknownWidth {
  767. get { return width < 0; }
  768. }
  769. public bool IsComplete {
  770. get { return Length == Width; }
  771. }
  772. public string Substring {
  773. get { return str; }
  774. }
  775. public bool IgnoreCase {
  776. get { return ignore; }
  777. }
  778. public Position Position {
  779. get { return pos; }
  780. }
  781. public bool IsSubstring {
  782. get { return str != null; }
  783. }
  784. public bool IsPosition {
  785. get { return pos != Position.Any; }
  786. }
  787. public Interval GetInterval () {
  788. return GetInterval (0);
  789. }
  790. public Interval GetInterval (int start) {
  791. if (!IsSubstring)
  792. return Interval.Empty;
  793. return new Interval (start + Offset, start + Offset + Length - 1);
  794. }
  795. }
  796. }