syntax.cs 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: syntax.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. //
  9. // Permission is hereby granted, free of charge, to any person obtaining
  10. // a copy of this software and associated documentation files (the
  11. // "Software"), to deal in the Software without restriction, including
  12. // without limitation the rights to use, copy, modify, merge, publish,
  13. // distribute, sublicense, and/or sell copies of the Software, and to
  14. // permit persons to whom the Software is furnished to do so, subject to
  15. // the following conditions:
  16. //
  17. // The above copyright notice and this permission notice shall be
  18. // included in all copies or substantial portions of the Software.
  19. //
  20. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  24. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. //
  28. using System;
  29. using System.Collections;
  30. namespace System.Text.RegularExpressions.Syntax {
  31. // collection classes
  32. class ExpressionCollection : CollectionBase {
  33. public void Add (Expression e) {
  34. List.Add (e);
  35. }
  36. public Expression this[int i] {
  37. get { return (Expression)List[i]; }
  38. set { List[i] = value; }
  39. }
  40. protected override void OnValidate (object o) {
  41. // allow null elements
  42. }
  43. }
  44. // abstract classes
  45. abstract class Expression {
  46. public abstract void Compile (ICompiler cmp, bool reverse);
  47. public abstract void GetWidth (out int min, out int max);
  48. public int GetFixedWidth () {
  49. int min, max;
  50. GetWidth (out min, out max);
  51. if (min == max)
  52. return min;
  53. return -1;
  54. }
  55. public virtual AnchorInfo GetAnchorInfo (bool reverse) {
  56. return new AnchorInfo (this, GetFixedWidth ());
  57. }
  58. public virtual bool IsComplex () {
  59. return true;
  60. }
  61. }
  62. // composite expressions
  63. abstract class CompositeExpression : Expression {
  64. public CompositeExpression () {
  65. expressions = new ExpressionCollection ();
  66. }
  67. protected ExpressionCollection Expressions {
  68. get { return expressions; }
  69. }
  70. protected void GetWidth (out int min, out int max, int count) {
  71. min = Int32.MaxValue;
  72. max = 0;
  73. bool empty = true;
  74. for (int i = 0; i < count; ++ i) {
  75. Expression e = Expressions[i];
  76. if (e == null)
  77. continue;
  78. empty = false;
  79. int a, b;
  80. e.GetWidth (out a, out b);
  81. if (a < min) min = a;
  82. if (b > max) max = b;
  83. }
  84. if (empty)
  85. min = max = 0;
  86. }
  87. private ExpressionCollection expressions;
  88. }
  89. // groups
  90. class Group : CompositeExpression {
  91. public Group () {
  92. }
  93. public Expression Expression {
  94. get { return Expressions[0]; }
  95. set { Expressions[0] = value; }
  96. }
  97. public void AppendExpression (Expression e) {
  98. Expressions.Add (e);
  99. }
  100. public override void Compile (ICompiler cmp, bool reverse) {
  101. int count = Expressions.Count;
  102. for (int i = 0; i < count; ++ i) {
  103. Expression e;
  104. if (reverse)
  105. e = Expressions[count - i - 1];
  106. else
  107. e = Expressions[i];
  108. e.Compile (cmp, reverse);
  109. }
  110. }
  111. public override void GetWidth (out int min, out int max) {
  112. min = 0;
  113. max = 0;
  114. foreach (Expression e in Expressions) {
  115. int a, b;
  116. e.GetWidth (out a, out b);
  117. min += a;
  118. if (max == Int32.MaxValue || b == Int32.MaxValue)
  119. max = Int32.MaxValue;
  120. else
  121. max += b;
  122. }
  123. }
  124. public override AnchorInfo GetAnchorInfo (bool reverse) {
  125. int ptr;
  126. int width = GetFixedWidth ();
  127. ArrayList infos = new ArrayList ();
  128. IntervalCollection segments = new IntervalCollection ();
  129. // accumulate segments
  130. ptr = 0;
  131. //foreach (Expression e in Expressions) {
  132. int count = Expressions.Count;
  133. for (int i = 0; i < count; ++ i) {
  134. Expression e;
  135. if (reverse)
  136. e = Expressions [count - i - 1];
  137. else
  138. e = Expressions [i];
  139. AnchorInfo info = e.GetAnchorInfo (reverse);
  140. infos.Add (info);
  141. if (info.IsPosition)
  142. return new AnchorInfo (this, ptr + info.Offset, width, info.Position);
  143. if (info.IsSubstring)
  144. segments.Add (info.GetInterval (ptr));
  145. if (info.IsUnknownWidth)
  146. break;
  147. ptr += info.Width;
  148. }
  149. // normalize and find the longest segment
  150. segments.Normalize ();
  151. Interval longest = Interval.Empty;
  152. foreach (Interval segment in segments) {
  153. if (segment.Size > longest.Size)
  154. longest = segment;
  155. }
  156. // now chain the substrings that made this segment together
  157. if (!longest.IsEmpty) {
  158. string str = "";
  159. ArrayList strs = new ArrayList();
  160. bool ignore = false;
  161. ptr = 0;
  162. //foreach (AnchorInfo info in infos) {
  163. for (int i = 0; i < infos.Count; ++ i) {
  164. AnchorInfo info;
  165. info = (AnchorInfo)infos[i];
  166. if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {
  167. //str += info.Substring; // TODO mark subexpressions
  168. strs.Add(info.Substring);
  169. ignore |= info.IgnoreCase;
  170. }
  171. if (info.IsUnknownWidth)
  172. break;
  173. ptr += info.Width;
  174. }
  175. for (int i = 0; i< strs.Count; ++i)
  176. {
  177. if (reverse)
  178. str += strs [strs.Count - i - 1];
  179. else
  180. str += strs [i];
  181. }
  182. return new AnchorInfo (this, longest.low, width, str, ignore);
  183. }
  184. return new AnchorInfo (this, width);
  185. }
  186. public override bool IsComplex () {
  187. bool comp = false;
  188. foreach (Expression e in Expressions) {
  189. comp |= e.IsComplex ();
  190. }
  191. return comp | GetFixedWidth () <= 0;
  192. }
  193. }
  194. class RegularExpression : Group {
  195. public RegularExpression () {
  196. group_count = 0;
  197. }
  198. public int GroupCount {
  199. get { return group_count; }
  200. set { group_count = value; }
  201. }
  202. public override void Compile (ICompiler cmp, bool reverse) {
  203. // info block
  204. int min, max;
  205. GetWidth (out min, out max);
  206. cmp.EmitInfo (group_count, min, max);
  207. // anchoring expression
  208. AnchorInfo info = GetAnchorInfo (reverse);
  209. //if (reverse)
  210. // info = new AnchorInfo (this, GetFixedWidth ()); // FIXME
  211. LinkRef pattern = cmp.NewLink ();
  212. cmp.EmitAnchor (reverse, info.Offset, pattern);
  213. if (info.IsPosition)
  214. cmp.EmitPosition (info.Position);
  215. else if (info.IsSubstring)
  216. cmp.EmitString (info.Substring, info.IgnoreCase, reverse);
  217. cmp.EmitTrue ();
  218. // pattern
  219. cmp.ResolveLink (pattern);
  220. base.Compile (cmp, reverse);
  221. cmp.EmitTrue ();
  222. }
  223. private int group_count;
  224. }
  225. class CapturingGroup : Group {
  226. public CapturingGroup () {
  227. this.gid = 0;
  228. this.name = null;
  229. }
  230. public int Number {
  231. get { return gid; }
  232. set { gid = value; }
  233. }
  234. public string Name {
  235. get { return name; }
  236. set { name = value; }
  237. }
  238. public bool IsNamed {
  239. get { return name != null; }
  240. }
  241. public override void Compile (ICompiler cmp, bool reverse) {
  242. cmp.EmitOpen (gid);
  243. base.Compile (cmp, reverse);
  244. cmp.EmitClose (gid);
  245. }
  246. public override bool IsComplex () {
  247. return true;
  248. }
  249. private int gid;
  250. private string name;
  251. }
  252. class BalancingGroup : CapturingGroup {
  253. public BalancingGroup () {
  254. this.balance = null;
  255. }
  256. public CapturingGroup Balance {
  257. get { return balance; }
  258. set { balance = value; }
  259. }
  260. public override void Compile (ICompiler cmp, bool reverse) {
  261. // can't invoke Group.Compile from here :(
  262. // so I'll just repeat the code
  263. LinkRef tail = cmp.NewLink ();
  264. cmp.EmitBalanceStart (this.Number, balance.Number, this.IsNamed, tail);
  265. int count = Expressions.Count;
  266. for (int i = 0; i < count; ++ i) {
  267. Expression e;
  268. if (reverse)
  269. e = Expressions[count - i - 1];
  270. else
  271. e = Expressions[i];
  272. e.Compile (cmp, reverse);
  273. }
  274. cmp.EmitBalance ();
  275. cmp.ResolveLink(tail);
  276. }
  277. private CapturingGroup balance;
  278. }
  279. class NonBacktrackingGroup : Group {
  280. public NonBacktrackingGroup () {
  281. }
  282. public override void Compile (ICompiler cmp, bool reverse) {
  283. LinkRef tail = cmp.NewLink ();
  284. cmp.EmitSub (tail);
  285. base.Compile (cmp, reverse);
  286. cmp.EmitTrue ();
  287. cmp.ResolveLink (tail);
  288. }
  289. public override bool IsComplex () {
  290. return true;
  291. }
  292. }
  293. // repetition
  294. class Repetition : CompositeExpression {
  295. public Repetition (int min, int max, bool lazy) {
  296. Expressions.Add (null);
  297. this.min = min;
  298. this.max = max;
  299. this.lazy = lazy;
  300. }
  301. public Expression Expression {
  302. get { return Expressions[0]; }
  303. set { Expressions[0] = value; }
  304. }
  305. public int Minimum {
  306. get { return min; }
  307. set { min = value; }
  308. }
  309. public int Maximum {
  310. get { return max; }
  311. set { max = value; }
  312. }
  313. public bool Lazy {
  314. get { return lazy; }
  315. set { lazy = value; }
  316. }
  317. public override void Compile (ICompiler cmp, bool reverse) {
  318. if (Expression.IsComplex ()) {
  319. LinkRef until = cmp.NewLink ();
  320. cmp.EmitRepeat (min, max, lazy, until);
  321. Expression.Compile (cmp, reverse);
  322. cmp.EmitUntil (until);
  323. }
  324. else {
  325. LinkRef tail = cmp.NewLink ();
  326. cmp.EmitFastRepeat (min, max, lazy, tail);
  327. Expression.Compile (cmp, reverse);
  328. cmp.EmitTrue ();
  329. cmp.ResolveLink (tail);
  330. }
  331. }
  332. public override void GetWidth (out int min, out int max) {
  333. Expression.GetWidth (out min, out max);
  334. min = min * this.min;
  335. if (max == Int32.MaxValue || this.max == 0xffff)
  336. max = Int32.MaxValue;
  337. else
  338. max = max * this.max;
  339. }
  340. public override AnchorInfo GetAnchorInfo (bool reverse) {
  341. int width = GetFixedWidth ();
  342. if (Minimum == 0)
  343. return new AnchorInfo (this, width);
  344. AnchorInfo info = Expression.GetAnchorInfo (reverse);
  345. if (info.IsPosition)
  346. return new AnchorInfo (this, info.Offset, width, info.Position);
  347. if (info.IsSubstring) {
  348. if (info.IsComplete) {
  349. string str = "";
  350. for (int i = 0; i < Minimum; ++ i)
  351. str += info.Substring;
  352. return new AnchorInfo (this, 0, width, str, info.IgnoreCase);
  353. }
  354. return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);
  355. }
  356. return new AnchorInfo (this, width);
  357. }
  358. private int min, max;
  359. private bool lazy;
  360. }
  361. // assertions
  362. abstract class Assertion : CompositeExpression {
  363. public Assertion () {
  364. Expressions.Add (null); // true expression
  365. Expressions.Add (null); // false expression
  366. }
  367. public Expression TrueExpression {
  368. get { return Expressions[0]; }
  369. set { Expressions[0] = value; }
  370. }
  371. public Expression FalseExpression {
  372. get { return Expressions[1]; }
  373. set { Expressions[1] = value; }
  374. }
  375. public override void GetWidth (out int min, out int max) {
  376. GetWidth (out min, out max, 2);
  377. if (TrueExpression == null || FalseExpression == null)
  378. min = 0;
  379. }
  380. }
  381. class CaptureAssertion : Assertion {
  382. public CaptureAssertion () {
  383. }
  384. public CapturingGroup CapturingGroup {
  385. get { return group; }
  386. set { group = value; }
  387. }
  388. public override void Compile (ICompiler cmp, bool reverse) {
  389. int gid = group.Number;
  390. LinkRef tail = cmp.NewLink ();
  391. if (FalseExpression == null) {
  392. // IfDefined :1
  393. // <yes_exp>
  394. // 1: <tail>
  395. cmp.EmitIfDefined (gid, tail);
  396. TrueExpression.Compile (cmp, reverse);
  397. }
  398. else {
  399. // IfDefined :1
  400. // <yes_expr>
  401. // Jump :2
  402. // 1: <no_expr>
  403. // 2: <tail>
  404. LinkRef false_expr = cmp.NewLink ();
  405. cmp.EmitIfDefined (gid, false_expr);
  406. TrueExpression.Compile (cmp, reverse);
  407. cmp.EmitJump (tail);
  408. cmp.ResolveLink (false_expr);
  409. FalseExpression.Compile (cmp, reverse);
  410. }
  411. cmp.ResolveLink (tail);
  412. }
  413. public override bool IsComplex () {
  414. bool comp = false;
  415. if (TrueExpression != null)
  416. comp |= TrueExpression.IsComplex ();
  417. if (FalseExpression != null)
  418. comp |= FalseExpression.IsComplex ();
  419. return comp | GetFixedWidth () <= 0;
  420. }
  421. private CapturingGroup group;
  422. }
  423. class ExpressionAssertion : Assertion {
  424. public ExpressionAssertion () {
  425. Expressions.Add (null); // test expression
  426. }
  427. public bool Reverse {
  428. get { return reverse; }
  429. set { reverse = value; }
  430. }
  431. public bool Negate {
  432. get { return negate; }
  433. set { negate = value; }
  434. }
  435. public Expression TestExpression {
  436. get { return Expressions[2]; }
  437. set { Expressions[2] = value; }
  438. }
  439. public override void Compile (ICompiler cmp, bool reverse) {
  440. LinkRef true_expr = cmp.NewLink ();
  441. LinkRef false_expr = cmp.NewLink ();
  442. // test op: positive / negative
  443. if (!negate)
  444. cmp.EmitTest (true_expr, false_expr);
  445. else
  446. cmp.EmitTest (false_expr, true_expr);
  447. // test expression: lookahead / lookbehind
  448. TestExpression.Compile (cmp, this.reverse);
  449. cmp.EmitTrue ();
  450. // target expressions
  451. if (TrueExpression == null) { // (?= ...)
  452. // Test :1, :2
  453. // <test_expr>
  454. // :2 False
  455. // :1 <tail>
  456. cmp.ResolveLink (false_expr);
  457. cmp.EmitFalse ();
  458. cmp.ResolveLink (true_expr);
  459. }
  460. else {
  461. cmp.ResolveLink (true_expr);
  462. TrueExpression.Compile (cmp, reverse);
  463. if (FalseExpression == null) { // (?(...) ...)
  464. // Test :1, :2
  465. // <test_expr>
  466. // :1 <yes_expr>
  467. // :2 <tail>
  468. cmp.ResolveLink (false_expr);
  469. }
  470. else { // (?(...) ... | ...)
  471. // Test :1, :2
  472. // <test_expr>
  473. // :1 <yes_expr>
  474. // Jump :3
  475. // :2 <no_expr>
  476. // :3 <tail>
  477. LinkRef tail = cmp.NewLink ();
  478. cmp.EmitJump (tail);
  479. cmp.ResolveLink (false_expr);
  480. FalseExpression.Compile (cmp, reverse);
  481. cmp.ResolveLink (tail);
  482. }
  483. }
  484. }
  485. private bool reverse, negate;
  486. }
  487. // alternation
  488. class Alternation : CompositeExpression {
  489. public Alternation () {
  490. }
  491. public ExpressionCollection Alternatives {
  492. get { return Expressions; }
  493. }
  494. public void AddAlternative (Expression e) {
  495. Alternatives.Add (e);
  496. }
  497. public override void Compile (ICompiler cmp, bool reverse) {
  498. // LinkRef next = cmp.NewLink ();
  499. LinkRef tail = cmp.NewLink ();
  500. foreach (Expression e in Alternatives) {
  501. LinkRef next = cmp.NewLink ();
  502. cmp.EmitBranch (next);
  503. e.Compile (cmp, reverse);
  504. cmp.EmitJump (tail);
  505. cmp.ResolveLink (next);
  506. cmp.EmitBranchEnd();
  507. }
  508. cmp.EmitFalse ();
  509. cmp.ResolveLink (tail);
  510. cmp.EmitAlternationEnd();
  511. }
  512. public override void GetWidth (out int min, out int max) {
  513. GetWidth (out min, out max, Alternatives.Count);
  514. }
  515. public override bool IsComplex () {
  516. bool comp = false;
  517. foreach (Expression e in Alternatives) {
  518. comp |= e.IsComplex ();
  519. }
  520. return comp | GetFixedWidth () <= 0;
  521. }
  522. }
  523. // terminal expressions
  524. class Literal : Expression {
  525. public Literal (string str, bool ignore) {
  526. this.str = str;
  527. this.ignore = ignore;
  528. }
  529. public string String {
  530. get { return str; }
  531. set { str = value; }
  532. }
  533. public bool IgnoreCase {
  534. get { return ignore; }
  535. set { ignore = value; }
  536. }
  537. public override void Compile (ICompiler cmp, bool reverse) {
  538. if (str.Length == 0)
  539. return;
  540. if (str.Length == 1)
  541. cmp.EmitCharacter (str[0], false, ignore, reverse);
  542. else
  543. cmp.EmitString (str, ignore, reverse);
  544. }
  545. public override void GetWidth (out int min, out int max) {
  546. min = max = str.Length;
  547. }
  548. public override AnchorInfo GetAnchorInfo (bool reverse) {
  549. return new AnchorInfo (this, 0, str.Length, str, ignore);
  550. }
  551. public override bool IsComplex () {
  552. return false;
  553. }
  554. private string str;
  555. private bool ignore;
  556. }
  557. class PositionAssertion : Expression {
  558. public PositionAssertion (Position pos) {
  559. this.pos = pos;
  560. }
  561. public Position Position {
  562. get { return pos; }
  563. set { pos = value; }
  564. }
  565. public override void Compile (ICompiler cmp, bool reverse) {
  566. cmp.EmitPosition (pos);
  567. }
  568. public override void GetWidth (out int min, out int max) {
  569. min = max = 0;
  570. }
  571. public override bool IsComplex () {
  572. return false;
  573. }
  574. public override AnchorInfo GetAnchorInfo (bool revers) {
  575. switch (pos) {
  576. case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:
  577. return new AnchorInfo (this, 0, 0, pos);
  578. default:
  579. return new AnchorInfo (this, 0);
  580. }
  581. }
  582. private Position pos;
  583. }
  584. class Reference : Expression {
  585. public Reference (bool ignore) {
  586. this.ignore = ignore;
  587. }
  588. public CapturingGroup CapturingGroup {
  589. get { return group; }
  590. set { group = value; }
  591. }
  592. public bool IgnoreCase {
  593. get { return ignore; }
  594. set { ignore = value; }
  595. }
  596. public override void Compile (ICompiler cmp, bool reverse) {
  597. cmp.EmitReference (group.Number, ignore, reverse);
  598. }
  599. public override void GetWidth (out int min, out int max) {
  600. //group.GetWidth (out min, out max);
  601. // TODO set width to referenced group for non-cyclical references
  602. min = 0;
  603. max = Int32.MaxValue;
  604. }
  605. public override bool IsComplex () {
  606. return true; // FIXME incorporate cyclic check
  607. }
  608. private CapturingGroup group;
  609. private bool ignore;
  610. }
  611. class CharacterClass : Expression {
  612. public CharacterClass (bool negate, bool ignore) {
  613. this.negate = negate;
  614. this.ignore = ignore;
  615. intervals = new IntervalCollection ();
  616. // initialize pos/neg category arrays
  617. int cat_size = (int) Category.LastValue;
  618. pos_cats = new BitArray (cat_size);
  619. neg_cats = new BitArray (cat_size);
  620. }
  621. public CharacterClass (Category cat, bool negate) : this (false, false) {
  622. this.AddCategory (cat, negate);
  623. }
  624. public bool Negate {
  625. get { return negate; }
  626. set { negate = value; }
  627. }
  628. public bool IgnoreCase {
  629. get { return ignore; }
  630. set { ignore = value; }
  631. }
  632. public void AddCategory (Category cat, bool negate) {
  633. int n = (int)cat;
  634. if (negate) {
  635. neg_cats[n] = true;
  636. } else {
  637. pos_cats[n] = true;
  638. }
  639. }
  640. public void AddCharacter (char c) {
  641. // TODO: this is certainly not the most efficient way of doing things
  642. // TODO: but at least it produces correct results.
  643. AddRange (c, c);
  644. }
  645. public void AddRange (char lo, char hi) {
  646. Interval new_interval = new Interval (lo, hi);
  647. // ignore case is on. we must make sure our interval does not
  648. // use upper case. if it does, we must normalize the upper case
  649. // characters into lower case.
  650. if (ignore) {
  651. if (upper_case_characters.Intersects (new_interval)) {
  652. Interval partial_new_interval;
  653. if (new_interval.low < upper_case_characters.low) {
  654. partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case,
  655. new_interval.high + distance_between_upper_and_lower_case);
  656. new_interval.high = upper_case_characters.low - 1;
  657. }
  658. else {
  659. partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case,
  660. upper_case_characters.high + distance_between_upper_and_lower_case);
  661. new_interval.low = upper_case_characters.high + 1;
  662. }
  663. intervals.Add (partial_new_interval);
  664. }
  665. else if (upper_case_characters.Contains (new_interval)) {
  666. new_interval.high += distance_between_upper_and_lower_case;
  667. new_interval.low += distance_between_upper_and_lower_case;
  668. }
  669. }
  670. intervals.Add (new_interval);
  671. }
  672. public override void Compile (ICompiler cmp, bool reverse) {
  673. // create the meta-collection
  674. IntervalCollection meta =
  675. intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));
  676. // count ops
  677. int count = meta.Count;
  678. for (int i = 0; i < pos_cats.Length; ++ i) {
  679. if (pos_cats[i]) ++ count;
  680. if (neg_cats[i]) ++ count;
  681. }
  682. if (count == 0)
  683. return;
  684. // emit in op for |meta| > 1
  685. LinkRef tail = cmp.NewLink ();
  686. if (count > 1)
  687. cmp.EmitIn (tail);
  688. // emit categories
  689. for (int i = 0; i < pos_cats.Length; ++ i) {
  690. if (pos_cats[i]) {
  691. if (neg_cats [i])
  692. cmp.EmitCategory (Category.AnySingleline, negate, reverse);
  693. else
  694. cmp.EmitCategory ((Category)i, negate, reverse);
  695. } else if (neg_cats[i]) {
  696. cmp.EmitCategory ((Category)i, !negate, reverse);
  697. }
  698. }
  699. // emit character/range/sets from meta-collection
  700. foreach (Interval a in meta) {
  701. if (a.IsDiscontiguous) { // Set
  702. BitArray bits = new BitArray (a.Size);
  703. foreach (Interval b in intervals) {
  704. if (a.Contains (b)) {
  705. for (int i = b.low; i <= b.high; ++ i)
  706. bits[i - a.low] = true;
  707. }
  708. }
  709. cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);
  710. }
  711. else if (a.IsSingleton) // Character
  712. cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);
  713. else // Range
  714. cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);
  715. }
  716. // finish up
  717. if (count > 1) {
  718. if (negate)
  719. cmp.EmitTrue ();
  720. else
  721. cmp.EmitFalse ();
  722. cmp.ResolveLink (tail);
  723. }
  724. }
  725. public override void GetWidth (out int min, out int max) {
  726. min = max = 1;
  727. }
  728. public override bool IsComplex () {
  729. return false;
  730. }
  731. // private
  732. private static double GetIntervalCost (Interval i) {
  733. // use op length as cost metric (=> optimize for space)
  734. if (i.IsDiscontiguous)
  735. return 3 + ((i.Size + 0xf) >> 4); // Set
  736. else if (i.IsSingleton)
  737. return 2; // Character
  738. else
  739. return 3; // Range
  740. }
  741. private static Interval upper_case_characters = new Interval ((char)65, (char)90);
  742. private const int distance_between_upper_and_lower_case = 32;
  743. private bool negate, ignore;
  744. private BitArray pos_cats, neg_cats;
  745. private IntervalCollection intervals;
  746. }
  747. class AnchorInfo {
  748. private Expression expr;
  749. private Position pos;
  750. private int offset;
  751. private string str;
  752. private int width;
  753. private bool ignore;
  754. public AnchorInfo (Expression expr, int width) {
  755. this.expr = expr;
  756. this.offset = 0;
  757. this.width = width;
  758. this.str = null;
  759. this.ignore = false;
  760. this.pos = Position.Any;
  761. }
  762. public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {
  763. this.expr = expr;
  764. this.offset = offset;
  765. this.width = width;
  766. this.str = ignore ? str.ToLower () : str;
  767. this.ignore = ignore;
  768. this.pos = Position.Any;
  769. }
  770. public AnchorInfo (Expression expr, int offset, int width, Position pos) {
  771. this.expr = expr;
  772. this.offset = offset;
  773. this.width = width;
  774. this.pos = pos;
  775. this.str = null;
  776. this.ignore = false;
  777. }
  778. public Expression Expression {
  779. get { return expr; }
  780. }
  781. public int Offset {
  782. get { return offset; }
  783. }
  784. public int Width {
  785. get { return width; }
  786. }
  787. public int Length {
  788. get { return (str != null) ? str.Length : 0; }
  789. }
  790. public bool IsUnknownWidth {
  791. get { return width < 0; }
  792. }
  793. public bool IsComplete {
  794. get { return Length == Width; }
  795. }
  796. public string Substring {
  797. get { return str; }
  798. }
  799. public bool IgnoreCase {
  800. get { return ignore; }
  801. }
  802. public Position Position {
  803. get { return pos; }
  804. }
  805. public bool IsSubstring {
  806. get { return str != null; }
  807. }
  808. public bool IsPosition {
  809. get { return pos != Position.Any; }
  810. }
  811. public Interval GetInterval () {
  812. return GetInterval (0);
  813. }
  814. public Interval GetInterval (int start) {
  815. if (!IsSubstring)
  816. return Interval.Empty;
  817. return new Interval (start + Offset, start + Offset + Length - 1);
  818. }
  819. }
  820. }