quicksearch.cs 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. //
  2. // assembly: System
  3. // namespace: System.Text.RegularExpressions
  4. // file: quicksearch.cs
  5. //
  6. // author: Dan Lewis ([email protected])
  7. // (c) 2002
  8. using System;
  9. namespace System.Text.RegularExpressions {
  10. // TODO use simple test for single character strings
  11. class QuickSearch {
  12. // simplified boyer-moore for fast substring matching
  13. public QuickSearch (string str, bool ignore) {
  14. this.str = str;
  15. this.len = str.Length;
  16. this.ignore = ignore;
  17. Setup ();
  18. }
  19. public string String {
  20. get { return str; }
  21. }
  22. public int Length {
  23. get { return len; }
  24. }
  25. public bool IgnoreCase {
  26. get { return ignore; }
  27. }
  28. public int Search (string text, int start, int end) {
  29. if (end > text.Length - len)
  30. end = text.Length - len;
  31. int ptr = start;
  32. if (!ignore) {
  33. while (ptr <= end) {
  34. int i = len - 1;
  35. while (str[i] == text[ptr + i]) {
  36. if (-- i < 0)
  37. return ptr;
  38. }
  39. if (ptr < end)
  40. ptr += shift[text[ptr + len]];
  41. else
  42. break;
  43. }
  44. }
  45. else {
  46. // ignore case: same as above, but we convert text
  47. // to lower case before doing the string compare
  48. while (ptr <= end) {
  49. int i = len - 1;
  50. while (str[i] == Char.ToLower (text[ptr + i])) {
  51. if (-- i < 0)
  52. return ptr;
  53. }
  54. if (ptr < end)
  55. ptr += shift[text[ptr + len]];
  56. else
  57. break;
  58. }
  59. }
  60. return -1;
  61. }
  62. // private
  63. private void Setup () {
  64. if (ignore)
  65. str = str.ToLower ();
  66. // this is a 64k entry shift table. that's 128kb per pattern!
  67. // is it worth compressing this by only storing shifts within
  68. // a (lo, hi) character range? for most substrings this would
  69. // be around 50 bytes...
  70. shift = new int[0x1000];
  71. for (int i = 0; i < 0x1000; ++ i)
  72. shift[i] = len + 1;
  73. for (int i = 0; i < len; ++ i) {
  74. char c = str[i];
  75. shift[c] = len - i;
  76. if (ignore)
  77. shift[Char.ToUpper (c)] = len - i;
  78. }
  79. }
  80. private string str;
  81. private int len;
  82. private bool ignore;
  83. private int[] shift;
  84. }
  85. }