algollike.c 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. /* This file is part of the software similarity tester SIM.
  2. Written by Dick Grune, Vrije Universiteit, Amsterdam.
  3. $Id: algollike.c,v 2.4 2005/02/20 17:02:59 dick Exp $
  4. */
  5. /* This module implements the routines InitLanguage, MayBeStartOfRun
  6. and CheckRun for ALGOL-like languages, in which it is meaningful
  7. and useful to isolate function bodies.
  8. It requires the user to define, preferably in Xlang.l, four token
  9. sets, represented as TOKEN[] and terminated by NOTOKEN:
  10. TOKEN NonFinals[] tokens that may not end a chunk
  11. TOKEN NonInitials[] tokens that may not start a chunk
  12. TOKEN Openers[] openers of parentheses that must balance
  13. in functions
  14. TOKEN Closers[] the corresponding closers, in the same order
  15. */
  16. #include "options.h"
  17. #include "token.h"
  18. #include "algollike.h"
  19. /* Arrays for fast identification tests for tokens. Each token is
  20. identified by its position in the set + 1. For example, if T is
  21. the n-th Opener, openers[TOKEN2int(tk)] == n+1.
  22. */
  23. static char non_finals[256];
  24. static char non_initials[256];
  25. static char openers[256];
  26. static char closers[256];
  27. static void cvt2bittable(const TOKEN *tl, char bt[256]);
  28. static unsigned int largest_function(const TOKEN *str, unsigned int size);
  29. void
  30. InitLanguage(void) {
  31. /* convert the token sets to bitmaps */
  32. cvt2bittable(NonFinals, non_finals);
  33. cvt2bittable(NonInitials, non_initials);
  34. cvt2bittable(Openers, openers);
  35. cvt2bittable(Closers, closers);
  36. }
  37. static void
  38. cvt2bittable(const TOKEN *tl, char bt[256]) {
  39. int i;
  40. int cnt = 1;
  41. for (i = 0; !TOKEN_EQ(tl[i], NOTOKEN); i++) {
  42. bt[TOKEN2int(tl[i])] = cnt++;
  43. }
  44. }
  45. int
  46. MayBeStartOfRun(TOKEN tk) {
  47. return !non_initials[TOKEN2int(tk)];
  48. }
  49. unsigned int
  50. CheckRun(const TOKEN *str, unsigned int size) {
  51. /* Checks the run starting at str with length size for
  52. acceptability in the language. Cuts from the end if
  53. necessary and returns the accepted length, which may
  54. be zero.
  55. */
  56. if (option_set('f')) {
  57. /* reduce to a function-like form first */
  58. size = largest_function(str, size);
  59. }
  60. while ( /* there is trailing garbage */
  61. size != 0 && non_finals[TOKEN2int(str[size-1])]
  62. ) {
  63. /* remove it */
  64. size--;
  65. }
  66. return size;
  67. }
  68. static unsigned int
  69. largest_function(const TOKEN *str, unsigned int size) {
  70. /* Returns the size of the longest sequence starting at
  71. str[0] and not containing unbalanced parentheses.
  72. Does not check the nesting of the parentheses, but then,
  73. sim is syntax-free anyway.
  74. */
  75. register unsigned int mrb_size = 0; /* most recent balancing size */
  76. register unsigned int pos;
  77. register int i;
  78. int balance_count[256];
  79. int n_imbalances;
  80. /* clear administration */
  81. n_imbalances = 0;
  82. for (i = 0; i < 255; i++) {
  83. balance_count[i] = 0;
  84. }
  85. /* scan str[] and see how far we get */
  86. for (pos = 0; pos < size; pos++) {
  87. register int tkval = TOKEN2int(str[pos]);
  88. register int pp; /* parenthesis position */
  89. /* account for openers */
  90. if ((pp = openers[tkval])) {
  91. if (balance_count[pp] == 0) {
  92. /* about to create an imbalance */
  93. n_imbalances++;
  94. }
  95. balance_count[pp]++;
  96. }
  97. /* account for closers */
  98. if ((pp = closers[tkval])) {
  99. if (balance_count[pp] == 0) {
  100. /* this is one Closer too many */
  101. return mrb_size;
  102. }
  103. balance_count[pp]--;
  104. if (balance_count[pp] == 0) {
  105. /* we just cleared an imbalance */
  106. n_imbalances--;
  107. }
  108. }
  109. if (n_imbalances == 0) {
  110. /* register balance point */
  111. mrb_size = pos + 1;
  112. }
  113. }
  114. return mrb_size;
  115. }