idf.c 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. /* This file is part of the software similarity tester SIM.
  2. Written by Dick Grune, Vrije Universiteit, Amsterdam.
  3. $Id: idf.c,v 2.8 2005/02/20 17:03:00 dick Exp $
  4. */
  5. #include <string.h>
  6. #include "system.par"
  7. #include "token.h"
  8. #include "idf.h"
  9. TOKEN
  10. idf_in_list(
  11. const char *str,
  12. const struct idf list[],
  13. unsigned int listsize,
  14. TOKEN dflt
  15. ) {
  16. register int first = 0;
  17. register int last = (listsize / sizeof (struct idf)) - 1;
  18. while (first < last) {
  19. register int middle = (first + last) / 2;
  20. if (strcmp(str, list[middle].id_tag) > 0) {
  21. first = middle + 1;
  22. }
  23. else {
  24. last = middle;
  25. }
  26. }
  27. return (strcmp(str, list[first].id_tag) == 0
  28. ? list[first].id_tr
  29. : dflt
  30. );
  31. }
  32. TOKEN
  33. idf_hashed(const char *str) {
  34. register int32 h = 0;
  35. /* let's be careful about ranges; if done wrong it's hard to debug */
  36. while (*str) {
  37. /* -1 <= h <= 2^31-1 */
  38. h = (h << 1) + (*str++&0377);
  39. /* -2^31 <= h <= 2^31-1 */
  40. if (h < 0) {
  41. /* -2^31 <= h <= -1 */
  42. h += 2147483647; /* 2^31-1 */
  43. /* -1 <= h <= 2^31-2 */
  44. }
  45. else {
  46. /* 0 <= h <= 2^31-1 */
  47. }
  48. /* -1 <= h <= 2^31-1 */
  49. }
  50. /* -1 <= h <= 2^31-1 */
  51. if (h < 0) {
  52. /* h = -1 */
  53. /* a very small chance, but all the same */
  54. h = 0;
  55. }
  56. /* 0 <= h <= 2^31-1 */
  57. h %= 253; /* 0 <= h < 253 */
  58. return NORM(h + 1); /* 1 <= h < 254 */
  59. /* this avoids SKIP (0) and EOL (255) */
  60. }