pass1.c 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. /* This file is part of the software similarity tester SIM.
  2. Written by Dick Grune, Vrije Universiteit, Amsterdam.
  3. $Id: pass1.c,v 2.8 2007/08/27 09:57:32 dick Exp $
  4. */
  5. #include <stdio.h>
  6. #include <string.h>
  7. #include "debug.par"
  8. #include "sim.h"
  9. #include "text.h"
  10. #include "tokenarray.h"
  11. #include "lex.h"
  12. #include "error.h"
  13. #include "pass1.h"
  14. #ifdef DB_TEXT
  15. static void db_print_text(const struct text *);
  16. #endif
  17. static void print_count(unsigned int cnt, const char *);
  18. void
  19. Pass1(int argc, char *argv[]) {
  20. register int n;
  21. InitText(argc);
  22. InitTokenArray();
  23. /* assume all texts to be new */
  24. NumberOfNewTexts = NumberOfTexts;
  25. /* read the files */
  26. for (n = 0; n < NumberOfTexts; n++) {
  27. register char *fname = argv[n];
  28. register struct text *txt = &Text[n];
  29. fprintf(OutputFile, "File %s: ", fname);
  30. txt->tx_fname = fname;
  31. txt->tx_pos = 0;
  32. txt->tx_start =
  33. txt->tx_limit = TextLength();
  34. if (strcmp(fname, "/") == 0) {
  35. fprintf(OutputFile, "separator\n");
  36. NumberOfNewTexts = n;
  37. }
  38. else {
  39. if (!OpenText(First, txt)) {
  40. fprintf(OutputFile, ">>>> cannot open <<<< ");
  41. /* the file has still been opened
  42. with a null file for uniformity
  43. */
  44. }
  45. while (NextTextTokenObtained(First)) {
  46. if (!TOKEN_EQ(lex_token, EOL)) {
  47. StoreToken();
  48. }
  49. }
  50. CloseText(First, txt);
  51. txt->tx_limit = TextLength();
  52. /* report */
  53. print_count(txt->tx_limit - txt->tx_start, "token");
  54. if (lex_non_ascii_cnt) {
  55. fprintf(DebugFile, ", ");
  56. print_count(lex_non_ascii_cnt,
  57. "non-ASCII character"
  58. );
  59. }
  60. fprintf(OutputFile, "\n");
  61. #ifdef DB_TEXT
  62. db_print_text(txt);
  63. #endif /* DB_TEXT */
  64. }
  65. fflush(OutputFile);
  66. }
  67. /* report total */
  68. fprintf(OutputFile, "Total: ");
  69. print_count(TextLength() - 1, "token");
  70. fprintf(OutputFile, "\n\n");
  71. fflush(OutputFile);
  72. }
  73. static void
  74. print_count(unsigned int cnt, const char *unit) {
  75. /* Prints a grammatically correct string "%u %s[s]"
  76. for units that form their plural by suffixing -s.
  77. */
  78. fprintf(OutputFile, "%u %s%s", cnt, unit, (cnt == 1 ? "" : "s"));
  79. }
  80. #ifdef DB_TEXT
  81. static void
  82. db_print_text(const struct text *txt) {
  83. /* prints a text (in compressed form) */
  84. register int i;
  85. fprintf(DebugFile, "\n\n**** DB_PRINT_TEXT ****\n");
  86. fprintf(DebugFile, "File \"%s\", %u tokens, ",
  87. txt->tx_fname, txt->tx_limit - txt->tx_start
  88. );
  89. fprintf(DebugFile, "txt->tx_start = %u, txt->tx_limit = %u\n",
  90. txt->tx_start, txt->tx_limit
  91. );
  92. for (i = txt->tx_start; i < txt->tx_limit; i++) {
  93. if ((i - txt->tx_start + 1) % 32 == 0) {
  94. fprintf(DebugFile, "\n");
  95. }
  96. print_token(stdout, TokenArray[i]);
  97. }
  98. fprintf(DebugFile, "\n");
  99. }
  100. #endif /* DB_TEXT */