mklang.y 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. %{
  2. #include <stdlib.h>
  3. #include <stdio.h>
  4. #include <string.h>
  5. #include <ctype.h>
  6. typedef struct _term term_t;
  7. struct _term {
  8. int kind;
  9. union {
  10. char c;
  11. char* s;
  12. term_t* t;
  13. } u;
  14. term_t* next;
  15. };
  16. static FILE* outf;
  17. static int yylex ();
  18. static void yyerror (char*);
  19. static void xlate (term_t* ts);
  20. static void
  21. nonTerm (char* s)
  22. {
  23. fprintf (outf, "<I>%s</I>", s);
  24. }
  25. static void
  26. gen (char* name, term_t* ts)
  27. {
  28. fprintf (outf, "<TR>\n <TD STYLE=\"text-align: right;\">");
  29. nonTerm (name);
  30. fprintf (outf, "</TD>\n <TD>:</TD>\n <TD>");
  31. xlate (ts);
  32. fprintf (outf, "</TD>\n</TR>\n");
  33. for (ts = ts->next; ts; ts = ts->next) {
  34. fprintf (outf, "<TR>\n <TD></TD>\n <TD>|</TD>\n <TD>");
  35. xlate (ts);
  36. fprintf (outf, "</TD>\n</TR>\n");
  37. }
  38. }
  39. static term_t*
  40. mkLiteral (char c, int kind)
  41. {
  42. term_t* nt = malloc(sizeof(term_t));
  43. nt->kind = kind;
  44. nt->u.c = c;
  45. nt->next = 0;
  46. return nt;
  47. }
  48. static term_t*
  49. mkID (char* s, int kind)
  50. {
  51. term_t* nt = malloc(sizeof(term_t));
  52. nt->kind = kind;
  53. nt->u.s = s;
  54. nt->next = 0;
  55. return nt;
  56. }
  57. static term_t*
  58. mkSeq (term_t* t1, term_t* t2, int kind)
  59. {
  60. term_t* nt = malloc(sizeof(term_t));
  61. nt->kind = kind;
  62. nt->u.t = t1;
  63. t1->next = t2;
  64. nt->next = 0;
  65. return nt;
  66. }
  67. static term_t*
  68. mkTerm (term_t* t, int kind)
  69. {
  70. term_t* nt = malloc(sizeof(term_t));
  71. nt->kind = kind;
  72. nt->u.t = t;
  73. nt->next = 0;
  74. return nt;
  75. }
  76. %}
  77. %union {
  78. char c;
  79. char* str;
  80. term_t* term;
  81. }
  82. %token <c> T_literal
  83. %token <str> T_name T_token T_opt T_seq T_choice
  84. %type <term> atomterm term rule rulelist
  85. %start prods
  86. %%
  87. prods : prods prod
  88. | prod
  89. ;
  90. prod : T_name '=' rulelist '\n'
  91. { gen ($1, $3); }
  92. rulelist : rule '|' rulelist { $1->next = $3; $$ = $1; }
  93. | rule { $$ = $1; }
  94. ;
  95. rule : term rule
  96. { if ($2->kind == T_seq) { $1->next = $2->u.t; $2->u.t = $1; $$ = $2;}
  97. else $$ = mkSeq ($1, $2, T_seq); }
  98. | term { $$ = $1; }
  99. ;
  100. term : '[' rule ']' { $$ = mkTerm ($2, T_opt); }
  101. | '(' rulelist ')' { $$ = mkTerm ($2, T_choice); }
  102. | atomterm { $$ = $1; }
  103. ;
  104. atomterm : T_literal { $$ = mkLiteral($1, T_literal); }
  105. | T_token { $$ = mkID($1, T_token); }
  106. | T_name { $$ = mkID($1, T_name); }
  107. ;
  108. %%
  109. static void
  110. xlate (term_t* ts)
  111. {
  112. term_t* t;
  113. switch (ts->kind) {
  114. case T_name :
  115. nonTerm (ts->u.s);
  116. break;
  117. case T_literal :
  118. fprintf (outf, "<B>'%c'</B>", ts->u.c);
  119. break;
  120. case T_token :
  121. fprintf (outf, "<B>%s</B>", ts->u.s);
  122. break;
  123. case T_opt :
  124. fprintf (outf, "[ ");
  125. xlate (ts->u.t);
  126. fprintf (outf, " ]");
  127. break;
  128. case T_seq :
  129. t = ts->u.t;
  130. xlate (t);
  131. for (t = t->next; t; t = t->next) {
  132. fprintf (outf, " ");
  133. xlate (t);
  134. }
  135. break;
  136. case T_choice :
  137. fprintf (outf, "(");
  138. t = ts->u.t;
  139. xlate (t);
  140. for (t = t->next; t; t = t->next) {
  141. fprintf (outf, " | ");
  142. xlate (t);
  143. }
  144. fprintf (outf, ")");
  145. break;
  146. }
  147. }
  148. #define BSIZE 2048
  149. static FILE* inf;
  150. static char buf[BSIZE];
  151. static char* lexptr;
  152. static int lineno;
  153. static char*
  154. skipSpace (char* p)
  155. {
  156. int c;
  157. while (isspace ((c = *p)) && (c != '\n')) p++;
  158. return p;
  159. }
  160. static char*
  161. mystrndup (char* p, int sz)
  162. {
  163. char* s = malloc (sz+1);
  164. memcpy (s, p, sz);
  165. s[sz] = '\0';
  166. return s;
  167. }
  168. static char*
  169. readLiteral (char* p)
  170. {
  171. int c;
  172. char* s = p;
  173. while (((c = *p) != '\'') && (c != '\0')) p++;
  174. if (c == '\0') {
  175. fprintf (stderr, "Unclosed literal '%s, line %d\n", s, lineno);
  176. exit (1);
  177. }
  178. yylval.c = *s;
  179. return (p+1);
  180. }
  181. static char*
  182. readName (char* p)
  183. {
  184. int c;
  185. char* s = p;
  186. while (!isspace ((c = *p)) && (c != '\0')) p++;
  187. yylval.str = mystrndup (s, p-s);
  188. return p;
  189. }
  190. static void
  191. yyerror (char* msg)
  192. {
  193. fprintf (stderr, "%s, line %d\n", msg, lineno);
  194. }
  195. static void
  196. lexinit ()
  197. {
  198. lexptr = buf;
  199. }
  200. #ifdef DEBUG
  201. static int _yylex ();
  202. int yylex()
  203. { /* for debugging */
  204. int rv = _yylex();
  205. fprintf(stderr, "returning %d\n", rv);
  206. switch (rv) {
  207. case T_name :
  208. case T_token :
  209. fprintf(stderr, "string val is '%s'\n", yylval.str);
  210. break;
  211. case T_literal :
  212. fprintf(stderr, "string val is '%c'\n", yylval.c);
  213. break;
  214. }
  215. return rv;
  216. }
  217. #define yylex _yylex
  218. #endif
  219. static int
  220. yylex ()
  221. {
  222. int c;
  223. do {
  224. if (*lexptr == '\0') {
  225. if (!fgets (buf, BSIZE, inf)) return EOF;
  226. lineno++;
  227. lexptr = buf;
  228. }
  229. lexptr = skipSpace (lexptr);
  230. } while (*lexptr == '\0');
  231. switch (c = *lexptr++) {
  232. case '\n' :
  233. case '|' :
  234. case '=' :
  235. case '(' :
  236. case ')' :
  237. case '[' :
  238. case ']' :
  239. return c;
  240. break;
  241. case '\'' :
  242. lexptr = readLiteral (lexptr);
  243. return T_literal;
  244. break;
  245. case 'T' :
  246. if (*lexptr == '_') {
  247. lexptr = readName (lexptr+1);
  248. return T_token;
  249. }
  250. else {
  251. lexptr = readName (lexptr-1);
  252. return T_name;
  253. }
  254. break;
  255. default :
  256. lexptr = readName (lexptr-1);
  257. return T_name;
  258. break;
  259. }
  260. }
  261. #ifdef DEBUG
  262. #undef yylex
  263. #endif
  264. static FILE*
  265. openF (char* fname, char* mode)
  266. {
  267. FILE* f = fopen (fname, mode);
  268. if (!f) {
  269. fprintf (stderr, "Could not open %s for %s\n", fname, mode);
  270. exit(1);
  271. }
  272. return f;
  273. }
  274. int
  275. main (int argc, char* argv[])
  276. {
  277. if (argc != 3) {
  278. fprintf (stderr, "mklang: 2 arguments required\n");
  279. exit(1);
  280. }
  281. inf = openF (argv[1], "r");
  282. outf = openF (argv[2], "w");
  283. lexinit();
  284. yyparse ();
  285. return (0);
  286. }