yacc.c 23 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378
  1. /*% clang -g -Wall -Wextra % -o #
  2. * miniyacc - LALR(1) grammars for C
  3. * See LICENSE for copyright and license details.
  4. */
  5. #include <assert.h>
  6. #include <ctype.h>
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <string.h>
  10. typedef int Sym;
  11. typedef struct Rule Rule;
  12. typedef struct TSet TSet;
  13. typedef struct Info Info;
  14. typedef struct Term Term;
  15. typedef struct Item Item;
  16. typedef struct Row Row;
  17. #define S ((Sym) -1)
  18. #define Red(n) (- (n+2)) /* involutive, Red(Red(x)) == x */
  19. #define GetBit(s,n) (s[n/32] & (1<<(n%32)))
  20. #define SetBit(s,n) (s[n/32] |= 1<<(n%32))
  21. enum {
  22. IdntSz = 64,
  23. MaxRhs = 32,
  24. MaxTk = 500,
  25. MaxNt = 500,
  26. MaxRl = 800,
  27. MaxTm = 1000,
  28. TSetSz = (MaxTk+31)/32,
  29. Sym0 = MaxTk
  30. };
  31. struct Rule {
  32. Sym lhs;
  33. Sym rhs[MaxRhs];
  34. char *act;
  35. int actln;
  36. int prec;
  37. };
  38. struct TSet {
  39. unsigned t[TSetSz];
  40. };
  41. struct Info {
  42. int nul;
  43. TSet fst;
  44. int prec;
  45. enum {
  46. ANone,
  47. ALeft,
  48. ARight,
  49. ANonassoc
  50. } assoc;
  51. char name[IdntSz];
  52. char type[IdntSz];
  53. };
  54. struct Term {
  55. Rule *rule;
  56. int dot;
  57. TSet lk;
  58. };
  59. struct Item {
  60. int id;
  61. int nt;
  62. Term ts[MaxTm];
  63. Item **gtbl;
  64. int dirty;
  65. };
  66. struct Row {
  67. int def;
  68. int ndef;
  69. int *t;
  70. };
  71. char srs[] = "shift/reduce conflict state %d token %s\n";
  72. char rrs[] = "reduce/reduce conflict state %d token %s\n";
  73. Item i0; /* temporary item */
  74. int nrl, nsy, nst, ntk;
  75. Rule rs[MaxRl]; /* grammar rules (ordered, rcmp) */
  76. Info is[MaxTk+MaxNt]; /* symbol information */
  77. Item **st; /* LALR(1) states (ordered, icmp) */
  78. Row *as; /* action table [state][tok] */
  79. Row *gs; /* goto table [sym][state] */
  80. Sym sstart;/* start symbol */
  81. Item *ini; /* initial state */
  82. int doty; /* type-checking enabled */
  83. int srconf, rrconf;
  84. int actsz;
  85. int *act;
  86. int *chk;
  87. int *adsp;
  88. int *gdsp;
  89. int lineno = 1;
  90. char *srca;
  91. FILE *fin;
  92. FILE *fout;
  93. FILE *fgrm;
  94. FILE *fhdr;
  95. void
  96. die(char *s)
  97. {
  98. fprintf(stderr, "%s (on line %d)\n", s, lineno);
  99. exit(1);
  100. }
  101. void *
  102. yalloc(size_t n, size_t o)
  103. {
  104. void *p;
  105. p = calloc(n, o);
  106. if (!p)
  107. die("out of memory");
  108. return p;
  109. }
  110. int
  111. rcmp(const void *a, const void *b)
  112. {
  113. return ((Rule *)a)->lhs - ((Rule *)b)->lhs;
  114. }
  115. Rule *
  116. rfind(Sym lhs)
  117. {
  118. Rule *r;
  119. Rule k;
  120. k.lhs = lhs;
  121. r = bsearch(&k, rs, nrl, sizeof *r, rcmp);
  122. if (r != 0)
  123. while (r > rs && r[-1].lhs == lhs)
  124. r--;
  125. return r;
  126. }
  127. int
  128. slen(Sym *l)
  129. {
  130. int n;
  131. for (n=0; *l!=S; n++, l++);
  132. return n;
  133. }
  134. void
  135. tszero(TSet *ts)
  136. {
  137. memset(ts, 0, sizeof *ts);
  138. }
  139. int
  140. tsunion(TSet *tsa, TSet *tsb)
  141. {
  142. int n;
  143. unsigned *a, *b, c, t;
  144. c = 0;
  145. a = tsa->t;
  146. b = tsb->t;
  147. n = (31+ntk)/32;
  148. while (n-- > 0) {
  149. t = *a;
  150. *a |= *b++;
  151. c |= t ^ *a++;
  152. }
  153. return !!c;
  154. }
  155. void
  156. first(TSet *ts, Sym *stnc, TSet *last)
  157. {
  158. Sym f;
  159. f = stnc[0];
  160. if (f == S) {
  161. if (last)
  162. tsunion(ts, last);
  163. return;
  164. }
  165. if (f < ntk) {
  166. SetBit(ts->t, f);
  167. return;
  168. }
  169. if (is[f].nul)
  170. first(ts, stnc+1, last);
  171. tsunion(ts, &is[f].fst);
  172. }
  173. void
  174. ginit()
  175. {
  176. int chg;
  177. Rule *r;
  178. Info *i;
  179. Sym *s;
  180. TSet ts;
  181. do {
  182. chg = 0;
  183. for (r=rs; r-rs<nrl; r++) {
  184. i = &is[r->lhs];
  185. for (s=r->rhs; *s!=S; s++)
  186. if (!is[*s].nul)
  187. goto nonul;
  188. chg |= i->nul == 0;
  189. i->nul = 1;
  190. nonul:
  191. tszero(&ts);
  192. first(&ts, r->rhs, 0);
  193. chg |= tsunion(&i->fst, &ts);
  194. }
  195. } while (chg);
  196. }
  197. int
  198. tcmp(Term *a, Term *b)
  199. {
  200. int c;
  201. c = a->rule - b->rule;
  202. if (c==0)
  203. c = a->dot - b->dot;
  204. return c;
  205. }
  206. int
  207. tcmpv(const void *a, const void *b)
  208. {
  209. return tcmp((Term *)a, (Term *)b);
  210. }
  211. void
  212. iclose(Item *i)
  213. {
  214. int smap[MaxNt];
  215. Rule *r;
  216. Term *t, t1;
  217. Sym s, *rem;
  218. int chg, n, m;
  219. t1.dot = 0;
  220. memset(smap, 0, sizeof smap);
  221. for (n=0; n<i->nt; n++) {
  222. t = &i->ts[n];
  223. s = t->rule->lhs-Sym0;
  224. if (t->dot==0)
  225. if (smap[s]==0)
  226. smap[s] = n;
  227. }
  228. do {
  229. chg = 0;
  230. for (n=0; n<i->nt; n++) {
  231. t = &i->ts[n];
  232. rem = &t->rule->rhs[t->dot];
  233. s = *rem++;
  234. if (s < Sym0 || s == S)
  235. continue;
  236. r = rfind(s);
  237. if (!r)
  238. die("some non-terminals are not defined");
  239. tszero(&t1.lk);
  240. first(&t1.lk, rem, &t->lk);
  241. m = smap[s-Sym0];
  242. if (m)
  243. for (; r-rs<nrl && r->lhs==s; r++, m++)
  244. chg |= tsunion(&i->ts[m].lk, &t1.lk);
  245. else {
  246. m = i->nt;
  247. smap[s-Sym0] = m;
  248. for (; r-rs<nrl && r->lhs==s; r++, m++) {
  249. if (m>=MaxTm)
  250. die("too many terms in item");
  251. t1.rule = r;
  252. i->ts[m] = t1;
  253. }
  254. i->nt = m;
  255. chg = 1;
  256. }
  257. }
  258. } while (chg);
  259. }
  260. void
  261. igoto(Item *i, Sym s)
  262. {
  263. Term *t, *t1;
  264. int n;
  265. i0.nt = 0;
  266. for (n=0, t=i->ts; n<i->nt; n++, t++) {
  267. if (t->rule->rhs[t->dot] != s)
  268. continue;
  269. t1 = &i0.ts[i0.nt++];
  270. *t1 = *t;
  271. t1->dot++;
  272. }
  273. qsort(i0.ts, i0.nt, sizeof i0.ts[0], tcmpv);
  274. }
  275. int
  276. icmp(Item *a, Item *b)
  277. {
  278. Term *ta, *tb, *ma, *mb;
  279. int c;
  280. ta = a->ts;
  281. tb = b->ts;
  282. ma = ta+a->nt;
  283. mb = tb+b->nt;
  284. for (;;) {
  285. if (ta==ma || ta->dot==0)
  286. return -(tb<mb && tb->dot);
  287. if (tb==mb || tb->dot==0)
  288. return +(ta<ma && ta->dot);
  289. if ((c=tcmp(ta++, tb++)))
  290. return c;
  291. }
  292. }
  293. int
  294. stadd(Item **pi)
  295. {
  296. Item *i, *i1;
  297. int lo, hi, mid, n, chg;
  298. /* http://www.iq0.com/duffgram/bsearch.c */
  299. i = *pi;
  300. lo = 0;
  301. hi = nst - 1;
  302. if (hi<0 || icmp(i, st[hi])>0)
  303. hi++;
  304. else if (icmp(i, st[lo])<=0)
  305. hi = lo;
  306. else
  307. while (hi-lo!=1) {
  308. mid = (lo+hi)/2;
  309. if (icmp(st[mid], i)<0)
  310. lo = mid;
  311. else
  312. hi = mid;
  313. }
  314. if (hi<nst && icmp(st[hi], i)==0) {
  315. chg = 0;
  316. i1 = st[hi];
  317. for (n=0; n<i->nt; n++)
  318. chg |= tsunion(&i1->ts[n].lk, &i->ts[n].lk);
  319. i1->dirty |= chg;
  320. *pi = i1;
  321. return chg;
  322. } else {
  323. st = realloc(st, ++nst * sizeof st[0]);
  324. if (!st)
  325. die("out of memory");
  326. memmove(&st[hi+1], &st[hi], (nst-1 - hi) * sizeof st[0]);
  327. i->gtbl = yalloc(nsy, sizeof i->gtbl[0]);
  328. i->dirty = 1;
  329. i1 = yalloc(1, sizeof *i1);
  330. *i1 = *i;
  331. *pi = st[hi] = i1;
  332. return 1;
  333. }
  334. }
  335. void
  336. stgen()
  337. {
  338. Sym s;
  339. Rule *r;
  340. Item *i, *i1;
  341. Term tini;
  342. int n, chg;
  343. ini = &i0;
  344. r = rfind(Sym0);
  345. tini.rule = r;
  346. tini.dot = 0;
  347. tszero(&tini.lk);
  348. SetBit(tini.lk.t, 0);
  349. i0.nt = 0;
  350. i0.ts[i0.nt++] = tini;
  351. stadd(&ini);
  352. do {
  353. chg = 0;
  354. for (n=0; n<nst; n++) {
  355. i = st[n];
  356. if (!i->dirty)
  357. continue;
  358. i->dirty = 0;
  359. iclose(i);
  360. for (s=0; s<nsy; s++) {
  361. igoto(i, s);
  362. i1 = &i0;
  363. if (!i1->nt) {
  364. i->gtbl[s] = 0;
  365. continue;
  366. }
  367. chg |= stadd(&i1);
  368. i->gtbl[s] = i1;
  369. }
  370. }
  371. } while (chg);
  372. }
  373. int
  374. resolve(Rule *r, Sym s, int st)
  375. {
  376. if (!r->prec || !is[s].prec) {
  377. conflict:
  378. if (fgrm)
  379. fprintf(fgrm, srs, st, is[s].name);
  380. srconf++;
  381. return ARight;
  382. }
  383. if (r->prec==is[s].prec) {
  384. if (is[s].assoc == ANone)
  385. goto conflict;
  386. return is[s].assoc;
  387. } else
  388. if (r->prec<is[s].prec)
  389. return ARight;
  390. else
  391. return ALeft;
  392. }
  393. void
  394. tblset(int *tbl, Item *i, Term *t)
  395. {
  396. int act;
  397. Sym s;
  398. s = t->rule->rhs[t->dot];
  399. if (s!=S) {
  400. /* shift */
  401. if (s>=ntk)
  402. return;
  403. assert(i->gtbl[s]);
  404. act = ARight;
  405. if (tbl[s] && tbl[s] != i->gtbl[s]->id) {
  406. assert(tbl[s]<=0);
  407. act = resolve(&rs[Red(tbl[s])], s, i->id-1);
  408. }
  409. switch (act) {
  410. case ARight:
  411. tbl[s] = i->gtbl[s]->id;
  412. break;
  413. case ANonassoc:
  414. tbl[s] = -1;
  415. break;
  416. }
  417. } else
  418. /* reduce */
  419. for (s=0; s<ntk; s++) {
  420. if (!GetBit(t->lk.t, s))
  421. continue;
  422. /* default to shift if conflict occurs */
  423. if (!tbl[s])
  424. act = ALeft;
  425. else if (tbl[s]<0) {
  426. if (fgrm)
  427. fprintf(fgrm, rrs, i->id-1, is[s].name);
  428. rrconf++;
  429. act = ARight;
  430. } else
  431. act = resolve(t->rule, s, i->id-1);
  432. switch (act) {
  433. case ALeft:
  434. tbl[s] = Red(t->rule-rs);
  435. break;
  436. case ANonassoc:
  437. tbl[s] = -1;
  438. break;
  439. }
  440. }
  441. }
  442. void
  443. setdef(Row *r, int w, int top)
  444. {
  445. int n, m, x, cnt, def, max;
  446. max = 0;
  447. def = -1;
  448. r->ndef = 0;
  449. for (n=0; n<w; n++) {
  450. x = r->t[n];
  451. if (x==0)
  452. r->ndef++;
  453. if (x>=top || x==0)
  454. continue;
  455. cnt = 1;
  456. for (m=n+1; m<w; m++)
  457. if (r->t[m]==x)
  458. cnt++;
  459. if (cnt>max) {
  460. def = x;
  461. max = cnt;
  462. }
  463. }
  464. r->def = def;
  465. if (max!=0)
  466. /* zero out the most frequent entry */
  467. for (n=0; n<w; n++)
  468. if (r->t[n]==def) {
  469. r->t[n] = 0;
  470. r->ndef++;
  471. }
  472. }
  473. void
  474. tblgen()
  475. {
  476. Row *r;
  477. Item *i;
  478. int n, m;
  479. for (n=0; n<nst; n++)
  480. st[n]->id = n+1;
  481. as = yalloc(nst, sizeof as[0]);
  482. gs = yalloc(nsy-MaxTk, sizeof gs[0]);
  483. /* fill action table */
  484. for (n=0; n<nst; n++) {
  485. r = &as[n];
  486. r->t = yalloc(ntk, sizeof r->t[0]);
  487. for (i=st[n], m=0; m<i->nt; m++)
  488. tblset(r->t, i, &i->ts[m]);
  489. setdef(r, ntk, -1);
  490. r->def = Red(r->def); /* Red(-1) == -1 */
  491. }
  492. /* fill goto table */
  493. for (n=MaxTk; n<nsy; n++) {
  494. r = &gs[n-MaxTk];
  495. r->t = yalloc(nst, sizeof r->t[0]);
  496. for (m=0; m<nst; m++)
  497. if (st[m]->gtbl[n])
  498. r->t[m] = st[m]->gtbl[n]->id;
  499. setdef(r, nst, nst+1);
  500. }
  501. }
  502. int
  503. prcmp(const void *a, const void *b)
  504. {
  505. return (*(Row **)a)->ndef - (*(Row **)b)->ndef;
  506. }
  507. void
  508. actgen()
  509. {
  510. Row **o, *r;
  511. int n, m, t, dsp, nnt;
  512. actsz = 0;
  513. o = yalloc(nst+nsy, sizeof o[0]);
  514. act = yalloc(nst*nsy, sizeof act[0]);
  515. chk = yalloc(nst*nsy, sizeof chk[0]);
  516. adsp = yalloc(nst, sizeof adsp[0]);
  517. for (n=0; n<nst*nsy; n++)
  518. chk[n] = -1;
  519. /* fill in actions */
  520. for (n=0; n<nst; n++)
  521. o[n] = &as[n];
  522. qsort(o, nst, sizeof o[0], prcmp);
  523. for (n=0; n<nst; n++) {
  524. r = o[n];
  525. dsp = 0;
  526. for (m=0; m<ntk && r->t[m]==0; m++)
  527. dsp--;
  528. retrya:
  529. /* The invariant here is even
  530. * trickier than it looks.
  531. */
  532. for (t=0; t<ntk; t++)
  533. if ((m=dsp+t)>=0 && chk[m]>=0)
  534. if ((r->t[t] && (chk[m]!=t || act[m]!=r->t[t]))
  535. || (!r->t[t] && chk[m]==t)) {
  536. dsp++;
  537. goto retrya;
  538. }
  539. adsp[r-as] = dsp;
  540. for (t=0; t<ntk; t++)
  541. if (r->t[t]) {
  542. chk[dsp+t] = t;
  543. act[dsp+t] = r->t[t];
  544. if (dsp+t>=actsz)
  545. actsz = dsp+t+1;
  546. }
  547. }
  548. /* fill in gotos */
  549. nnt = nsy-MaxTk;
  550. gdsp = yalloc(nnt, sizeof gdsp[0]);
  551. for (n=0; n<nnt; n++)
  552. o[n] = &gs[n];
  553. qsort(o, nnt, sizeof o[0], prcmp);
  554. for (n=0; n<nnt; n++) {
  555. r = o[n];
  556. dsp = 0;
  557. for (m=0; m<nst && r->t[m]==0; m++)
  558. dsp--;
  559. retryg:
  560. for (t=m; t<nst; t++)
  561. if (chk[dsp+t]>=0 && r->t[t]) {
  562. dsp++;
  563. goto retryg;
  564. }
  565. gdsp[r-gs] = dsp;
  566. for (t=m; t<nst; t++)
  567. if (r->t[t]) {
  568. chk[dsp+t] = ntk+(r-gs);
  569. act[dsp+t] = r->t[t];
  570. if (dsp+t>=actsz)
  571. actsz = dsp+t+1;
  572. }
  573. }
  574. free(o);
  575. }
  576. void
  577. aout(char *name, int *t, int n)
  578. {
  579. int i;
  580. fprintf(fout, "short %s[] = {", name);
  581. for (i=0; i<n; i++) {
  582. if (i % 10 == 0)
  583. fprintf(fout, "\n");
  584. fprintf(fout, "%4d", t[i]);
  585. if (i != n-1)
  586. fprintf(fout, ",");
  587. }
  588. fprintf(fout, "\n};\n");
  589. }
  590. void
  591. tblout()
  592. {
  593. int *o, n, m;
  594. fprintf(fout, "short yyini = %d;\n", ini->id-1);
  595. fprintf(fout, "short yyntoks = %d;\n", ntk);
  596. o = yalloc(nrl+nst+nsy, sizeof o[0]);
  597. for (n=0; n<nrl; n++)
  598. o[n] = slen(rs[n].rhs);
  599. aout("yyr1", o, nrl);
  600. for (n=0; n<nrl; n++)
  601. o[n] = rs[n].lhs-MaxTk;
  602. aout("yyr2", o, nrl);
  603. for (n=0; n<nst; n++)
  604. o[n] = as[n].def;
  605. aout("yyadef", o, nst);
  606. for (n=0; n<nsy-MaxTk; n++) {
  607. o[n] = gs[n].def;
  608. assert(o[n]>0 || o[n]==-1);
  609. if (o[n]>0)
  610. o[n]--;
  611. }
  612. aout("yygdef", o, nsy-MaxTk);
  613. aout("yyadsp", adsp, nst);
  614. aout("yygdsp", gdsp, nsy-MaxTk);
  615. for (n=0; n<actsz; n++)
  616. if (act[n]>=0)
  617. act[n]--;
  618. aout("yyact", act, actsz);
  619. aout("yychk", chk, actsz);
  620. for (n=0; n<128; n++) {
  621. o[n] = 0;
  622. for (m=0; m<ntk; m++)
  623. if (is[m].name[0]=='\'')
  624. if (is[m].name[1]==n)
  625. assert(!o[n]), o[n] = m;
  626. }
  627. m = 128;
  628. for (n=1; n<ntk; n++) {
  629. if (is[n].name[0]=='\'')
  630. continue;
  631. fprintf(fout, "#define %s %d\n", is[n].name, m);
  632. if (fhdr)
  633. fprintf(fhdr, "#define %s %d\n", is[n].name, m);
  634. o[m++] = n;
  635. }
  636. aout("yytrns", o, m);
  637. if (fhdr) {
  638. fputs("int yyparse(void);\n", fhdr);
  639. fputs("#ifndef YYSTYPE\n", fhdr);
  640. fputs("#define YYSTYPE int\n", fhdr);
  641. fputs("#endif\n", fhdr);
  642. fputs("extern YYSTYPE yylval;\n", fhdr);
  643. fputs("#endif\n", fhdr);
  644. }
  645. free(o);
  646. }
  647. void
  648. stdump()
  649. {
  650. Term *t;
  651. Sym *s1;
  652. int n, m, d, act;
  653. Rule *r;
  654. Row *ar;
  655. if (!fgrm)
  656. return;
  657. for (r=rs; r-rs<nrl; r++) {
  658. fprintf(fgrm, "\n%03d: %s ->", (int)(r-rs), is[r->lhs].name);
  659. for (s1=r->rhs; *s1!=S; s1++)
  660. fprintf(fgrm, " %s", is[*s1].name);
  661. }
  662. fprintf(fgrm, "\n");
  663. for (m=0; m<nst; m++) {
  664. fprintf(fgrm, "\nState %d:\n", m);
  665. for (t=st[m]->ts; t-st[m]->ts<st[m]->nt; t++) {
  666. r = t->rule;
  667. d = t->dot;
  668. if (d==0 && t!=st[m]->ts)
  669. continue;
  670. fprintf(fgrm, " %s ->", is[r->lhs].name);
  671. for (s1=r->rhs; *s1!=S; s1++, d--)
  672. fprintf(fgrm, " %s%s", d ? "" : ". ", is[*s1].name);
  673. if (!d)
  674. fprintf(fgrm, " .");
  675. fprintf(fgrm, "\n");
  676. }
  677. fprintf(fgrm, "\n");
  678. ar = &as[m];
  679. for (n=0; n<ntk; n++) {
  680. act = ar->t[n];
  681. if (!act)
  682. continue;
  683. if (act==-1)
  684. fprintf(fgrm, " %s error (nonassoc)\n", is[n].name);
  685. else if (act<0)
  686. fprintf(fgrm, " %s reduce with rule %d\n", is[n].name, Red(act));
  687. else
  688. fprintf(fgrm, " %s shift and go to %d\n", is[n].name, act-1);
  689. }
  690. if (ar->def != -1)
  691. fprintf(fgrm, " * reduce with rule %d\n", ar->def);
  692. }
  693. }
  694. enum {
  695. TIdnt,
  696. TTokchr, /* 'c' */
  697. TPP, /* %% */
  698. TLL, /* %{ */
  699. TLangle, /* < */
  700. TRangle, /* > */
  701. TSemi, /* ; */
  702. TBar, /* | */
  703. TColon, /* : */
  704. TLBrack, /* { */
  705. TUnion,
  706. TType,
  707. TToken,
  708. TRight,
  709. TLeft,
  710. TNonassoc,
  711. TPrec,
  712. TStart,
  713. TEof
  714. };
  715. struct {
  716. char *name;
  717. int tok;
  718. } words[] = {
  719. { "%%", TPP },
  720. { "%union", TUnion },
  721. { "%type", TType },
  722. { "%token", TToken },
  723. { "%right", TRight },
  724. { "%left", TLeft },
  725. { "%nonassoc", TNonassoc },
  726. { "%prec", TPrec },
  727. { "%start", TStart },
  728. { 0, 0 }
  729. };
  730. char idnt[IdntSz];
  731. int
  732. istok(int c)
  733. {
  734. return isalnum(c) || c=='_' || c=='%';
  735. }
  736. int
  737. nexttk()
  738. {
  739. int n;
  740. char c, *p;
  741. while (isspace(c=fgetc(fin)))
  742. if (c == '\n')
  743. lineno++;
  744. switch (c) {
  745. case '<':
  746. return TLangle;
  747. case '>':
  748. return TRangle;
  749. case ';':
  750. return TSemi;
  751. case '|':
  752. return TBar;
  753. case ':':
  754. return TColon;
  755. case '{':
  756. return TLBrack;
  757. case EOF:
  758. return TEof;
  759. case '\'':
  760. idnt[0] = '\'';
  761. idnt[1] = fgetc(fin);
  762. idnt[2] = '\'';
  763. idnt[3] = 0;
  764. if (fgetc(fin)!='\'')
  765. die("syntax error, invalid char token");
  766. return TTokchr;
  767. }
  768. p = idnt;
  769. while (istok(c)) {
  770. *p++ = c;
  771. if (p-idnt >= IdntSz-1)
  772. die("identifier too long");
  773. c = fgetc(fin);
  774. }
  775. *p = 0;
  776. if (strcmp(idnt, "%")==0)
  777. if (c=='{')
  778. return TLL;
  779. ungetc(c, fin);
  780. for (n=0; words[n].name; n++)
  781. if (strcmp(idnt, words[n].name) == 0)
  782. return words[n].tok;
  783. return TIdnt;
  784. }
  785. char *
  786. cpycode()
  787. {
  788. int c, nest, in, len, pos;
  789. char *s;
  790. len = 64;
  791. s = yalloc(len+1, 1);
  792. s[0] = '{';
  793. pos = 1;
  794. nest = 1;
  795. in = 0;
  796. while (nest) {
  797. c = fgetc(fin);
  798. if (in) {
  799. if (c == in)
  800. if (s[pos-1] != '\\')
  801. in = 0;
  802. } else {
  803. if (c == '"' || c == '\'')
  804. in = c;
  805. if (c == '{')
  806. nest++;
  807. if (c == '}')
  808. nest--;
  809. if (c == EOF)
  810. die("syntax error, unclosed code block");
  811. if (c == '\n')
  812. lineno++;
  813. }
  814. if (pos>=len)
  815. if (!(s=realloc(s, len=2*len+1)))
  816. die("out of memory");
  817. s[pos++] = c;
  818. }
  819. s[pos] = 0;
  820. return s;
  821. }
  822. int
  823. gettype(char *type)
  824. {
  825. int tk;
  826. tk = nexttk();
  827. if (tk==TLangle) {
  828. if (nexttk()!=TIdnt)
  829. die("syntax error, ident expected after <");
  830. strcpy(type, idnt);
  831. if (nexttk()!=TRangle)
  832. die("syntax error, unclosed <");
  833. return nexttk();
  834. } else {
  835. type[0] = 0;
  836. return tk;
  837. }
  838. }
  839. Sym
  840. findsy(char *name, int add)
  841. {
  842. int n;
  843. for (n=0; n<nsy; n++) {
  844. if (n == ntk) {
  845. if (name[0]=='\'') {
  846. if (ntk>=MaxTk)
  847. die("too many tokens");
  848. ntk++;
  849. strcpy(is[n].name, name);
  850. return n;
  851. }
  852. n = MaxTk;
  853. }
  854. if (strcmp(is[n].name, name)==0)
  855. return n;
  856. }
  857. if (add) {
  858. if (nsy>=MaxTk+MaxNt)
  859. die("too many non-terminals");
  860. strcpy(is[nsy].name, name);
  861. return nsy++;
  862. } else
  863. return nsy;
  864. }
  865. void
  866. getdecls()
  867. {
  868. int tk, prec, p, a, c, c1, n;
  869. Info *si;
  870. char type[IdntSz], *s;
  871. strcpy(is[0].name, "$");
  872. ntk = 1;
  873. strcpy(is[Sym0].name, "@start");
  874. nsy = MaxTk+1;
  875. sstart = S;
  876. prec = 0;
  877. tk = nexttk();
  878. for (;;)
  879. switch (tk) {
  880. case TStart:
  881. tk = nexttk();
  882. if (tk!=TIdnt)
  883. die("syntax error, ident expected after %start");
  884. sstart = findsy(idnt, 1);
  885. if (sstart<ntk)
  886. die("%start cannot specify a token");
  887. tk = nexttk();
  888. break;
  889. case TUnion:
  890. tk = nexttk();
  891. if (tk!=TLBrack)
  892. die("syntax error, { expected after %union");
  893. fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
  894. s = cpycode();
  895. fprintf(fout, "typedef union %s yyunion;\n", s);
  896. fprintf(fout, "#define YYSTYPE yyunion\n");
  897. if (fhdr) {
  898. fprintf(fhdr, "typedef union %s yyunion;\n", s);
  899. fprintf(fhdr, "#define YYSTYPE yyunion\n");
  900. }
  901. free(s);
  902. doty = 1;
  903. tk = nexttk();
  904. break;
  905. case TLeft:
  906. p = ++prec;
  907. a = ALeft;
  908. goto addtoks;
  909. case TRight:
  910. p = ++prec;
  911. a = ARight;
  912. goto addtoks;
  913. case TNonassoc:
  914. p = ++prec;
  915. a = ANonassoc;
  916. goto addtoks;
  917. case TToken:
  918. p = 0;
  919. a = ANone;
  920. addtoks:
  921. tk = gettype(type);
  922. while (tk==TIdnt || tk==TTokchr) {
  923. si = 0;
  924. n = findsy(idnt, 0);
  925. if (n>=MaxTk && n<nsy)
  926. die("non-terminal redeclared as token");
  927. if (n==nsy) {
  928. if (ntk>=MaxTk)
  929. die("too many tokens");
  930. n = ntk++;
  931. }
  932. si = &is[n];
  933. strcpy(si->name, idnt);
  934. strcpy(si->type, type);
  935. si->prec = p;
  936. si->assoc = a;
  937. tk = nexttk();
  938. }
  939. break;
  940. case TType:
  941. tk = gettype(type);
  942. if (type[0]==0)
  943. die("syntax error, type expected");
  944. while (tk==TIdnt) {
  945. si = 0;
  946. n = findsy(idnt, 1);
  947. if (n<ntk)
  948. die("token redeclared as non-terminal");
  949. if (n==nsy) {
  950. nsy++;
  951. }
  952. si = &is[n];
  953. strcpy(si->name, idnt);
  954. strcpy(si->type, type);
  955. tk = nexttk();
  956. }
  957. break;
  958. case TLL:
  959. fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
  960. for (;;) {
  961. c = fgetc(fin);
  962. if (c == EOF)
  963. die("syntax error, unclosed %{");
  964. if (c == '%') {
  965. c1 = fgetc(fin);
  966. if (c1 == '}') {
  967. fputs("\n", fout);
  968. break;
  969. }
  970. ungetc(c1, fin);
  971. }
  972. if (c == '\n')
  973. lineno++;
  974. fputc(c, fout);
  975. }
  976. tk = nexttk();
  977. break;
  978. case TPP:
  979. return;
  980. case TEof:
  981. die("syntax error, unfinished declarations");
  982. default:
  983. die("syntax error, declaration expected");
  984. }
  985. }
  986. void
  987. getgram()
  988. {
  989. extern char *retcode;
  990. int tk;
  991. Sym hd, *p, s;
  992. Rule *r;
  993. for (;;) {
  994. tk = nexttk();
  995. if (tk==TPP || tk==TEof) {
  996. if (sstart==S)
  997. die("syntax error, empty grammar");
  998. r = &rs[nrl++];
  999. r->lhs = Sym0;
  1000. r->rhs[0] = sstart;
  1001. r->rhs[1] = 0;
  1002. r->rhs[2] = S;
  1003. r->act = retcode;
  1004. qsort(rs, nrl, sizeof rs[0], rcmp);
  1005. return;
  1006. }
  1007. if (tk!=TIdnt)
  1008. die("syntax error, production rule expected");
  1009. if (nexttk()!=TColon)
  1010. die("syntax error, colon expected after production's head");
  1011. hd = findsy(idnt, 1);
  1012. if (sstart==S)
  1013. sstart = hd;
  1014. do {
  1015. if (nrl>=MaxRl-1)
  1016. die("too many rules");
  1017. r = &rs[nrl++];
  1018. r->lhs = hd;
  1019. r->act = 0;
  1020. p = r->rhs;
  1021. while ((tk=nexttk())==TIdnt || tk==TTokchr || tk==TPrec) {
  1022. if (tk==TPrec) {
  1023. tk = nexttk();
  1024. if (tk!=TIdnt
  1025. || (s=findsy(idnt, 0))>=ntk)
  1026. die("token expected after %prec");
  1027. r->prec = is[s].prec;
  1028. continue;
  1029. }
  1030. s = findsy(idnt, 1);
  1031. *p++ = s;
  1032. if (s<ntk && is[s].prec>0)
  1033. r->prec = is[s].prec;
  1034. if (p-r->rhs >= MaxRhs-1)
  1035. die("production rule too long");
  1036. }
  1037. *p = S;
  1038. if (tk==TLBrack) {
  1039. r->actln = lineno;
  1040. r->act = cpycode();
  1041. tk = nexttk();
  1042. }
  1043. } while (tk==TBar);
  1044. if (tk!=TSemi)
  1045. die("syntax error, ; or | expected");
  1046. }
  1047. }
  1048. void
  1049. actout(Rule *r)
  1050. {
  1051. long l;
  1052. int i, ar;
  1053. char c, *p, *ty, tya[IdntSz];
  1054. ar = slen(r->rhs);
  1055. p = r->act;
  1056. i = r->actln;
  1057. if (!p)
  1058. return;
  1059. while ((c=*p++))
  1060. switch (c) {
  1061. case '\n':
  1062. i++;
  1063. default:
  1064. fputc(c, fout);
  1065. break;
  1066. case '$':
  1067. c = *p++;
  1068. if (c == '$') {
  1069. fprintf(fout, "yyval");
  1070. if (doty) {
  1071. ty = is[r->lhs].type;
  1072. if (!ty[0]) {
  1073. lineno = i;
  1074. die("$$ has no type");
  1075. }
  1076. fprintf(fout, ".%s", ty);
  1077. }
  1078. }
  1079. else if (c == '<') {
  1080. ty = tya;
  1081. while (istok(*p) && ty-tya<IdntSz-1)
  1082. *ty++ = *p++;
  1083. *ty = 0;
  1084. if (*p++!='>') {
  1085. lineno = i;
  1086. die("unclosed tag field");
  1087. }
  1088. ty = tya;
  1089. c = *p++;
  1090. if (c == '$') {
  1091. fprintf(fout, "yyval.%s", ty);
  1092. } else {
  1093. if (!isdigit(c)) {
  1094. lineno = i;
  1095. die("number or $ expected afer tag field");
  1096. }
  1097. goto readnum;
  1098. }
  1099. }
  1100. else if (isdigit(c)) {
  1101. ty = 0;
  1102. readnum:
  1103. l = strtol(p-1, &p, 10);
  1104. if (l > ar) {
  1105. lineno = i;
  1106. die("invalid $n");
  1107. }
  1108. fprintf(fout, "ps[%d].val", (int)l);
  1109. if (doty) {
  1110. if (!ty && l>0)
  1111. ty = is[r->rhs[l-1]].type;
  1112. if (!ty || !ty[0]) {
  1113. lineno = i;
  1114. die("$n has no type");
  1115. }
  1116. fprintf(fout, ".%s", ty);
  1117. }
  1118. }
  1119. else {
  1120. fputc('$', fout);
  1121. fputc(c, fout);
  1122. }
  1123. }
  1124. fputs("\n", fout);
  1125. }
  1126. void
  1127. codeout()
  1128. {
  1129. extern char *code0[], *code1[];
  1130. char **p;
  1131. Rule *r;
  1132. int n, c;
  1133. for (p=code0; *p; p++)
  1134. fputs(*p, fout);
  1135. for (n=0; n<nrl; n++) {
  1136. fprintf(fout, "\tcase %d:\n", n);
  1137. r = &rs[n];
  1138. fprintf(fout, "#line %d \"%s\"\n", r->actln, srca);
  1139. actout(r);
  1140. fputs("\t\tbreak;\n", fout);
  1141. }
  1142. for (p=code1; *p; p++)
  1143. fputs(*p, fout);
  1144. fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
  1145. while ((c=fgetc(fin))!=EOF)
  1146. fputc(c, fout);
  1147. }
  1148. void
  1149. init(int ac, char *av[])
  1150. {
  1151. int c, vf, df;
  1152. char *pref, buf[100], *opt;
  1153. (void) ac;
  1154. pref = "y";
  1155. vf = df = 0;
  1156. for (av++; av[0] && av[0][0]=='-'; av++)
  1157. for (opt = &av[0][1]; (c = *opt); opt++)
  1158. switch (c) {
  1159. case 'v':
  1160. vf = 1;
  1161. break;
  1162. case 'd':
  1163. df = 1;
  1164. break;
  1165. case 'b':
  1166. if ((pref = *++av))
  1167. break;
  1168. default:
  1169. usage:
  1170. fputs("usage: myacc [-vd] [-b file_prefix] grammar\n", stderr);
  1171. exit(1);
  1172. }
  1173. if (!(srca = *av))
  1174. goto usage;
  1175. fin = fopen(srca, "r");
  1176. if (strlen(pref) + 10 > sizeof buf)
  1177. die("-b prefix too long");
  1178. sprintf(buf, "%s.tab.c", pref);
  1179. fout = fopen(buf, "w");
  1180. if (vf) {
  1181. sprintf(buf, "%s.output", pref);
  1182. fgrm = fopen(buf, "w");
  1183. }
  1184. if (df) {
  1185. sprintf(buf, "%s.tab.h", pref);
  1186. fhdr = fopen(buf, "w");
  1187. if (fhdr) {
  1188. fprintf(fhdr, "#ifndef Y_TAB_H_\n");
  1189. fprintf(fhdr, "#define Y_TAB_H_\n");
  1190. }
  1191. }
  1192. if (!fin || !fout || (!fgrm && vf) || (!fhdr && df))
  1193. die("cannot open work files");
  1194. }
  1195. int
  1196. main(int ac, char *av[])
  1197. {
  1198. init(ac, av);
  1199. getdecls();
  1200. getgram();
  1201. ginit();
  1202. stgen();
  1203. tblgen();
  1204. stdump();
  1205. actgen();
  1206. tblout();
  1207. codeout();
  1208. if (srconf)
  1209. fprintf(stderr, "%d shift/reduce conflicts\n", srconf);
  1210. if (rrconf)
  1211. fprintf(stderr, "%d reduce/reduce conflicts\n", rrconf);
  1212. exit(0);
  1213. }
  1214. /* Glorious macros.
  1215. |sed 's|.*|"&\\n",|'
  1216. */
  1217. char *retcode = "\t\tyyval = ps[1].val; return 0;";
  1218. char *code0[] = {
  1219. "\n",
  1220. "#ifndef YYSTYPE\n",
  1221. "#define YYSTYPE int\n",
  1222. "#endif\n",
  1223. "YYSTYPE yylval;\n",
  1224. "\n",
  1225. "int\n",
  1226. "yyparse()\n",
  1227. "{\n",
  1228. " enum {\n",
  1229. " StackSize = 100,\n",
  1230. " ActSz = sizeof yyact / sizeof yyact[0],\n",
  1231. " };\n",
  1232. " struct {\n",
  1233. " YYSTYPE val;\n",
  1234. " int state;\n",
  1235. " } stk[StackSize], *ps;\n",
  1236. " int r, h, n, s, tk;\n",
  1237. " YYSTYPE yyval;\n",
  1238. "\n",
  1239. " ps = stk;\n",
  1240. " ps->state = s = yyini;\n",
  1241. " tk = -1;\n",
  1242. "loop:\n",
  1243. " n = yyadsp[s];\n",
  1244. " if (tk < 0 && n > -yyntoks)\n",
  1245. " tk = yytrns[yylex()];\n",
  1246. " n += tk;\n",
  1247. " if (n < 0 || n >= ActSz || yychk[n] != tk) {\n",
  1248. " r = yyadef[s];\n",
  1249. " if (r < 0)\n",
  1250. " return -1;\n",
  1251. " goto reduce;\n",
  1252. " }\n",
  1253. " n = yyact[n];\n",
  1254. " if (n == -1)\n",
  1255. " return -1;\n",
  1256. " if (n < 0) {\n",
  1257. " r = - (n+2);\n",
  1258. " goto reduce;\n",
  1259. " }\n",
  1260. " tk = -1;\n",
  1261. " yyval = yylval;\n",
  1262. "stack:\n",
  1263. " ps++;\n",
  1264. " if (ps-stk >= StackSize)\n",
  1265. " return -2;\n",
  1266. " s = n;\n",
  1267. " ps->state = s;\n",
  1268. " ps->val = yyval;\n",
  1269. " goto loop;\n",
  1270. "reduce:\n",
  1271. " ps -= yyr1[r];\n",
  1272. " h = yyr2[r];\n",
  1273. " s = ps->state;\n",
  1274. " n = yygdsp[h] + s;\n",
  1275. " if (n < 0 || n >= ActSz || yychk[n] != yyntoks+h)\n",
  1276. " n = yygdef[h];\n",
  1277. " else\n",
  1278. " n = yyact[n];\n",
  1279. " switch (r) {\n",
  1280. 0
  1281. };
  1282. char *code1[] = {
  1283. " }\n",
  1284. " goto stack;\n",
  1285. "}\n",
  1286. 0
  1287. };