| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378 |
- /*% clang -g -Wall -Wextra % -o #
- * miniyacc - LALR(1) grammars for C
- * See LICENSE for copyright and license details.
- */
- #include <assert.h>
- #include <ctype.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- typedef int Sym;
- typedef struct Rule Rule;
- typedef struct TSet TSet;
- typedef struct Info Info;
- typedef struct Term Term;
- typedef struct Item Item;
- typedef struct Row Row;
- #define S ((Sym) -1)
- #define Red(n) (- (n+2)) /* involutive, Red(Red(x)) == x */
- #define GetBit(s,n) (s[n/32] & (1<<(n%32)))
- #define SetBit(s,n) (s[n/32] |= 1<<(n%32))
- enum {
- IdntSz = 64,
- MaxRhs = 32,
- MaxTk = 500,
- MaxNt = 500,
- MaxRl = 800,
- MaxTm = 1000,
- TSetSz = (MaxTk+31)/32,
- Sym0 = MaxTk
- };
- struct Rule {
- Sym lhs;
- Sym rhs[MaxRhs];
- char *act;
- int actln;
- int prec;
- };
- struct TSet {
- unsigned t[TSetSz];
- };
- struct Info {
- int nul;
- TSet fst;
- int prec;
- enum {
- ANone,
- ALeft,
- ARight,
- ANonassoc
- } assoc;
- char name[IdntSz];
- char type[IdntSz];
- };
- struct Term {
- Rule *rule;
- int dot;
- TSet lk;
- };
- struct Item {
- int id;
- int nt;
- Term ts[MaxTm];
- Item **gtbl;
- int dirty;
- };
- struct Row {
- int def;
- int ndef;
- int *t;
- };
- char srs[] = "shift/reduce conflict state %d token %s\n";
- char rrs[] = "reduce/reduce conflict state %d token %s\n";
- Item i0; /* temporary item */
- int nrl, nsy, nst, ntk;
- Rule rs[MaxRl]; /* grammar rules (ordered, rcmp) */
- Info is[MaxTk+MaxNt]; /* symbol information */
- Item **st; /* LALR(1) states (ordered, icmp) */
- Row *as; /* action table [state][tok] */
- Row *gs; /* goto table [sym][state] */
- Sym sstart;/* start symbol */
- Item *ini; /* initial state */
- int doty; /* type-checking enabled */
- int srconf, rrconf;
- int actsz;
- int *act;
- int *chk;
- int *adsp;
- int *gdsp;
- int lineno = 1;
- char *srca;
- FILE *fin;
- FILE *fout;
- FILE *fgrm;
- FILE *fhdr;
- void
- die(char *s)
- {
- fprintf(stderr, "%s (on line %d)\n", s, lineno);
- exit(1);
- }
- void *
- yalloc(size_t n, size_t o)
- {
- void *p;
- p = calloc(n, o);
- if (!p)
- die("out of memory");
- return p;
- }
- int
- rcmp(const void *a, const void *b)
- {
- return ((Rule *)a)->lhs - ((Rule *)b)->lhs;
- }
- Rule *
- rfind(Sym lhs)
- {
- Rule *r;
- Rule k;
- k.lhs = lhs;
- r = bsearch(&k, rs, nrl, sizeof *r, rcmp);
- if (r != 0)
- while (r > rs && r[-1].lhs == lhs)
- r--;
- return r;
- }
- int
- slen(Sym *l)
- {
- int n;
- for (n=0; *l!=S; n++, l++);
- return n;
- }
- void
- tszero(TSet *ts)
- {
- memset(ts, 0, sizeof *ts);
- }
- int
- tsunion(TSet *tsa, TSet *tsb)
- {
- int n;
- unsigned *a, *b, c, t;
- c = 0;
- a = tsa->t;
- b = tsb->t;
- n = (31+ntk)/32;
- while (n-- > 0) {
- t = *a;
- *a |= *b++;
- c |= t ^ *a++;
- }
- return !!c;
- }
- void
- first(TSet *ts, Sym *stnc, TSet *last)
- {
- Sym f;
- f = stnc[0];
- if (f == S) {
- if (last)
- tsunion(ts, last);
- return;
- }
- if (f < ntk) {
- SetBit(ts->t, f);
- return;
- }
- if (is[f].nul)
- first(ts, stnc+1, last);
- tsunion(ts, &is[f].fst);
- }
- void
- ginit()
- {
- int chg;
- Rule *r;
- Info *i;
- Sym *s;
- TSet ts;
- do {
- chg = 0;
- for (r=rs; r-rs<nrl; r++) {
- i = &is[r->lhs];
- for (s=r->rhs; *s!=S; s++)
- if (!is[*s].nul)
- goto nonul;
- chg |= i->nul == 0;
- i->nul = 1;
- nonul:
- tszero(&ts);
- first(&ts, r->rhs, 0);
- chg |= tsunion(&i->fst, &ts);
- }
- } while (chg);
- }
- int
- tcmp(Term *a, Term *b)
- {
- int c;
- c = a->rule - b->rule;
- if (c==0)
- c = a->dot - b->dot;
- return c;
- }
- int
- tcmpv(const void *a, const void *b)
- {
- return tcmp((Term *)a, (Term *)b);
- }
- void
- iclose(Item *i)
- {
- int smap[MaxNt];
- Rule *r;
- Term *t, t1;
- Sym s, *rem;
- int chg, n, m;
- t1.dot = 0;
- memset(smap, 0, sizeof smap);
- for (n=0; n<i->nt; n++) {
- t = &i->ts[n];
- s = t->rule->lhs-Sym0;
- if (t->dot==0)
- if (smap[s]==0)
- smap[s] = n;
- }
- do {
- chg = 0;
- for (n=0; n<i->nt; n++) {
- t = &i->ts[n];
- rem = &t->rule->rhs[t->dot];
- s = *rem++;
- if (s < Sym0 || s == S)
- continue;
- r = rfind(s);
- if (!r)
- die("some non-terminals are not defined");
- tszero(&t1.lk);
- first(&t1.lk, rem, &t->lk);
- m = smap[s-Sym0];
- if (m)
- for (; r-rs<nrl && r->lhs==s; r++, m++)
- chg |= tsunion(&i->ts[m].lk, &t1.lk);
- else {
- m = i->nt;
- smap[s-Sym0] = m;
- for (; r-rs<nrl && r->lhs==s; r++, m++) {
- if (m>=MaxTm)
- die("too many terms in item");
- t1.rule = r;
- i->ts[m] = t1;
- }
- i->nt = m;
- chg = 1;
- }
- }
- } while (chg);
- }
- void
- igoto(Item *i, Sym s)
- {
- Term *t, *t1;
- int n;
- i0.nt = 0;
- for (n=0, t=i->ts; n<i->nt; n++, t++) {
- if (t->rule->rhs[t->dot] != s)
- continue;
- t1 = &i0.ts[i0.nt++];
- *t1 = *t;
- t1->dot++;
- }
- qsort(i0.ts, i0.nt, sizeof i0.ts[0], tcmpv);
- }
- int
- icmp(Item *a, Item *b)
- {
- Term *ta, *tb, *ma, *mb;
- int c;
- ta = a->ts;
- tb = b->ts;
- ma = ta+a->nt;
- mb = tb+b->nt;
- for (;;) {
- if (ta==ma || ta->dot==0)
- return -(tb<mb && tb->dot);
- if (tb==mb || tb->dot==0)
- return +(ta<ma && ta->dot);
- if ((c=tcmp(ta++, tb++)))
- return c;
- }
- }
- int
- stadd(Item **pi)
- {
- Item *i, *i1;
- int lo, hi, mid, n, chg;
- /* http://www.iq0.com/duffgram/bsearch.c */
- i = *pi;
- lo = 0;
- hi = nst - 1;
- if (hi<0 || icmp(i, st[hi])>0)
- hi++;
- else if (icmp(i, st[lo])<=0)
- hi = lo;
- else
- while (hi-lo!=1) {
- mid = (lo+hi)/2;
- if (icmp(st[mid], i)<0)
- lo = mid;
- else
- hi = mid;
- }
- if (hi<nst && icmp(st[hi], i)==0) {
- chg = 0;
- i1 = st[hi];
- for (n=0; n<i->nt; n++)
- chg |= tsunion(&i1->ts[n].lk, &i->ts[n].lk);
- i1->dirty |= chg;
- *pi = i1;
- return chg;
- } else {
- st = realloc(st, ++nst * sizeof st[0]);
- if (!st)
- die("out of memory");
- memmove(&st[hi+1], &st[hi], (nst-1 - hi) * sizeof st[0]);
- i->gtbl = yalloc(nsy, sizeof i->gtbl[0]);
- i->dirty = 1;
- i1 = yalloc(1, sizeof *i1);
- *i1 = *i;
- *pi = st[hi] = i1;
- return 1;
- }
- }
- void
- stgen()
- {
- Sym s;
- Rule *r;
- Item *i, *i1;
- Term tini;
- int n, chg;
- ini = &i0;
- r = rfind(Sym0);
- tini.rule = r;
- tini.dot = 0;
- tszero(&tini.lk);
- SetBit(tini.lk.t, 0);
- i0.nt = 0;
- i0.ts[i0.nt++] = tini;
- stadd(&ini);
- do {
- chg = 0;
- for (n=0; n<nst; n++) {
- i = st[n];
- if (!i->dirty)
- continue;
- i->dirty = 0;
- iclose(i);
- for (s=0; s<nsy; s++) {
- igoto(i, s);
- i1 = &i0;
- if (!i1->nt) {
- i->gtbl[s] = 0;
- continue;
- }
- chg |= stadd(&i1);
- i->gtbl[s] = i1;
- }
- }
- } while (chg);
- }
- int
- resolve(Rule *r, Sym s, int st)
- {
- if (!r->prec || !is[s].prec) {
- conflict:
- if (fgrm)
- fprintf(fgrm, srs, st, is[s].name);
- srconf++;
- return ARight;
- }
- if (r->prec==is[s].prec) {
- if (is[s].assoc == ANone)
- goto conflict;
- return is[s].assoc;
- } else
- if (r->prec<is[s].prec)
- return ARight;
- else
- return ALeft;
- }
- void
- tblset(int *tbl, Item *i, Term *t)
- {
- int act;
- Sym s;
- s = t->rule->rhs[t->dot];
- if (s!=S) {
- /* shift */
- if (s>=ntk)
- return;
- assert(i->gtbl[s]);
- act = ARight;
- if (tbl[s] && tbl[s] != i->gtbl[s]->id) {
- assert(tbl[s]<=0);
- act = resolve(&rs[Red(tbl[s])], s, i->id-1);
- }
- switch (act) {
- case ARight:
- tbl[s] = i->gtbl[s]->id;
- break;
- case ANonassoc:
- tbl[s] = -1;
- break;
- }
- } else
- /* reduce */
- for (s=0; s<ntk; s++) {
- if (!GetBit(t->lk.t, s))
- continue;
- /* default to shift if conflict occurs */
- if (!tbl[s])
- act = ALeft;
- else if (tbl[s]<0) {
- if (fgrm)
- fprintf(fgrm, rrs, i->id-1, is[s].name);
- rrconf++;
- act = ARight;
- } else
- act = resolve(t->rule, s, i->id-1);
- switch (act) {
- case ALeft:
- tbl[s] = Red(t->rule-rs);
- break;
- case ANonassoc:
- tbl[s] = -1;
- break;
- }
- }
- }
- void
- setdef(Row *r, int w, int top)
- {
- int n, m, x, cnt, def, max;
- max = 0;
- def = -1;
- r->ndef = 0;
- for (n=0; n<w; n++) {
- x = r->t[n];
- if (x==0)
- r->ndef++;
- if (x>=top || x==0)
- continue;
- cnt = 1;
- for (m=n+1; m<w; m++)
- if (r->t[m]==x)
- cnt++;
- if (cnt>max) {
- def = x;
- max = cnt;
- }
- }
- r->def = def;
- if (max!=0)
- /* zero out the most frequent entry */
- for (n=0; n<w; n++)
- if (r->t[n]==def) {
- r->t[n] = 0;
- r->ndef++;
- }
- }
- void
- tblgen()
- {
- Row *r;
- Item *i;
- int n, m;
- for (n=0; n<nst; n++)
- st[n]->id = n+1;
- as = yalloc(nst, sizeof as[0]);
- gs = yalloc(nsy-MaxTk, sizeof gs[0]);
- /* fill action table */
- for (n=0; n<nst; n++) {
- r = &as[n];
- r->t = yalloc(ntk, sizeof r->t[0]);
- for (i=st[n], m=0; m<i->nt; m++)
- tblset(r->t, i, &i->ts[m]);
- setdef(r, ntk, -1);
- r->def = Red(r->def); /* Red(-1) == -1 */
- }
- /* fill goto table */
- for (n=MaxTk; n<nsy; n++) {
- r = &gs[n-MaxTk];
- r->t = yalloc(nst, sizeof r->t[0]);
- for (m=0; m<nst; m++)
- if (st[m]->gtbl[n])
- r->t[m] = st[m]->gtbl[n]->id;
- setdef(r, nst, nst+1);
- }
- }
- int
- prcmp(const void *a, const void *b)
- {
- return (*(Row **)a)->ndef - (*(Row **)b)->ndef;
- }
- void
- actgen()
- {
- Row **o, *r;
- int n, m, t, dsp, nnt;
- actsz = 0;
- o = yalloc(nst+nsy, sizeof o[0]);
- act = yalloc(nst*nsy, sizeof act[0]);
- chk = yalloc(nst*nsy, sizeof chk[0]);
- adsp = yalloc(nst, sizeof adsp[0]);
- for (n=0; n<nst*nsy; n++)
- chk[n] = -1;
- /* fill in actions */
- for (n=0; n<nst; n++)
- o[n] = &as[n];
- qsort(o, nst, sizeof o[0], prcmp);
- for (n=0; n<nst; n++) {
- r = o[n];
- dsp = 0;
- for (m=0; m<ntk && r->t[m]==0; m++)
- dsp--;
- retrya:
- /* The invariant here is even
- * trickier than it looks.
- */
- for (t=0; t<ntk; t++)
- if ((m=dsp+t)>=0 && chk[m]>=0)
- if ((r->t[t] && (chk[m]!=t || act[m]!=r->t[t]))
- || (!r->t[t] && chk[m]==t)) {
- dsp++;
- goto retrya;
- }
- adsp[r-as] = dsp;
- for (t=0; t<ntk; t++)
- if (r->t[t]) {
- chk[dsp+t] = t;
- act[dsp+t] = r->t[t];
- if (dsp+t>=actsz)
- actsz = dsp+t+1;
- }
- }
- /* fill in gotos */
- nnt = nsy-MaxTk;
- gdsp = yalloc(nnt, sizeof gdsp[0]);
- for (n=0; n<nnt; n++)
- o[n] = &gs[n];
- qsort(o, nnt, sizeof o[0], prcmp);
- for (n=0; n<nnt; n++) {
- r = o[n];
- dsp = 0;
- for (m=0; m<nst && r->t[m]==0; m++)
- dsp--;
- retryg:
- for (t=m; t<nst; t++)
- if (chk[dsp+t]>=0 && r->t[t]) {
- dsp++;
- goto retryg;
- }
- gdsp[r-gs] = dsp;
- for (t=m; t<nst; t++)
- if (r->t[t]) {
- chk[dsp+t] = ntk+(r-gs);
- act[dsp+t] = r->t[t];
- if (dsp+t>=actsz)
- actsz = dsp+t+1;
- }
- }
- free(o);
- }
- void
- aout(char *name, int *t, int n)
- {
- int i;
- fprintf(fout, "short %s[] = {", name);
- for (i=0; i<n; i++) {
- if (i % 10 == 0)
- fprintf(fout, "\n");
- fprintf(fout, "%4d", t[i]);
- if (i != n-1)
- fprintf(fout, ",");
- }
- fprintf(fout, "\n};\n");
- }
- void
- tblout()
- {
- int *o, n, m;
- fprintf(fout, "short yyini = %d;\n", ini->id-1);
- fprintf(fout, "short yyntoks = %d;\n", ntk);
- o = yalloc(nrl+nst+nsy, sizeof o[0]);
- for (n=0; n<nrl; n++)
- o[n] = slen(rs[n].rhs);
- aout("yyr1", o, nrl);
- for (n=0; n<nrl; n++)
- o[n] = rs[n].lhs-MaxTk;
- aout("yyr2", o, nrl);
- for (n=0; n<nst; n++)
- o[n] = as[n].def;
- aout("yyadef", o, nst);
- for (n=0; n<nsy-MaxTk; n++) {
- o[n] = gs[n].def;
- assert(o[n]>0 || o[n]==-1);
- if (o[n]>0)
- o[n]--;
- }
- aout("yygdef", o, nsy-MaxTk);
- aout("yyadsp", adsp, nst);
- aout("yygdsp", gdsp, nsy-MaxTk);
- for (n=0; n<actsz; n++)
- if (act[n]>=0)
- act[n]--;
- aout("yyact", act, actsz);
- aout("yychk", chk, actsz);
- for (n=0; n<128; n++) {
- o[n] = 0;
- for (m=0; m<ntk; m++)
- if (is[m].name[0]=='\'')
- if (is[m].name[1]==n)
- assert(!o[n]), o[n] = m;
- }
- m = 128;
- for (n=1; n<ntk; n++) {
- if (is[n].name[0]=='\'')
- continue;
- fprintf(fout, "#define %s %d\n", is[n].name, m);
- if (fhdr)
- fprintf(fhdr, "#define %s %d\n", is[n].name, m);
- o[m++] = n;
- }
- aout("yytrns", o, m);
- if (fhdr) {
- fputs("int yyparse(void);\n", fhdr);
- fputs("#ifndef YYSTYPE\n", fhdr);
- fputs("#define YYSTYPE int\n", fhdr);
- fputs("#endif\n", fhdr);
- fputs("extern YYSTYPE yylval;\n", fhdr);
- fputs("#endif\n", fhdr);
- }
- free(o);
- }
- void
- stdump()
- {
- Term *t;
- Sym *s1;
- int n, m, d, act;
- Rule *r;
- Row *ar;
- if (!fgrm)
- return;
- for (r=rs; r-rs<nrl; r++) {
- fprintf(fgrm, "\n%03d: %s ->", (int)(r-rs), is[r->lhs].name);
- for (s1=r->rhs; *s1!=S; s1++)
- fprintf(fgrm, " %s", is[*s1].name);
- }
- fprintf(fgrm, "\n");
- for (m=0; m<nst; m++) {
- fprintf(fgrm, "\nState %d:\n", m);
- for (t=st[m]->ts; t-st[m]->ts<st[m]->nt; t++) {
- r = t->rule;
- d = t->dot;
- if (d==0 && t!=st[m]->ts)
- continue;
- fprintf(fgrm, " %s ->", is[r->lhs].name);
- for (s1=r->rhs; *s1!=S; s1++, d--)
- fprintf(fgrm, " %s%s", d ? "" : ". ", is[*s1].name);
- if (!d)
- fprintf(fgrm, " .");
- fprintf(fgrm, "\n");
- }
- fprintf(fgrm, "\n");
- ar = &as[m];
- for (n=0; n<ntk; n++) {
- act = ar->t[n];
- if (!act)
- continue;
- if (act==-1)
- fprintf(fgrm, " %s error (nonassoc)\n", is[n].name);
- else if (act<0)
- fprintf(fgrm, " %s reduce with rule %d\n", is[n].name, Red(act));
- else
- fprintf(fgrm, " %s shift and go to %d\n", is[n].name, act-1);
- }
- if (ar->def != -1)
- fprintf(fgrm, " * reduce with rule %d\n", ar->def);
- }
- }
- enum {
- TIdnt,
- TTokchr, /* 'c' */
- TPP, /* %% */
- TLL, /* %{ */
- TLangle, /* < */
- TRangle, /* > */
- TSemi, /* ; */
- TBar, /* | */
- TColon, /* : */
- TLBrack, /* { */
- TUnion,
- TType,
- TToken,
- TRight,
- TLeft,
- TNonassoc,
- TPrec,
- TStart,
- TEof
- };
- struct {
- char *name;
- int tok;
- } words[] = {
- { "%%", TPP },
- { "%union", TUnion },
- { "%type", TType },
- { "%token", TToken },
- { "%right", TRight },
- { "%left", TLeft },
- { "%nonassoc", TNonassoc },
- { "%prec", TPrec },
- { "%start", TStart },
- { 0, 0 }
- };
- char idnt[IdntSz];
- int
- istok(int c)
- {
- return isalnum(c) || c=='_' || c=='%';
- }
- int
- nexttk()
- {
- int n;
- char c, *p;
- while (isspace(c=fgetc(fin)))
- if (c == '\n')
- lineno++;
- switch (c) {
- case '<':
- return TLangle;
- case '>':
- return TRangle;
- case ';':
- return TSemi;
- case '|':
- return TBar;
- case ':':
- return TColon;
- case '{':
- return TLBrack;
- case EOF:
- return TEof;
- case '\'':
- idnt[0] = '\'';
- idnt[1] = fgetc(fin);
- idnt[2] = '\'';
- idnt[3] = 0;
- if (fgetc(fin)!='\'')
- die("syntax error, invalid char token");
- return TTokchr;
- }
- p = idnt;
- while (istok(c)) {
- *p++ = c;
- if (p-idnt >= IdntSz-1)
- die("identifier too long");
- c = fgetc(fin);
- }
- *p = 0;
- if (strcmp(idnt, "%")==0)
- if (c=='{')
- return TLL;
- ungetc(c, fin);
- for (n=0; words[n].name; n++)
- if (strcmp(idnt, words[n].name) == 0)
- return words[n].tok;
- return TIdnt;
- }
- char *
- cpycode()
- {
- int c, nest, in, len, pos;
- char *s;
- len = 64;
- s = yalloc(len+1, 1);
- s[0] = '{';
- pos = 1;
- nest = 1;
- in = 0;
- while (nest) {
- c = fgetc(fin);
- if (in) {
- if (c == in)
- if (s[pos-1] != '\\')
- in = 0;
- } else {
- if (c == '"' || c == '\'')
- in = c;
- if (c == '{')
- nest++;
- if (c == '}')
- nest--;
- if (c == EOF)
- die("syntax error, unclosed code block");
- if (c == '\n')
- lineno++;
- }
- if (pos>=len)
- if (!(s=realloc(s, len=2*len+1)))
- die("out of memory");
- s[pos++] = c;
- }
- s[pos] = 0;
- return s;
- }
- int
- gettype(char *type)
- {
- int tk;
- tk = nexttk();
- if (tk==TLangle) {
- if (nexttk()!=TIdnt)
- die("syntax error, ident expected after <");
- strcpy(type, idnt);
- if (nexttk()!=TRangle)
- die("syntax error, unclosed <");
- return nexttk();
- } else {
- type[0] = 0;
- return tk;
- }
- }
- Sym
- findsy(char *name, int add)
- {
- int n;
- for (n=0; n<nsy; n++) {
- if (n == ntk) {
- if (name[0]=='\'') {
- if (ntk>=MaxTk)
- die("too many tokens");
- ntk++;
- strcpy(is[n].name, name);
- return n;
- }
- n = MaxTk;
- }
- if (strcmp(is[n].name, name)==0)
- return n;
- }
- if (add) {
- if (nsy>=MaxTk+MaxNt)
- die("too many non-terminals");
- strcpy(is[nsy].name, name);
- return nsy++;
- } else
- return nsy;
- }
- void
- getdecls()
- {
- int tk, prec, p, a, c, c1, n;
- Info *si;
- char type[IdntSz], *s;
- strcpy(is[0].name, "$");
- ntk = 1;
- strcpy(is[Sym0].name, "@start");
- nsy = MaxTk+1;
- sstart = S;
- prec = 0;
- tk = nexttk();
- for (;;)
- switch (tk) {
- case TStart:
- tk = nexttk();
- if (tk!=TIdnt)
- die("syntax error, ident expected after %start");
- sstart = findsy(idnt, 1);
- if (sstart<ntk)
- die("%start cannot specify a token");
- tk = nexttk();
- break;
- case TUnion:
- tk = nexttk();
- if (tk!=TLBrack)
- die("syntax error, { expected after %union");
- fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
- s = cpycode();
- fprintf(fout, "typedef union %s yyunion;\n", s);
- fprintf(fout, "#define YYSTYPE yyunion\n");
- if (fhdr) {
- fprintf(fhdr, "typedef union %s yyunion;\n", s);
- fprintf(fhdr, "#define YYSTYPE yyunion\n");
- }
- free(s);
- doty = 1;
- tk = nexttk();
- break;
- case TLeft:
- p = ++prec;
- a = ALeft;
- goto addtoks;
- case TRight:
- p = ++prec;
- a = ARight;
- goto addtoks;
- case TNonassoc:
- p = ++prec;
- a = ANonassoc;
- goto addtoks;
- case TToken:
- p = 0;
- a = ANone;
- addtoks:
- tk = gettype(type);
- while (tk==TIdnt || tk==TTokchr) {
- si = 0;
- n = findsy(idnt, 0);
- if (n>=MaxTk && n<nsy)
- die("non-terminal redeclared as token");
- if (n==nsy) {
- if (ntk>=MaxTk)
- die("too many tokens");
- n = ntk++;
- }
- si = &is[n];
- strcpy(si->name, idnt);
- strcpy(si->type, type);
- si->prec = p;
- si->assoc = a;
- tk = nexttk();
- }
- break;
- case TType:
- tk = gettype(type);
- if (type[0]==0)
- die("syntax error, type expected");
- while (tk==TIdnt) {
- si = 0;
- n = findsy(idnt, 1);
- if (n<ntk)
- die("token redeclared as non-terminal");
- if (n==nsy) {
- nsy++;
- }
- si = &is[n];
- strcpy(si->name, idnt);
- strcpy(si->type, type);
- tk = nexttk();
- }
- break;
- case TLL:
- fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
- for (;;) {
- c = fgetc(fin);
- if (c == EOF)
- die("syntax error, unclosed %{");
- if (c == '%') {
- c1 = fgetc(fin);
- if (c1 == '}') {
- fputs("\n", fout);
- break;
- }
- ungetc(c1, fin);
- }
- if (c == '\n')
- lineno++;
- fputc(c, fout);
- }
- tk = nexttk();
- break;
- case TPP:
- return;
- case TEof:
- die("syntax error, unfinished declarations");
- default:
- die("syntax error, declaration expected");
- }
- }
- void
- getgram()
- {
- extern char *retcode;
- int tk;
- Sym hd, *p, s;
- Rule *r;
- for (;;) {
- tk = nexttk();
- if (tk==TPP || tk==TEof) {
- if (sstart==S)
- die("syntax error, empty grammar");
- r = &rs[nrl++];
- r->lhs = Sym0;
- r->rhs[0] = sstart;
- r->rhs[1] = 0;
- r->rhs[2] = S;
- r->act = retcode;
- qsort(rs, nrl, sizeof rs[0], rcmp);
- return;
- }
- if (tk!=TIdnt)
- die("syntax error, production rule expected");
- if (nexttk()!=TColon)
- die("syntax error, colon expected after production's head");
- hd = findsy(idnt, 1);
- if (sstart==S)
- sstart = hd;
- do {
- if (nrl>=MaxRl-1)
- die("too many rules");
- r = &rs[nrl++];
- r->lhs = hd;
- r->act = 0;
- p = r->rhs;
- while ((tk=nexttk())==TIdnt || tk==TTokchr || tk==TPrec) {
- if (tk==TPrec) {
- tk = nexttk();
- if (tk!=TIdnt
- || (s=findsy(idnt, 0))>=ntk)
- die("token expected after %prec");
- r->prec = is[s].prec;
- continue;
- }
- s = findsy(idnt, 1);
- *p++ = s;
- if (s<ntk && is[s].prec>0)
- r->prec = is[s].prec;
- if (p-r->rhs >= MaxRhs-1)
- die("production rule too long");
- }
- *p = S;
- if (tk==TLBrack) {
- r->actln = lineno;
- r->act = cpycode();
- tk = nexttk();
- }
- } while (tk==TBar);
- if (tk!=TSemi)
- die("syntax error, ; or | expected");
- }
- }
- void
- actout(Rule *r)
- {
- long l;
- int i, ar;
- char c, *p, *ty, tya[IdntSz];
- ar = slen(r->rhs);
- p = r->act;
- i = r->actln;
- if (!p)
- return;
- while ((c=*p++))
- switch (c) {
- case '\n':
- i++;
- default:
- fputc(c, fout);
- break;
- case '$':
- c = *p++;
- if (c == '$') {
- fprintf(fout, "yyval");
- if (doty) {
- ty = is[r->lhs].type;
- if (!ty[0]) {
- lineno = i;
- die("$$ has no type");
- }
- fprintf(fout, ".%s", ty);
- }
- }
- else if (c == '<') {
- ty = tya;
- while (istok(*p) && ty-tya<IdntSz-1)
- *ty++ = *p++;
- *ty = 0;
- if (*p++!='>') {
- lineno = i;
- die("unclosed tag field");
- }
- ty = tya;
- c = *p++;
- if (c == '$') {
- fprintf(fout, "yyval.%s", ty);
- } else {
- if (!isdigit(c)) {
- lineno = i;
- die("number or $ expected afer tag field");
- }
- goto readnum;
- }
- }
- else if (isdigit(c)) {
- ty = 0;
- readnum:
- l = strtol(p-1, &p, 10);
- if (l > ar) {
- lineno = i;
- die("invalid $n");
- }
- fprintf(fout, "ps[%d].val", (int)l);
- if (doty) {
- if (!ty && l>0)
- ty = is[r->rhs[l-1]].type;
- if (!ty || !ty[0]) {
- lineno = i;
- die("$n has no type");
- }
- fprintf(fout, ".%s", ty);
- }
- }
- else {
- fputc('$', fout);
- fputc(c, fout);
- }
- }
- fputs("\n", fout);
- }
- void
- codeout()
- {
- extern char *code0[], *code1[];
- char **p;
- Rule *r;
- int n, c;
- for (p=code0; *p; p++)
- fputs(*p, fout);
- for (n=0; n<nrl; n++) {
- fprintf(fout, "\tcase %d:\n", n);
- r = &rs[n];
- fprintf(fout, "#line %d \"%s\"\n", r->actln, srca);
- actout(r);
- fputs("\t\tbreak;\n", fout);
- }
- for (p=code1; *p; p++)
- fputs(*p, fout);
- fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
- while ((c=fgetc(fin))!=EOF)
- fputc(c, fout);
- }
- void
- init(int ac, char *av[])
- {
- int c, vf, df;
- char *pref, buf[100], *opt;
- (void) ac;
- pref = "y";
- vf = df = 0;
- for (av++; av[0] && av[0][0]=='-'; av++)
- for (opt = &av[0][1]; (c = *opt); opt++)
- switch (c) {
- case 'v':
- vf = 1;
- break;
- case 'd':
- df = 1;
- break;
- case 'b':
- if ((pref = *++av))
- break;
- default:
- usage:
- fputs("usage: myacc [-vd] [-b file_prefix] grammar\n", stderr);
- exit(1);
- }
- if (!(srca = *av))
- goto usage;
- fin = fopen(srca, "r");
- if (strlen(pref) + 10 > sizeof buf)
- die("-b prefix too long");
- sprintf(buf, "%s.tab.c", pref);
- fout = fopen(buf, "w");
- if (vf) {
- sprintf(buf, "%s.output", pref);
- fgrm = fopen(buf, "w");
- }
- if (df) {
- sprintf(buf, "%s.tab.h", pref);
- fhdr = fopen(buf, "w");
- if (fhdr) {
- fprintf(fhdr, "#ifndef Y_TAB_H_\n");
- fprintf(fhdr, "#define Y_TAB_H_\n");
- }
- }
- if (!fin || !fout || (!fgrm && vf) || (!fhdr && df))
- die("cannot open work files");
- }
- int
- main(int ac, char *av[])
- {
- init(ac, av);
- getdecls();
- getgram();
- ginit();
- stgen();
- tblgen();
- stdump();
- actgen();
- tblout();
- codeout();
- if (srconf)
- fprintf(stderr, "%d shift/reduce conflicts\n", srconf);
- if (rrconf)
- fprintf(stderr, "%d reduce/reduce conflicts\n", rrconf);
- exit(0);
- }
- /* Glorious macros.
- |sed 's|.*|"&\\n",|'
- */
- char *retcode = "\t\tyyval = ps[1].val; return 0;";
- char *code0[] = {
- "\n",
- "#ifndef YYSTYPE\n",
- "#define YYSTYPE int\n",
- "#endif\n",
- "YYSTYPE yylval;\n",
- "\n",
- "int\n",
- "yyparse()\n",
- "{\n",
- " enum {\n",
- " StackSize = 100,\n",
- " ActSz = sizeof yyact / sizeof yyact[0],\n",
- " };\n",
- " struct {\n",
- " YYSTYPE val;\n",
- " int state;\n",
- " } stk[StackSize], *ps;\n",
- " int r, h, n, s, tk;\n",
- " YYSTYPE yyval;\n",
- "\n",
- " ps = stk;\n",
- " ps->state = s = yyini;\n",
- " tk = -1;\n",
- "loop:\n",
- " n = yyadsp[s];\n",
- " if (tk < 0 && n > -yyntoks)\n",
- " tk = yytrns[yylex()];\n",
- " n += tk;\n",
- " if (n < 0 || n >= ActSz || yychk[n] != tk) {\n",
- " r = yyadef[s];\n",
- " if (r < 0)\n",
- " return -1;\n",
- " goto reduce;\n",
- " }\n",
- " n = yyact[n];\n",
- " if (n == -1)\n",
- " return -1;\n",
- " if (n < 0) {\n",
- " r = - (n+2);\n",
- " goto reduce;\n",
- " }\n",
- " tk = -1;\n",
- " yyval = yylval;\n",
- "stack:\n",
- " ps++;\n",
- " if (ps-stk >= StackSize)\n",
- " return -2;\n",
- " s = n;\n",
- " ps->state = s;\n",
- " ps->val = yyval;\n",
- " goto loop;\n",
- "reduce:\n",
- " ps -= yyr1[r];\n",
- " h = yyr2[r];\n",
- " s = ps->state;\n",
- " n = yygdsp[h] + s;\n",
- " if (n < 0 || n >= ActSz || yychk[n] != yyntoks+h)\n",
- " n = yygdef[h];\n",
- " else\n",
- " n = yyact[n];\n",
- " switch (r) {\n",
- 0
- };
- char *code1[] = {
- " }\n",
- " goto stack;\n",
- "}\n",
- 0
- };
|