123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863 |
- /*************************************************************************
- * Copyright (c) 2011 AT&T Intellectual Property
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * https://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors: Details at https://graphviz.org
- *************************************************************************/
- /*
- * Glenn Fowler
- * AT&T Research
- *
- * expression library default lexical analyzer
- */
- #include "config.h"
- #include <cgraph/gv_ctype.h>
- #include <expr/exlib.h>
- #include <stdbool.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <util/agxbuf.h>
- #include <util/streq.h>
- #include <util/unreachable.h>
- #if defined(TRACE_lex) && TRACE_lex
- /*
- * trace c for op
- */
- static void
- trace(Expr_t* ex, int lev, char* op, int c)
- {
- char* s = 0;
- char* t;
- bool free_t = false;
- char buf[16];
- void* x = 0;
- t = "";
- switch (c)
- {
- case 0:
- s = " EOF";
- break;
- case '=':
- s = t = buf;
- *t++ = ' ';
- if (!lev && ex_lval.op != c)
- *t++ = ex_lval.op;
- *t++ = c;
- *t = 0;
- break;
- case AND:
- s = " AND ";
- t = "&&";
- break;
- case DEC:
- s = " DEC ";
- t = "--";
- break;
- case DECLARE:
- s = " DECLARE ";
- t = ex_lval.id->name;
- break;
- case DYNAMIC:
- s = " DYNAMIC ";
- t = ex_lval.id->name;
- x = (void*)ex_lval.id;
- break;
- case EQ:
- s = " EQ ";
- t = "==";
- break;
- case FLOATING:
- s = " FLOATING ";
- snprintf(t = buf, sizeof(buf), "%f", ex_lval.floating);
- break;
- case GE:
- s = " GE ";
- t = ">=";
- break;
- case CONSTANT:
- s = " CONSTANT ";
- t = ex_lval.id->name;
- break;
- case ID:
- s = " ID ";
- t = ex_lval.id->name;
- break;
- case INC:
- s = "INC ";
- t = "++";
- break;
- case INTEGER:
- s = " INTEGER ";
- snprintf(t = buf, sizeof(buf), "%lld", ex_lval.integer);
- break;
- case LABEL:
- s = " LABEL ";
- t = ex_lval.id->name;
- break;
- case LE:
- s = " LE ";
- t = "<=";
- break;
- case LSH:
- s = " LSH ";
- t = "<<";
- break;
- case NAME:
- s = " NAME ";
- t = ex_lval.id->name;
- x = (void*)ex_lval.id;
- break;
- case NE:
- s = " NE ";
- t = "!=";
- break;
- case OR:
- s = " OR ";
- t = "||";
- break;
- case RSH:
- s = " RSH ";
- t = ">>";
- break;
- case STRING:
- s = " STRING ";
- t = fmtesc(ex_lval.string);
- free_t = true;
- break;
- case UNSIGNED:
- s = " UNSIGNED ";
- snprintf(t = buf, sizeof(buf), "%llu", (unsigned long long)ex_lval.integer);
- break;
- case BREAK:
- s = " break";
- break;
- case CASE:
- s = " case";
- break;
- case CONTINUE:
- s = " continue";
- break;
- case DEFAULT:
- s = " default";
- break;
- case ELSE:
- s = " else";
- break;
- case EXIT:
- s = " exit";
- break;
- case FOR:
- s = " for";
- break;
- case ITERATOR:
- s = " forf";
- break;
- case GSUB:
- s = " gsub";
- break;
- case IF:
- s = " if";
- break;
- case IN_OP:
- s = " in";
- break;
- case PRAGMA:
- s = " pragma";
- break;
- case PRINT:
- s = " print";
- break;
- case PRINTF:
- s = " printf";
- break;
- case QUERY:
- s = " query";
- break;
- case RAND:
- s = " rand";
- break;
- case RETURN:
- s = " return";
- break;
- case SPLIT:
- s = " split";
- break;
- case SPRINTF:
- s = " sprintf";
- break;
- case SRAND:
- s = " srand";
- break;
- case SUB:
- s = " sub";
- break;
- case SUBSTR:
- s = " substr";
- break;
- case SWITCH:
- s = " switch";
- break;
- case TOKENS:
- s = " tokens";
- break;
- case UNSET:
- s = " unset";
- break;
- case WHILE:
- s = " while";
- break;
- default:
- if (c < 0177)
- {
- s = buf;
- *s++ = c;
- *s = 0;
- t = fmtesc(buf);
- free_t = true;
- s = " ";
- }
- break;
- }
- if (x)
- error(TRACE_lex + lev, "%s: [%d] %04d%s%s (%x)", op, ex->input->nesting, c, s, t, x);
- else
- error(TRACE_lex + lev, "%s: [%d] %04d%s%s", op, ex->input->nesting, c, s, t);
- if (free_t) {
- free(t);
- }
- }
- /*
- * trace wrapper for extoken()
- */
- extern int _extoken_fn_(Expr_t*);
- int
- extoken_fn(Expr_t* ex)
- {
- int c;
- #define extoken_fn _extoken_fn_
- c = extoken_fn(ex);
- trace(ex, 0, "ex_lex", c);
- return c;
- }
- #else
- #define trace(p,a,b,c) do { } while (0)
- #endif
- /*
- * get the next expression char
- */
- static int
- lex(Expr_t* ex)
- {
- int c;
- for (;;)
- {
- if ((c = ex->input->peek))
- ex->input->peek = 0;
- else if (ex->input->pp)
- {
- if (!(c = *ex->input->pp++))
- {
- ex->input->pp = 0;
- continue;
- }
- }
- else if (ex->input->fp)
- {
- if ((c = getc(ex->input->fp)) == EOF)
- {
- if (!expop(ex))
- continue;
- else trace(ex, -1, "expop fp FAIL", 0);
- c = 0;
- }
- }
- else c = 0;
- if (c == '\n')
- setcontext(ex);
- else if (c)
- putcontext(ex, c);
- trace(ex, -3, "ex--lex", c);
- return c;
- }
- }
- /*
- * get the next expression token
- */
- int
- extoken_fn(Expr_t* ex)
- {
- int c;
- char* s;
- int q;
- char* e;
- Dt_t* v;
- if (ex->eof || ex->errors)
- return 0;
- again:
- for (;;)
- switch (c = lex(ex))
- {
- case 0:
- goto eof;
- case '/':
- switch (q = lex(ex))
- {
- case '*':
- for (;;) switch (lex(ex))
- {
- case '\n':
- if (error_info.line)
- error_info.line++;
- else error_info.line = 2;
- continue;
- case '*':
- switch (lex(ex))
- {
- case 0:
- goto eof;
- case '\n':
- if (error_info.line)
- error_info.line++;
- else error_info.line = 2;
- break;
- case '*':
- exunlex(ex, '*');
- break;
- case '/':
- goto again;
- }
- break;
- }
- break;
- case '/':
- while ((c = lex(ex)) != '\n')
- if (!c)
- goto eof;
- break;
- default:
- goto opeq;
- }
- /*FALLTHROUGH*/
- case '\n':
- if (error_info.line)
- error_info.line++;
- else error_info.line = 2;
- /*FALLTHROUGH*/
- case ' ':
- case '\t':
- case '\r':
- break;
- case '(':
- case '{':
- case '[':
- ex->input->nesting++;
- return ex_lval.op = c;
- case ')':
- case '}':
- case ']':
- ex->input->nesting--;
- return ex_lval.op = c;
- case '+':
- case '-':
- if ((q = lex(ex)) == c)
- return ex_lval.op = c == '+' ? INC : DEC;
- goto opeq;
- case '*':
- case '%':
- case '^':
- q = lex(ex);
- opeq:
- ex_lval.op = c;
- if (q == '=')
- c = '=';
- else if (q == '%' && c == '%')
- {
- goto eof;
- }
- else exunlex(ex, q);
- return c;
- case '&':
- case '|':
- if ((q = lex(ex)) == '=')
- {
- ex_lval.op = c;
- return '=';
- }
- if (q == c)
- c = c == '&' ? AND : OR;
- else exunlex(ex, q);
- return ex_lval.op = c;
- case '<':
- case '>':
- if ((q = lex(ex)) == c)
- {
- ex_lval.op = c = c == '<' ? LSH : RSH;
- if ((q = lex(ex)) == '=')
- c = '=';
- else exunlex(ex, q);
- return c;
- }
- goto relational;
- case '=':
- case '!':
- q = lex(ex);
- relational:
- if (q == '=') switch (c)
- {
- case '<':
- c = LE;
- break;
- case '>':
- c = GE;
- break;
- case '=':
- c = EQ;
- break;
- case '!':
- c = NE;
- break;
- default:
- UNREACHABLE();
- }
- else exunlex(ex, q);
- return ex_lval.op = c;
- case '#':
- if (!ex->linewrap) {
- s = ex->linep - 1;
- while (s > ex->line && gv_isspace(*(s - 1)))
- s--;
- if (s == ex->line)
- {
- switch (extoken_fn(ex))
- {
- case DYNAMIC:
- case ID:
- case NAME:
- s = ex_lval.id->name;
- break;
- default:
- s = "";
- break;
- }
- if (streq(s, "include"))
- {
- if (extoken_fn(ex) != STRING)
- exerror("#%s: string argument expected", s);
- else if (!expush(ex, ex_lval.string, 1, NULL))
- {
- setcontext(ex);
- goto again;
- }
- }
- else exerror("unknown directive");
- }
- }
- return ex_lval.op = c;
- case '\'':
- case '"':
- q = c;
- agxbclear(&ex->tmp);
- ex->input->nesting++;
- while ((c = lex(ex)) != q)
- {
- if (c == '\\')
- {
- agxbputc(&ex->tmp, '\\');
- c = lex(ex);
- }
- if (!c)
- {
- exerror("unterminated %c string", q);
- goto eof;
- }
- if (c == '\n')
- {
- if (error_info.line)
- error_info.line++;
- else error_info.line = 2;
- }
- agxbputc(&ex->tmp, (char)c);
- }
- ex->input->nesting--;
- s = agxbuse(&ex->tmp);
- if (q == '"' || (ex->disc->flags & EX_CHARSTRING))
- {
- if (!(ex_lval.string = vmstrdup(ex->vm, s)))
- goto eof;
- stresc(ex_lval.string);
- return STRING;
- }
- ex_lval.integer = chrtoi(s);
- return INTEGER;
- case '.':
- if (gv_isdigit(c = lex(ex)))
- {
- agxbclear(&ex->tmp);
- agxbput(&ex->tmp, "0.");
- goto floating;
- }
- exunlex(ex, c);
- return ex_lval.op = '.';
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9': {
- agxbclear(&ex->tmp);
- agxbputc(&ex->tmp, (char)c);
- q = INTEGER;
- int b = 0;
- if ((c = lex(ex)) == 'x' || c == 'X')
- {
- b = 16;
- agxbputc(&ex->tmp, (char)c);
- for (c = lex(ex); gv_isxdigit(c); c = lex(ex))
- {
- agxbputc(&ex->tmp, (char)c);
- }
- }
- else
- {
- while (gv_isdigit(c))
- {
- agxbputc(&ex->tmp, (char)c);
- c = lex(ex);
- }
- if (c == '#')
- {
- agxbputc(&ex->tmp, (char)c);
- do
- {
- agxbputc(&ex->tmp, (char)c);
- } while (gv_isalnum(c = lex(ex)));
- }
- else
- {
- if (c == '.')
- {
- floating:
- q = FLOATING;
- agxbputc(&ex->tmp, (char)c);
- while (gv_isdigit(c = lex(ex)))
- agxbputc(&ex->tmp, (char)c);
- }
- if (c == 'e' || c == 'E')
- {
- q = FLOATING;
- agxbputc(&ex->tmp, (char)c);
- if ((c = lex(ex)) == '-' || c == '+')
- {
- agxbputc(&ex->tmp, (char)c);
- c = lex(ex);
- }
- while (gv_isdigit(c))
- {
- agxbputc(&ex->tmp, (char)c);
- c = lex(ex);
- }
- }
- }
- }
- s = agxbuse(&ex->tmp);
- if (q == FLOATING)
- ex_lval.floating = strtod(s, &e);
- else
- {
- if (c == 'u' || c == 'U')
- {
- q = UNSIGNED;
- c = lex(ex);
- ex_lval.integer = strtoull(s, &e, b);
- }
- else
- ex_lval.integer = strtoll(s, &e, b);
- }
- exunlex(ex, c);
- if (*e || gv_isalpha(c) || c == '_' || c == '$')
- {
- exerror("%s: invalid numeric constant", s);
- goto eof;
- }
- return q;
- }
- default:
- if (gv_isalpha(c) || c == '_' || c == '$')
- {
- agxbclear(&ex->tmp);
- agxbputc(&ex->tmp, (char)c);
- while (gv_isalnum(c = lex(ex)) || c == '_' || c == '$')
- agxbputc(&ex->tmp, (char)c);
- exunlex(ex, c);
- s = agxbuse(&ex->tmp);
- /* v = expr.declare ? dtview(ex->symbols, NULL) : (Dt_t*)0; FIX */
- v = (Dt_t*)0;
- ex_lval.id = dtmatch(ex->symbols, s);
- if (v)
- dtview(ex->symbols, v);
- if (!ex_lval.id)
- {
- const size_t size = sizeof(Exid_t) + strlen(s) - EX_NAMELEN + 1;
- if (!(ex_lval.id = vmalloc(ex->vm, size))) {
- exnospace();
- goto eof;
- }
- memset(ex_lval.id, 0, size);
- strcpy(ex_lval.id->name, s);
- ex_lval.id->lex = NAME;
- /*
- * LABELs are in the parent scope!
- */
- if (c == ':' && !expr.nolabel && ex->frame && ex->frame->view)
- dtinsert(ex->frame->view, ex_lval.id);
- else
- dtinsert(ex->symbols, ex_lval.id);
- }
- /*
- * lexical analyzer state controlled by the grammar
- */
- switch (ex_lval.id->lex)
- {
- case DECLARE:
- if (ex_lval.id->index == CHARACTER)
- {
- /*
- * `char*' === `string'
- * the * must immediately follow char
- */
- if (c == '*')
- {
- lex(ex);
- ex_lval.id = id_string;
- }
- }
- break;
- case NAME:
- /*
- * action labels are disambiguated from ?:
- * through the expr.nolabel grammar hook
- * the : must immediately follow labels
- */
- if (c == ':' && !expr.nolabel)
- return LABEL;
- break;
- case PRAGMA:
- /*
- * user specific statement stripped and
- * passed as string
- */
- {
- int b;
- int n;
- int pc = 0;
- int po;
- int t;
- /*UNDENT...*/
- agxbclear(&ex->tmp);
- b = 1;
- n = 0;
- po = 0;
- for (c = t = lex(ex);; c = lex(ex))
- {
- switch (c)
- {
- case 0:
- goto eof;
- case '/':
- switch (q = lex(ex))
- {
- case '*':
- for (;;)
- {
- switch (lex(ex))
- {
- case '\n':
- if (error_info.line)
- error_info.line++;
- else error_info.line = 2;
- continue;
- case '*':
- switch (lex(ex))
- {
- case 0:
- goto eof;
- case '\n':
- if (error_info.line)
- error_info.line++;
- else error_info.line = 2;
- continue;
- case '*':
- exunlex(ex, '*');
- continue;
- case '/':
- break;
- default:
- continue;
- }
- break;
- default: // ignore; keep consuming characters
- break;
- }
- if (!b++)
- goto eof;
- agxbputc(&ex->tmp, ' ');
- break;
- }
- break;
- case '/':
- while ((c = lex(ex)) != '\n')
- if (!c)
- goto eof;
- if (error_info.line)
- error_info.line++;
- else error_info.line = 2;
- b = 1;
- agxbputc(&ex->tmp, '\n');
- break;
- default:
- b = 0;
- agxbputc(&ex->tmp, (char)c);
- agxbputc(&ex->tmp, (char)q);
- break;
- }
- continue;
- case '\n':
- if (error_info.line)
- error_info.line++;
- else error_info.line = 2;
- b = 1;
- agxbputc(&ex->tmp, '\n');
- continue;
- case ' ':
- case '\t':
- if (!b++)
- goto eof;
- agxbputc(&ex->tmp, ' ');
- continue;
- case '(':
- case '{':
- case '[':
- b = 0;
- if (!po)
- {
- switch (po = c)
- {
- case '(':
- pc = ')';
- break;
- case '{':
- pc = '}';
- break;
- case '[':
- pc = ']';
- break;
- default:
- UNREACHABLE();
- }
- n++;
- }
- else if (c == po)
- n++;
- agxbputc(&ex->tmp, (char)c);
- continue;
- case ')':
- case '}':
- case ']':
- b = 0;
- if (!po)
- {
- exunlex(ex, c);
- break;
- }
- agxbputc(&ex->tmp, (char)c);
- if (c == pc && --n <= 0)
- {
- if (t == po)
- break;
- po = 0;
- }
- continue;
- case ';':
- b = 0;
- if (!n)
- break;
- agxbputc(&ex->tmp, (char)c);
- continue;
- case '\'':
- case '"':
- b = 0;
- agxbputc(&ex->tmp, (char)c);
- ex->input->nesting++;
- q = c;
- while ((c = lex(ex)) != q)
- {
- if (c == '\\')
- {
- agxbputc(&ex->tmp, '\\');
- c = lex(ex);
- }
- if (!c)
- {
- exerror("unterminated %c string", q);
- goto eof;
- }
- if (c == '\n')
- {
- if (error_info.line)
- error_info.line++;
- else error_info.line = 2;
- }
- agxbputc(&ex->tmp, (char)c);
- }
- ex->input->nesting--;
- continue;
- default:
- b = 0;
- agxbputc(&ex->tmp, (char)c);
- continue;
- }
- break;
- }
- ex->disc->reff(ex, NULL, ex_lval.id, NULL);
- /*..INDENT*/
- }
- goto again;
- }
- return ex_lval.id->lex;
- }
- return ex_lval.op = c;
- }
- eof:
- ex->eof = 1;
- return ex_lval.op = ';';
- }
|