123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304 |
- /**
- * @file
- * @ingroup cgraph_core
- */
- /*************************************************************************
- * Copyright (c) 2011 AT&T Intellectual Property
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * https://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors: Details at https://graphviz.org
- *************************************************************************/
- /* requires flex (i.e. not lex) */
- /* By default, Flex emits a lexer using symbols prefixed with "yy". Graphviz
- * contains multiple Flex-generated lexers, so we alter this prefix to avoid
- * symbol clashes.
- */
- %option prefix="aag"
- /* Avoid generating an unused input function. See
- https://westes.github.io/flex/manual/Scanner-Options.html
- */
- %option noinput
- %{
- #include <assert.h>
- #include <grammar.h>
- #include <cgraph/cghdr.h>
- #include <cgraph/gv_ctype.h>
- #include <stdbool.h>
- #include <stddef.h>
- #include <string.h>
- #include <util/agxbuf.h>
- #include <util/startswith.h>
- // #define YY_BUF_SIZE 128000
- #define GRAPH_EOF_TOKEN '@' /* lex class must be defined below */
- /* this is a workaround for linux flex */
- static int line_num = 1;
- static int html_nest = 0; /* nesting level for html strings */
- static const char* InputFile;
- static Agdisc_t *Disc;
- static void *Ifile;
- static int graphType;
- /* By default, Flex calls isatty() to determine whether the input it is
- * scanning is coming from the user typing or from a file. However, our input
- * is being provided by Graphviz' I/O channel mechanism, which does not have a
- * valid file descriptor that supports isatty().
- */
- #define isatty(x) gv_isatty_suppression
- int gv_isatty_suppression;
- #ifndef YY_INPUT
- #define YY_INPUT(buf,result,max_size) \
- if ((result = Disc->io->afread(Ifile, buf, max_size)) < 0) \
- YY_FATAL_ERROR( "input in flex scanner failed" )
- #endif
- /* buffer for arbitrary length strings (longer than BUFSIZ) */
- static agxbuf Sbuf;
- static void beginstr(void);
- static void addstr(char *src);
- static void endstr(void);
- static void endstr_html(void);
- static void storeFileName(char* fname, size_t len);
- /* ppDirective:
- * Process a possible preprocessor line directive.
- * aagtext = #.*
- */
- static void ppDirective (void);
- /* twoDots:
- * Return true if token has more than one '.';
- * we know the last character is a '.'.
- */
- static bool twoDots(void);
- /* chkNum:
- * The regexp for NUMBER allows a terminating letter or '.'.
- * This way we can catch a number immediately followed by a name
- * or something like 123.456.78, and report this to the user.
- */
- static int chkNum(void);
- /* The LETTER class below consists of ascii letters, underscore, all non-ascii
- * characters. This allows identifiers to have characters from any
- * character set independent of locale. The downside is that, for certain
- * character sets, non-letter and, in fact, undefined characters will be
- * accepted. This is not likely and, from dot's stand, shouldn't do any
- * harm. (Presumably undefined characters will be ignored in display.) And,
- * it allows a greater wealth of names. */
- %}
- GRAPH_EOF_TOKEN [@]
- LETTER [A-Za-z_\200-\377]
- DIGIT [0-9]
- NAME {LETTER}({LETTER}|{DIGIT})*
- NUMBER [-]?(({DIGIT}+(\.{DIGIT}*)?)|(\.{DIGIT}+))(\.|{LETTER})?
- ID ({NAME}|{NUMBER})
- %x comment
- %x qstring
- %x hstring
- %%
- {GRAPH_EOF_TOKEN} return(EOF);
- <INITIAL,comment>\n line_num++;
- "/*" BEGIN(comment);
- <comment>[^*\n]* /* eat anything not a '*' */
- <comment>"*"+[^*/\n]* /* eat up '*'s not followed by '/'s */
- <comment>"*"+"/" BEGIN(INITIAL);
- "//".* /* ignore C++-style comments */
- ^"#".* ppDirective ();
- "#".* /* ignore shell-like comments */
- [ \t\r] /* ignore whitespace */
- "\xEF\xBB\xBF" /* ignore BOM */
- "node" return(T_node); /* see tokens in agcanonstr */
- "edge" return(T_edge);
- "graph" if (!graphType) graphType = T_graph; return(T_graph);
- "digraph" if (!graphType) graphType = T_digraph; return(T_digraph);
- "strict" return(T_strict);
- "subgraph" return(T_subgraph);
- "->" if (graphType == T_digraph) return(T_edgeop); else return('-');
- "--" if (graphType == T_graph) return(T_edgeop); else return('-');
- {NAME} { aaglval.str = agstrdup(Ag_G_global,aagget_text()); return(T_atom); }
- {NUMBER} { if (chkNum()) yyless(aagget_leng()-1); aaglval.str = agstrdup(Ag_G_global,aagget_text()); return(T_atom); }
- ["] BEGIN(qstring); beginstr();
- <qstring>["] BEGIN(INITIAL); endstr(); return (T_qatom);
- <qstring>[\\]["] addstr ("\"");
- <qstring>[\\][\\] addstr ("\\\\");
- <qstring>[\\][\n] line_num++; /* ignore escaped newlines */
- <qstring>[\n] addstr ("\n"); line_num++;
- <qstring>([^"\\\n]*|[\\]) addstr(aagget_text());
- [<] BEGIN(hstring); html_nest = 1; beginstr();
- <hstring>[>] html_nest--; if (html_nest) addstr(aagget_text()); else {BEGIN(INITIAL); endstr_html(); return (T_qatom);}
- <hstring>[<] html_nest++; addstr(aagget_text());
- <hstring>[\n] addstr(aagget_text()); line_num++; /* add newlines */
- <hstring>([^><\n]*) addstr(aagget_text());
- . return aagget_text()[0];
- %%
- void aagerror(const char *str);
- void aagerror(const char *str)
- {
- agxbuf xb = {0};
- if (InputFile) {
- agxbprint (&xb, "%s: ", InputFile);
- }
- agxbprint (&xb, "%s in line %d", str, line_num);
- if (*aagget_text()) {
- agxbprint(&xb, " near '%s'", aagget_text());
- }
- else switch (YYSTATE) {
- case qstring: {
- agxbprint(&xb, " scanning a quoted string (missing endquote? longer than %d?)", YY_BUF_SIZE);
- if (agxblen(&Sbuf) > 0) {
- agxbprint(&xb, "\nString starting:\"%.80s", agxbuse(&Sbuf));
- }
- break;
- }
- case hstring: {
- agxbprint(&xb, " scanning a HTML string (missing '>'? bad nesting? longer than %d?)", YY_BUF_SIZE);
- if (agxblen(&Sbuf) > 0) {
- agxbprint(&xb, "\nString starting:<%.80s", agxbuse(&Sbuf));
- }
- break;
- }
- case comment :
- agxbprint(&xb, " scanning a /*...*/ comment (missing '*/? longer than %d?)", YY_BUF_SIZE);
- break;
- default: // nothing extra to note
- break;
- }
- agxbputc (&xb, '\n');
- agerrorf("%s", agxbuse(&xb));
- agxbfree(&xb);
- BEGIN(INITIAL);
- }
- /* must be here to see flex's macro defns */
- void aglexeof(void) { unput(GRAPH_EOF_TOKEN); }
- void aglexbad(void) { YY_FLUSH_BUFFER; }
- #ifndef YY_CALL_ONLY_ARG
- # define YY_CALL_ONLY_ARG void
- #endif
- int aagwrap(YY_CALL_ONLY_ARG)
- {
- return 1;
- }
- /* Reset line number */
- void agreadline(int n) { line_num = n; }
- /* (Re)set file:
- */
- void agsetfile(const char* f) { InputFile = f; line_num = 1; }
- /* There is a hole here, because switching channels
- * requires pushing back whatever was previously read.
- * There probably is a right way of doing this.
- */
- void aglexinit(Agdisc_t *disc, void *ifile) { Disc = disc; Ifile = ifile; graphType = 0;}
- static void beginstr(void) {
- // nothing required, but we should not have pending string data
- assert(agxblen(&Sbuf) == 0 &&
- "pending string data that was not consumed (missing "
- "endstr()/endhtmlstr()?)");
- }
- static void addstr(char *src) {
- agxbput(&Sbuf, src);
- }
- static void endstr(void) {
- aaglval.str = agstrdup(Ag_G_global, agxbuse(&Sbuf));
- }
- static void endstr_html(void) {
- aaglval.str = agstrdup_html(Ag_G_global, agxbuse(&Sbuf));
- }
- static void storeFileName(char* fname, size_t len) {
- static size_t cnt;
- static char* buf;
- if (len > cnt) {
- buf = gv_realloc(buf, cnt + 1, len + 1);
- cnt = len;
- }
- strcpy (buf, fname);
- InputFile = buf;
- }
- /* ppDirective:
- * Process a possible preprocessor line directive.
- * aagtext = #.*
- */
- static void ppDirective (void)
- {
- int r, cnt, lineno;
- char buf[2];
- char* s = aagget_text() + 1; /* skip initial # */
- if (startswith(s, "line")) s += strlen("line");
- r = sscanf(s, "%d %1[\"]%n", &lineno, buf, &cnt);
- if (r > 0) { /* got line number */
- // ignore if line number was out of range
- if (lineno <= 0) {
- return;
- }
- line_num = lineno - 1;
- if (r > 1) { /* saw quote */
- char* p = s + cnt;
- char* e = p;
- while (*e && *e != '"') e++;
- if (e != p && *e == '"') {
- *e = '\0';
- storeFileName(p, (size_t)(e - p));
- }
- }
- }
- }
- /* twoDots:
- * Return true if token has more than one '.';
- * we know the last character is a '.'.
- */
- static bool twoDots(void) {
- const char *dot = strchr(aagget_text(), '.');
- // was there a dot and was it not the last character?
- return dot != NULL && dot != &aagget_text()[aagget_leng() - 1];
- }
- /* chkNum:
- * The regexp for NUMBER allows a terminating letter or '.'.
- * This way we can catch a number immediately followed by a name
- * or something like 123.456.78, and report this to the user.
- */
- static int chkNum(void) {
- char c = aagget_text()[aagget_leng() - 1]; // last character
- if ((!gv_isdigit(c) && c != '.') || (c == '.' && twoDots())) { // c is letter
- const char* fname;
- if (InputFile)
- fname = InputFile;
- else
- fname = "input";
- agwarningf("syntax ambiguity - badly delimited number '%s' in line %d of "
- "%s splits into two tokens\n", aagget_text(), line_num, fname);
- return 1;
- }
- else return 0;
- }
|