parse.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. /*************************************************************************
  2. * Copyright (c) 2011 AT&T Intellectual Property
  3. * All rights reserved. This program and the accompanying materials
  4. * are made available under the terms of the Eclipse Public License v1.0
  5. * which accompanies this distribution, and is available at
  6. * https://www.eclipse.org/legal/epl-v10.html
  7. *
  8. * Contributors: Details at https://graphviz.org
  9. *************************************************************************/
  10. /*
  11. * Top-level parsing of gpr code into blocks
  12. *
  13. */
  14. #include <ast/ast.h>
  15. #include <ast/error.h>
  16. #include <cgraph/gv_ctype.h>
  17. #include <gvpr/parse.h>
  18. #include <stdbool.h>
  19. #include <stdio.h>
  20. #include <stdlib.h>
  21. #include <string.h>
  22. #include <util/agxbuf.h>
  23. #include <util/alloc.h>
  24. #include <util/unreachable.h>
  25. static int lineno = 1; /* current line number */
  26. static int col0 = 1; /* true if char ptr is at column 0 */
  27. static int startLine = 1; /* set to start line of bracketd content */
  28. static int kwLine = 1; /* set to line of keyword */
  29. static char *case_str[] = {
  30. "BEGIN", "END", "BEG_G", "END_G", "N", "E", "EOF", "ERROR",
  31. };
  32. /// convert case_t to string
  33. static char *caseStr(case_t cs) { return case_str[(int)cs]; }
  34. /// eat characters until eol
  35. static int eol(FILE *str) {
  36. int c;
  37. while ((c = getc(str)) != '\n') {
  38. if (c < 0)
  39. return c;
  40. }
  41. lineno++;
  42. col0 = 1;
  43. return c;
  44. }
  45. /* return character from input stream
  46. * while keeping track of line number.
  47. * Strip out comments, and return space or newline.
  48. * If a newline is seen in comment and ostr
  49. * is non-null, add newline to ostr.
  50. */
  51. static int readc(FILE *str, agxbuf *ostr) {
  52. int c;
  53. int cc;
  54. switch (c = getc(str)) {
  55. case '\n':
  56. lineno++;
  57. col0 = 1;
  58. break;
  59. case '#':
  60. if (col0) { /* shell comment */
  61. c = eol(str);
  62. } else
  63. col0 = 0;
  64. break;
  65. case '/':
  66. cc = getc(str);
  67. switch (cc) {
  68. case '*': /* in C comment */
  69. while (1) {
  70. switch (c = getc(str)) {
  71. case '\n':
  72. lineno++;
  73. if (ostr)
  74. agxbputc(ostr, (char)c);
  75. break;
  76. case '*':
  77. switch (cc = getc(str)) {
  78. case -1:
  79. return cc;
  80. break;
  81. case '\n':
  82. lineno++;
  83. if (ostr)
  84. agxbputc(ostr, (char)cc);
  85. break;
  86. case '*':
  87. ungetc(cc, str);
  88. break;
  89. case '/':
  90. col0 = 0;
  91. return ' ';
  92. break;
  93. }
  94. }
  95. }
  96. break;
  97. case '/': /* in C++ comment */
  98. c = eol(str);
  99. break;
  100. default: /* not a comment */
  101. if (cc >= '\0')
  102. ungetc(cc, str);
  103. break;
  104. }
  105. break;
  106. default:
  107. col0 = 0;
  108. break;
  109. }
  110. return c;
  111. }
  112. /// push character back onto stream; if newline, reduce lineno
  113. static void unreadc(FILE *str, int c) {
  114. ungetc(c, str);
  115. if (c == '\n')
  116. lineno--;
  117. }
  118. static int skipWS(FILE *str) {
  119. int c;
  120. while (true) {
  121. c = readc(str, 0);
  122. if (!gv_isspace(c)) {
  123. return c;
  124. }
  125. }
  126. }
  127. /// Put initial alpha in buffer; add additional alphas, up to buffer size.
  128. static void parseID(FILE *str, int c, char *buf, size_t bsize) {
  129. char *ptr = buf;
  130. char *eptr = buf + (bsize - 1);
  131. *ptr++ = (char)c;
  132. while (true) {
  133. c = readc(str, 0);
  134. if (c < 0)
  135. break;
  136. if (gv_isalpha(c) || c == '_') {
  137. if (ptr == eptr)
  138. break;
  139. *ptr++ = (char)c;
  140. } else {
  141. unreadc(str, c);
  142. break;
  143. }
  144. }
  145. *ptr = '\0';
  146. }
  147. #define BSIZE 8
  148. /* Look for keywords: BEGIN, END, BEG_G, END_G, N, E
  149. * As side-effect, sets kwLine to line of keyword.
  150. */
  151. static case_t parseKind(FILE *str) {
  152. int c;
  153. char buf[BSIZE];
  154. case_t cs = Error;
  155. c = skipWS(str);
  156. if (c < 0)
  157. return Eof;
  158. if (!gv_isalpha(c)) {
  159. error(ERROR_ERROR, "expected keyword BEGIN/END/N/E...; found '%c', line %d",
  160. c, lineno);
  161. return Error;
  162. }
  163. kwLine = lineno;
  164. parseID(str, c, buf, BSIZE);
  165. if (strcmp(buf, "BEGIN") == 0) {
  166. cs = Begin;
  167. } else if (strcmp(buf, "BEG_G") == 0) {
  168. cs = BeginG;
  169. } else if (strcmp(buf, "E") == 0) {
  170. cs = Edge;
  171. } else if (strcmp(buf, "END") == 0) {
  172. cs = End;
  173. } else if (strcmp(buf, "END_G") == 0) {
  174. cs = EndG;
  175. } else if (strcmp(buf, "N") == 0) {
  176. cs = Node;
  177. }
  178. if (cs == Error)
  179. error(ERROR_ERROR, "unexpected keyword \"%s\", line %d", buf, kwLine);
  180. return cs;
  181. }
  182. /* eat characters from ins, putting them into outs,
  183. * up to and including a terminating character ec
  184. * that is not escaped with a back quote.
  185. */
  186. static int endString(FILE *ins, agxbuf *outs, char ec) {
  187. int sline = lineno;
  188. int c;
  189. while ((c = getc(ins)) != ec) {
  190. if (c == '\\') {
  191. agxbputc(outs, (char)c);
  192. c = getc(ins);
  193. }
  194. if (c < 0) {
  195. error(ERROR_ERROR, "unclosed string, start line %d", sline);
  196. return c;
  197. }
  198. if (c == '\n')
  199. lineno++;
  200. agxbputc(outs, (char)c);
  201. }
  202. agxbputc(outs, (char)c);
  203. return 0;
  204. }
  205. /* eat characters from ins, putting them into outs,
  206. * up to a terminating character ec.
  207. * Strings are treated as atomic units: any ec in them
  208. * is ignored. Since matching bc-ec pairs might nest,
  209. * the function is called recursively.
  210. */
  211. static int endBracket(FILE *ins, agxbuf *outs, char bc, char ec) {
  212. int c;
  213. while (true) {
  214. c = readc(ins, outs);
  215. if (c < 0 || c == ec)
  216. return c;
  217. else if (c == bc) {
  218. agxbputc(outs, (char)c);
  219. c = endBracket(ins, outs, bc, ec);
  220. if (c < 0)
  221. return c;
  222. else
  223. agxbputc(outs, (char)c);
  224. } else if (c == '\'' || c == '"') {
  225. agxbputc(outs, (char)c);
  226. if (endString(ins, outs, (char)c))
  227. return -1;
  228. } else
  229. agxbputc(outs, (char)c);
  230. }
  231. }
  232. /* parse paired expression : bc <string> ec
  233. * returning <string>
  234. * As a side-effect, set startLine to beginning of content.
  235. */
  236. static char *parseBracket(FILE *str, agxbuf *buf, int bc, int ec) {
  237. int c;
  238. c = skipWS(str);
  239. if (c < 0)
  240. return 0;
  241. if (c != bc) {
  242. unreadc(str, c);
  243. return 0;
  244. }
  245. startLine = lineno;
  246. c = endBracket(str, buf, bc, (char)ec);
  247. if (c < 0) {
  248. if (!getErrorErrors())
  249. error(ERROR_ERROR, "unclosed bracket %c%c expression, start line %d", bc,
  250. ec, startLine);
  251. return 0;
  252. } else
  253. return agxbdisown(buf);
  254. }
  255. static char *parseAction(FILE *str, agxbuf *buf) {
  256. return parseBracket(str, buf, '{', '}');
  257. }
  258. static char *parseGuard(FILE *str, agxbuf *buf) {
  259. return parseBracket(str, buf, '[', ']');
  260. }
  261. /* Recognize
  262. * BEGIN <optional action>
  263. * END <optional action>
  264. * BEG_G <optional action>
  265. * END_G <optional action>
  266. * N <optional guard> <optional action>
  267. * E <optional guard> <optional action>
  268. * where
  269. * guard = '[' <expr> ']'
  270. * action = '{' <expr> '}'
  271. */
  272. static case_t parseCase(FILE *str, char **guard, int *gline, char **action,
  273. int *aline) {
  274. case_t kind;
  275. agxbuf buf = {0};
  276. kind = parseKind(str);
  277. switch (kind) {
  278. case Begin:
  279. case BeginG:
  280. case End:
  281. case EndG:
  282. *action = parseAction(str, &buf);
  283. *aline = startLine;
  284. if (getErrorErrors())
  285. kind = Error;
  286. break;
  287. case Edge:
  288. case Node:
  289. *guard = parseGuard(str, &buf);
  290. *gline = startLine;
  291. if (!getErrorErrors()) {
  292. *action = parseAction(str, &buf);
  293. *aline = startLine;
  294. }
  295. if (getErrorErrors())
  296. kind = Error;
  297. break;
  298. case Eof:
  299. case Error: /* to silence warnings */
  300. break;
  301. default:
  302. UNREACHABLE();
  303. }
  304. agxbfree(&buf);
  305. return kind;
  306. }
  307. /// create new block and append to list; return new item as tail
  308. static void addBlock(parse_blocks_t *list, char *stmt, int line,
  309. case_infos_t nodelist, case_infos_t edgelist) {
  310. parse_block item = {0};
  311. item.l_beging = line;
  312. item.begg_stmt = stmt;
  313. item.node_stmts = nodelist;
  314. item.edge_stmts = edgelist;
  315. parse_blocks_append(list, item);
  316. }
  317. /// create new case_info and append to list
  318. static void addCase(case_infos_t *list, char *guard, int gline, char *action,
  319. int line) {
  320. if (!guard && !action) {
  321. error(ERROR_WARNING,
  322. "Case with neither guard nor action, line %d - ignored", kwLine);
  323. return;
  324. }
  325. case_info item = {.guard = guard, .action = action};
  326. if (guard)
  327. item.gstart = gline;
  328. if (action)
  329. item.astart = line;
  330. case_infos_append(list, item);
  331. }
  332. static void bindAction(case_t cs, char *action, int aline, char **ap, int *lp) {
  333. if (!action)
  334. error(ERROR_WARNING, "%s with no action, line %d - ignored", caseStr(cs),
  335. kwLine);
  336. else if (*ap)
  337. error(ERROR_ERROR, "additional %s section, line %d", caseStr(cs), kwLine);
  338. else {
  339. *ap = action;
  340. *lp = aline;
  341. }
  342. }
  343. /// parses input into gpr sections
  344. parse_prog *parseProg(char *input, int isFile) {
  345. FILE *str;
  346. char *guard = NULL;
  347. char *action = NULL;
  348. bool more;
  349. parse_blocks_t blocklist = {0};
  350. case_infos_t edgelist = {0};
  351. case_infos_t nodelist = {0};
  352. int line = 0, gline = 0;
  353. int l_beging = 0;
  354. char *begg_stmt;
  355. lineno = col0 = startLine = kwLine = 1;
  356. parse_prog *prog = calloc(1, sizeof(parse_prog));
  357. if (!prog) {
  358. error(ERROR_ERROR, "parseProg: out of memory");
  359. return NULL;
  360. }
  361. if (isFile) {
  362. str = fopen(input, "r");
  363. prog->source = input;
  364. } else {
  365. str = tmpfile();
  366. if (str != NULL) {
  367. fputs(input, str);
  368. rewind(str);
  369. }
  370. prog->source = NULL; /* command line */
  371. }
  372. if (!str) {
  373. if (isFile)
  374. error(ERROR_ERROR, "could not open %s for reading", input);
  375. else
  376. error(ERROR_ERROR, "parseProg : unable to create sfio stream");
  377. free(prog);
  378. return NULL;
  379. }
  380. begg_stmt = NULL;
  381. more = true;
  382. while (more) {
  383. switch (parseCase(str, &guard, &gline, &action, &line)) {
  384. case Begin:
  385. bindAction(Begin, action, line, &prog->begin_stmt, &prog->l_begin);
  386. break;
  387. case BeginG:
  388. if (action && (begg_stmt || !case_infos_is_empty(&nodelist) ||
  389. !case_infos_is_empty(&edgelist))) { // non-empty block
  390. addBlock(&blocklist, begg_stmt, l_beging, nodelist, edgelist);
  391. /* reset values */
  392. edgelist = (case_infos_t){0};
  393. nodelist = (case_infos_t){0};
  394. begg_stmt = NULL;
  395. }
  396. bindAction(BeginG, action, line, &begg_stmt, &l_beging);
  397. break;
  398. case End:
  399. bindAction(End, action, line, &prog->end_stmt, &prog->l_end);
  400. break;
  401. case EndG:
  402. bindAction(EndG, action, line, &prog->endg_stmt, &prog->l_endg);
  403. break;
  404. case Eof:
  405. more = false;
  406. break;
  407. case Node:
  408. addCase(&nodelist, guard, gline, action, line);
  409. break;
  410. case Edge:
  411. addCase(&edgelist, guard, gline, action, line);
  412. break;
  413. case Error: /* to silence warnings */
  414. more = false;
  415. break;
  416. default:
  417. UNREACHABLE();
  418. }
  419. }
  420. if (begg_stmt || !case_infos_is_empty(&nodelist) ||
  421. !case_infos_is_empty(&edgelist)) { // non-empty block
  422. addBlock(&blocklist, begg_stmt, l_beging, nodelist, edgelist);
  423. }
  424. prog->blocks = blocklist;
  425. fclose(str);
  426. if (getErrorErrors()) {
  427. freeParseProg(prog);
  428. prog = NULL;
  429. }
  430. return prog;
  431. }
  432. static void freeBlocks(parse_blocks_t *ip) {
  433. for (size_t i = 0; i < parse_blocks_size(ip); ++i) {
  434. parse_block p = parse_blocks_get(ip, i);
  435. free(p.begg_stmt);
  436. case_infos_free(&p.node_stmts);
  437. case_infos_free(&p.edge_stmts);
  438. }
  439. parse_blocks_free(ip);
  440. }
  441. void freeParseProg(parse_prog *prog) {
  442. if (!prog)
  443. return;
  444. free(prog->begin_stmt);
  445. freeBlocks(&prog->blocks);
  446. free(prog->endg_stmt);
  447. free(prog->end_stmt);
  448. free(prog);
  449. }