extoken.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863
  1. /*************************************************************************
  2. * Copyright (c) 2011 AT&T Intellectual Property
  3. * All rights reserved. This program and the accompanying materials
  4. * are made available under the terms of the Eclipse Public License v1.0
  5. * which accompanies this distribution, and is available at
  6. * https://www.eclipse.org/legal/epl-v10.html
  7. *
  8. * Contributors: Details at https://graphviz.org
  9. *************************************************************************/
  10. /*
  11. * Glenn Fowler
  12. * AT&T Research
  13. *
  14. * expression library default lexical analyzer
  15. */
  16. #include "config.h"
  17. #include <cgraph/gv_ctype.h>
  18. #include <expr/exlib.h>
  19. #include <stdbool.h>
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include <util/agxbuf.h>
  24. #include <util/streq.h>
  25. #include <util/unreachable.h>
  26. #if defined(TRACE_lex) && TRACE_lex
  27. /*
  28. * trace c for op
  29. */
  30. static void
  31. trace(Expr_t* ex, int lev, char* op, int c)
  32. {
  33. char* s = 0;
  34. char* t;
  35. bool free_t = false;
  36. char buf[16];
  37. void* x = 0;
  38. t = "";
  39. switch (c)
  40. {
  41. case 0:
  42. s = " EOF";
  43. break;
  44. case '=':
  45. s = t = buf;
  46. *t++ = ' ';
  47. if (!lev && ex_lval.op != c)
  48. *t++ = ex_lval.op;
  49. *t++ = c;
  50. *t = 0;
  51. break;
  52. case AND:
  53. s = " AND ";
  54. t = "&&";
  55. break;
  56. case DEC:
  57. s = " DEC ";
  58. t = "--";
  59. break;
  60. case DECLARE:
  61. s = " DECLARE ";
  62. t = ex_lval.id->name;
  63. break;
  64. case DYNAMIC:
  65. s = " DYNAMIC ";
  66. t = ex_lval.id->name;
  67. x = (void*)ex_lval.id;
  68. break;
  69. case EQ:
  70. s = " EQ ";
  71. t = "==";
  72. break;
  73. case FLOATING:
  74. s = " FLOATING ";
  75. snprintf(t = buf, sizeof(buf), "%f", ex_lval.floating);
  76. break;
  77. case GE:
  78. s = " GE ";
  79. t = ">=";
  80. break;
  81. case CONSTANT:
  82. s = " CONSTANT ";
  83. t = ex_lval.id->name;
  84. break;
  85. case ID:
  86. s = " ID ";
  87. t = ex_lval.id->name;
  88. break;
  89. case INC:
  90. s = "INC ";
  91. t = "++";
  92. break;
  93. case INTEGER:
  94. s = " INTEGER ";
  95. snprintf(t = buf, sizeof(buf), "%lld", ex_lval.integer);
  96. break;
  97. case LABEL:
  98. s = " LABEL ";
  99. t = ex_lval.id->name;
  100. break;
  101. case LE:
  102. s = " LE ";
  103. t = "<=";
  104. break;
  105. case LSH:
  106. s = " LSH ";
  107. t = "<<";
  108. break;
  109. case NAME:
  110. s = " NAME ";
  111. t = ex_lval.id->name;
  112. x = (void*)ex_lval.id;
  113. break;
  114. case NE:
  115. s = " NE ";
  116. t = "!=";
  117. break;
  118. case OR:
  119. s = " OR ";
  120. t = "||";
  121. break;
  122. case RSH:
  123. s = " RSH ";
  124. t = ">>";
  125. break;
  126. case STRING:
  127. s = " STRING ";
  128. t = fmtesc(ex_lval.string);
  129. free_t = true;
  130. break;
  131. case UNSIGNED:
  132. s = " UNSIGNED ";
  133. snprintf(t = buf, sizeof(buf), "%llu", (unsigned long long)ex_lval.integer);
  134. break;
  135. case BREAK:
  136. s = " break";
  137. break;
  138. case CASE:
  139. s = " case";
  140. break;
  141. case CONTINUE:
  142. s = " continue";
  143. break;
  144. case DEFAULT:
  145. s = " default";
  146. break;
  147. case ELSE:
  148. s = " else";
  149. break;
  150. case EXIT:
  151. s = " exit";
  152. break;
  153. case FOR:
  154. s = " for";
  155. break;
  156. case ITERATOR:
  157. s = " forf";
  158. break;
  159. case GSUB:
  160. s = " gsub";
  161. break;
  162. case IF:
  163. s = " if";
  164. break;
  165. case IN_OP:
  166. s = " in";
  167. break;
  168. case PRAGMA:
  169. s = " pragma";
  170. break;
  171. case PRINT:
  172. s = " print";
  173. break;
  174. case PRINTF:
  175. s = " printf";
  176. break;
  177. case QUERY:
  178. s = " query";
  179. break;
  180. case RAND:
  181. s = " rand";
  182. break;
  183. case RETURN:
  184. s = " return";
  185. break;
  186. case SPLIT:
  187. s = " split";
  188. break;
  189. case SPRINTF:
  190. s = " sprintf";
  191. break;
  192. case SRAND:
  193. s = " srand";
  194. break;
  195. case SUB:
  196. s = " sub";
  197. break;
  198. case SUBSTR:
  199. s = " substr";
  200. break;
  201. case SWITCH:
  202. s = " switch";
  203. break;
  204. case TOKENS:
  205. s = " tokens";
  206. break;
  207. case UNSET:
  208. s = " unset";
  209. break;
  210. case WHILE:
  211. s = " while";
  212. break;
  213. default:
  214. if (c < 0177)
  215. {
  216. s = buf;
  217. *s++ = c;
  218. *s = 0;
  219. t = fmtesc(buf);
  220. free_t = true;
  221. s = " ";
  222. }
  223. break;
  224. }
  225. if (x)
  226. error(TRACE_lex + lev, "%s: [%d] %04d%s%s (%x)", op, ex->input->nesting, c, s, t, x);
  227. else
  228. error(TRACE_lex + lev, "%s: [%d] %04d%s%s", op, ex->input->nesting, c, s, t);
  229. if (free_t) {
  230. free(t);
  231. }
  232. }
  233. /*
  234. * trace wrapper for extoken()
  235. */
  236. extern int _extoken_fn_(Expr_t*);
  237. int
  238. extoken_fn(Expr_t* ex)
  239. {
  240. int c;
  241. #define extoken_fn _extoken_fn_
  242. c = extoken_fn(ex);
  243. trace(ex, 0, "ex_lex", c);
  244. return c;
  245. }
  246. #else
  247. #define trace(p,a,b,c) do { } while (0)
  248. #endif
  249. /*
  250. * get the next expression char
  251. */
  252. static int
  253. lex(Expr_t* ex)
  254. {
  255. int c;
  256. for (;;)
  257. {
  258. if ((c = ex->input->peek))
  259. ex->input->peek = 0;
  260. else if (ex->input->pp)
  261. {
  262. if (!(c = *ex->input->pp++))
  263. {
  264. ex->input->pp = 0;
  265. continue;
  266. }
  267. }
  268. else if (ex->input->fp)
  269. {
  270. if ((c = getc(ex->input->fp)) == EOF)
  271. {
  272. if (!expop(ex))
  273. continue;
  274. else trace(ex, -1, "expop fp FAIL", 0);
  275. c = 0;
  276. }
  277. }
  278. else c = 0;
  279. if (c == '\n')
  280. setcontext(ex);
  281. else if (c)
  282. putcontext(ex, c);
  283. trace(ex, -3, "ex--lex", c);
  284. return c;
  285. }
  286. }
  287. /*
  288. * get the next expression token
  289. */
  290. int
  291. extoken_fn(Expr_t* ex)
  292. {
  293. int c;
  294. char* s;
  295. int q;
  296. char* e;
  297. Dt_t* v;
  298. if (ex->eof || ex->errors)
  299. return 0;
  300. again:
  301. for (;;)
  302. switch (c = lex(ex))
  303. {
  304. case 0:
  305. goto eof;
  306. case '/':
  307. switch (q = lex(ex))
  308. {
  309. case '*':
  310. for (;;) switch (lex(ex))
  311. {
  312. case '\n':
  313. if (error_info.line)
  314. error_info.line++;
  315. else error_info.line = 2;
  316. continue;
  317. case '*':
  318. switch (lex(ex))
  319. {
  320. case 0:
  321. goto eof;
  322. case '\n':
  323. if (error_info.line)
  324. error_info.line++;
  325. else error_info.line = 2;
  326. break;
  327. case '*':
  328. exunlex(ex, '*');
  329. break;
  330. case '/':
  331. goto again;
  332. }
  333. break;
  334. }
  335. break;
  336. case '/':
  337. while ((c = lex(ex)) != '\n')
  338. if (!c)
  339. goto eof;
  340. break;
  341. default:
  342. goto opeq;
  343. }
  344. /*FALLTHROUGH*/
  345. case '\n':
  346. if (error_info.line)
  347. error_info.line++;
  348. else error_info.line = 2;
  349. /*FALLTHROUGH*/
  350. case ' ':
  351. case '\t':
  352. case '\r':
  353. break;
  354. case '(':
  355. case '{':
  356. case '[':
  357. ex->input->nesting++;
  358. return ex_lval.op = c;
  359. case ')':
  360. case '}':
  361. case ']':
  362. ex->input->nesting--;
  363. return ex_lval.op = c;
  364. case '+':
  365. case '-':
  366. if ((q = lex(ex)) == c)
  367. return ex_lval.op = c == '+' ? INC : DEC;
  368. goto opeq;
  369. case '*':
  370. case '%':
  371. case '^':
  372. q = lex(ex);
  373. opeq:
  374. ex_lval.op = c;
  375. if (q == '=')
  376. c = '=';
  377. else if (q == '%' && c == '%')
  378. {
  379. goto eof;
  380. }
  381. else exunlex(ex, q);
  382. return c;
  383. case '&':
  384. case '|':
  385. if ((q = lex(ex)) == '=')
  386. {
  387. ex_lval.op = c;
  388. return '=';
  389. }
  390. if (q == c)
  391. c = c == '&' ? AND : OR;
  392. else exunlex(ex, q);
  393. return ex_lval.op = c;
  394. case '<':
  395. case '>':
  396. if ((q = lex(ex)) == c)
  397. {
  398. ex_lval.op = c = c == '<' ? LSH : RSH;
  399. if ((q = lex(ex)) == '=')
  400. c = '=';
  401. else exunlex(ex, q);
  402. return c;
  403. }
  404. goto relational;
  405. case '=':
  406. case '!':
  407. q = lex(ex);
  408. relational:
  409. if (q == '=') switch (c)
  410. {
  411. case '<':
  412. c = LE;
  413. break;
  414. case '>':
  415. c = GE;
  416. break;
  417. case '=':
  418. c = EQ;
  419. break;
  420. case '!':
  421. c = NE;
  422. break;
  423. default:
  424. UNREACHABLE();
  425. }
  426. else exunlex(ex, q);
  427. return ex_lval.op = c;
  428. case '#':
  429. if (!ex->linewrap) {
  430. s = ex->linep - 1;
  431. while (s > ex->line && gv_isspace(*(s - 1)))
  432. s--;
  433. if (s == ex->line)
  434. {
  435. switch (extoken_fn(ex))
  436. {
  437. case DYNAMIC:
  438. case ID:
  439. case NAME:
  440. s = ex_lval.id->name;
  441. break;
  442. default:
  443. s = "";
  444. break;
  445. }
  446. if (streq(s, "include"))
  447. {
  448. if (extoken_fn(ex) != STRING)
  449. exerror("#%s: string argument expected", s);
  450. else if (!expush(ex, ex_lval.string, 1, NULL))
  451. {
  452. setcontext(ex);
  453. goto again;
  454. }
  455. }
  456. else exerror("unknown directive");
  457. }
  458. }
  459. return ex_lval.op = c;
  460. case '\'':
  461. case '"':
  462. q = c;
  463. agxbclear(&ex->tmp);
  464. ex->input->nesting++;
  465. while ((c = lex(ex)) != q)
  466. {
  467. if (c == '\\')
  468. {
  469. agxbputc(&ex->tmp, '\\');
  470. c = lex(ex);
  471. }
  472. if (!c)
  473. {
  474. exerror("unterminated %c string", q);
  475. goto eof;
  476. }
  477. if (c == '\n')
  478. {
  479. if (error_info.line)
  480. error_info.line++;
  481. else error_info.line = 2;
  482. }
  483. agxbputc(&ex->tmp, (char)c);
  484. }
  485. ex->input->nesting--;
  486. s = agxbuse(&ex->tmp);
  487. if (q == '"' || (ex->disc->flags & EX_CHARSTRING))
  488. {
  489. if (!(ex_lval.string = vmstrdup(ex->vm, s)))
  490. goto eof;
  491. stresc(ex_lval.string);
  492. return STRING;
  493. }
  494. ex_lval.integer = chrtoi(s);
  495. return INTEGER;
  496. case '.':
  497. if (gv_isdigit(c = lex(ex)))
  498. {
  499. agxbclear(&ex->tmp);
  500. agxbput(&ex->tmp, "0.");
  501. goto floating;
  502. }
  503. exunlex(ex, c);
  504. return ex_lval.op = '.';
  505. case '0': case '1': case '2': case '3': case '4':
  506. case '5': case '6': case '7': case '8': case '9': {
  507. agxbclear(&ex->tmp);
  508. agxbputc(&ex->tmp, (char)c);
  509. q = INTEGER;
  510. int b = 0;
  511. if ((c = lex(ex)) == 'x' || c == 'X')
  512. {
  513. b = 16;
  514. agxbputc(&ex->tmp, (char)c);
  515. for (c = lex(ex); gv_isxdigit(c); c = lex(ex))
  516. {
  517. agxbputc(&ex->tmp, (char)c);
  518. }
  519. }
  520. else
  521. {
  522. while (gv_isdigit(c))
  523. {
  524. agxbputc(&ex->tmp, (char)c);
  525. c = lex(ex);
  526. }
  527. if (c == '#')
  528. {
  529. agxbputc(&ex->tmp, (char)c);
  530. do
  531. {
  532. agxbputc(&ex->tmp, (char)c);
  533. } while (gv_isalnum(c = lex(ex)));
  534. }
  535. else
  536. {
  537. if (c == '.')
  538. {
  539. floating:
  540. q = FLOATING;
  541. agxbputc(&ex->tmp, (char)c);
  542. while (gv_isdigit(c = lex(ex)))
  543. agxbputc(&ex->tmp, (char)c);
  544. }
  545. if (c == 'e' || c == 'E')
  546. {
  547. q = FLOATING;
  548. agxbputc(&ex->tmp, (char)c);
  549. if ((c = lex(ex)) == '-' || c == '+')
  550. {
  551. agxbputc(&ex->tmp, (char)c);
  552. c = lex(ex);
  553. }
  554. while (gv_isdigit(c))
  555. {
  556. agxbputc(&ex->tmp, (char)c);
  557. c = lex(ex);
  558. }
  559. }
  560. }
  561. }
  562. s = agxbuse(&ex->tmp);
  563. if (q == FLOATING)
  564. ex_lval.floating = strtod(s, &e);
  565. else
  566. {
  567. if (c == 'u' || c == 'U')
  568. {
  569. q = UNSIGNED;
  570. c = lex(ex);
  571. ex_lval.integer = strtoull(s, &e, b);
  572. }
  573. else
  574. ex_lval.integer = strtoll(s, &e, b);
  575. }
  576. exunlex(ex, c);
  577. if (*e || gv_isalpha(c) || c == '_' || c == '$')
  578. {
  579. exerror("%s: invalid numeric constant", s);
  580. goto eof;
  581. }
  582. return q;
  583. }
  584. default:
  585. if (gv_isalpha(c) || c == '_' || c == '$')
  586. {
  587. agxbclear(&ex->tmp);
  588. agxbputc(&ex->tmp, (char)c);
  589. while (gv_isalnum(c = lex(ex)) || c == '_' || c == '$')
  590. agxbputc(&ex->tmp, (char)c);
  591. exunlex(ex, c);
  592. s = agxbuse(&ex->tmp);
  593. /* v = expr.declare ? dtview(ex->symbols, NULL) : (Dt_t*)0; FIX */
  594. v = (Dt_t*)0;
  595. ex_lval.id = dtmatch(ex->symbols, s);
  596. if (v)
  597. dtview(ex->symbols, v);
  598. if (!ex_lval.id)
  599. {
  600. const size_t size = sizeof(Exid_t) + strlen(s) - EX_NAMELEN + 1;
  601. if (!(ex_lval.id = vmalloc(ex->vm, size))) {
  602. exnospace();
  603. goto eof;
  604. }
  605. memset(ex_lval.id, 0, size);
  606. strcpy(ex_lval.id->name, s);
  607. ex_lval.id->lex = NAME;
  608. /*
  609. * LABELs are in the parent scope!
  610. */
  611. if (c == ':' && !expr.nolabel && ex->frame && ex->frame->view)
  612. dtinsert(ex->frame->view, ex_lval.id);
  613. else
  614. dtinsert(ex->symbols, ex_lval.id);
  615. }
  616. /*
  617. * lexical analyzer state controlled by the grammar
  618. */
  619. switch (ex_lval.id->lex)
  620. {
  621. case DECLARE:
  622. if (ex_lval.id->index == CHARACTER)
  623. {
  624. /*
  625. * `char*' === `string'
  626. * the * must immediately follow char
  627. */
  628. if (c == '*')
  629. {
  630. lex(ex);
  631. ex_lval.id = id_string;
  632. }
  633. }
  634. break;
  635. case NAME:
  636. /*
  637. * action labels are disambiguated from ?:
  638. * through the expr.nolabel grammar hook
  639. * the : must immediately follow labels
  640. */
  641. if (c == ':' && !expr.nolabel)
  642. return LABEL;
  643. break;
  644. case PRAGMA:
  645. /*
  646. * user specific statement stripped and
  647. * passed as string
  648. */
  649. {
  650. int b;
  651. int n;
  652. int pc = 0;
  653. int po;
  654. int t;
  655. /*UNDENT...*/
  656. agxbclear(&ex->tmp);
  657. b = 1;
  658. n = 0;
  659. po = 0;
  660. for (c = t = lex(ex);; c = lex(ex))
  661. {
  662. switch (c)
  663. {
  664. case 0:
  665. goto eof;
  666. case '/':
  667. switch (q = lex(ex))
  668. {
  669. case '*':
  670. for (;;)
  671. {
  672. switch (lex(ex))
  673. {
  674. case '\n':
  675. if (error_info.line)
  676. error_info.line++;
  677. else error_info.line = 2;
  678. continue;
  679. case '*':
  680. switch (lex(ex))
  681. {
  682. case 0:
  683. goto eof;
  684. case '\n':
  685. if (error_info.line)
  686. error_info.line++;
  687. else error_info.line = 2;
  688. continue;
  689. case '*':
  690. exunlex(ex, '*');
  691. continue;
  692. case '/':
  693. break;
  694. default:
  695. continue;
  696. }
  697. break;
  698. default: // ignore; keep consuming characters
  699. break;
  700. }
  701. if (!b++)
  702. goto eof;
  703. agxbputc(&ex->tmp, ' ');
  704. break;
  705. }
  706. break;
  707. case '/':
  708. while ((c = lex(ex)) != '\n')
  709. if (!c)
  710. goto eof;
  711. if (error_info.line)
  712. error_info.line++;
  713. else error_info.line = 2;
  714. b = 1;
  715. agxbputc(&ex->tmp, '\n');
  716. break;
  717. default:
  718. b = 0;
  719. agxbputc(&ex->tmp, (char)c);
  720. agxbputc(&ex->tmp, (char)q);
  721. break;
  722. }
  723. continue;
  724. case '\n':
  725. if (error_info.line)
  726. error_info.line++;
  727. else error_info.line = 2;
  728. b = 1;
  729. agxbputc(&ex->tmp, '\n');
  730. continue;
  731. case ' ':
  732. case '\t':
  733. if (!b++)
  734. goto eof;
  735. agxbputc(&ex->tmp, ' ');
  736. continue;
  737. case '(':
  738. case '{':
  739. case '[':
  740. b = 0;
  741. if (!po)
  742. {
  743. switch (po = c)
  744. {
  745. case '(':
  746. pc = ')';
  747. break;
  748. case '{':
  749. pc = '}';
  750. break;
  751. case '[':
  752. pc = ']';
  753. break;
  754. default:
  755. UNREACHABLE();
  756. }
  757. n++;
  758. }
  759. else if (c == po)
  760. n++;
  761. agxbputc(&ex->tmp, (char)c);
  762. continue;
  763. case ')':
  764. case '}':
  765. case ']':
  766. b = 0;
  767. if (!po)
  768. {
  769. exunlex(ex, c);
  770. break;
  771. }
  772. agxbputc(&ex->tmp, (char)c);
  773. if (c == pc && --n <= 0)
  774. {
  775. if (t == po)
  776. break;
  777. po = 0;
  778. }
  779. continue;
  780. case ';':
  781. b = 0;
  782. if (!n)
  783. break;
  784. agxbputc(&ex->tmp, (char)c);
  785. continue;
  786. case '\'':
  787. case '"':
  788. b = 0;
  789. agxbputc(&ex->tmp, (char)c);
  790. ex->input->nesting++;
  791. q = c;
  792. while ((c = lex(ex)) != q)
  793. {
  794. if (c == '\\')
  795. {
  796. agxbputc(&ex->tmp, '\\');
  797. c = lex(ex);
  798. }
  799. if (!c)
  800. {
  801. exerror("unterminated %c string", q);
  802. goto eof;
  803. }
  804. if (c == '\n')
  805. {
  806. if (error_info.line)
  807. error_info.line++;
  808. else error_info.line = 2;
  809. }
  810. agxbputc(&ex->tmp, (char)c);
  811. }
  812. ex->input->nesting--;
  813. continue;
  814. default:
  815. b = 0;
  816. agxbputc(&ex->tmp, (char)c);
  817. continue;
  818. }
  819. break;
  820. }
  821. ex->disc->reff(ex, NULL, ex_lval.id, NULL);
  822. /*..INDENT*/
  823. }
  824. goto again;
  825. }
  826. return ex_lval.id->lex;
  827. }
  828. return ex_lval.op = c;
  829. }
  830. eof:
  831. ex->eof = 1;
  832. return ex_lval.op = ';';
  833. }