sfvscanf.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. /*************************************************************************
  2. * Copyright (c) 2011 AT&T Intellectual Property
  3. * All rights reserved. This program and the accompanying materials
  4. * are made available under the terms of the Eclipse Public License v1.0
  5. * which accompanies this distribution, and is available at
  6. * https://www.eclipse.org/legal/epl-v10.html
  7. *
  8. * Contributors: Details at https://graphviz.org
  9. *************************************************************************/
  10. #include <assert.h>
  11. #include <cgraph/gv_ctype.h>
  12. #include <inttypes.h>
  13. #include <limits.h>
  14. #include <sfio/sfhdr.h>
  15. #include <stdbool.h>
  16. #include <stddef.h>
  17. #include <stdio.h>
  18. /* The main engine for reading formatted data
  19. **
  20. ** Written by Kiem-Phong Vo.
  21. */
  22. #define MAXWIDTH INT_MAX // max amount to scan
  23. /**
  24. * @param form format string
  25. * @param accept accepted characters are set to 1
  26. */
  27. static const unsigned char *setclass(const unsigned char *form, bool *accept) {
  28. int fmt, c;
  29. bool yes;
  30. if ((fmt = *form++) == '^') { /* we want the complement of this set */
  31. yes = false;
  32. fmt = *form++;
  33. } else
  34. yes = true;
  35. for (c = 0; c <= UCHAR_MAX; ++c)
  36. accept[c] = !yes;
  37. if (fmt == ']' || fmt == '-') { /* special first char */
  38. accept[fmt] = yes;
  39. fmt = *form++;
  40. }
  41. for (; fmt != ']'; fmt = *form++) { /* done */
  42. if (!fmt)
  43. return (form - 1);
  44. /* interval */
  45. if (fmt != '-' || form[0] == ']' || form[-2] > form[0])
  46. accept[fmt] = yes;
  47. else
  48. for (c = form[-2] + 1; c < form[0]; ++c)
  49. accept[c] = yes;
  50. }
  51. return form;
  52. }
  53. /**
  54. * @param f file to be scanned
  55. * @param ft Formatting instructions
  56. */
  57. int sfvscanf(FILE *f, Sffmt_t *ft) {
  58. int inp, shift, base, width;
  59. ssize_t size;
  60. int fmt, flags, dot, n_assign, v, n, n_input;
  61. char *sp;
  62. char accept[SF_MAXDIGITS];
  63. Argv_t argv;
  64. int argp, argn;
  65. void *value; /* location to assign scanned value */
  66. const char *t_str;
  67. ssize_t n_str;
  68. #define SFGETC(f,c) (((c) = getc(f)) < 0 ? c : (++n_input, c))
  69. #define SFUNGETC(f,c) do { \
  70. ungetc((c), (f)); \
  71. --n_input; \
  72. } while (0)
  73. assert(f != NULL);
  74. n_assign = n_input = 0;
  75. inp = -1;
  76. const char *form;
  77. argv.ft = ft;
  78. form = argv.ft->form;
  79. argn = -1;
  80. assert(ft != NULL && ft->extf != NULL);
  81. loop_fmt:
  82. while ((fmt = *form++)) {
  83. if (fmt != '%') {
  84. if (gv_isspace(fmt)) {
  85. if (fmt != '\n')
  86. fmt = -1;
  87. for (;;) {
  88. if (SFGETC(f, inp) < 0 || inp == fmt)
  89. goto loop_fmt;
  90. else if (!gv_isspace(inp)) {
  91. SFUNGETC(f, inp);
  92. goto loop_fmt;
  93. }
  94. }
  95. } else {
  96. match_1:
  97. if (SFGETC(f, inp) != fmt) {
  98. if (inp >= 0)
  99. SFUNGETC(f, inp);
  100. goto done;
  101. }
  102. }
  103. continue;
  104. }
  105. if (*form == '%') {
  106. form += 1;
  107. goto match_1;
  108. }
  109. if (*form == '\0')
  110. goto done;
  111. if (*form == '*') {
  112. flags = SFFMT_SKIP;
  113. form += 1;
  114. } else
  115. flags = 0;
  116. /* matching some pattern */
  117. base = 10;
  118. size = -1;
  119. width = dot = 0;
  120. t_str = NULL;
  121. n_str = 0;
  122. value = NULL;
  123. argp = -1;
  124. loop_flags: /* LOOP FOR FLAGS, WIDTH, BASE, TYPE */
  125. switch ((fmt = *form++)) {
  126. case LEFTP: /* get the type which is enclosed in balanced () */
  127. t_str = form;
  128. for (v = 1;;) {
  129. switch (*form++) {
  130. case 0: /* not balanceable, retract */
  131. form = t_str;
  132. t_str = NULL;
  133. n_str = 0;
  134. goto loop_flags;
  135. case LEFTP: /* increasing nested level */
  136. v += 1;
  137. continue;
  138. case RIGHTP: /* decreasing nested level */
  139. if ((v -= 1) != 0)
  140. continue;
  141. if (*t_str != '*')
  142. n_str = (form - 1) - t_str;
  143. else {
  144. t_str = _Sffmtintf(t_str + 1, &n);
  145. FP_SET(-1, argn);
  146. FMTSET(ft, form, LEFTP, 0, 0, 0, 0, 0, NULL, 0);
  147. n = ft->extf(&argv, ft);
  148. if (n < 0)
  149. goto done;
  150. assert(ft->flags & SFFMT_VALUE);
  151. if ((t_str = argv.s) && (n_str = (int)ft->size) < 0)
  152. n_str = (ssize_t)strlen(t_str);
  153. }
  154. goto loop_flags;
  155. default:
  156. // skip over
  157. break;
  158. }
  159. }
  160. case '#': /* alternative format */
  161. flags |= SFFMT_ALTER;
  162. goto loop_flags;
  163. case '.': /* width & base */
  164. dot += 1;
  165. if (gv_isdigit(*form)) {
  166. fmt = *form++;
  167. goto dot_size;
  168. } else if (*form == '*') {
  169. form = _Sffmtintf(form + 1, &n);
  170. n = FP_SET(-1, argn);
  171. FMTSET(ft, form, '.', dot, 0, 0, 0, 0, NULL, 0);
  172. if (ft->extf(&argv, ft) < 0)
  173. goto done;
  174. assert(ft->flags & SFFMT_VALUE);
  175. v = argv.i;
  176. if (v < 0)
  177. v = 0;
  178. goto dot_set;
  179. } else
  180. goto loop_flags;
  181. case '0':
  182. case '1':
  183. case '2':
  184. case '3':
  185. case '4':
  186. case '5':
  187. case '6':
  188. case '7':
  189. case '8':
  190. case '9':
  191. dot_size:
  192. for (v = fmt - '0'; gv_isdigit(*form); ++form)
  193. v = v * 10 + (*form - '0');
  194. dot_set:
  195. if (dot == 0 || dot == 1)
  196. width = v;
  197. else if (dot == 2)
  198. base = v;
  199. goto loop_flags;
  200. case 'I': /* object size */
  201. size = 0;
  202. flags = (flags & ~SFFMT_TYPES) | SFFMT_IFLAG;
  203. if (gv_isdigit(*form)) {
  204. for (n = *form; gv_isdigit(n); n = *++form)
  205. size = size * 10 + (n - '0');
  206. } else if (*form == '*') {
  207. form = _Sffmtintf(form + 1, &n);
  208. n = FP_SET(-1, argn);
  209. FMTSET(ft, form, 'I', sizeof(int), 0, 0, 0, 0, NULL, 0);
  210. if (ft->extf(&argv, ft) < 0)
  211. goto done;
  212. assert(ft->flags & SFFMT_VALUE);
  213. size = argv.i;
  214. }
  215. goto loop_flags;
  216. case 'l':
  217. size = -1;
  218. flags &= ~SFFMT_TYPES;
  219. if (*form == 'l') {
  220. form += 1;
  221. flags |= SFFMT_LLONG;
  222. } else
  223. flags |= SFFMT_LONG;
  224. goto loop_flags;
  225. case 'h':
  226. size = -1;
  227. flags &= ~SFFMT_TYPES;
  228. if (*form == 'h') {
  229. form += 1;
  230. flags |= SFFMT_SSHORT;
  231. } else
  232. flags |= SFFMT_SHORT;
  233. goto loop_flags;
  234. case 'L':
  235. size = -1;
  236. flags = (flags & ~SFFMT_TYPES) | SFFMT_LDOUBLE;
  237. goto loop_flags;
  238. case 'j':
  239. size = -1;
  240. flags = (flags & ~SFFMT_TYPES) | SFFMT_JFLAG;
  241. goto loop_flags;
  242. case 'z':
  243. size = -1;
  244. flags = (flags & ~SFFMT_TYPES) | SFFMT_ZFLAG;
  245. goto loop_flags;
  246. case 't':
  247. size = -1;
  248. flags = (flags & ~SFFMT_TYPES) | SFFMT_TFLAG;
  249. goto loop_flags;
  250. default: // continue with logic below
  251. break;
  252. }
  253. /* set object size */
  254. if (flags & (SFFMT_TYPES & ~SFFMT_IFLAG)) {
  255. if ((_Sftype[fmt] & (SFFMT_INT | SFFMT_UINT)) || fmt == 'n') {
  256. size = (flags & SFFMT_LLONG) ? (ssize_t)sizeof(long long) :
  257. (flags & SFFMT_LONG) ? (ssize_t)sizeof(long) :
  258. (flags & SFFMT_SHORT) ? (ssize_t)sizeof(short) :
  259. (flags & SFFMT_SSHORT) ? (ssize_t)sizeof(char) :
  260. (flags & SFFMT_JFLAG) ? (ssize_t)sizeof(long long) :
  261. (flags & SFFMT_TFLAG) ? (ssize_t)sizeof(ptrdiff_t) :
  262. (flags & SFFMT_ZFLAG) ? (ssize_t)sizeof(size_t) : -1;
  263. } else if (_Sftype[fmt] & SFFMT_FLOAT) {
  264. size = (flags & SFFMT_LDOUBLE) ? (ssize_t)sizeof(long double) :
  265. (flags & (SFFMT_LONG | SFFMT_LLONG)) ? (ssize_t)sizeof(double) : -1;
  266. }
  267. }
  268. argp = FP_SET(argp, argn);
  269. FMTSET(ft, form, fmt, size, flags, width, 0, base, t_str, n_str);
  270. v = ft->extf(&argv, ft);
  271. if (v < 0)
  272. goto done;
  273. else if (v == 0) { // extf did not use input stream
  274. FMTGET(ft, form, fmt, size, flags, width, n, base);
  275. if ((ft->flags & SFFMT_VALUE) && !(ft->flags & SFFMT_SKIP))
  276. value = argv.vp;
  277. } else { // v > 0: number of input bytes consumed
  278. n_input += v;
  279. if (!(ft->flags & SFFMT_SKIP))
  280. n_assign += 1;
  281. continue;
  282. }
  283. if (_Sftype[fmt] == 0) /* unknown pattern */
  284. continue;
  285. assert(!(!value && !(flags & SFFMT_SKIP)));
  286. if (fmt == 'n') { /* return length of consumed input */
  287. if (sizeof(long) > sizeof(int) && FMTCMP(size, long, long long))
  288. *((long *) value) = (long)n_input;
  289. else if (sizeof(short) < sizeof(int) && FMTCMP(size, short, long long))
  290. *((short *) value) = (short)n_input;
  291. else if (size == sizeof(char))
  292. *((char *) value) = (char)n_input;
  293. else
  294. *((int *) value) = (int)n_input;
  295. continue;
  296. }
  297. /* if get here, start scanning input */
  298. if (width == 0)
  299. width = fmt == 'c' ? 1 : MAXWIDTH;
  300. /* define the first input character */
  301. if (fmt == 'c' || fmt == '[')
  302. SFGETC(f, inp);
  303. else {
  304. do {
  305. SFGETC(f, inp);
  306. }
  307. while (gv_isspace(inp)) // skip starting blanks
  308. ;
  309. }
  310. if (inp < 0)
  311. goto done;
  312. if (_Sftype[fmt] == SFFMT_FLOAT) {
  313. char *val;
  314. val = accept;
  315. if (width >= 0 && (size_t)width >= SF_MAXDIGITS)
  316. width = SF_MAXDIGITS - 1;
  317. int exponent = 0;
  318. bool seen_dot = false;
  319. do {
  320. if (gv_isdigit(inp))
  321. *val++ = inp;
  322. else if (inp == '.') { /* too many dots */
  323. if (seen_dot)
  324. break;
  325. seen_dot = true;
  326. *val++ = '.';
  327. } else if (inp == 'e' || inp == 'E') { /* too many e,E */
  328. if (exponent++ > 0)
  329. break;
  330. *val++ = inp;
  331. if (--width <= 0 || SFGETC(f, inp) < 0 ||
  332. (inp != '-' && inp != '+' && !gv_isdigit(inp)))
  333. break;
  334. *val++ = inp;
  335. } else if (inp == '-' || inp == '+') { /* too many signs */
  336. if (val > accept)
  337. break;
  338. *val++ = inp;
  339. } else
  340. break;
  341. } while (--width > 0 && SFGETC(f, inp) >= 0);
  342. if (value) {
  343. *val = '\0';
  344. argv.d = strtod(accept, NULL);
  345. n_assign += 1;
  346. if (FMTCMP(size, double, long double))
  347. *((double *) value) = argv.d;
  348. else
  349. *((float *) value) = (float) argv.d;
  350. }
  351. } else if (_Sftype[fmt] == SFFMT_UINT || fmt == 'p') {
  352. if (inp == '-') {
  353. SFUNGETC(f, inp);
  354. goto done;
  355. } else
  356. goto int_cvt;
  357. } else if (_Sftype[fmt] == SFFMT_INT) {
  358. int_cvt:
  359. if (inp == '-' || inp == '+') {
  360. if (inp == '-')
  361. flags |= SFFMT_MINUS;
  362. while (--width > 0 && SFGETC(f, inp) >= 0)
  363. if (!gv_isspace(inp))
  364. break;
  365. }
  366. if (inp < 0)
  367. goto done;
  368. if (fmt == 'o')
  369. base = 8;
  370. else if (fmt == 'x' || fmt == 'p')
  371. base = 16;
  372. else if (fmt == 'i' && inp == '0') { /* self-described data */
  373. base = 8;
  374. if (width > 1) { /* peek to see if it's a base-16 */
  375. if (SFGETC(f, inp) >= 0) {
  376. if (inp == 'x' || inp == 'X')
  377. base = 16;
  378. SFUNGETC(f, inp);
  379. }
  380. inp = '0';
  381. }
  382. }
  383. /* now convert */
  384. argv.lu = 0;
  385. if (base == 16) {
  386. sp = (char *) _Sfcv36;
  387. shift = 4;
  388. if (sp[inp] >= 16) {
  389. SFUNGETC(f, inp);
  390. goto done;
  391. }
  392. if (inp == '0' && --width > 0) { /* skip leading 0x or 0X */
  393. if (SFGETC(f, inp) >= 0 &&
  394. (inp == 'x' || inp == 'X') && --width > 0)
  395. SFGETC(f, inp);
  396. }
  397. if (inp >= 0 && sp[inp] < 16)
  398. goto base_shift;
  399. } else if (base == 10) { /* fast base 10 conversion */
  400. if (inp < '0' || inp > '9') {
  401. SFUNGETC(f, inp);
  402. goto done;
  403. }
  404. do {
  405. argv.lu =
  406. (argv.lu << 3) + (argv.lu << 1) + (inp - '0');
  407. } while (--width > 0 && SFGETC(f, inp) >= '0'
  408. && inp <= '9');
  409. if (fmt == 'i' && inp == '#' && !(flags & SFFMT_ALTER)) {
  410. base = (int) argv.lu;
  411. if (base < 2 || base > SF_RADIX)
  412. goto done;
  413. argv.lu = 0;
  414. sp = base <= 36 ? (char *) _Sfcv36 : (char *) _Sfcv64;
  415. if (--width > 0 &&
  416. SFGETC(f, inp) >= 0 && sp[inp] < base)
  417. goto base_conv;
  418. }
  419. } else { /* other bases */
  420. sp = base <= 36 ? (char *) _Sfcv36 : (char *) _Sfcv64;
  421. if (base < 2 || base > SF_RADIX || sp[inp] >= base) {
  422. SFUNGETC(f, inp);
  423. goto done;
  424. }
  425. base_conv: /* check for power of 2 conversions */
  426. if ((base & ~(base - 1)) == base) {
  427. if (base < 8)
  428. shift = base < 4 ? 1 : 2;
  429. else if (base < 32)
  430. shift = base < 16 ? 3 : 4;
  431. else
  432. shift = base < 64 ? 5 : 6;
  433. base_shift:do {
  434. argv.lu = (argv.lu << shift) + sp[inp];
  435. } while (--width > 0 &&
  436. SFGETC(f, inp) >= 0 && sp[inp] < base);
  437. } else {
  438. do {
  439. argv.lu = (argv.lu * base) + sp[inp];
  440. } while (--width > 0 &&
  441. SFGETC(f, inp) >= 0 && sp[inp] < base);
  442. }
  443. }
  444. if (flags & SFFMT_MINUS)
  445. argv.ll = -argv.ll;
  446. if (value) {
  447. n_assign += 1;
  448. if (fmt == 'p') {
  449. *((void **) value) = (void *)(uintptr_t)argv.lu;
  450. } else if (sizeof(long) > sizeof(int) && FMTCMP(size, long, long long)) {
  451. if (fmt == 'd' || fmt == 'i')
  452. *((long *) value) = (long) argv.ll;
  453. else
  454. *((ulong *) value) = (ulong) argv.lu;
  455. } else if (sizeof(short) < sizeof(int) && FMTCMP(size, short, long long)) {
  456. if (fmt == 'd' || fmt == 'i')
  457. *((short *) value) = (short) argv.ll;
  458. else
  459. *((ushort *) value) = (ushort) argv.lu;
  460. } else if (size == sizeof(char)) {
  461. if (fmt == 'd' || fmt == 'i')
  462. *((char *) value) = (char) argv.ll;
  463. else
  464. *((uchar *) value) = (uchar) argv.lu;
  465. } else {
  466. if (fmt == 'd' || fmt == 'i')
  467. *((int *) value) = (int) argv.ll;
  468. else
  469. *((unsigned*)value) = (unsigned)argv.lu;
  470. }
  471. }
  472. } else if (fmt == 's' || fmt == 'c' || fmt == '[') {
  473. if (size < 0)
  474. size = MAXWIDTH;
  475. if (value) {
  476. argv.s = (char *) value;
  477. if (fmt != 'c')
  478. size -= 1;
  479. } else
  480. size = 0;
  481. n = 0;
  482. if (fmt == 's') {
  483. do {
  484. if (gv_isspace(inp))
  485. break;
  486. if ((n += 1) <= size)
  487. *argv.s++ = inp;
  488. } while (--width > 0 && SFGETC(f, inp) >= 0);
  489. } else if (fmt == 'c') {
  490. do {
  491. if ((n += 1) <= size)
  492. *argv.s++ = inp;
  493. } while (--width > 0 && SFGETC(f, inp) >= 0);
  494. } else { /* if(fmt == '[') */
  495. bool accepted[UCHAR_MAX + 1];
  496. form = (const char*)setclass((const unsigned char*)form, accepted);
  497. do {
  498. if (!accepted[inp]) {
  499. if (n > 0 || (flags & SFFMT_ALTER))
  500. break;
  501. else {
  502. SFUNGETC(f, inp);
  503. goto done;
  504. }
  505. }
  506. if ((n += 1) <= size)
  507. *argv.s++ = inp;
  508. } while (--width > 0 && SFGETC(f, inp) >= 0);
  509. }
  510. if (value && (n > 0 || fmt == '[')) {
  511. n_assign += 1;
  512. if (fmt != 'c' && size >= 0)
  513. *argv.s = '\0';
  514. }
  515. }
  516. if (width > 0 && inp >= 0)
  517. SFUNGETC(f, inp);
  518. }
  519. done:
  520. if (n_assign == 0 && inp < 0)
  521. n_assign = -1;
  522. return n_assign;
  523. }