123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587 |
- /*************************************************************************
- * Copyright (c) 2011 AT&T Intellectual Property
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * https://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors: Details at https://graphviz.org
- *************************************************************************/
- #include <assert.h>
- #include <cgraph/gv_ctype.h>
- #include <inttypes.h>
- #include <limits.h>
- #include <sfio/sfhdr.h>
- #include <stdbool.h>
- #include <stddef.h>
- #include <stdio.h>
- /* The main engine for reading formatted data
- **
- ** Written by Kiem-Phong Vo.
- */
- #define MAXWIDTH INT_MAX // max amount to scan
- /**
- * @param form format string
- * @param accept accepted characters are set to 1
- */
- static const unsigned char *setclass(const unsigned char *form, bool *accept) {
- int fmt, c;
- bool yes;
- if ((fmt = *form++) == '^') { /* we want the complement of this set */
- yes = false;
- fmt = *form++;
- } else
- yes = true;
- for (c = 0; c <= UCHAR_MAX; ++c)
- accept[c] = !yes;
- if (fmt == ']' || fmt == '-') { /* special first char */
- accept[fmt] = yes;
- fmt = *form++;
- }
- for (; fmt != ']'; fmt = *form++) { /* done */
- if (!fmt)
- return (form - 1);
- /* interval */
- if (fmt != '-' || form[0] == ']' || form[-2] > form[0])
- accept[fmt] = yes;
- else
- for (c = form[-2] + 1; c < form[0]; ++c)
- accept[c] = yes;
- }
- return form;
- }
- /**
- * @param f file to be scanned
- * @param ft Formatting instructions
- */
- int sfvscanf(FILE *f, Sffmt_t *ft) {
- int inp, shift, base, width;
- ssize_t size;
- int fmt, flags, dot, n_assign, v, n, n_input;
- char *sp;
- char accept[SF_MAXDIGITS];
- Argv_t argv;
- int argp, argn;
- void *value; /* location to assign scanned value */
- const char *t_str;
- ssize_t n_str;
- #define SFGETC(f,c) (((c) = getc(f)) < 0 ? c : (++n_input, c))
- #define SFUNGETC(f,c) do { \
- ungetc((c), (f)); \
- --n_input; \
- } while (0)
- assert(f != NULL);
- n_assign = n_input = 0;
- inp = -1;
- const char *form;
- argv.ft = ft;
- form = argv.ft->form;
- argn = -1;
- assert(ft != NULL && ft->extf != NULL);
- loop_fmt:
- while ((fmt = *form++)) {
- if (fmt != '%') {
- if (gv_isspace(fmt)) {
- if (fmt != '\n')
- fmt = -1;
- for (;;) {
- if (SFGETC(f, inp) < 0 || inp == fmt)
- goto loop_fmt;
- else if (!gv_isspace(inp)) {
- SFUNGETC(f, inp);
- goto loop_fmt;
- }
- }
- } else {
- match_1:
- if (SFGETC(f, inp) != fmt) {
- if (inp >= 0)
- SFUNGETC(f, inp);
- goto done;
- }
- }
- continue;
- }
- if (*form == '%') {
- form += 1;
- goto match_1;
- }
- if (*form == '\0')
- goto done;
- if (*form == '*') {
- flags = SFFMT_SKIP;
- form += 1;
- } else
- flags = 0;
- /* matching some pattern */
- base = 10;
- size = -1;
- width = dot = 0;
- t_str = NULL;
- n_str = 0;
- value = NULL;
- argp = -1;
- loop_flags: /* LOOP FOR FLAGS, WIDTH, BASE, TYPE */
- switch ((fmt = *form++)) {
- case LEFTP: /* get the type which is enclosed in balanced () */
- t_str = form;
- for (v = 1;;) {
- switch (*form++) {
- case 0: /* not balanceable, retract */
- form = t_str;
- t_str = NULL;
- n_str = 0;
- goto loop_flags;
- case LEFTP: /* increasing nested level */
- v += 1;
- continue;
- case RIGHTP: /* decreasing nested level */
- if ((v -= 1) != 0)
- continue;
- if (*t_str != '*')
- n_str = (form - 1) - t_str;
- else {
- t_str = _Sffmtintf(t_str + 1, &n);
- FP_SET(-1, argn);
- FMTSET(ft, form, LEFTP, 0, 0, 0, 0, 0, NULL, 0);
- n = ft->extf(&argv, ft);
- if (n < 0)
- goto done;
- assert(ft->flags & SFFMT_VALUE);
- if ((t_str = argv.s) && (n_str = (int)ft->size) < 0)
- n_str = (ssize_t)strlen(t_str);
- }
- goto loop_flags;
- default:
- // skip over
- break;
- }
- }
- case '#': /* alternative format */
- flags |= SFFMT_ALTER;
- goto loop_flags;
- case '.': /* width & base */
- dot += 1;
- if (gv_isdigit(*form)) {
- fmt = *form++;
- goto dot_size;
- } else if (*form == '*') {
- form = _Sffmtintf(form + 1, &n);
- n = FP_SET(-1, argn);
- FMTSET(ft, form, '.', dot, 0, 0, 0, 0, NULL, 0);
- if (ft->extf(&argv, ft) < 0)
- goto done;
- assert(ft->flags & SFFMT_VALUE);
- v = argv.i;
- if (v < 0)
- v = 0;
- goto dot_set;
- } else
- goto loop_flags;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- dot_size:
- for (v = fmt - '0'; gv_isdigit(*form); ++form)
- v = v * 10 + (*form - '0');
- dot_set:
- if (dot == 0 || dot == 1)
- width = v;
- else if (dot == 2)
- base = v;
- goto loop_flags;
- case 'I': /* object size */
- size = 0;
- flags = (flags & ~SFFMT_TYPES) | SFFMT_IFLAG;
- if (gv_isdigit(*form)) {
- for (n = *form; gv_isdigit(n); n = *++form)
- size = size * 10 + (n - '0');
- } else if (*form == '*') {
- form = _Sffmtintf(form + 1, &n);
- n = FP_SET(-1, argn);
- FMTSET(ft, form, 'I', sizeof(int), 0, 0, 0, 0, NULL, 0);
- if (ft->extf(&argv, ft) < 0)
- goto done;
- assert(ft->flags & SFFMT_VALUE);
- size = argv.i;
- }
- goto loop_flags;
- case 'l':
- size = -1;
- flags &= ~SFFMT_TYPES;
- if (*form == 'l') {
- form += 1;
- flags |= SFFMT_LLONG;
- } else
- flags |= SFFMT_LONG;
- goto loop_flags;
- case 'h':
- size = -1;
- flags &= ~SFFMT_TYPES;
- if (*form == 'h') {
- form += 1;
- flags |= SFFMT_SSHORT;
- } else
- flags |= SFFMT_SHORT;
- goto loop_flags;
- case 'L':
- size = -1;
- flags = (flags & ~SFFMT_TYPES) | SFFMT_LDOUBLE;
- goto loop_flags;
- case 'j':
- size = -1;
- flags = (flags & ~SFFMT_TYPES) | SFFMT_JFLAG;
- goto loop_flags;
- case 'z':
- size = -1;
- flags = (flags & ~SFFMT_TYPES) | SFFMT_ZFLAG;
- goto loop_flags;
- case 't':
- size = -1;
- flags = (flags & ~SFFMT_TYPES) | SFFMT_TFLAG;
- goto loop_flags;
- default: // continue with logic below
- break;
- }
- /* set object size */
- if (flags & (SFFMT_TYPES & ~SFFMT_IFLAG)) {
- if ((_Sftype[fmt] & (SFFMT_INT | SFFMT_UINT)) || fmt == 'n') {
- size = (flags & SFFMT_LLONG) ? (ssize_t)sizeof(long long) :
- (flags & SFFMT_LONG) ? (ssize_t)sizeof(long) :
- (flags & SFFMT_SHORT) ? (ssize_t)sizeof(short) :
- (flags & SFFMT_SSHORT) ? (ssize_t)sizeof(char) :
- (flags & SFFMT_JFLAG) ? (ssize_t)sizeof(long long) :
- (flags & SFFMT_TFLAG) ? (ssize_t)sizeof(ptrdiff_t) :
- (flags & SFFMT_ZFLAG) ? (ssize_t)sizeof(size_t) : -1;
- } else if (_Sftype[fmt] & SFFMT_FLOAT) {
- size = (flags & SFFMT_LDOUBLE) ? (ssize_t)sizeof(long double) :
- (flags & (SFFMT_LONG | SFFMT_LLONG)) ? (ssize_t)sizeof(double) : -1;
- }
- }
- argp = FP_SET(argp, argn);
- FMTSET(ft, form, fmt, size, flags, width, 0, base, t_str, n_str);
- v = ft->extf(&argv, ft);
- if (v < 0)
- goto done;
- else if (v == 0) { // extf did not use input stream
- FMTGET(ft, form, fmt, size, flags, width, n, base);
- if ((ft->flags & SFFMT_VALUE) && !(ft->flags & SFFMT_SKIP))
- value = argv.vp;
- } else { // v > 0: number of input bytes consumed
- n_input += v;
- if (!(ft->flags & SFFMT_SKIP))
- n_assign += 1;
- continue;
- }
- if (_Sftype[fmt] == 0) /* unknown pattern */
- continue;
- assert(!(!value && !(flags & SFFMT_SKIP)));
- if (fmt == 'n') { /* return length of consumed input */
- if (sizeof(long) > sizeof(int) && FMTCMP(size, long, long long))
- *((long *) value) = (long)n_input;
- else if (sizeof(short) < sizeof(int) && FMTCMP(size, short, long long))
- *((short *) value) = (short)n_input;
- else if (size == sizeof(char))
- *((char *) value) = (char)n_input;
- else
- *((int *) value) = (int)n_input;
- continue;
- }
- /* if get here, start scanning input */
- if (width == 0)
- width = fmt == 'c' ? 1 : MAXWIDTH;
- /* define the first input character */
- if (fmt == 'c' || fmt == '[')
- SFGETC(f, inp);
- else {
- do {
- SFGETC(f, inp);
- }
- while (gv_isspace(inp)) // skip starting blanks
- ;
- }
- if (inp < 0)
- goto done;
- if (_Sftype[fmt] == SFFMT_FLOAT) {
- char *val;
- val = accept;
- if (width >= 0 && (size_t)width >= SF_MAXDIGITS)
- width = SF_MAXDIGITS - 1;
- int exponent = 0;
- bool seen_dot = false;
- do {
- if (gv_isdigit(inp))
- *val++ = inp;
- else if (inp == '.') { /* too many dots */
- if (seen_dot)
- break;
- seen_dot = true;
- *val++ = '.';
- } else if (inp == 'e' || inp == 'E') { /* too many e,E */
- if (exponent++ > 0)
- break;
- *val++ = inp;
- if (--width <= 0 || SFGETC(f, inp) < 0 ||
- (inp != '-' && inp != '+' && !gv_isdigit(inp)))
- break;
- *val++ = inp;
- } else if (inp == '-' || inp == '+') { /* too many signs */
- if (val > accept)
- break;
- *val++ = inp;
- } else
- break;
- } while (--width > 0 && SFGETC(f, inp) >= 0);
- if (value) {
- *val = '\0';
- argv.d = strtod(accept, NULL);
- n_assign += 1;
- if (FMTCMP(size, double, long double))
- *((double *) value) = argv.d;
- else
- *((float *) value) = (float) argv.d;
- }
- } else if (_Sftype[fmt] == SFFMT_UINT || fmt == 'p') {
- if (inp == '-') {
- SFUNGETC(f, inp);
- goto done;
- } else
- goto int_cvt;
- } else if (_Sftype[fmt] == SFFMT_INT) {
- int_cvt:
- if (inp == '-' || inp == '+') {
- if (inp == '-')
- flags |= SFFMT_MINUS;
- while (--width > 0 && SFGETC(f, inp) >= 0)
- if (!gv_isspace(inp))
- break;
- }
- if (inp < 0)
- goto done;
- if (fmt == 'o')
- base = 8;
- else if (fmt == 'x' || fmt == 'p')
- base = 16;
- else if (fmt == 'i' && inp == '0') { /* self-described data */
- base = 8;
- if (width > 1) { /* peek to see if it's a base-16 */
- if (SFGETC(f, inp) >= 0) {
- if (inp == 'x' || inp == 'X')
- base = 16;
- SFUNGETC(f, inp);
- }
- inp = '0';
- }
- }
- /* now convert */
- argv.lu = 0;
- if (base == 16) {
- sp = (char *) _Sfcv36;
- shift = 4;
- if (sp[inp] >= 16) {
- SFUNGETC(f, inp);
- goto done;
- }
- if (inp == '0' && --width > 0) { /* skip leading 0x or 0X */
- if (SFGETC(f, inp) >= 0 &&
- (inp == 'x' || inp == 'X') && --width > 0)
- SFGETC(f, inp);
- }
- if (inp >= 0 && sp[inp] < 16)
- goto base_shift;
- } else if (base == 10) { /* fast base 10 conversion */
- if (inp < '0' || inp > '9') {
- SFUNGETC(f, inp);
- goto done;
- }
- do {
- argv.lu =
- (argv.lu << 3) + (argv.lu << 1) + (inp - '0');
- } while (--width > 0 && SFGETC(f, inp) >= '0'
- && inp <= '9');
- if (fmt == 'i' && inp == '#' && !(flags & SFFMT_ALTER)) {
- base = (int) argv.lu;
- if (base < 2 || base > SF_RADIX)
- goto done;
- argv.lu = 0;
- sp = base <= 36 ? (char *) _Sfcv36 : (char *) _Sfcv64;
- if (--width > 0 &&
- SFGETC(f, inp) >= 0 && sp[inp] < base)
- goto base_conv;
- }
- } else { /* other bases */
- sp = base <= 36 ? (char *) _Sfcv36 : (char *) _Sfcv64;
- if (base < 2 || base > SF_RADIX || sp[inp] >= base) {
- SFUNGETC(f, inp);
- goto done;
- }
- base_conv: /* check for power of 2 conversions */
- if ((base & ~(base - 1)) == base) {
- if (base < 8)
- shift = base < 4 ? 1 : 2;
- else if (base < 32)
- shift = base < 16 ? 3 : 4;
- else
- shift = base < 64 ? 5 : 6;
- base_shift:do {
- argv.lu = (argv.lu << shift) + sp[inp];
- } while (--width > 0 &&
- SFGETC(f, inp) >= 0 && sp[inp] < base);
- } else {
- do {
- argv.lu = (argv.lu * base) + sp[inp];
- } while (--width > 0 &&
- SFGETC(f, inp) >= 0 && sp[inp] < base);
- }
- }
- if (flags & SFFMT_MINUS)
- argv.ll = -argv.ll;
- if (value) {
- n_assign += 1;
- if (fmt == 'p') {
- *((void **) value) = (void *)(uintptr_t)argv.lu;
- } else if (sizeof(long) > sizeof(int) && FMTCMP(size, long, long long)) {
- if (fmt == 'd' || fmt == 'i')
- *((long *) value) = (long) argv.ll;
- else
- *((ulong *) value) = (ulong) argv.lu;
- } else if (sizeof(short) < sizeof(int) && FMTCMP(size, short, long long)) {
- if (fmt == 'd' || fmt == 'i')
- *((short *) value) = (short) argv.ll;
- else
- *((ushort *) value) = (ushort) argv.lu;
- } else if (size == sizeof(char)) {
- if (fmt == 'd' || fmt == 'i')
- *((char *) value) = (char) argv.ll;
- else
- *((uchar *) value) = (uchar) argv.lu;
- } else {
- if (fmt == 'd' || fmt == 'i')
- *((int *) value) = (int) argv.ll;
- else
- *((unsigned*)value) = (unsigned)argv.lu;
- }
- }
- } else if (fmt == 's' || fmt == 'c' || fmt == '[') {
- if (size < 0)
- size = MAXWIDTH;
- if (value) {
- argv.s = (char *) value;
- if (fmt != 'c')
- size -= 1;
- } else
- size = 0;
- n = 0;
- if (fmt == 's') {
- do {
- if (gv_isspace(inp))
- break;
- if ((n += 1) <= size)
- *argv.s++ = inp;
- } while (--width > 0 && SFGETC(f, inp) >= 0);
- } else if (fmt == 'c') {
- do {
- if ((n += 1) <= size)
- *argv.s++ = inp;
- } while (--width > 0 && SFGETC(f, inp) >= 0);
- } else { /* if(fmt == '[') */
- bool accepted[UCHAR_MAX + 1];
- form = (const char*)setclass((const unsigned char*)form, accepted);
- do {
- if (!accepted[inp]) {
- if (n > 0 || (flags & SFFMT_ALTER))
- break;
- else {
- SFUNGETC(f, inp);
- goto done;
- }
- }
- if ((n += 1) <= size)
- *argv.s++ = inp;
- } while (--width > 0 && SFGETC(f, inp) >= 0);
- }
- if (value && (n > 0 || fmt == '[')) {
- n_assign += 1;
- if (fmt != 'c' && size >= 0)
- *argv.s = '\0';
- }
- }
- if (width > 0 && inp >= 0)
- SFUNGETC(f, inp);
- }
- done:
- if (n_assign == 0 && inp < 0)
- n_assign = -1;
- return n_assign;
- }
|