Graphviz 14.1.3~dev.20260126.0926
Loading...
Searching...
No Matches
sfvscanf.c
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (c) 2011 AT&T Intellectual Property
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * https://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors: Details at https://graphviz.org
9 *************************************************************************/
10
11#include "config.h"
12
13#include <assert.h>
14#include <inttypes.h>
15#include <limits.h>
16#include <sfio/sfhdr.h>
17#include <stdbool.h>
18#include <stddef.h>
19#include <stdio.h>
20#include <util/gv_ctype.h>
21
22/* The main engine for reading formatted data
23**
24** Written by Kiem-Phong Vo.
25*/
26
27#define MAXWIDTH INT_MAX // max amount to scan
28
33static const unsigned char *setclass(const unsigned char *form, bool *accept) {
34 int fmt, c;
35 bool yes;
36
37 if ((fmt = *form++) == '^') { /* we want the complement of this set */
38 yes = false;
39 fmt = *form++;
40 } else
41 yes = true;
42
43 for (c = 0; c <= UCHAR_MAX; ++c)
44 accept[c] = !yes;
45
46 if (fmt == ']' || fmt == '-') { /* special first char */
47 accept[fmt] = yes;
48 fmt = *form++;
49 }
50
51 for (; fmt != ']'; fmt = *form++) { /* done */
52 if (!fmt)
53 return form - 1;
54
55 /* interval */
56 if (fmt != '-' || form[0] == ']' || form[-2] > form[0])
57 accept[fmt] = yes;
58 else
59 for (c = form[-2] + 1; c < form[0]; ++c)
60 accept[c] = yes;
61 }
62
63 return form;
64}
65
70int sfvscanf(FILE *f, Sffmt_t *ft) {
71 int inp, shift, base, width;
72 ssize_t size;
73 int fmt, flags, dot, n_assign, v, n_input;
74 char *sp;
75 char accept[SF_MAXDIGITS];
76
77 Argv_t argv;
78
79 void *value; /* location to assign scanned value */
80 const char *t_str;
81 ssize_t n_str;
82
83#define SFGETC(f,c) (((c) = getc(f)) < 0 ? c : (++n_input, c))
84#define SFUNGETC(f,c) do { \
85 ungetc((c), (f)); \
86 --n_input; \
87} while (0)
88
89 assert(f != NULL);
90
91 n_assign = n_input = 0;
92
93 inp = -1;
94
95 const char *form;
96 argv.ft = ft;
97
98 form = argv.ft->form;
99
100 assert(ft != NULL && ft->extf != NULL);
101
102 loop_fmt:
103 while ((fmt = *form++)) {
104 if (fmt != '%') {
105 if (gv_isspace(fmt)) {
106 if (fmt != '\n')
107 fmt = -1;
108 for (;;) {
109 if (SFGETC(f, inp) < 0 || inp == fmt)
110 goto loop_fmt;
111 else if (!gv_isspace(inp)) {
112 SFUNGETC(f, inp);
113 goto loop_fmt;
114 }
115 }
116 } else {
117 match_1:
118 if (SFGETC(f, inp) != fmt) {
119 if (inp >= 0)
120 SFUNGETC(f, inp);
121 goto done;
122 }
123 }
124 continue;
125 }
126
127 if (*form == '%') {
128 form += 1;
129 goto match_1;
130 }
131
132 if (*form == '\0')
133 goto done;
134
135 if (*form == '*') {
137 form += 1;
138 } else
139 flags = 0;
140
141 /* matching some pattern */
142 base = 10;
143 size = -1;
144 width = dot = 0;
145 t_str = NULL;
146 n_str = 0;
147 value = NULL;
148
149 loop_flags: /* LOOP FOR FLAGS, WIDTH, BASE, TYPE */
150 switch ((fmt = *form++)) {
151 case LEFTP: /* get the type which is enclosed in balanced () */
152 t_str = form;
153 for (v = 1;;) {
154 switch (*form++) {
155 case 0: /* not balanceable, retract */
156 form = t_str;
157 t_str = NULL;
158 n_str = 0;
159 goto loop_flags;
160 case LEFTP: /* increasing nested level */
161 v += 1;
162 continue;
163 case RIGHTP: /* decreasing nested level */
164 if ((v -= 1) != 0)
165 continue;
166 if (*t_str != '*')
167 n_str = (form - 1) - t_str;
168 else {
169
170 FMTSET(ft, form, LEFTP, 0, 0, 0, 0, 0, NULL, 0);
171 if (ft->extf(&argv, ft) < 0)
172 goto done;
173 assert(ft->flags & SFFMT_VALUE);
174 if ((t_str = argv.s) && (n_str = (int)ft->size) < 0)
175 n_str = (ssize_t)strlen(t_str);
176 }
177 goto loop_flags;
178 default:
179 // skip over
180 break;
181 }
182 }
183
184 case '#': /* alternative format */
186 goto loop_flags;
187
188 case '.': /* width & base */
189 dot += 1;
190 if (gv_isdigit(*form)) {
191 fmt = *form++;
192 goto dot_size;
193 } else if (*form == '*') {
194 form = _Sffmtintf(form + 1);
195
196 FMTSET(ft, form, '.', dot, 0, 0, 0, 0, NULL, 0);
197 if (ft->extf(&argv, ft) < 0)
198 goto done;
199 assert(ft->flags & SFFMT_VALUE);
200 v = argv.i;
201 if (v < 0)
202 v = 0;
203 goto dot_set;
204 } else
205 goto loop_flags;
206
207 case '0':
208 case '1':
209 case '2':
210 case '3':
211 case '4':
212 case '5':
213 case '6':
214 case '7':
215 case '8':
216 case '9':
217 dot_size:
218 for (v = fmt - '0'; gv_isdigit(*form); ++form)
219 v = v * 10 + (*form - '0');
220
221 dot_set:
222 if (dot == 0 || dot == 1)
223 width = v;
224 else if (dot == 2)
225 base = v;
226 goto loop_flags;
227
228 case 'I': /* object size */
229 size = 0;
230 flags = (flags & ~SFFMT_TYPES) | SFFMT_IFLAG;
231 if (gv_isdigit(*form)) {
232 for (; gv_isdigit(*form); ++form)
233 size = size * 10 + (*form - '0');
234 } else if (*form == '*') {
235 form = _Sffmtintf(form + 1);
236
237 FMTSET(ft, form, 'I', sizeof(int), 0, 0, 0, 0, NULL, 0);
238 if (ft->extf(&argv, ft) < 0)
239 goto done;
240 assert(ft->flags & SFFMT_VALUE);
241 size = argv.i;
242 }
243 goto loop_flags;
244
245 case 'l':
246 size = -1;
247 flags &= ~SFFMT_TYPES;
248 if (*form == 'l') {
249 form += 1;
251 } else
252 flags |= SFFMT_LONG;
253 goto loop_flags;
254 case 'h':
255 size = -1;
256 flags &= ~SFFMT_TYPES;
257 if (*form == 'h') {
258 form += 1;
260 } else
262 goto loop_flags;
263 case 'L':
264 size = -1;
265 flags = (flags & ~SFFMT_TYPES) | SFFMT_LDOUBLE;
266 goto loop_flags;
267 case 'j':
268 size = -1;
269 flags = (flags & ~SFFMT_TYPES) | SFFMT_JFLAG;
270 goto loop_flags;
271 case 'z':
272 size = -1;
273 flags = (flags & ~SFFMT_TYPES) | SFFMT_ZFLAG;
274 goto loop_flags;
275 case 't':
276 size = -1;
277 flags = (flags & ~SFFMT_TYPES) | SFFMT_TFLAG;
278 goto loop_flags;
279 default: // continue with logic below
280 break;
281 }
282
283 /* set object size */
284 if (flags & (SFFMT_TYPES & ~SFFMT_IFLAG)) {
285 if ((_Sftype[fmt] & (SFFMT_INT | SFFMT_UINT)) || fmt == 'n') {
286 size = (flags & SFFMT_LLONG) ? (ssize_t)sizeof(long long) :
287 (flags & SFFMT_LONG) ? (ssize_t)sizeof(long) :
288 (flags & SFFMT_SHORT) ? (ssize_t)sizeof(short) :
289 (flags & SFFMT_SSHORT) ? (ssize_t)sizeof(char) :
290 (flags & SFFMT_JFLAG) ? (ssize_t)sizeof(long long) :
291 (flags & SFFMT_TFLAG) ? (ssize_t)sizeof(ptrdiff_t) :
292 (flags & SFFMT_ZFLAG) ? (ssize_t)sizeof(size_t) : -1;
293 } else if (_Sftype[fmt] & SFFMT_FLOAT) {
294 size = (flags & SFFMT_LDOUBLE) ? (ssize_t)sizeof(long double) :
295 (flags & (SFFMT_LONG | SFFMT_LLONG)) ? (ssize_t)sizeof(double) : -1;
296 }
297 }
298
299 FMTSET(ft, form, fmt, size, flags, width, 0, base, t_str, n_str);
300 v = ft->extf(&argv, ft);
301
302 if (v < 0)
303 goto done;
304 else if (v == 0) { // extf did not use input stream
305 FMTGET(ft, form, fmt, size, flags, width, (int){0}, base);
306 if ((ft->flags & SFFMT_VALUE) && !(ft->flags & SFFMT_SKIP))
307 value = argv.vp;
308 } else { // v > 0: number of input bytes consumed
309 n_input += v;
310 if (!(ft->flags & SFFMT_SKIP))
311 n_assign += 1;
312 continue;
313 }
314
315 if (_Sftype[fmt] == 0) /* unknown pattern */
316 continue;
317
318 assert(!(!value && !(flags & SFFMT_SKIP)));
319
320 if (fmt == 'n') { /* return length of consumed input */
321 if (sizeof(long) > sizeof(int) && FMTCMP(size, long, long long))
322 *((long *) value) = (long)n_input;
323 else if (sizeof(short) < sizeof(int) && FMTCMP(size, short, long long))
324 *((short *) value) = (short)n_input;
325 else if (size == sizeof(char))
326 *((char *) value) = (char)n_input;
327 else
328 *((int *) value) = (int)n_input;
329 continue;
330 }
331
332 /* if get here, start scanning input */
333 if (width == 0)
334 width = fmt == 'c' ? 1 : MAXWIDTH;
335
336 /* define the first input character */
337 if (fmt == 'c' || fmt == '[')
338 SFGETC(f, inp);
339 else {
340 do {
341 SFGETC(f, inp);
342 }
343 while (gv_isspace(inp)) // skip starting blanks
344 ;
345 }
346 if (inp < 0)
347 goto done;
348
349 if (_Sftype[fmt] == SFFMT_FLOAT) {
350 char *val;
351
352 val = accept;
353 if (width >= 0 && (size_t)width >= SF_MAXDIGITS)
354 width = SF_MAXDIGITS - 1;
355 int exponent = 0;
356 bool seen_dot = false;
357 do {
358 if (gv_isdigit(inp))
359 *val++ = inp;
360 else if (inp == '.') { /* too many dots */
361 if (seen_dot)
362 break;
363 seen_dot = true;
364 *val++ = '.';
365 } else if (inp == 'e' || inp == 'E') { /* too many e,E */
366 if (exponent++ > 0)
367 break;
368 *val++ = inp;
369 if (--width <= 0 || SFGETC(f, inp) < 0 ||
370 (inp != '-' && inp != '+' && !gv_isdigit(inp)))
371 break;
372 *val++ = inp;
373 } else if (inp == '-' || inp == '+') { /* too many signs */
374 if (val > accept)
375 break;
376 *val++ = inp;
377 } else
378 break;
379
380 } while (--width > 0 && SFGETC(f, inp) >= 0);
381
382 if (value) {
383 *val = '\0';
384 argv.d = strtod(accept, NULL);
385
386 n_assign += 1;
387 if (FMTCMP(size, double, long double))
388 *((double *) value) = argv.d;
389 else
390 *((float *) value) = (float) argv.d;
391 }
392 } else if (_Sftype[fmt] == SFFMT_UINT || fmt == 'p') {
393 if (inp == '-') {
394 SFUNGETC(f, inp);
395 goto done;
396 } else
397 goto int_cvt;
398 } else if (_Sftype[fmt] == SFFMT_INT) {
399 int_cvt:
400 if (inp == '-' || inp == '+') {
401 if (inp == '-')
403 while (--width > 0 && SFGETC(f, inp) >= 0)
404 if (!gv_isspace(inp))
405 break;
406 }
407 if (inp < 0)
408 goto done;
409
410 if (fmt == 'o')
411 base = 8;
412 else if (fmt == 'x' || fmt == 'p')
413 base = 16;
414 else if (fmt == 'i' && inp == '0') { /* self-described data */
415 base = 8;
416 if (width > 1) { /* peek to see if it's a base-16 */
417 if (SFGETC(f, inp) >= 0) {
418 if (inp == 'x' || inp == 'X')
419 base = 16;
420 SFUNGETC(f, inp);
421 }
422 inp = '0';
423 }
424 }
425
426 /* now convert */
427 argv.lu = 0;
428 if (base == 16) {
429 sp = (char *) _Sfcv36;
430 shift = 4;
431 if (sp[inp] >= 16) {
432 SFUNGETC(f, inp);
433 goto done;
434 }
435 if (inp == '0' && --width > 0) { /* skip leading 0x or 0X */
436 if (SFGETC(f, inp) >= 0 &&
437 (inp == 'x' || inp == 'X') && --width > 0)
438 SFGETC(f, inp);
439 }
440 if (inp >= 0 && sp[inp] < 16)
441 goto base_shift;
442 } else if (base == 10) { /* fast base 10 conversion */
443 if (inp < '0' || inp > '9') {
444 SFUNGETC(f, inp);
445 goto done;
446 }
447
448 do {
449 argv.lu =
450 (argv.lu << 3) + (argv.lu << 1) + (inp - '0');
451 } while (--width > 0 && SFGETC(f, inp) >= '0'
452 && inp <= '9');
453
454 if (fmt == 'i' && inp == '#' && !(flags & SFFMT_ALTER)) {
455 base = (int) argv.lu;
456 if (base < 2 || base > SF_RADIX)
457 goto done;
458 argv.lu = 0;
459 sp = base <= 36 ? (char *) _Sfcv36 : (char *) _Sfcv64;
460 if (--width > 0 &&
461 SFGETC(f, inp) >= 0 && sp[inp] < base)
462 goto base_conv;
463 }
464 } else { /* other bases */
465 sp = base <= 36 ? (char *) _Sfcv36 : (char *) _Sfcv64;
466 if (base < 2 || base > SF_RADIX || sp[inp] >= base) {
467 SFUNGETC(f, inp);
468 goto done;
469 }
470
471 base_conv: /* check for power of 2 conversions */
472 if ((base & ~(base - 1)) == base) {
473 if (base < 8)
474 shift = base < 4 ? 1 : 2;
475 else if (base < 32)
476 shift = base < 16 ? 3 : 4;
477 else
478 shift = base < 64 ? 5 : 6;
479
480 base_shift:do {
481 argv.lu = (argv.lu << shift) + sp[inp];
482 } while (--width > 0 &&
483 SFGETC(f, inp) >= 0 && sp[inp] < base);
484 } else {
485 do {
486 argv.lu = (argv.lu * base) + sp[inp];
487 } while (--width > 0 &&
488 SFGETC(f, inp) >= 0 && sp[inp] < base);
489 }
490 }
491
492 if (flags & SFFMT_MINUS)
493 argv.ll = -argv.ll;
494
495 if (value) {
496 n_assign += 1;
497
498 if (fmt == 'p') {
499 *((void **) value) = (void *)(uintptr_t)argv.lu;
500 } else if (sizeof(long) > sizeof(int) && FMTCMP(size, long, long long)) {
501 if (fmt == 'd' || fmt == 'i')
502 *((long *) value) = (long) argv.ll;
503 else
504 *((ulong *) value) = (ulong) argv.lu;
505 } else if (sizeof(short) < sizeof(int) && FMTCMP(size, short, long long)) {
506 if (fmt == 'd' || fmt == 'i')
507 *((short *) value) = (short) argv.ll;
508 else
509 *((ushort *) value) = (ushort) argv.lu;
510 } else if (size == sizeof(char)) {
511 if (fmt == 'd' || fmt == 'i')
512 *((char *) value) = (char) argv.ll;
513 else
514 *((uchar *) value) = (uchar) argv.lu;
515 } else {
516 if (fmt == 'd' || fmt == 'i')
517 *((int *) value) = (int) argv.ll;
518 else
519 *((unsigned*)value) = (unsigned)argv.lu;
520 }
521 }
522 } else if (fmt == 's' || fmt == 'c' || fmt == '[') {
523 if (size < 0)
524 size = MAXWIDTH;
525 if (value) {
526 argv.s = value;
527 if (fmt != 'c')
528 size -= 1;
529 } else
530 size = 0;
531
532 int n = 0;
533 if (fmt == 's') {
534 do {
535 if (gv_isspace(inp))
536 break;
537 if ((n += 1) <= size)
538 *argv.s++ = inp;
539 } while (--width > 0 && SFGETC(f, inp) >= 0);
540 } else if (fmt == 'c') {
541 do {
542 if ((n += 1) <= size)
543 *argv.s++ = inp;
544 } while (--width > 0 && SFGETC(f, inp) >= 0);
545 } else { /* if(fmt == '[') */
546 bool accepted[UCHAR_MAX + 1];
547 form = (const char*)setclass((const unsigned char*)form, accepted);
548 do {
549 if (!accepted[inp]) {
550 if (n > 0 || (flags & SFFMT_ALTER))
551 break;
552 else {
553 SFUNGETC(f, inp);
554 goto done;
555 }
556 }
557 if ((n += 1) <= size)
558 *argv.s++ = inp;
559 } while (--width > 0 && SFGETC(f, inp) >= 0);
560 }
561
562 if (value && (n > 0 || fmt == '[')) {
563 n_assign += 1;
564 if (fmt != 'c' && size >= 0)
565 *argv.s = '\0';
566 }
567 }
568
569 if (width > 0 && inp >= 0)
570 SFUNGETC(f, inp);
571 }
572
573 done:
574
575 if (n_assign == 0 && inp < 0)
576 n_assign = -1;
577
578 return n_assign;
579}
static int flags
Definition gc.c:63
#define dot(v, w)
Definition geom.c:191
node NULL
Definition grammar.y:181
replacements for ctype.h functions
static bool gv_isdigit(int c)
Definition gv_ctype.h:41
static bool gv_isspace(int c)
Definition gv_ctype.h:55
#define RIGHTP
Definition sfhdr.h:83
#define FMTCMP(sz, type, maxtype)
Definition sfhdr.h:96
#define LEFTP
Definition sfhdr.h:82
#define FMTGET(ft, frm, fv, sz, flgs, wid, pr, bs)
Definition sfhdr.h:92
#define _Sfcv36
Definition sfhdr.h:130
#define SF_MAXDIGITS
Definition sfhdr.h:122
#define _Sfcv64
Definition sfhdr.h:131
#define _Sftype
Definition sfhdr.h:132
#define ushort
Definition sfhdr.h:47
#define ulong
Definition sfhdr.h:44
#define _Sffmtintf
Definition sfhdr.h:129
#define SF_RADIX
Definition sfhdr.h:116
#define SFFMT_TYPES
Definition sfhdr.h:104
#define uchar
Definition sfhdr.h:41
#define FMTSET(ft, frm, fv, sz, flgs, wid, pr, bs, ts, ns)
Definition sfhdr.h:86
#define SFFMT_MINUS
Definition sfhdr.h:102
#define SFFMT_INT
Definition sfhdr.h:109
#define SFFMT_UINT
Definition sfhdr.h:110
#define SFFMT_FLOAT
Definition sfhdr.h:111
#define SFFMT_ZFLAG
Definition sfio.h:49
#define SFFMT_ALTER
Definition sfio.h:55
#define SFFMT_LONG
Definition sfio.h:59
#define SFFMT_JFLAG
Definition sfio.h:65
#define SFFMT_SSHORT
Definition sfio.h:47
#define SFFMT_SHORT
Definition sfio.h:58
#define SFFMT_IFLAG
Definition sfio.h:64
#define SFFMT_LDOUBLE
Definition sfio.h:61
#define SFFMT_VALUE
Definition sfio.h:62
#define SFFMT_TFLAG
Definition sfio.h:48
#define SFFMT_SKIP
Definition sfio.h:57
#define SFFMT_LLONG
Definition sfio.h:60
static const unsigned char * setclass(const unsigned char *form, bool *accept)
Definition sfvscanf.c:33
#define SFUNGETC(f, c)
#define MAXWIDTH
Definition sfvscanf.c:27
int sfvscanf(FILE *f, Sffmt_t *ft)
Definition sfvscanf.c:70
#define SFGETC(f, c)
char * form
Definition sfio.h:34
int flags
Definition sfio.h:38
ssize_t size
Definition sfio.h:37
Sffmtext_f extf
Definition sfio.h:32
Definition sfhdr.h:65
unsigned long long lu
Definition sfhdr.h:73
Sffmt_t * ft
Definition sfhdr.h:79
void * vp
Definition sfhdr.h:78
long long ll
Definition sfhdr.h:72
double d
Definition sfhdr.h:75
char * s
Definition sfhdr.h:77
int i
Definition sfhdr.h:66