Graphviz 13.0.0~dev.20250607.1528
Loading...
Searching...
No Matches
scan.l
Go to the documentation of this file.
1
5/*************************************************************************
6 * Copyright (c) 2011 AT&T Intellectual Property
7 * All rights reserved. This program and the accompanying materials
8 * are made available under the terms of the Eclipse Public License v1.0
9 * which accompanies this distribution, and is available at
10 * https://www.eclipse.org/legal/epl-v10.html
11 *
12 * Contributors: Details at https://graphviz.org
13 *************************************************************************/
14
15
16/* requires flex (i.e. not lex) */
17
18 /* By default, Flex emits a lexer using symbols prefixed with "yy". Graphviz
19 * contains multiple Flex-generated lexers, so we alter this prefix to avoid
20 * symbol clashes.
21 */
22%option prefix="aag"
23
24 /* Generate a reentrant parser with no global variables.
25 * All state will be contained in an opaque structure.
26 */
27%option reentrant bison-bridge
28
29 /* The generated structure will have space for user data.
30 */
31%option extra-type="struct aagextra_s *"
32
33 /* Avoid generating an unused input function. See
34 https://westes.github.io/flex/manual/Scanner-Options.html
35 */
36%option noinput
37
38%{
39#include <assert.h>
40#include <grammar.h>
41#include <cgraph/cghdr.h>
42#include <stdbool.h>
43#include <stddef.h>
44#include <string.h>
45#include <util/agxbuf.h>
46#include <util/gv_ctype.h>
47#include <util/startswith.h>
48// #define YY_BUF_SIZE 128000
49#define GRAPH_EOF_TOKEN '@' /* lex class must be defined below */
50 /* this is a workaround for linux flex */
51
52#define YYSTYPE AAGSTYPE
53
54/* By default, Flex calls isatty() to determine whether the input it is
55 * scanning is coming from the user typing or from a file. However, our input
56 * is being provided by Graphviz' I/O channel mechanism, which does not have a
57 * valid file descriptor that supports isatty().
58 */
59#define isatty(x) gv_isatty_suppression
61
62static int read_input(aagscan_t yyscanner, char *buf, int max_size);
63
64#ifndef YY_INPUT
65#define YY_INPUT(buf,result,max_size) \
66 if ((result = read_input(yyscanner, buf, max_size)) < 0) \
67 YY_FATAL_ERROR( "input in flex scanner failed" )
68#endif
69
70/* buffer for arbitrary length strings (longer than BUFSIZ) */
71
72static void beginstr(aagscan_t yyscanner);
73static void addstr(aagscan_t yyscanner, char *src);
74static void endstr(aagscan_t yyscanner);
75static void endstr_html(aagscan_t yyscanner);
76static void storeFileName(aagscan_t yyscanner, const char* fname, size_t len);
77
78/* ppDirective:
79 * Process a possible preprocessor line directive.
80 * aagtext = #.*
81 */
82static void ppDirective (aagscan_t yyscanner);
83
84/* twoDots:
85 * Return true if token has more than one '.';
86 * we know the last character is a '.'.
87 */
88static bool twoDots(aagscan_t yyscanner);
89
90/* chkNum:
91 * The regexp for NUMBER allows a terminating letter or '.'.
92 * This way we can catch a number immediately followed by a name
93 * or something like 123.456.78, and report this to the user.
94 */
95static int chkNum(aagscan_t yyscanner);
96
97/* The LETTER class below consists of ascii letters, underscore, all non-ascii
98 * characters. This allows identifiers to have characters from any
99 * character set independent of locale. The downside is that, for certain
100 * character sets, non-letter and, in fact, undefined characters will be
101 * accepted. This is not likely and, from dot's stand, shouldn't do any
102 * harm. (Presumably undefined characters will be ignored in display.) And,
103 * it allows a greater wealth of names. */
cgraph.h additions
void * aagscan_t
Definition cghdr.h:107
static char * fname
static double len(glCompPoint p)
Definition glutils.c:150
static void endstr(void)
Definition gmlscan.c:876
static void beginstr(void)
Definition gmlscan.c:868
replacements for ctype.h functions
static void storeFileName(aagscan_t yyscanner, const char *fname, size_t len)
Definition scan.l:236
static bool twoDots(aagscan_t yyscanner)
Definition scan.l:277
static void addstr(aagscan_t yyscanner, char *src)
Definition scan.l:221
static int read_input(aagscan_t yyscanner, char *buf, int max_size)
Definition scan.l:308
static int chkNum(aagscan_t yyscanner)
Definition scan.l:288
static void endstr_html(aagscan_t yyscanner)
Definition scan.l:231
int gv_isatty_suppression
Definition scan.l:60
static void ppDirective(aagscan_t yyscanner)
Definition scan.l:247
104%}
105GRAPH_EOF_TOKEN [@]
106LETTER [A-Za-z_\200-\377]
107DIGIT [0-9]
108NAME {LETTER}({LETTER}|{DIGIT})*
109NUMBER [-]?(({DIGIT}+(\.{DIGIT}*)?)|(\.{DIGIT}+))(\.|{LETTER})?
110ID ({NAME}|{NUMBER})
111%x comment
112%x qstring
113%x hstring
115{GRAPH_EOF_TOKEN} return(EOF);
116<INITIAL,comment>\n yyextra->line_num++;
#define yyextra
Definition scan.c:346
117"/*" BEGIN(comment);
#define BEGIN
Definition gmlscan.c:377
#define comment
Definition scan.c:830
118<comment>[^*\n]* /* eat anything not a '*' */
119<comment>"*"+[^*/\n]* /* eat up '*'s not followed by '/'s */
120<comment>"*"+"/" BEGIN(INITIAL);
#define INITIAL
Definition gmlscan.c:885
121"//".* /* ignore C++-style comments */
122^"#".* ppDirective (yyscanner);
123"#".* /* ignore shell-like comments */
124[ \t\r] /* ignore whitespace */
125"\xEF\xBB\xBF" /* ignore BOM */
126"node" return(T_node); /* see tokens in agcanonstr */
#define T_node
Definition grammar.h:113
127"edge" return(T_edge);
#define T_edge
Definition grammar.h:114
128"graph" if (!yyextra->graphType) yyextra->graphType = T_graph; return(T_graph);
#define T_graph
Definition grammar.h:112
129"digraph" if (!yyextra->graphType) yyextra->graphType = T_digraph; return(T_digraph);
#define T_digraph
Definition grammar.h:115
130"strict" return(T_strict);
#define T_strict
Definition grammar.h:117
131"subgraph" return(T_subgraph);
#define T_subgraph
Definition grammar.h:116
132"->" if (yyextra->graphType == T_digraph) return(T_edgeop); else return('-');
#define T_edgeop
Definition grammar.h:118
133"--" if (yyextra->graphType == T_graph) return(T_edgeop); else return('-');
134{NAME} { yylval->str = agstrdup(yyextra->G,aagget_text(yyscanner)); return(T_atom); }
#define yylval
Definition gmlparse.c:74
#define T_atom
Definition grammar.h:121
char * agstrdup(Agraph_t *, const char *)
returns a pointer to a reference-counted copy of the argument string, creating one if necessary
Definition refstr.c:399
135{NUMBER} { if (chkNum(yyscanner)) yyless(aagget_leng(yyscanner)-1); yylval->str = agstrdup(yyextra->G,aagget_text(yyscanner)); return(T_atom); }
#define yyless(n)
Definition gmlscan.c:429
136["] BEGIN(qstring); beginstr(yyscanner);
#define qstring
Definition gmlscan.c:886
137<qstring>["] BEGIN(INITIAL); endstr(yyscanner); return (T_qatom);
#define T_qatom
Definition grammar.h:122
138<qstring>[\\]["] addstr (yyscanner,"\"");
139<qstring>[\\][\\] addstr (yyscanner,"\\\\");
140<qstring>[\\][\n] yyextra->line_num++; /* ignore escaped newlines */
141<qstring>[\n] addstr (yyscanner,"\n"); yyextra->line_num++;
142<qstring>([^"\\\n]*|[\\]) addstr(yyscanner,aagget_text(yyscanner));
143[<] BEGIN(hstring); yyextra->html_nest = 1; beginstr(yyscanner);
#define hstring
Definition scan.c:832
144<hstring>[>] yyextra->html_nest--; if (yyextra->html_nest) addstr(yyscanner,aagget_text(yyscanner)); else {BEGIN(INITIAL); endstr_html(yyscanner); return (T_qatom);}
145<hstring>[<] yyextra->html_nest++; addstr(yyscanner,aagget_text(yyscanner));
146<hstring>[\n] addstr(yyscanner,aagget_text(yyscanner)); yyextra->line_num++; /* add newlines */
147<hstring>([^><\n]*) addstr(yyscanner,aagget_text(yyscanner));
148. return aagget_text(yyscanner)[0];
149%%
150
151void aagerror(aagscan_t yyscanner, const char *str)
152{
153 /* for YYSTATE macro */
154 struct yyguts_t *yyg = yyscanner;
155
156 agxbuf xb = {0};
157 if (yyextra->InputFile) {
158 agxbprint (&xb, "%s: ", yyextra->InputFile);
159 }
160 agxbprint (&xb, "%s in line %d", str, yyextra->line_num);
161 if (*aagget_text(yyscanner)) {
162 agxbprint(&xb, " near '%s'", aagget_text(yyscanner));
163 }
164 else switch (YYSTATE) {
165 case qstring: {
166 agxbprint(&xb, " scanning a quoted string (missing endquote? longer than %d?)", YY_BUF_SIZE);
167 if (agxblen(&yyextra->Sbuf) > 0) {
168 agxbprint(&xb, "\nString starting:\"%.80s", agxbuse(&yyextra->Sbuf));
169 }
170 break;
171 }
172 case hstring: {
173 agxbprint(&xb, " scanning a HTML string (missing '>'? bad nesting? longer than %d?)", YY_BUF_SIZE);
174 if (agxblen(&yyextra->Sbuf) > 0) {
175 agxbprint(&xb, "\nString starting:<%.80s", agxbuse(&yyextra->Sbuf));
176 }
177 break;
178 }
179 case comment :
180 agxbprint(&xb, " scanning a /*...*/ comment (missing '*/? longer than %d?)", YY_BUF_SIZE);
181 break;
182 default: // nothing extra to note
183 break;
184 }
185 agxbputc (&xb, '\n');
186 agerrorf("%s", agxbuse(&xb));
187 agxbfree(&xb);
188 BEGIN(INITIAL);
189}
190/* must be here to see flex's macro defns */
191void aglexeof(aagscan_t yyscanner) {
192 struct yyguts_t *yyg = yyscanner;
194}
195
196void aglexbad(aagscan_t yyscanner) {
197 struct yyguts_t *yyg = yyscanner;
199}
200
201#ifndef YY_CALL_ONLY_ARG
202# define YY_CALL_ONLY_ARG aagscan_t yyscanner
203#endif
204
206{
207 (void)yyscanner;
208 return 1;
209}
210
211static void beginstr(aagscan_t yyscanner) {
212 aagextra_t *ctx = aagget_extra(yyscanner);
213 // avoid unused variable warning in case assert() is compiled away.
214 (void)ctx;
215 // nothing required, but we should not have pending string data
216 assert(agxblen(&ctx->Sbuf) == 0 &&
217 "pending string data that was not consumed (missing "
218 "endstr()/endhtmlstr()?)");
219}
220
221static void addstr(aagscan_t yyscanner, char *src) {
222 aagextra_t *ctx = aagget_extra(yyscanner);
223 agxbput(&ctx->Sbuf, src);
224}
225
226static void endstr(aagscan_t yyscanner) {
227 aagextra_t *ctx = aagget_extra(yyscanner);
228 aagget_lval(yyscanner)->str = agstrdup(ctx->G, agxbuse(&ctx->Sbuf));
229}
230
231static void endstr_html(aagscan_t yyscanner) {
232 aagextra_t *ctx = aagget_extra(yyscanner);
233 aagget_lval(yyscanner)->str = agstrdup_html(ctx->G, agxbuse(&ctx->Sbuf));
234}
235
236static void storeFileName(aagscan_t yyscanner, const char* fname, size_t len) {
237 aagextra_t *ctx = aagget_extra(yyscanner);
238 agxbuf *buffer = &ctx->InputFileBuffer;
239 agxbput_n(buffer, fname, len);
240 ctx->InputFile = agxbuse(buffer);
241}
242
243/* ppDirective:
244 * Process a possible preprocessor line directive.
245 * aagtext = #.*
246 */
247static void ppDirective (aagscan_t yyscanner)
248{
249 int r, cnt, lineno;
250 char buf[2];
251 char* s = aagget_text(yyscanner) + 1; /* skip initial # */
252
253 if (startswith(s, "line")) s += strlen("line");
254 r = sscanf(s, "%d %1[\"]%n", &lineno, buf, &cnt);
255 if (r > 0) { /* got line number */
256 // ignore if line number was out of range
257 if (lineno <= 0) {
258 return;
259 }
260 aagget_extra(yyscanner)->line_num = lineno - 1;
261 if (r > 1) { /* saw quote */
262 char* p = s + cnt;
263 char* e = p;
264 while (*e && *e != '"') e++;
265 if (e != p && *e == '"') {
266 *e = '\0';
267 storeFileName(yyscanner, p, (size_t)(e - p));
268 }
269 }
270 }
271}
272
273/* twoDots:
274 * Return true if token has more than one '.';
275 * we know the last character is a '.'.
276 */
277static bool twoDots(aagscan_t yyscanner) {
278 const char *dot = strchr(aagget_text(yyscanner), '.');
279 // was there a dot and was it not the last character?
280 return dot != NULL && dot != &aagget_text(yyscanner)[aagget_leng(yyscanner) - 1];
281}
282
283/* chkNum:
284 * The regexp for NUMBER allows a terminating letter or '.'.
285 * This way we can catch a number immediately followed by a name
286 * or something like 123.456.78, and report this to the user.
287 */
288static int chkNum(aagscan_t yyscanner) {
289 char c = aagget_text(yyscanner)[aagget_leng(yyscanner) - 1]; // last character
290 if ((!gv_isdigit(c) && c != '.') || (c == '.' && twoDots(yyscanner))) { // c is letter
291 const char* fname;
292 aagextra_t *ctx = aagget_extra(yyscanner);
293
294 if (ctx->InputFile)
295 fname = ctx->InputFile;
296 else
297 fname = "input";
298
299 agwarningf("syntax ambiguity - badly delimited number '%s' in line %d of "
300 "%s splits into two tokens\n", aagget_text(yyscanner), ctx->line_num, fname);
301
302 return 1;
303 }
304 else return 0;
305}
306
307
308static int read_input(aagscan_t scanner, char *buf, int max_size)
309{
311 return ctx->Disc->io->afread(ctx->Ifile, buf, max_size);
312}
static void agxbfree(agxbuf *xb)
free any malloced resources
Definition agxbuf.h:78
static size_t agxbput_n(agxbuf *xb, const char *s, size_t ssz)
append string s of length ssz into xb
Definition agxbuf.h:250
static int agxbprint(agxbuf *xb, const char *fmt,...)
Printf-style output to an agxbuf.
Definition agxbuf.h:234
static WUR char * agxbuse(agxbuf *xb)
Definition agxbuf.h:307
static size_t agxblen(const agxbuf *xb)
return number of characters currently stored
Definition agxbuf.h:89
static int agxbputc(agxbuf *xb, char c)
add character to buffer
Definition agxbuf.h:277
aagextra_t * aagget_extra(aagscan_t yyscanner)
#define dot(v, w)
Definition geom.c:228
#define YYSTATE
Definition gmlscan.c:383
#define unput(c)
Definition gmlscan.c:441
#define YY_FLUSH_BUFFER
Definition gmlscan.c:553
#define YY_BUF_SIZE
Definition gmlscan.c:399
node NULL
Definition grammar.y:180
static int cnt(Dict_t *d, Dtlink_t **set)
Definition graph.c:198
void agwarningf(const char *fmt,...)
Definition agerror.c:173
void agerrorf(const char *fmt,...)
Definition agerror.c:165
char * agstrdup_html(Agraph_t *, const char *)
returns a pointer to a reference-counted HTML-like copy of the argument string, creating one if neces...
Definition refstr.c:395
static bool gv_isdigit(int c)
Definition gv_ctype.h:41
agxbput(xb, staging)
textitem scanner parser str
Definition htmlparse.y:224
cleanup & scanner
Definition htmlparse.y:295
static int lineno
Definition parse.c:28
struct yyguts_t * yyg
Definition scan.c:1074
#define GRAPH_EOF_TOKEN
Definition scan.l:49
void aglexeof(aagscan_t yyscanner)
Definition scan.l:191
int aagwrap(YY_CALL_ONLY_ARG)
Definition scan.l:205
void aagerror(aagscan_t yyscanner, const char *str)
Definition scan.l:151
void aglexbad(aagscan_t yyscanner)
Definition scan.l:196
#define YY_CALL_ONLY_ARG
Definition scan.l:202
static bool startswith(const char *s, const char *prefix)
does the string s begin with the string prefix?
Definition startswith.h:11
Agiodisc_t * io
Definition cgraph.h:338
int(* afread)(void *chan, char *buf, int bufsize)
Definition cgraph.h:327
Agraph_t * G
Definition grammar.h:67
void * Ifile
Definition grammar.h:66
const char * InputFile
Definition grammar.h:74
int line_num
Definition grammar.h:72
agxbuf Sbuf
Definition grammar.h:78
Agdisc_t * Disc
Definition grammar.h:65
agxbuf InputFileBuffer
Definition grammar.h:75
Definition grammar.c:89