Graphviz 13.0.0~dev.20241220.2304
Loading...
Searching...
No Matches
scan.l
Go to the documentation of this file.
1
5/*************************************************************************
6 * Copyright (c) 2011 AT&T Intellectual Property
7 * All rights reserved. This program and the accompanying materials
8 * are made available under the terms of the Eclipse Public License v1.0
9 * which accompanies this distribution, and is available at
10 * https://www.eclipse.org/legal/epl-v10.html
11 *
12 * Contributors: Details at https://graphviz.org
13 *************************************************************************/
14
15
16/* requires flex (i.e. not lex) */
17
18 /* By default, Flex emits a lexer using symbols prefixed with "yy". Graphviz
19 * contains multiple Flex-generated lexers, so we alter this prefix to avoid
20 * symbol clashes.
21 */
22%option prefix="aag"
23
24 /* Avoid generating an unused input function. See
25 https://westes.github.io/flex/manual/Scanner-Options.html
26 */
27%option noinput
28
29%{
30#include <assert.h>
31#include <grammar.h>
32#include <cgraph/cghdr.h>
33#include <stdbool.h>
34#include <stddef.h>
35#include <string.h>
36#include <util/agxbuf.h>
37#include <util/gv_ctype.h>
38#include <util/startswith.h>
39// #define YY_BUF_SIZE 128000
40#define GRAPH_EOF_TOKEN '@' /* lex class must be defined below */
41 /* this is a workaround for linux flex */
42static int line_num = 1;
43static int html_nest = 0; /* nesting level for html strings */
44static const char* InputFile;
45static Agdisc_t *Disc;
46static void *Ifile;
47static int graphType;
48
49/* By default, Flex calls isatty() to determine whether the input it is
50 * scanning is coming from the user typing or from a file. However, our input
51 * is being provided by Graphviz' I/O channel mechanism, which does not have a
52 * valid file descriptor that supports isatty().
53 */
54#define isatty(x) gv_isatty_suppression
56
57#ifndef YY_INPUT
58#define YY_INPUT(buf,result,max_size) \
59 if ((result = Disc->io->afread(Ifile, buf, max_size)) < 0) \
60 YY_FATAL_ERROR( "input in flex scanner failed" )
61#endif
62
63/* buffer for arbitrary length strings (longer than BUFSIZ) */
64static agxbuf Sbuf;
65
66static void beginstr(void);
67static void addstr(char *src);
68static void endstr(void);
69static void endstr_html(void);
70static void storeFileName(char* fname, size_t len);
71
72/* ppDirective:
73 * Process a possible preprocessor line directive.
74 * aagtext = #.*
75 */
76static void ppDirective (void);
77
78/* twoDots:
79 * Return true if token has more than one '.';
80 * we know the last character is a '.'.
81 */
82static bool twoDots(void);
83
84/* chkNum:
85 * The regexp for NUMBER allows a terminating letter or '.'.
86 * This way we can catch a number immediately followed by a name
87 * or something like 123.456.78, and report this to the user.
88 */
89static int chkNum(void);
90
91
92/* The LETTER class below consists of ascii letters, underscore, all non-ascii
93 * characters. This allows identifiers to have characters from any
94 * character set independent of locale. The downside is that, for certain
95 * character sets, non-letter and, in fact, undefined characters will be
96 * accepted. This is not likely and, from dot's stand, shouldn't do any
97 * harm. (Presumably undefined characters will be ignored in display.) And,
98 * it allows a greater wealth of names. */
cgraph.h additions
static char * fname
static double len(glCompPoint p)
Definition glutils.c:150
replacements for ctype.h functions
static int chkNum(void)
Definition scan.l:284
static const char * InputFile
Definition scan.l:44
static Agdisc_t * Disc
Definition scan.l:45
static void endstr(void)
Definition scan.l:219
static void storeFileName(char *fname, size_t len)
Definition scan.l:227
static void * Ifile
Definition scan.l:46
static bool twoDots(void)
Definition scan.l:273
static int line_num
Definition scan.l:42
static void ppDirective(void)
Definition scan.l:243
static void endstr_html(void)
Definition scan.l:223
static void addstr(char *src)
Definition scan.l:215
static int html_nest
Definition scan.l:43
int gv_isatty_suppression
Definition scan.l:55
static agxbuf Sbuf
Definition scan.l:64
static void beginstr(void)
Definition scan.l:208
static int graphType
Definition scan.l:47
user's discipline
Definition cgraph.h:337
99%}
100GRAPH_EOF_TOKEN [@]
101LETTER [A-Za-z_\200-\377]
102DIGIT [0-9]
103NAME {LETTER}({LETTER}|{DIGIT})*
104NUMBER [-]?(({DIGIT}+(\.{DIGIT}*)?)|(\.{DIGIT}+))(\.|{LETTER})?
105ID ({NAME}|{NUMBER})
106%x comment
107%x qstring
108%x hstring
110{GRAPH_EOF_TOKEN} return(EOF);
111<INITIAL,comment>\n line_num++;
112"/*" BEGIN(comment);
#define BEGIN
Definition gmlscan.c:377
#define comment
Definition scan.c:889
113<comment>[^*\n]* /* eat anything not a '*' */
114<comment>"*"+[^*/\n]* /* eat up '*'s not followed by '/'s */
115<comment>"*"+"/" BEGIN(INITIAL);
#define INITIAL
Definition gmlscan.c:897
116"//".* /* ignore C++-style comments */
117^"#".* ppDirective ();
118"#".* /* ignore shell-like comments */
119[ \t\r] /* ignore whitespace */
120"\xEF\xBB\xBF" /* ignore BOM */
121"node" return(T_node); /* see tokens in agcanonstr */
#define T_node
Definition grammar.c:235
122"edge" return(T_edge);
#define T_edge
Definition grammar.c:236
123"graph" if (!graphType) graphType = T_graph; return(T_graph);
#define T_graph
Definition grammar.c:234
124"digraph" if (!graphType) graphType = T_digraph; return(T_digraph);
#define T_digraph
Definition grammar.c:237
125"strict" return(T_strict);
#define T_strict
Definition grammar.c:239
126"subgraph" return(T_subgraph);
#define T_subgraph
Definition grammar.c:238
127"->" if (graphType == T_digraph) return(T_edgeop); else return('-');
#define T_edgeop
Definition grammar.c:240
128"--" if (graphType == T_graph) return(T_edgeop); else return('-');
129{NAME} { aaglval.str = agstrdup(Ag_G_global,aagget_text()); return(T_atom); }
Agraph_t * Ag_G_global
Definition graph.c:24
#define T_atom
Definition grammar.c:243
AAGSTYPE aaglval
char * agstrdup(Agraph_t *, const char *)
returns a pointer to a reference-counted copy of the argument string, creating one if necessary
Definition refstr.c:131
char * str
Definition grammar.c:253
130{NUMBER} { if (chkNum()) yyless(aagget_leng()-1); aaglval.str = agstrdup(Ag_G_global,aagget_text()); return(T_atom); }
#define yyless(n)
Definition gmlscan.c:429
131["] BEGIN(qstring); beginstr();
#define qstring
Definition gmlscan.c:898
132<qstring>["] BEGIN(INITIAL); endstr(); return (T_qatom);
#define T_qatom
Definition grammar.c:244
133<qstring>[\\]["] addstr ("\"");
134<qstring>[\\][\\] addstr ("\\\\");
135<qstring>[\\][\n] line_num++; /* ignore escaped newlines */
136<qstring>[\n] addstr ("\n"); line_num++;
137<qstring>([^"\\\n]*|[\\]) addstr(aagget_text());
138[<] BEGIN(hstring); html_nest = 1; beginstr();
#define hstring
Definition scan.c:891
139<hstring>[>] html_nest--; if (html_nest) addstr(aagget_text()); else {BEGIN(INITIAL); endstr_html(); return (T_qatom);}
140<hstring>[<] html_nest++; addstr(aagget_text());
141<hstring>[\n] addstr(aagget_text()); line_num++; /* add newlines */
142<hstring>([^><\n]*) addstr(aagget_text());
143. return aagget_text()[0];
144%%
145
146void aagerror(const char *str);
147void aagerror(const char *str)
148{
149 agxbuf xb = {0};
150 if (InputFile) {
151 agxbprint (&xb, "%s: ", InputFile);
152 }
153 agxbprint (&xb, "%s in line %d", str, line_num);
154 if (*aagget_text()) {
155 agxbprint(&xb, " near '%s'", aagget_text());
156 }
157 else switch (YYSTATE) {
158 case qstring: {
159 agxbprint(&xb, " scanning a quoted string (missing endquote? longer than %d?)", YY_BUF_SIZE);
160 if (agxblen(&Sbuf) > 0) {
161 agxbprint(&xb, "\nString starting:\"%.80s", agxbuse(&Sbuf));
162 }
163 break;
164 }
165 case hstring: {
166 agxbprint(&xb, " scanning a HTML string (missing '>'? bad nesting? longer than %d?)", YY_BUF_SIZE);
167 if (agxblen(&Sbuf) > 0) {
168 agxbprint(&xb, "\nString starting:<%.80s", agxbuse(&Sbuf));
169 }
170 break;
171 }
172 case comment :
173 agxbprint(&xb, " scanning a /*...*/ comment (missing '*/? longer than %d?)", YY_BUF_SIZE);
174 break;
175 default: // nothing extra to note
176 break;
177 }
178 agxbputc (&xb, '\n');
179 agerrorf("%s", agxbuse(&xb));
180 agxbfree(&xb);
181 BEGIN(INITIAL);
182}
183/* must be here to see flex's macro defns */
185
187
188#ifndef YY_CALL_ONLY_ARG
189# define YY_CALL_ONLY_ARG void
190#endif
191
193{
194 return 1;
195}
196
197 /* (Re)set file:
198 */
199void agsetfile(const char* f) { InputFile = f; line_num = 1; }
200
201/* There is a hole here, because switching channels
202 * requires pushing back whatever was previously read.
203 * There probably is a right way of doing this.
204 */
205void aglexinit(Agdisc_t *disc, void *ifile) { Disc = disc; Ifile = ifile; graphType = 0;}
206
207
208static void beginstr(void) {
209 // nothing required, but we should not have pending string data
210 assert(agxblen(&Sbuf) == 0 &&
211 "pending string data that was not consumed (missing "
212 "endstr()/endhtmlstr()?)");
213}
214
215static void addstr(char *src) {
216 agxbput(&Sbuf, src);
217}
218
219static void endstr(void) {
221}
222
223static void endstr_html(void) {
225}
226
227static void storeFileName(char* fname, size_t len) {
228 static size_t cnt;
229 static char* buf;
230
231 if (len > cnt) {
232 buf = gv_realloc(buf, cnt + 1, len + 1);
233 cnt = len;
234 }
235 strcpy (buf, fname);
236 InputFile = buf;
237}
238
239/* ppDirective:
240 * Process a possible preprocessor line directive.
241 * aagtext = #.*
242 */
243static void ppDirective (void)
244{
245 int r, cnt, lineno;
246 char buf[2];
247 char* s = aagget_text() + 1; /* skip initial # */
248
249 if (startswith(s, "line")) s += strlen("line");
250 r = sscanf(s, "%d %1[\"]%n", &lineno, buf, &cnt);
251 if (r > 0) { /* got line number */
252 // ignore if line number was out of range
253 if (lineno <= 0) {
254 return;
255 }
256 line_num = lineno - 1;
257 if (r > 1) { /* saw quote */
258 char* p = s + cnt;
259 char* e = p;
260 while (*e && *e != '"') e++;
261 if (e != p && *e == '"') {
262 *e = '\0';
263 storeFileName(p, (size_t)(e - p));
264 }
265 }
266 }
267}
268
269/* twoDots:
270 * Return true if token has more than one '.';
271 * we know the last character is a '.'.
272 */
273static bool twoDots(void) {
274 const char *dot = strchr(aagget_text(), '.');
275 // was there a dot and was it not the last character?
276 return dot != NULL && dot != &aagget_text()[aagget_leng() - 1];
277}
278
279/* chkNum:
280 * The regexp for NUMBER allows a terminating letter or '.'.
281 * This way we can catch a number immediately followed by a name
282 * or something like 123.456.78, and report this to the user.
283 */
284static int chkNum(void) {
285 char c = aagget_text()[aagget_leng() - 1]; // last character
286 if ((!gv_isdigit(c) && c != '.') || (c == '.' && twoDots())) { // c is letter
287 const char* fname;
288
289 if (InputFile)
291 else
292 fname = "input";
293
294 agwarningf("syntax ambiguity - badly delimited number '%s' in line %d of "
295 "%s splits into two tokens\n", aagget_text(), line_num, fname);
296
297 return 1;
298 }
299 else return 0;
300}
static void agxbfree(agxbuf *xb)
free any malloced resources
Definition agxbuf.h:78
static int agxbprint(agxbuf *xb, const char *fmt,...)
Printf-style output to an agxbuf.
Definition agxbuf.h:234
static WUR char * agxbuse(agxbuf *xb)
Definition agxbuf.h:307
static size_t agxblen(const agxbuf *xb)
return number of characters currently stored
Definition agxbuf.h:89
static int agxbputc(agxbuf *xb, char c)
add character to buffer
Definition agxbuf.h:277
static void * gv_realloc(void *ptr, size_t old_size, size_t new_size)
Definition alloc.h:49
static Dtdisc_t disc
Definition exparse.y:209
#define dot(v, w)
Definition geom.c:228
#define YYSTATE
Definition gmlscan.c:383
#define unput(c)
Definition gmlscan.c:441
#define YY_FLUSH_BUFFER
Definition gmlscan.c:553
#define YY_BUF_SIZE
Definition gmlscan.c:399
node NULL
Definition grammar.y:163
static int cnt(Dict_t *d, Dtlink_t **set)
Definition graph.c:210
void agwarningf(const char *fmt,...)
Definition agerror.c:173
void agerrorf(const char *fmt,...)
Definition agerror.c:165
void agsetfile(const char *f)
sets the current file name for subsequent error reporting
Definition scan.l:199
char * agstrdup_html(Agraph_t *, const char *)
Definition refstr.c:135
static bool gv_isdigit(int c)
Definition gv_ctype.h:41
agxbput(xb, staging)
textitem scanner parser str
Definition htmlparse.y:224
static int lineno
Definition parse.c:28
void aglexbad(void)
Definition scan.l:186
#define GRAPH_EOF_TOKEN
Definition scan.l:40
void aglexinit(Agdisc_t *disc, void *ifile)
Definition scan.l:205
int aagwrap(YY_CALL_ONLY_ARG)
Definition scan.l:192
#define YY_CALL_ONLY_ARG
Definition scan.l:189
void aglexeof(void)
Definition scan.l:184
void aagerror(const char *str)
Definition scan.l:147
static bool startswith(const char *s, const char *prefix)
does the string s begin with the string prefix?
Definition startswith.h:11
Definition grammar.c:93