Graphviz 14.1.3~dev.20260124.0732
Loading...
Searching...
No Matches
strmatch.c
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (c) 2011 AT&T Intellectual Property
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * https://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors: Details at https://graphviz.org
9 *************************************************************************/
10
11
12/*
13 * D. G. Korn
14 * G. S. Fowler
15 * AT&T Research
16 *
17 * match shell file patterns -- derived from Bourne and Korn shell gmatch()
18 *
19 * sh pattern egrep RE description
20 * ---------- -------- -----------
21 * * .* 0 or more chars
22 * ? . any single char
23 * [.] [.] char class
24 * [!.] [^.] negated char class
25 * [[:.:]] [[:.:]] ctype class
26 * [[=.=]] [[=.=]] equivalence class
27 * [[...]] [[...]] collation element
28 * *(.) (.)* 0 or more of
29 * +(.) (.)+ 1 or more of
30 * ?(.) (.)? 0 or 1 of
31 * (.) (.) 1 of
32 * @(.) (.) 1 of
33 * a|b a|b a or b
34 * \# () subgroup back reference [1-9]
35 * a&b a and b
36 * !(.) none of
37 *
38 * \ used to escape metacharacters
39 *
40 * *, ?, (, |, &, ), [, \ must be \'d outside of [...]
41 * only ] must be \'d inside [...]
42 *
43 * BUG: unbalanced ) terminates top level pattern
44 */
45
46#include "config.h"
47
48#include <ast/ast.h>
49#include <ctype.h>
50#include <stddef.h>
51#include <string.h>
52#include <util/gv_ctype.h>
53#include <util/strview.h>
54
55#ifdef _DEBUG_MATCH
56#include <ast/error.h>
57#endif
58
59#define MAXGROUP 10
60
61typedef struct {
62 char *beg[MAXGROUP];
63 char *end[MAXGROUP];
64 char *next_s;
65 int groups;
66} Group_t;
67
68typedef struct {
71 char *last_s;
72 char *next_p;
73} Match_t;
74
75#define mbgetchar(p) (*p++)
76
77#define getsource(s,e) (((s)>=(e))?0:mbgetchar(s))
78
79/*
80 * gobble chars up to <sub> or ) keeping track of (...) and [...]
81 * sub must be one of { '|', '&', 0 }
82 * 0 returned if s runs out
83 */
84
85static char *gobble(Match_t * mp, char *s, int sub,
86 int *g, int clear)
87{
88 int p = 0;
89 char *b = 0;
90 int c = 0;
91 int n;
92
93 for (;;)
94 switch (mbgetchar(s)) {
95 case '\\':
96 if (mbgetchar(s))
97 break;
98 /*FALLTHROUGH*/ case 0:
99 return 0;
100 case '[':
101 if (!b) {
102 if (*s == '!')
103 (void)mbgetchar(s);
104 b = s;
105 } else if (*s == '.' || *s == '=' || *s == ':')
106 c = *s;
107 break;
108 case ']':
109 if (b) {
110 if (*(s - 2) == c)
111 c = 0;
112 else if (b != (s - 1))
113 b = 0;
114 }
115 break;
116 case '(':
117 if (!b) {
118 p++;
119 n = (*g)++;
120 if (clear) {
121 if (!sub)
122 n++;
123 if (n < MAXGROUP)
124 mp->current.beg[n] = mp->current.end[n] = 0;
125 }
126 }
127 break;
128 case ')':
129 if (!b && p-- <= 0)
130 return sub ? 0 : s;
131 break;
132 case '|':
133 if (!b && !p && sub == '|')
134 return s;
135 break;
136 default: // nothing required
137 break;
138 }
139}
140
141static int grpmatch(Match_t *, int, char *, char *, char *, int);
142
143#ifdef _DEBUG_MATCH
144#define RETURN(v) {error_info.indent--;return (v);}
145#else
146#define RETURN(v) {return (v);}
147#endif
148
149/*
150 * match a single pattern
151 * e is the end (0) of the substring in s
152 * r marks the start of a repeated subgroup pattern
153 */
154
155static int
156onematch(Match_t * mp, int g, char *s, char *p, char *e, char *r,
157 int flags)
158{
159 int pc;
160 int sc;
161 int n;
162 char *olds;
163 char *oldp;
164
165#ifdef _DEBUG_MATCH
167 error(-1, "onematch g=%d s=%-.*s p=%s r=%p flags=%o", g, e - s, s, p,
168 r, flags);
169#endif
170 do {
171 olds = s;
172 sc = getsource(s, e);
173 oldp = p;
174 switch (pc = mbgetchar(p)) {
175 case '(':
176 case '*':
177 case '?':
178 case '+':
179 case '@':
180 case '!':
181 if (pc == '(' || *p == '(') {
182 char *subp;
183 int oldg;
184
185 s = olds;
186 subp = p + (pc != '(');
187 oldg = g;
188 n = ++g;
189 if (g < MAXGROUP && (!r || g > mp->current.groups))
190 mp->current.beg[g] = mp->current.end[g] = 0;
191 if (!(p = gobble(mp, subp, 0, &g, !r)))
192 RETURN(0);
193 if (pc == '*' || pc == '?' || (pc == '+' && oldp == r)) {
194 if (onematch(mp, g, s, p, e, NULL, flags))
195 RETURN(1);
196 if (!sc || !getsource(s, e)) {
197 mp->current.groups = oldg;
198 RETURN(0);
199 }
200 }
201 if (pc == '*' || pc == '+') {
202 p = oldp;
203 sc = n - 1;
204 } else
205 sc = g;
206 pc = (pc != '!');
207 do {
208 if (grpmatch(mp, n, olds, subp, s, flags) == pc) {
209 if (n < MAXGROUP) {
210 if (!mp->current.beg[n] || mp->current.beg[n] > olds)
211 mp->current.beg[n] = olds;
212 if (s > mp->current.end[n])
213 mp->current.end[n] = s;
214#ifdef _DEBUG_MATCH
215 error(-4,
216 "subgroup#%d n=%d beg=%p end=%p len=%d",
217 __LINE__, n, mp->current.beg[n],
218 mp->current.end[n],
219 mp->current.end[n] - mp->current.beg[n]);
220#endif
221 }
222 if (onematch(mp, sc, s, p, e, oldp, flags)) {
223 if (p == oldp && n < MAXGROUP) {
224 if (!mp->current.beg[n] || mp->current.beg[n] > olds)
225 mp->current.beg[n] = olds;
226 if (s > mp->current.end[n])
227 mp->current.end[n] = s;
228#ifdef _DEBUG_MATCH
229 error(-4,
230 "subgroup#%d n=%d beg=%p end=%p len=%d",
231 __LINE__, n, mp->current.beg[n],
232 mp->current.end[n],
233 mp->current.end[n] -
234 mp->current.beg[n]);
235#endif
236 }
237 RETURN(1);
238 }
239 }
240 } while (s < e && mbgetchar(s));
241 mp->current.groups = oldg;
242 RETURN(0);
243 } else if (pc == '*') {
244 /*
245 * several stars are the same as one
246 */
247
248 while (*p == '*' && *(p + 1) != '(')
249 p++;
250 oldp = p;
251 switch (pc = mbgetchar(p)) {
252 case '@':
253 case '!':
254 case '+':
255 n = *p == '(';
256 break;
257 case '(':
258 case '[':
259 case '?':
260 case '*':
261 n = 1;
262 break;
263 case 0:
264 case '|':
265 case '&':
266 case ')':
267 mp->current.next_s = e;
268 mp->next_p = oldp;
269 mp->current.groups = g;
270 if (!pc && (!mp->best.next_s || mp->current.next_s > mp->best.next_s)) {
271 mp->best = mp->current;
272#ifdef _DEBUG_MATCH
273 error(-3, "best#%d groups=%d next=\"%s\"",
274 __LINE__, mp->best.groups, mp->best.next_s);
275#endif
276 }
277 RETURN(1);
278 case '\\':
279 if (!(pc = mbgetchar(p)))
280 RETURN(0);
281 if (pc >= '0' && pc <= '9') {
282 n = pc - '0';
283#ifdef _DEBUG_MATCH
284 error(-2,
285 "backref#%d n=%d g=%d beg=%p end=%p len=%d",
286 __LINE__, n, g, mp->current.beg[n],
287 mp->current.end[n],
288 mp->current.end[n] - mp->current.beg[n]);
289#endif
290 if (n <= g && mp->current.beg[n])
291 pc = *mp->current.beg[n];
292 }
293 /*FALLTHROUGH*/ default:
294 n = 0;
295 break;
296 }
297 p = oldp;
298 for (;;) {
299 if ((n || pc == sc) && onematch(mp, g, olds, p, e, NULL, flags))
300 RETURN(1);
301 if (!sc)
302 RETURN(0);
303 olds = s;
304 sc = getsource(s, e);
305 }
306 } else if (pc != '?' && pc != sc)
307 RETURN(0);
308 break;
309 case 0:
310 case '|':
311 case '&':
312 case ')':
313 if (!sc) {
314 mp->current.next_s = olds;
315 mp->next_p = oldp;
316 mp->current.groups = g;
317 }
318 if (!pc && (!mp->best.next_s || olds > mp->best.next_s)) {
319 mp->best = mp->current;
320 mp->best.next_s = olds;
321 mp->best.groups = g;
322#ifdef _DEBUG_MATCH
323 error(-3, "best#%d groups=%d next=\"%s\"", __LINE__,
324 mp->best.groups, mp->best.next_s);
325#endif
326 }
327 RETURN(!sc);
328 case '[':
329 {
330 /*UNDENT... */
331
332 int invert;
333 int x;
334 int ok = 0;
335 char *range;
336
337 if (!sc)
338 RETURN(0);
339 range = 0;
340 n = 0;
341 if ((invert = (*p == '!')))
342 p++;
343 for (;;) {
344 oldp = p;
345 if (!(pc = mbgetchar(p))) {
346 RETURN(0);
347 } else if (pc == '[' && (*p == ':' || *p == '=' || *p == '.')) {
348 n = mbgetchar(p);
349 oldp = p;
350 strview_t callee = {.data = oldp};
351 for (;;) {
352 if (!(pc = mbgetchar(p)))
353 RETURN(0);
354 if (pc == n && *p == ']')
355 break;
356 ++callee.size;
357 }
358 (void)mbgetchar(p);
359 if (ok)
360 /*NOP*/;
361 else if (n == ':') {
362 if (strview_str_eq(callee, "alnum")) {
363 if (gv_isalnum(sc))
364 ok = 1;
365 } else if (strview_str_eq(callee, "alpha")) {
366 if (gv_isalpha(sc))
367 ok = 1;
368 } else if (strview_str_eq(callee, "blank")) {
369 if (gv_isblank(sc))
370 ok = 1;
371 } else if (strview_str_eq(callee, "cntrl")) {
372 if (gv_iscntrl(sc))
373 ok = 1;
374 } else if (strview_str_eq(callee, "digit")) {
375 if (gv_isdigit(sc))
376 ok = 1;
377 } else if (strview_str_eq(callee, "graph")) {
378 if (gv_isgraph(sc))
379 ok = 1;
380 } else if (strview_str_eq(callee, "lower")) {
381 if (gv_islower(sc))
382 ok = 1;
383 } else if (strview_str_eq(callee, "print")) {
384 if (gv_isprint(sc))
385 ok = 1;
386 } else if (strview_str_eq(callee, "punct")) {
387 if (gv_ispunct(sc))
388 ok = 1;
389 } else if (strview_str_eq(callee, "space")) {
390 if (gv_isspace(sc))
391 ok = 1;
392 } else if (strview_str_eq(callee, "upper")) {
393 if (gv_isupper(sc))
394 ok = 1;
395 } else if (strview_str_eq(callee, "xdigit")) {
396 if (gv_isxdigit(sc))
397 ok = 1;
398 }
399 }
400 else if (range)
401 goto getrange;
402 else if (*p == '-' && *(p + 1) != ']') {
403 (void)mbgetchar(p);
404 range = oldp;
405 } else
406 if ((gv_isalpha(*oldp) && gv_isalpha(*olds)
407 && tolower(*oldp) == tolower(*olds))
408 || sc == mbgetchar(oldp))
409 ok = 1;
410 n = 1;
411 } else if (pc == ']' && n) {
412 if (ok != invert)
413 break;
414 RETURN(0);
415 } else if (pc == '\\' && (oldp = p, !(pc = mbgetchar(p)))) {
416 RETURN(0);
417 } else if (ok)
418 /*NOP*/;
419 else if (range)
420 {
421 getrange:
422 x = mbgetchar(range);
423 if (sc == x || sc == pc || (sc > x && sc < pc))
424 ok = 1;
425 if (*p == '-' && *(p + 1) != ']') {
426 (void)mbgetchar(p);
427 range = oldp;
428 } else
429 range = 0;
430 n = 1;
431 } else if (*p == '-' && *(p + 1) != ']') {
432 (void)mbgetchar(p);
433 range = oldp;
434 n = 1;
435 } else {
436 if (sc == pc)
437 ok = 1;
438 n = pc;
439 }
440 }
441
442 /*...INDENT */
443 }
444 break;
445 case '\\':
446 if (!(pc = mbgetchar(p)))
447 RETURN(0);
448 if (pc >= '0' && pc <= '9') {
449 n = pc - '0';
450#ifdef _DEBUG_MATCH
451 error(-2, "backref#%d n=%d g=%d beg=%p end=%p len=%d",
452 __LINE__, n, g, mp->current.beg[n],
453 mp->current.end[n],
454 mp->current.end[n] - mp->current.beg[n]);
455#endif
456 if (n <= g && (oldp = mp->current.beg[n])) {
457 while (oldp < mp->current.end[n])
458 if (!*olds || *olds++ != *oldp++)
459 RETURN(0);
460 s = olds;
461 break;
462 }
463 }
464 /*FALLTHROUGH*/ default:
465 if (pc != sc)
466 RETURN(0);
467 break;
468 }
469 } while (sc);
470 RETURN(0);
471}
472
473/*
474 * match any pattern in a group
475 * | and & subgroups are parsed here
476 */
477
478static int
479grpmatch(Match_t * mp, int g, char *s, char *p, char *e,
480 int flags)
481{
482 char *a;
483
484#ifdef _DEBUG_MATCH
486 error(-1, "grpmatch g=%d s=%-.*s p=%s flags=%o", g, e - s, s, p,
487 flags);
488#endif
489 do {
490 for (a = p; onematch(mp, g, s, a, e, NULL, flags); a++)
491 if (*(a = mp->next_p) != '&')
492 RETURN(1);
493 } while ((p = gobble(mp, p, '|', &g, 1)));
494 RETURN(0);
495}
496
497/*
498 * subgroup match
499 * 0 returned if no match
500 * otherwise number of subgroups matched returned
501 * match group begin offsets are even elements of sub
502 * match group end offsets are odd elements of sub
503 * the matched string is from s+sub[0] up to but not
504 * including s+sub[1]
505 */
506
507int strgrpmatch(char *b, char *p, size_t *sub, int n, int flags) {
508 int i;
509 char *s;
510 char *e;
512
513 s = b;
514 match.last_s = e = s + strlen(s);
515 for (;;) {
516 match.best.next_s = 0;
517 match.current.groups = 0;
518 match.current.beg[0] = 0;
519 if ((i = grpmatch(&match, 0, s, p, e, flags)) || match.best.next_s) {
520 if (!(flags & STR_RIGHT) || (match.current.next_s == e)) {
521 if (!i)
522 match.current = match.best;
523 match.current.groups++;
524 match.current.end[0] = match.current.next_s;
525#ifdef _DEBUG_MATCH
526 error(-1, "match i=%d s=\"%s\" p=\"%s\" flags=%o groups=%d next=\"%s\"",
527 i, s, p, flags, match.current.groups, match.current.next_s);
528#endif
529 break;
530 }
531 }
532 if ((flags & STR_LEFT) || s >= e)
533 return 0;
534 s++;
535 }
536 if ((flags & STR_RIGHT) && match.current.next_s != e)
537 return 0;
538 if (!sub)
539 return 1;
540 match.current.beg[0] = s;
541 s = b;
542 if (n > match.current.groups)
543 n = match.current.groups;
544 for (i = 0; i < n; i++) {
545 sub[i * 2] = match.current.end[i] ? (size_t)(match.current.beg[i] - s) : 0;
546 sub[i * 2 + 1] = match.current.end[i] ? (size_t)(match.current.end[i] - s) : 0;
547 }
548 return n;
549}
550
551/*
552 * compare the string s with the shell pattern p
553 * returns 1 for match 0 otherwise
554 */
555
556int strmatch(char *s, char *p) {
557 return strgrpmatch(s, p, NULL, 0, STR_LEFT | STR_RIGHT);
558}
size_t match(char *str, char *pat)
Definition actions.c:97
#define STR_LEFT
Definition ast.h:26
#define STR_RIGHT
Definition ast.h:27
#define sub(h, i)
Definition closest.c:62
Error_info_t error_info
Definition error.c:24
static int flags
Definition gc.c:63
node NULL
Definition grammar.y:181
bool ok(Agraph_t *g)
Definition gv.cpp:366
replacements for ctype.h functions
static bool gv_isgraph(int c)
Definition gv_ctype.h:45
static bool gv_islower(int c)
Definition gv_ctype.h:25
static bool gv_isxdigit(int c)
Definition gv_ctype.h:71
static bool gv_isupper(int c)
Definition gv_ctype.h:27
static bool gv_isalnum(int c)
Definition gv_ctype.h:43
static bool gv_iscntrl(int c)
Definition gv_ctype.h:33
static bool gv_isblank(int c)
Definition gv_ctype.h:31
static bool gv_isdigit(int c)
Definition gv_ctype.h:41
static bool gv_isalpha(int c)
Definition gv_ctype.h:29
static bool gv_isprint(int c)
Definition gv_ctype.h:47
static bool gv_isspace(int c)
Definition gv_ctype.h:55
static bool gv_ispunct(int c)
Definition gv_ctype.h:49
table Syntax error
Definition htmlparse.y:288
#define MAXGROUP
Definition strmatch.c:59
int strmatch(char *s, char *p)
Definition strmatch.c:556
static int onematch(Match_t *mp, int g, char *s, char *p, char *e, char *r, int flags)
Definition strmatch.c:156
static int grpmatch(Match_t *, int, char *, char *, char *, int)
Definition strmatch.c:479
int strgrpmatch(char *b, char *p, size_t *sub, int n, int flags)
Definition strmatch.c:507
#define getsource(s, e)
Definition strmatch.c:77
#define RETURN(v)
Definition strmatch.c:146
#define mbgetchar(p)
Definition strmatch.c:75
static char * gobble(Match_t *mp, char *s, int sub, int *g, int clear)
Definition strmatch.c:85
int indent
Definition error.h:26
int groups
Definition strmatch.c:65
char * end[MAXGROUP]
Definition strmatch.c:63
char * beg[MAXGROUP]
Definition strmatch.c:62
char * next_s
Definition strmatch.c:64
char * next_p
Definition strmatch.c:72
Group_t best
Definition strmatch.c:70
Group_t current
Definition strmatch.c:69
char * last_s
Definition strmatch.c:71
a non-owning string reference
Definition strview.h:20
const char * data
start of the pointed to string
Definition strview.h:21
size_t size
extent of the string in bytes
Definition strview.h:22
Non-owning string references.
static bool strview_str_eq(strview_t a, const char *b)
compare a string reference to a string for equality
Definition strview.h:98
Definition grammar.c:90
static bool clear(Ppoint_t pti, Ppoint_t ptj, int start, int end, int V, Ppoint_t pts[], int nextPt[])
Definition visibility.c:147