Graphviz 12.0.1~dev.20240716.0800
Loading...
Searching...
No Matches
strmatch.c
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (c) 2011 AT&T Intellectual Property
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * https://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors: Details at https://graphviz.org
9 *************************************************************************/
10
11
12/*
13 * D. G. Korn
14 * G. S. Fowler
15 * AT&T Research
16 *
17 * match shell file patterns -- derived from Bourne and Korn shell gmatch()
18 *
19 * sh pattern egrep RE description
20 * ---------- -------- -----------
21 * * .* 0 or more chars
22 * ? . any single char
23 * [.] [.] char class
24 * [!.] [^.] negated char class
25 * [[:.:]] [[:.:]] ctype class
26 * [[=.=]] [[=.=]] equivalence class
27 * [[...]] [[...]] collation element
28 * *(.) (.)* 0 or more of
29 * +(.) (.)+ 1 or more of
30 * ?(.) (.)? 0 or 1 of
31 * (.) (.) 1 of
32 * @(.) (.) 1 of
33 * a|b a|b a or b
34 * \# () subgroup back reference [1-9]
35 * a&b a and b
36 * !(.) none of
37 *
38 * \ used to escape metacharacters
39 *
40 * *, ?, (, |, &, ), [, \ must be \'d outside of [...]
41 * only ] must be \'d inside [...]
42 *
43 * BUG: unbalanced ) terminates top level pattern
44 */
45
46#include <ast/ast.h>
47#include <cgraph/gv_ctype.h>
48#include <cgraph/strview.h>
49#include <ctype.h>
50#include <stddef.h>
51#include <string.h>
52
53#ifdef _DEBUG_MATCH
54#include <ast/error.h>
55#endif
56
57#define MAXGROUP 10
58
59typedef struct {
60 char *beg[MAXGROUP];
61 char *end[MAXGROUP];
62 char *next_s;
63 int groups;
64} Group_t;
65
66typedef struct {
69 char *last_s;
70 char *next_p;
71} Match_t;
72
73#define mbgetchar(p) (*p++)
74
75#define getsource(s,e) (((s)>=(e))?0:mbgetchar(s))
76
77/*
78 * gobble chars up to <sub> or ) keeping track of (...) and [...]
79 * sub must be one of { '|', '&', 0 }
80 * 0 returned if s runs out
81 */
82
83static char *gobble(Match_t * mp, char *s, int sub,
84 int *g, int clear)
85{
86 int p = 0;
87 char *b = 0;
88 int c = 0;
89 int n;
90
91 for (;;)
92 switch (mbgetchar(s)) {
93 case '\\':
94 if (mbgetchar(s))
95 break;
96 /*FALLTHROUGH*/ case 0:
97 return 0;
98 case '[':
99 if (!b) {
100 if (*s == '!')
101 (void)mbgetchar(s);
102 b = s;
103 } else if (*s == '.' || *s == '=' || *s == ':')
104 c = *s;
105 break;
106 case ']':
107 if (b) {
108 if (*(s - 2) == c)
109 c = 0;
110 else if (b != (s - 1))
111 b = 0;
112 }
113 break;
114 case '(':
115 if (!b) {
116 p++;
117 n = (*g)++;
118 if (clear) {
119 if (!sub)
120 n++;
121 if (n < MAXGROUP)
122 mp->current.beg[n] = mp->current.end[n] = 0;
123 }
124 }
125 break;
126 case ')':
127 if (!b && p-- <= 0)
128 return sub ? 0 : s;
129 break;
130 case '|':
131 if (!b && !p && sub == '|')
132 return s;
133 break;
134 default: // nothing required
135 break;
136 }
137}
138
139static int grpmatch(Match_t *, int, char *, char *, char *, int);
140
141#ifdef _DEBUG_MATCH
142#define RETURN(v) {error_info.indent--;return (v);}
143#else
144#define RETURN(v) {return (v);}
145#endif
146
147/*
148 * match a single pattern
149 * e is the end (0) of the substring in s
150 * r marks the start of a repeated subgroup pattern
151 */
152
153static int
154onematch(Match_t * mp, int g, char *s, char *p, char *e, char *r,
155 int flags)
156{
157 int pc;
158 int sc;
159 int n;
160 char *olds;
161 char *oldp;
162
163#ifdef _DEBUG_MATCH
165 error(-1, "onematch g=%d s=%-.*s p=%s r=%p flags=%o", g, e - s, s, p,
166 r, flags);
167#endif
168 do {
169 olds = s;
170 sc = getsource(s, e);
171 oldp = p;
172 switch (pc = mbgetchar(p)) {
173 case '(':
174 case '*':
175 case '?':
176 case '+':
177 case '@':
178 case '!':
179 if (pc == '(' || *p == '(') {
180 char *subp;
181 int oldg;
182
183 s = olds;
184 subp = p + (pc != '(');
185 oldg = g;
186 n = ++g;
187 if (g < MAXGROUP && (!r || g > mp->current.groups))
188 mp->current.beg[g] = mp->current.end[g] = 0;
189 if (!(p = gobble(mp, subp, 0, &g, !r)))
190 RETURN(0);
191 if (pc == '*' || pc == '?' || (pc == '+' && oldp == r)) {
192 if (onematch(mp, g, s, p, e, NULL, flags))
193 RETURN(1);
194 if (!sc || !getsource(s, e)) {
195 mp->current.groups = oldg;
196 RETURN(0);
197 }
198 }
199 if (pc == '*' || pc == '+') {
200 p = oldp;
201 sc = n - 1;
202 } else
203 sc = g;
204 pc = (pc != '!');
205 do {
206 if (grpmatch(mp, n, olds, subp, s, flags) == pc) {
207 if (n < MAXGROUP) {
208 if (!mp->current.beg[n] || mp->current.beg[n] > olds)
209 mp->current.beg[n] = olds;
210 if (s > mp->current.end[n])
211 mp->current.end[n] = s;
212#ifdef _DEBUG_MATCH
213 error(-4,
214 "subgroup#%d n=%d beg=%p end=%p len=%d",
215 __LINE__, n, mp->current.beg[n],
216 mp->current.end[n],
217 mp->current.end[n] - mp->current.beg[n]);
218#endif
219 }
220 if (onematch(mp, sc, s, p, e, oldp, flags)) {
221 if (p == oldp && n < MAXGROUP) {
222 if (!mp->current.beg[n] || mp->current.beg[n] > olds)
223 mp->current.beg[n] = olds;
224 if (s > mp->current.end[n])
225 mp->current.end[n] = s;
226#ifdef _DEBUG_MATCH
227 error(-4,
228 "subgroup#%d n=%d beg=%p end=%p len=%d",
229 __LINE__, n, mp->current.beg[n],
230 mp->current.end[n],
231 mp->current.end[n] -
232 mp->current.beg[n]);
233#endif
234 }
235 RETURN(1);
236 }
237 }
238 } while (s < e && mbgetchar(s));
239 mp->current.groups = oldg;
240 RETURN(0);
241 } else if (pc == '*') {
242 /*
243 * several stars are the same as one
244 */
245
246 while (*p == '*' && *(p + 1) != '(')
247 p++;
248 oldp = p;
249 switch (pc = mbgetchar(p)) {
250 case '@':
251 case '!':
252 case '+':
253 n = *p == '(';
254 break;
255 case '(':
256 case '[':
257 case '?':
258 case '*':
259 n = 1;
260 break;
261 case 0:
262 case '|':
263 case '&':
264 case ')':
265 mp->current.next_s = e;
266 mp->next_p = oldp;
267 mp->current.groups = g;
268 if (!pc && (!mp->best.next_s || mp->current.next_s > mp->best.next_s)) {
269 mp->best = mp->current;
270#ifdef _DEBUG_MATCH
271 error(-3, "best#%d groups=%d next=\"%s\"",
272 __LINE__, mp->best.groups, mp->best.next_s);
273#endif
274 }
275 RETURN(1);
276 case '\\':
277 if (!(pc = mbgetchar(p)))
278 RETURN(0);
279 if (pc >= '0' && pc <= '9') {
280 n = pc - '0';
281#ifdef _DEBUG_MATCH
282 error(-2,
283 "backref#%d n=%d g=%d beg=%p end=%p len=%d",
284 __LINE__, n, g, mp->current.beg[n],
285 mp->current.end[n],
286 mp->current.end[n] - mp->current.beg[n]);
287#endif
288 if (n <= g && mp->current.beg[n])
289 pc = *mp->current.beg[n];
290 }
291 /*FALLTHROUGH*/ default:
292 n = 0;
293 break;
294 }
295 p = oldp;
296 for (;;) {
297 if ((n || pc == sc) && onematch(mp, g, olds, p, e, NULL, flags))
298 RETURN(1);
299 if (!sc)
300 RETURN(0);
301 olds = s;
302 sc = getsource(s, e);
303 }
304 } else if (pc != '?' && pc != sc)
305 RETURN(0);
306 break;
307 case 0:
308 case '|':
309 case '&':
310 case ')':
311 if (!sc) {
312 mp->current.next_s = olds;
313 mp->next_p = oldp;
314 mp->current.groups = g;
315 }
316 if (!pc && (!mp->best.next_s || olds > mp->best.next_s)) {
317 mp->best = mp->current;
318 mp->best.next_s = olds;
319 mp->best.groups = g;
320#ifdef _DEBUG_MATCH
321 error(-3, "best#%d groups=%d next=\"%s\"", __LINE__,
322 mp->best.groups, mp->best.next_s);
323#endif
324 }
325 RETURN(!sc);
326 case '[':
327 {
328 /*UNDENT... */
329
330 int invert;
331 int x;
332 int ok = 0;
333 char *range;
334
335 if (!sc)
336 RETURN(0);
337 range = 0;
338 n = 0;
339 if ((invert = (*p == '!')))
340 p++;
341 for (;;) {
342 oldp = p;
343 if (!(pc = mbgetchar(p))) {
344 RETURN(0);
345 } else if (pc == '[' && (*p == ':' || *p == '=' || *p == '.')) {
346 n = mbgetchar(p);
347 oldp = p;
348 strview_t callee = {.data = oldp};
349 for (;;) {
350 if (!(pc = mbgetchar(p)))
351 RETURN(0);
352 if (pc == n && *p == ']')
353 break;
354 ++callee.size;
355 }
356 (void)mbgetchar(p);
357 if (ok)
358 /*NOP*/;
359 else if (n == ':') {
360 if (strview_str_eq(callee, "alnum")) {
361 if (gv_isalnum(sc))
362 ok = 1;
363 } else if (strview_str_eq(callee, "alpha")) {
364 if (gv_isalpha(sc))
365 ok = 1;
366 } else if (strview_str_eq(callee, "blank")) {
367 if (gv_isblank(sc))
368 ok = 1;
369 } else if (strview_str_eq(callee, "cntrl")) {
370 if (gv_iscntrl(sc))
371 ok = 1;
372 } else if (strview_str_eq(callee, "digit")) {
373 if (gv_isdigit(sc))
374 ok = 1;
375 } else if (strview_str_eq(callee, "graph")) {
376 if (gv_isgraph(sc))
377 ok = 1;
378 } else if (strview_str_eq(callee, "lower")) {
379 if (gv_islower(sc))
380 ok = 1;
381 } else if (strview_str_eq(callee, "print")) {
382 if (gv_isprint(sc))
383 ok = 1;
384 } else if (strview_str_eq(callee, "punct")) {
385 if (gv_ispunct(sc))
386 ok = 1;
387 } else if (strview_str_eq(callee, "space")) {
388 if (gv_isspace(sc))
389 ok = 1;
390 } else if (strview_str_eq(callee, "upper")) {
391 if (gv_isupper(sc))
392 ok = 1;
393 } else if (strview_str_eq(callee, "xdigit")) {
394 if (gv_isxdigit(sc))
395 ok = 1;
396 }
397 }
398 else if (range)
399 goto getrange;
400 else if (*p == '-' && *(p + 1) != ']') {
401 (void)mbgetchar(p);
402 range = oldp;
403 } else
404 if ((gv_isalpha(*oldp) && gv_isalpha(*olds)
405 && tolower(*oldp) == tolower(*olds))
406 || sc == mbgetchar(oldp))
407 ok = 1;
408 n = 1;
409 } else if (pc == ']' && n) {
410 if (ok != invert)
411 break;
412 RETURN(0);
413 } else if (pc == '\\' && (oldp = p, !(pc = mbgetchar(p)))) {
414 RETURN(0);
415 } else if (ok)
416 /*NOP*/;
417 else if (range)
418 {
419 getrange:
420 x = mbgetchar(range);
421 if (sc == x || sc == pc || (sc > x && sc < pc))
422 ok = 1;
423 if (*p == '-' && *(p + 1) != ']') {
424 (void)mbgetchar(p);
425 range = oldp;
426 } else
427 range = 0;
428 n = 1;
429 } else if (*p == '-' && *(p + 1) != ']') {
430 (void)mbgetchar(p);
431 range = oldp;
432 n = 1;
433 } else {
434 if (sc == pc)
435 ok = 1;
436 n = pc;
437 }
438 }
439
440 /*...INDENT */
441 }
442 break;
443 case '\\':
444 if (!(pc = mbgetchar(p)))
445 RETURN(0);
446 if (pc >= '0' && pc <= '9') {
447 n = pc - '0';
448#ifdef _DEBUG_MATCH
449 error(-2, "backref#%d n=%d g=%d beg=%p end=%p len=%d",
450 __LINE__, n, g, mp->current.beg[n],
451 mp->current.end[n],
452 mp->current.end[n] - mp->current.beg[n]);
453#endif
454 if (n <= g && (oldp = mp->current.beg[n])) {
455 while (oldp < mp->current.end[n])
456 if (!*olds || *olds++ != *oldp++)
457 RETURN(0);
458 s = olds;
459 break;
460 }
461 }
462 /*FALLTHROUGH*/ default:
463 if (pc != sc)
464 RETURN(0);
465 break;
466 }
467 } while (sc);
468 RETURN(0);
469}
470
471/*
472 * match any pattern in a group
473 * | and & subgroups are parsed here
474 */
475
476static int
477grpmatch(Match_t * mp, int g, char *s, char *p, char *e,
478 int flags)
479{
480 char *a;
481
482#ifdef _DEBUG_MATCH
484 error(-1, "grpmatch g=%d s=%-.*s p=%s flags=%o", g, e - s, s, p,
485 flags);
486#endif
487 do {
488 for (a = p; onematch(mp, g, s, a, e, NULL, flags); a++)
489 if (*(a = mp->next_p) != '&')
490 RETURN(1);
491 } while ((p = gobble(mp, p, '|', &g, 1)));
492 RETURN(0);
493}
494
495/*
496 * subgroup match
497 * 0 returned if no match
498 * otherwise number of subgroups matched returned
499 * match group begin offsets are even elements of sub
500 * match group end offsets are odd elements of sub
501 * the matched string is from s+sub[0] up to but not
502 * including s+sub[1]
503 */
504
505int strgrpmatch(char *b, char *p, size_t *sub, int n, int flags) {
506 int i;
507 char *s;
508 char *e;
510
511 s = b;
512 match.last_s = e = s + strlen(s);
513 for (;;) {
514 match.best.next_s = 0;
515 match.current.groups = 0;
516 match.current.beg[0] = 0;
517 if ((i = grpmatch(&match, 0, s, p, e, flags)) || match.best.next_s) {
518 if (!(flags & STR_RIGHT) || (match.current.next_s == e)) {
519 if (!i)
520 match.current = match.best;
521 match.current.groups++;
522 match.current.end[0] = match.current.next_s;
523#ifdef _DEBUG_MATCH
524 error(-1, "match i=%d s=\"%s\" p=\"%s\" flags=%o groups=%d next=\"%s\"",
525 i, s, p, flags, match.current.groups, match.current.next_s);
526#endif
527 break;
528 }
529 }
530 if ((flags & STR_LEFT) || s >= e)
531 return 0;
532 s++;
533 }
534 if ((flags & STR_RIGHT) && match.current.next_s != e)
535 return 0;
536 if (!sub)
537 return 1;
538 match.current.beg[0] = s;
539 s = b;
540 if (n > match.current.groups)
541 n = match.current.groups;
542 for (i = 0; i < n; i++) {
543 sub[i * 2] = match.current.end[i] ? (size_t)(match.current.beg[i] - s) : 0;
544 sub[i * 2 + 1] = match.current.end[i] ? (size_t)(match.current.end[i] - s) : 0;
545 }
546 return n;
547}
548
549/*
550 * compare the string s with the shell pattern p
551 * returns 1 for match 0 otherwise
552 */
553
554int strmatch(char *s, char *p) {
555 return strgrpmatch(s, p, NULL, 0, STR_LEFT | STR_RIGHT);
556}
size_t match(char *str, char *pat)
Definition actions.c:106
#define STR_LEFT
Definition ast.h:26
#define STR_RIGHT
Definition ast.h:27
#define sub(h, i)
Definition closest.c:65
void error(int level, const char *s,...)
Definition error.c:83
Error_info_t error_info
Definition error.c:23
static int flags
Definition gc.c:61
node NULL
Definition grammar.y:149
bool ok(Agraph_t *g)
Definition gv.cpp:368
replacements for ctype.h functions
static bool gv_isgraph(int c)
Definition gv_ctype.h:45
static bool gv_islower(int c)
Definition gv_ctype.h:25
static bool gv_isxdigit(int c)
Definition gv_ctype.h:71
static bool gv_isupper(int c)
Definition gv_ctype.h:27
static bool gv_isalnum(int c)
Definition gv_ctype.h:43
static bool gv_iscntrl(int c)
Definition gv_ctype.h:33
static bool gv_isblank(int c)
Definition gv_ctype.h:31
static bool gv_isdigit(int c)
Definition gv_ctype.h:41
static bool gv_isalpha(int c)
Definition gv_ctype.h:29
static bool gv_isprint(int c)
Definition gv_ctype.h:47
static bool gv_isspace(int c)
Definition gv_ctype.h:55
static bool gv_ispunct(int c)
Definition gv_ctype.h:49
#define MAXGROUP
Definition strmatch.c:57
int strmatch(char *s, char *p)
Definition strmatch.c:554
static int onematch(Match_t *mp, int g, char *s, char *p, char *e, char *r, int flags)
Definition strmatch.c:154
static int grpmatch(Match_t *, int, char *, char *, char *, int)
Definition strmatch.c:477
int strgrpmatch(char *b, char *p, size_t *sub, int n, int flags)
Definition strmatch.c:505
#define getsource(s, e)
Definition strmatch.c:75
#define RETURN(v)
Definition strmatch.c:144
#define mbgetchar(p)
Definition strmatch.c:73
static char * gobble(Match_t *mp, char *s, int sub, int *g, int clear)
Definition strmatch.c:83
int indent
Definition error.h:26
int groups
Definition strmatch.c:63
char * end[MAXGROUP]
Definition strmatch.c:61
char * beg[MAXGROUP]
Definition strmatch.c:60
char * next_s
Definition strmatch.c:62
char * next_p
Definition strmatch.c:70
Group_t best
Definition strmatch.c:68
Group_t current
Definition strmatch.c:67
char * last_s
Definition strmatch.c:69
a non-owning string reference
Definition strview.h:20
const char * data
start of the pointed to string
Definition strview.h:21
size_t size
extent of the string in bytes
Definition strview.h:22
Non-owning string references.
static bool strview_str_eq(strview_t a, const char *b)
compare a string reference to a string for equality
Definition strview.h:98
Definition grammar.c:93
static bool clear(Ppoint_t pti, Ppoint_t ptj, int start, int end, int V, Ppoint_t pts[], int nextPt[])
Definition visibility.c:145