Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libast/astsa/strmatch.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1985-2012 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* David Korn <[email protected]> *
19
* Phong Vo <[email protected]> *
20
* *
21
***********************************************************************/
22
#pragma prototyped
23
24
/*
25
* D. G. Korn
26
* G. S. Fowler
27
* AT&T Research
28
*
29
* match shell file patterns -- derived from Bourne and Korn shell gmatch()
30
*
31
* sh pattern egrep RE description
32
* ---------- -------- -----------
33
* * .* 0 or more chars
34
* ? . any single char
35
* [.] [.] char class
36
* [!.] [^.] negated char class
37
* [[:.:]] [[:.:]] ctype class
38
* [[=.=]] [[=.=]] equivalence class
39
* [[...]] [[...]] collation element
40
* *(.) (.)* 0 or more of
41
* +(.) (.)+ 1 or more of
42
* ?(.) (.)? 0 or 1 of
43
* (.) (.) 1 of
44
* @(.) (.) 1 of
45
* a|b a|b a or b
46
* \# () subgroup back reference [1-9]
47
* a&b a and b
48
* !(.) none of
49
*
50
* \ used to escape metacharacters
51
*
52
* *, ?, (, |, &, ), [, \ must be \'d outside of [...]
53
* only ] must be \'d inside [...]
54
*
55
* BUG: unbalanced ) terminates top level pattern
56
*/
57
58
#include <ast.h>
59
#include <ctype.h>
60
#include <hashkey.h>
61
62
#ifndef isblank
63
#define isblank(x) ((x)==' '||(x)=='\t')
64
#endif
65
66
#ifndef isgraph
67
#define isgraph(x) (isprint(x)&&!isblank(x))
68
#endif
69
70
#define MAXGROUP 10
71
72
typedef struct
73
{
74
char* beg[MAXGROUP];
75
char* end[MAXGROUP];
76
char* next_s;
77
short groups;
78
} Group_t;
79
80
typedef struct
81
{
82
Group_t current;
83
Group_t best;
84
char* last_s;
85
char* next_p;
86
} Match_t;
87
88
#define mbgetchar(p) (*p++)
89
90
#ifndef isxdigit
91
#define isxdigit(c) ((c)>='0'&&(c)<='9'||(c)>='a'&&(c)<='f'||(c)>='A'&&(c)<='F')
92
#endif
93
94
#define getsource(s,e) (((s)>=(e))?0:mbgetchar(s))
95
96
#define COLL_MAX 3
97
98
/*
99
* gobble chars up to <sub> or ) keeping track of (...) and [...]
100
* sub must be one of { '|', '&', 0 }
101
* 0 returned if s runs out
102
*/
103
104
static char*
105
gobble(Match_t* mp, register char* s, register int sub, int* g, int clear)
106
{
107
register int p = 0;
108
register char* b = 0;
109
int c = 0;
110
int n;
111
112
for (;;)
113
switch (mbgetchar(s))
114
{
115
case '\\':
116
if (mbgetchar(s))
117
break;
118
/*FALLTHROUGH*/
119
case 0:
120
return 0;
121
case '[':
122
if (!b)
123
{
124
if (*s == '!' || *s == '^')
125
mbgetchar(s);
126
b = s;
127
}
128
else if (*s == '.' || *s == '=' || *s == ':')
129
c = *s;
130
break;
131
case ']':
132
if (b)
133
{
134
if (*(s - 2) == c)
135
c = 0;
136
else if (b != (s - 1))
137
b = 0;
138
}
139
break;
140
case '(':
141
if (!b)
142
{
143
p++;
144
n = (*g)++;
145
if (clear)
146
{
147
if (!sub)
148
n++;
149
if (n < MAXGROUP)
150
mp->current.beg[n] = mp->current.end[n] = 0;
151
}
152
}
153
break;
154
case ')':
155
if (!b && p-- <= 0)
156
return sub ? 0 : s;
157
break;
158
case '|':
159
if (!b && !p && sub == '|')
160
return s;
161
break;
162
}
163
}
164
165
static int grpmatch(Match_t*, int, char*, register char*, char*, int);
166
167
/*
168
* match a single pattern
169
* e is the end (0) of the substring in s
170
* r marks the start of a repeated subgroup pattern
171
*/
172
173
static int
174
onematch(Match_t* mp, int g, char* s, char* p, char* e, char* r, int flags)
175
{
176
register int pc;
177
register int sc;
178
register int n;
179
register int icase;
180
char* olds;
181
char* oldp;
182
183
icase = flags & STR_ICASE;
184
do
185
{
186
olds = s;
187
sc = getsource(s, e);
188
if (icase && isupper(sc))
189
sc = tolower(sc);
190
oldp = p;
191
switch (pc = mbgetchar(p))
192
{
193
case '(':
194
case '*':
195
case '?':
196
case '+':
197
case '@':
198
case '!':
199
if (pc == '(' || *p == '(')
200
{
201
char* subp;
202
int oldg;
203
204
s = olds;
205
subp = p + (pc != '(');
206
oldg = g;
207
n = ++g;
208
if (g < MAXGROUP && (!r || g > mp->current.groups))
209
mp->current.beg[g] = mp->current.end[g] = 0;
210
if (!(p = gobble(mp, subp, 0, &g, !r)))
211
return 0;
212
if (pc == '*' || pc == '?' || pc == '+' && oldp == r)
213
{
214
if (onematch(mp, g, s, p, e, NiL, flags))
215
return 1;
216
if (!sc || !getsource(s, e))
217
{
218
mp->current.groups = oldg;
219
return 0;
220
}
221
}
222
if (pc == '*' || pc == '+')
223
{
224
p = oldp;
225
sc = n - 1;
226
}
227
else
228
sc = g;
229
pc = (pc != '!');
230
do
231
{
232
if (grpmatch(mp, n, olds, subp, s, flags) == pc)
233
{
234
if (n < MAXGROUP)
235
{
236
if (!mp->current.beg[n] || mp->current.beg[n] > olds)
237
mp->current.beg[n] = olds;
238
if (s > mp->current.end[n])
239
mp->current.end[n] = s;
240
}
241
if (onematch(mp, sc, s, p, e, oldp, flags))
242
{
243
if (p == oldp && n < MAXGROUP)
244
{
245
if (!mp->current.beg[n] || mp->current.beg[n] > olds)
246
mp->current.beg[n] = olds;
247
if (s > mp->current.end[n])
248
mp->current.end[n] = s;
249
}
250
return 1;
251
}
252
}
253
} while (s < e && mbgetchar(s));
254
mp->current.groups = oldg;
255
return 0;
256
}
257
else if (pc == '*')
258
{
259
/*
260
* several stars are the same as one
261
*/
262
263
while (*p == '*' && *(p + 1) != '(')
264
p++;
265
oldp = p;
266
switch (pc = mbgetchar(p))
267
{
268
case '@':
269
case '!':
270
case '+':
271
n = *p == '(';
272
break;
273
case '(':
274
case '[':
275
case '?':
276
case '*':
277
n = 1;
278
break;
279
case 0:
280
case '|':
281
case '&':
282
case ')':
283
mp->current.next_s = (flags & STR_MAXIMAL) ? e : olds;
284
mp->next_p = oldp;
285
mp->current.groups = g;
286
if (!pc && (!mp->best.next_s || (flags & STR_MAXIMAL) && mp->current.next_s > mp->best.next_s || !(flags & STR_MAXIMAL) && mp->current.next_s < mp->best.next_s))
287
mp->best = mp->current;
288
return 1;
289
case '\\':
290
if (!(pc = mbgetchar(p)))
291
return 0;
292
if (pc >= '0' && pc <= '9')
293
{
294
n = pc - '0';
295
if (n <= g && mp->current.beg[n])
296
pc = *mp->current.beg[n];
297
}
298
/*FALLTHROUGH*/
299
default:
300
if (icase && isupper(pc))
301
pc = tolower(pc);
302
n = 0;
303
break;
304
}
305
p = oldp;
306
for (;;)
307
{
308
if ((n || pc == sc) && onematch(mp, g, olds, p, e, NiL, flags))
309
return 1;
310
if (!sc)
311
return 0;
312
olds = s;
313
sc = getsource(s, e);
314
if ((flags & STR_ICASE) && isupper(sc))
315
sc = tolower(sc);
316
}
317
}
318
else if (pc != '?' && pc != sc)
319
return 0;
320
break;
321
case 0:
322
if (!(flags & STR_MAXIMAL))
323
sc = 0;
324
/*FALLTHROUGH*/
325
case '|':
326
case '&':
327
case ')':
328
if (!sc)
329
{
330
mp->current.next_s = olds;
331
mp->next_p = oldp;
332
mp->current.groups = g;
333
}
334
if (!pc && (!mp->best.next_s || (flags & STR_MAXIMAL) && olds > mp->best.next_s || !(flags & STR_MAXIMAL) && olds < mp->best.next_s))
335
{
336
mp->best = mp->current;
337
mp->best.next_s = olds;
338
mp->best.groups = g;
339
}
340
return !sc;
341
case '[':
342
{
343
/*UNDENT...*/
344
345
int invert;
346
int x;
347
int ok = 0;
348
char* range;
349
350
if (!sc)
351
return 0;
352
range = 0;
353
n = 0;
354
if (invert = *p == '!' || *p == '^')
355
p++;
356
for (;;)
357
{
358
oldp = p;
359
if (!(pc = mbgetchar(p)))
360
return 0;
361
else if (pc == '[' && (*p == ':' || *p == '=' || *p == '.'))
362
{
363
x = 0;
364
n = mbgetchar(p);
365
oldp = p;
366
for (;;)
367
{
368
if (!(pc = mbgetchar(p)))
369
return 0;
370
if (pc == n && *p == ']')
371
break;
372
x++;
373
}
374
mbgetchar(p);
375
if (ok)
376
/*NOP*/;
377
else if (n == ':')
378
{
379
switch (HASHNKEY5(x, oldp[0], oldp[1], oldp[2], oldp[3], oldp[4]))
380
{
381
case HASHNKEY5(5,'a','l','n','u','m'):
382
if (isalnum(sc))
383
ok = 1;
384
break;
385
case HASHNKEY5(5,'a','l','p','h','a'):
386
if (isalpha(sc))
387
ok = 1;
388
break;
389
case HASHNKEY5(5,'b','l','a','n','k'):
390
if (isblank(sc))
391
ok = 1;
392
break;
393
case HASHNKEY5(5,'c','n','t','r','l'):
394
if (iscntrl(sc))
395
ok = 1;
396
break;
397
case HASHNKEY5(5,'d','i','g','i','t'):
398
if (isdigit(sc))
399
ok = 1;
400
break;
401
case HASHNKEY5(5,'g','r','a','p','h'):
402
if (isgraph(sc))
403
ok = 1;
404
break;
405
case HASHNKEY5(5,'l','o','w','e','r'):
406
if (islower(sc))
407
ok = 1;
408
break;
409
case HASHNKEY5(5,'p','r','i','n','t'):
410
if (isprint(sc))
411
ok = 1;
412
break;
413
case HASHNKEY5(5,'p','u','n','c','t'):
414
if (ispunct(sc))
415
ok = 1;
416
break;
417
case HASHNKEY5(5,'s','p','a','c','e'):
418
if (isspace(sc))
419
ok = 1;
420
break;
421
case HASHNKEY5(5,'u','p','p','e','r'):
422
if (icase ? islower(sc) : isupper(sc))
423
ok = 1;
424
break;
425
case HASHNKEY5(6,'x','d','i','g','i'):
426
if (oldp[5] == 't' && isxdigit(sc))
427
ok = 1;
428
break;
429
}
430
}
431
else if (range)
432
goto getrange;
433
else if (*p == '-' && *(p + 1) != ']')
434
{
435
mbgetchar(p);
436
range = oldp;
437
}
438
else if (isalpha(*oldp) && isalpha(*olds) && tolower(*oldp) == tolower(*olds) || sc == mbgetchar(oldp))
439
ok = 1;
440
n = 1;
441
}
442
else if (pc == ']' && n)
443
{
444
if (ok != invert)
445
break;
446
return 0;
447
}
448
else if (pc == '\\' && (oldp = p, !(pc = mbgetchar(p))))
449
return 0;
450
else if (ok)
451
/*NOP*/;
452
else if (range)
453
{
454
getrange:
455
if (icase && isupper(pc))
456
pc = tolower(pc);
457
x = mbgetchar(range);
458
if (icase && isupper(x))
459
x = tolower(x);
460
if (sc == x || sc == pc || sc > x && sc < pc)
461
ok = 1;
462
if (*p == '-' && *(p + 1) != ']')
463
{
464
mbgetchar(p);
465
range = oldp;
466
}
467
else
468
range = 0;
469
n = 1;
470
}
471
else if (*p == '-' && *(p + 1) != ']')
472
{
473
mbgetchar(p);
474
range = oldp;
475
n = 1;
476
}
477
else
478
{
479
if (icase && isupper(pc))
480
pc = tolower(pc);
481
if (sc == pc)
482
ok = 1;
483
n = pc;
484
}
485
}
486
487
/*...INDENT*/
488
}
489
break;
490
case '\\':
491
if (!(pc = mbgetchar(p)))
492
return 0;
493
if (pc >= '0' && pc <= '9')
494
{
495
n = pc - '0';
496
if (n <= g && (oldp = mp->current.beg[n]))
497
{
498
while (oldp < mp->current.end[n])
499
if (!*olds || *olds++ != *oldp++)
500
return 0;
501
s = olds;
502
break;
503
}
504
}
505
/*FALLTHROUGH*/
506
default:
507
if (icase && isupper(pc))
508
pc = tolower(pc);
509
if (pc != sc)
510
return 0;
511
break;
512
}
513
} while (sc);
514
return 0;
515
}
516
517
/*
518
* match any pattern in a group
519
* | and & subgroups are parsed here
520
*/
521
522
static int
523
grpmatch(Match_t* mp, int g, char* s, register char* p, char* e, int flags)
524
{
525
register char* a;
526
527
do
528
{
529
for (a = p; onematch(mp, g, s, a, e, NiL, flags); a++)
530
if (*(a = mp->next_p) != '&')
531
return 1;
532
} while (p = gobble(mp, p, '|', &g, 1));
533
return 0;
534
}
535
536
/*
537
* subgroup match
538
* 0 returned if no match
539
* otherwise number of subgroups matched returned
540
* match group begin offsets are even elements of sub
541
* match group end offsets are odd elements of sub
542
* the matched string is from s+sub[0] up to but not
543
* including s+sub[1]
544
*/
545
546
int
547
strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, int flags)
548
{
549
register int i;
550
register char* s;
551
char* e;
552
Match_t match;
553
554
s = (char*)b;
555
match.last_s = e = s + strlen(s);
556
for (;;)
557
{
558
match.best.next_s = 0;
559
match.current.groups = 0;
560
if ((i = grpmatch(&match, 0, s, (char*)p, e, flags)) || match.best.next_s)
561
{
562
if (!i)
563
match.current = match.best;
564
match.current.groups++;
565
match.current.end[0] = match.current.next_s;
566
break;
567
}
568
if ((flags & STR_LEFT) || s >= e)
569
return 0;
570
s++;
571
}
572
if ((flags & STR_RIGHT) && match.current.next_s != e)
573
return 0;
574
if (!sub)
575
return 1;
576
match.current.beg[0] = s;
577
s = (char*)b;
578
if (n > match.current.groups)
579
n = match.current.groups;
580
for (i = 0; i < n; i++)
581
{
582
sub[i * 2] = match.current.end[i] ? match.current.beg[i] - s : 0;
583
sub[i * 2 + 1] = match.current.end[i] ? match.current.end[i] - s : 0;
584
}
585
return n;
586
}
587
588
/*
589
* compare the string s with the shell pattern p
590
* returns 1 for match 0 otherwise
591
*/
592
593
int
594
strmatch(const char* s, const char* p)
595
{
596
return strgrpmatch(s, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT);
597
}
598
599