Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libast/regex/regnexec.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1985-2012 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* David Korn <[email protected]> *
19
* Phong Vo <[email protected]> *
20
* *
21
***********************************************************************/
22
#pragma prototyped
23
24
/*
25
* posix regex executor
26
* single sized-record interface
27
*/
28
29
#include "reglib.h"
30
31
#if _AST_REGEX_DEBUG
32
33
#define DEBUG_TEST(f,y,n) ((debug&(debug_flag=f))?(y):(n))
34
#define DEBUG_CODE(f,y,n) do if(debug&(f)){y}else{n} while(0)
35
#define DEBUG_INIT() do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
36
37
static unsigned long debug;
38
static unsigned long debug_flag;
39
40
static const char* rexnames[] =
41
{
42
"REX_NULL",
43
"REX_ALT",
44
"REX_ALT_CATCH",
45
"REX_BACK",
46
"REX_BEG",
47
"REX_BEG_STR",
48
"REX_BM",
49
"REX_CAT",
50
"REX_CLASS",
51
"REX_COLL_CLASS",
52
"REX_CONJ",
53
"REX_CONJ_LEFT",
54
"REX_CONJ_RIGHT",
55
"REX_DONE",
56
"REX_DOT",
57
"REX_END",
58
"REX_END_STR",
59
"REX_EXEC",
60
"REX_FIN_STR",
61
"REX_GROUP",
62
"REX_GROUP_CATCH",
63
"REX_GROUP_AHEAD",
64
"REX_GROUP_AHEAD_CATCH",
65
"REX_GROUP_AHEAD_NOT",
66
"REX_GROUP_BEHIND",
67
"REX_GROUP_BEHIND_CATCH",
68
"REX_GROUP_BEHIND_NOT",
69
"REX_GROUP_BEHIND_NOT_CATCH",
70
"REX_GROUP_COND",
71
"REX_GROUP_COND_CATCH",
72
"REX_GROUP_CUT",
73
"REX_GROUP_CUT_CATCH",
74
"REX_KMP",
75
"REX_NEG",
76
"REX_NEG_CATCH",
77
"REX_NEST",
78
"REX_ONECHAR",
79
"REX_REP",
80
"REX_REP_CATCH",
81
"REX_STRING",
82
"REX_TRIE",
83
"REX_WBEG",
84
"REX_WEND",
85
"REX_WORD",
86
"REX_WORD_NOT"
87
};
88
89
static const char* rexname(Rex_t* rex)
90
{
91
if (!rex)
92
return "NIL";
93
if (rex->type >= elementsof(rexnames))
94
return "ERROR";
95
return rexnames[rex->type];
96
}
97
98
#else
99
100
#define DEBUG_INIT()
101
#define DEBUG_TEST(f,y,n) (n)
102
#define DEBUG_CODE(f,y,n) do {n} while(0)
103
104
#endif
105
106
#define BEG_ALT 1 /* beginning of an alt */
107
#define BEG_ONE 2 /* beginning of one iteration of a rep */
108
#define BEG_REP 3 /* beginning of a repetition */
109
#define BEG_SUB 4 /* beginning of a subexpression */
110
#define END_ANY 5 /* end of any of above */
111
112
/*
113
* returns from parse()
114
*/
115
116
#define NONE 0 /* no parse found */
117
#define GOOD 1 /* some parse was found */
118
#define CUT 2 /* no match and no backtrack */
119
#define BEST 3 /* an unbeatable parse was found */
120
#define BAD 4 /* error ocurred */
121
122
/*
123
* REG_SHELL_DOT test
124
*/
125
126
#define LEADING(e,r,s) (*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
127
128
/*
129
* Pos_t is for comparing parses. An entry is made in the
130
* array at the beginning and at the end of each Group_t,
131
* each iteration in a Group_t, and each Binary_t.
132
*/
133
134
typedef struct
135
{
136
unsigned char* p; /* where in string */
137
size_t length; /* length in string */
138
short serial; /* preorder subpattern number */
139
short be; /* which end of pair */
140
} Pos_t;
141
142
/* ===== begin library support ===== */
143
144
#define vector(t,v,i) (((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
145
146
static Vector_t*
147
vecopen(int inc, int siz)
148
{
149
Vector_t* v;
150
Stk_t* sp;
151
152
if (inc <= 0)
153
inc = 16;
154
if (!(sp = stkopen(STK_SMALL|STK_NULL)))
155
return 0;
156
if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
157
{
158
stkclose(sp);
159
return 0;
160
}
161
v->stk = sp;
162
v->vec = (char*)v + sizeof(Vector_t);
163
v->max = v->inc = inc;
164
v->siz = siz;
165
v->cur = 0;
166
return v;
167
}
168
169
static void*
170
vecseek(Vector_t** p, int index)
171
{
172
Vector_t* v = *p;
173
174
if (index >= v->max)
175
{
176
while ((v->max += v->inc) <= index);
177
if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
178
return 0;
179
*p = v;
180
v->vec = (char*)v + sizeof(Vector_t);
181
}
182
return v->vec + index * v->siz;
183
}
184
185
static void
186
vecclose(Vector_t* v)
187
{
188
if (v)
189
stkclose(v->stk);
190
}
191
192
typedef struct
193
{
194
Stk_pos_t pos;
195
char data[1];
196
} Stk_frame_t;
197
198
#define stknew(s,p) ((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
199
#define stkold(s,p) stkset(s,(p)->base,(p)->offset)
200
201
#define stkframe(s) (*((Stk_frame_t**)stktop(s)-1))
202
#define stkdata(s,t) ((t*)stkframe(s)->data)
203
#define stkpop(s) stkold(s,&(stkframe(s)->pos))
204
205
static void*
206
stkpush(Stk_t* sp, size_t size)
207
{
208
Stk_frame_t* f;
209
Stk_pos_t p;
210
211
stknew(sp, &p);
212
size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
213
if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
214
return 0;
215
f->pos = p;
216
stkframe(sp) = f;
217
return f->data;
218
}
219
220
/* ===== end library support ===== */
221
222
/*
223
* Match_frame_t is for saving and restoring match records
224
* around alternate attempts, so that fossils will not be
225
* left in the match array. These are the only entries in
226
* the match array that are not otherwise guaranteed to
227
* have current data in them when they get used.
228
*/
229
230
typedef struct
231
{
232
size_t size;
233
regmatch_t* match;
234
regmatch_t save[1];
235
} Match_frame_t;
236
237
#define matchpush(e,x) ((x)->re.group.number?_matchpush(e,x):0)
238
#define matchcopy(e,x) do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); } while (0)
239
#define matchpop(e,x) do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); stkpop(stkstd); } while (0)
240
241
#define pospop(e) (--(e)->pos->cur)
242
243
/*
244
* allocate a frame and push a match onto the stack
245
*/
246
247
static int
248
_matchpush(Env_t* env, Rex_t* rex)
249
{
250
Match_frame_t* f;
251
regmatch_t* m;
252
regmatch_t* e;
253
regmatch_t* s;
254
int num;
255
256
if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
257
num = 0;
258
if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
259
{
260
env->error = REG_ESPACE;
261
return 1;
262
}
263
f->size = num * sizeof(regmatch_t);
264
f->match = m = env->match + rex->re.group.number;
265
e = m + num;
266
s = f->save;
267
while (m < e)
268
{
269
*s++ = *m;
270
*m++ = state.nomatch;
271
}
272
return 0;
273
}
274
275
/*
276
* allocate a frame and push a pos onto the stack
277
*/
278
279
static int
280
pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
281
{
282
Pos_t* pos;
283
284
if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
285
{
286
env->error = REG_ESPACE;
287
return 1;
288
}
289
pos->serial = rex->serial;
290
pos->p = p;
291
pos->be = be;
292
env->pos->cur++;
293
return 0;
294
}
295
296
/*
297
* two matches are known to have the same length
298
* os is start of old pos array, ns is start of new,
299
* oend and nend are end+1 pointers to ends of arrays.
300
* oe and ne are ends (not end+1) of subarrays.
301
* returns 1 if new is better, -1 if old, else 0.
302
*/
303
304
static int
305
better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
306
{
307
Pos_t* oe;
308
Pos_t* ne;
309
int k;
310
int n;
311
312
if (env->error)
313
return -1;
314
for (;;)
315
{
316
DEBUG_CODE(0x0080,{sfprintf(sfstdout, " %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
317
if (ns >= nend)
318
return DEBUG_TEST(0x8000,(os < oend),(0));
319
if (os >= oend)
320
return DEBUG_TEST(0x8000,(-1),(1));
321
n = os->serial;
322
if (ns->serial > n)
323
return -1;
324
if (n > ns->serial)
325
{
326
env->error = REG_PANIC;
327
return -1;
328
}
329
if (ns->p > os->p)
330
return 1;
331
if (os->p > ns->p)
332
return -1;
333
oe = os;
334
k = 0;
335
for (;;)
336
if ((++oe)->serial == n)
337
{
338
if (oe->be != END_ANY)
339
k++;
340
else if (k-- <= 0)
341
break;
342
}
343
ne = ns;
344
k = 0;
345
for (;;)
346
if ((++ne)->serial == n)
347
{
348
if (ne->be != END_ANY)
349
k++;
350
else if (k-- <= 0)
351
break;
352
}
353
if (ne->p > oe->p)
354
return 1;
355
if (oe->p > ne->p)
356
return -1;
357
if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
358
return k;
359
os = oe + 1;
360
ns = ne + 1;
361
}
362
}
363
364
#if _AST_REGEX_DEBUG
365
366
static void
367
showmatch(regmatch_t* p)
368
{
369
sfputc(sfstdout, '(');
370
if (p->rm_so < 0)
371
sfputc(sfstdout, '?');
372
else
373
sfprintf(sfstdout, "%z", p->rm_so);
374
sfputc(sfstdout, ',');
375
if (p->rm_eo < 0)
376
sfputc(sfstdout, '?');
377
else
378
sfprintf(sfstdout, "%z", p->rm_eo);
379
sfputc(sfstdout, ')');
380
}
381
382
static int
383
_better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
384
{
385
int i;
386
387
DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
388
i = better(env, os, ns, oend, nend, 0);
389
DEBUG_TEST(0x0040,(sfprintf(sfstdout, " %s\n", i <= 0 ? "OLD" : "NEW")),(0));
390
return i;
391
}
392
393
#define better _better
394
395
#endif
396
397
#define follow(e,r,c,s) ((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
398
399
static int parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
400
401
static int
402
parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
403
{
404
int i;
405
int r = NONE;
406
Rex_t catcher;
407
408
DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->re.group.number, rex->lo, n, rex->hi, env->end - s, s)),(0));
409
if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
410
{
411
if (env->stack && pospush(env, rex, s, END_ANY))
412
return BAD;
413
i = follow(env, rex, cont, s);
414
if (env->stack)
415
pospop(env);
416
switch (i)
417
{
418
case BAD:
419
return BAD;
420
case CUT:
421
return CUT;
422
case BEST:
423
case GOOD:
424
return BEST;
425
}
426
}
427
if (n < rex->hi)
428
{
429
catcher.type = REX_REP_CATCH;
430
catcher.serial = rex->serial;
431
catcher.re.rep_catch.ref = rex;
432
catcher.re.rep_catch.cont = cont;
433
catcher.re.rep_catch.beg = s;
434
catcher.re.rep_catch.n = n + 1;
435
catcher.next = rex->next;
436
if (n == 0)
437
rex->re.rep_catch.beg = s;
438
if (env->stack)
439
{
440
if (matchpush(env, rex))
441
return BAD;
442
if (pospush(env, rex, s, BEG_ONE))
443
return BAD;
444
DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH %d (%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)(%z,%z)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
445
}
446
r = parse(env, rex->re.group.expr.rex, &catcher, s);
447
DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d %d `%-.*s'\n", __LINE__, debug_flag, rex->re.group.number, r, env->end - s, s)),(0));
448
if (env->stack)
449
{
450
pospop(env);
451
matchpop(env, rex);
452
DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP %d %d (%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)(%z,%z)\n", __LINE__, debug_flag, rex->re.group.number, r, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
453
}
454
switch (r)
455
{
456
case BAD:
457
return BAD;
458
case BEST:
459
return BEST;
460
case CUT:
461
r = NONE;
462
break;
463
case GOOD:
464
if (rex->flags & REG_MINIMAL)
465
return BEST;
466
r = GOOD;
467
break;
468
}
469
}
470
if (n < rex->lo)
471
return r;
472
if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
473
{
474
if (env->stack && pospush(env, rex, s, END_ANY))
475
return BAD;
476
i = follow(env, rex, cont, s);
477
if (env->stack)
478
pospop(env);
479
switch (i)
480
{
481
case BAD:
482
r = BAD;
483
break;
484
case CUT:
485
r = CUT;
486
break;
487
case BEST:
488
r = BEST;
489
break;
490
case GOOD:
491
r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
492
break;
493
}
494
}
495
return r;
496
}
497
498
static int
499
parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
500
{
501
unsigned char* p;
502
int r;
503
504
if (p = rex->map)
505
{
506
for (;;)
507
{
508
if (s >= env->end)
509
return NONE;
510
while (x->c != p[*s])
511
if (!(x = x->sib))
512
return NONE;
513
if (x->end)
514
break;
515
x = x->son;
516
s++;
517
}
518
}
519
else
520
{
521
for (;;)
522
{
523
if (s >= env->end)
524
return NONE;
525
while (x->c != *s)
526
if (!(x = x->sib))
527
return NONE;
528
if (x->end)
529
break;
530
x = x->son;
531
s++;
532
}
533
}
534
s++;
535
if (rex->flags & REG_MINIMAL)
536
switch (follow(env, rex, cont, s))
537
{
538
case BAD:
539
return BAD;
540
case CUT:
541
return CUT;
542
case BEST:
543
case GOOD:
544
return BEST;
545
}
546
if (x->son)
547
switch (parsetrie(env, x->son, rex, cont, s))
548
{
549
case BAD:
550
return BAD;
551
case CUT:
552
return CUT;
553
case BEST:
554
return BEST;
555
case GOOD:
556
if (rex->flags & REG_MINIMAL)
557
return BEST;
558
r = GOOD;
559
break;
560
default:
561
r = NONE;
562
break;
563
}
564
else
565
r = NONE;
566
if (!(rex->flags & REG_MINIMAL))
567
switch (follow(env, rex, cont, s))
568
{
569
case BAD:
570
return BAD;
571
case CUT:
572
return CUT;
573
case BEST:
574
return BEST;
575
case GOOD:
576
return GOOD;
577
}
578
return r;
579
}
580
581
static int
582
collelt(register Celt_t* ce, char* key, int c, int x)
583
{
584
Ckey_t elt;
585
586
mbxfrm(elt, key, COLL_KEY_MAX);
587
for (;; ce++)
588
{
589
switch (ce->typ)
590
{
591
case COLL_call:
592
if (!x && (*ce->fun)(c))
593
return 1;
594
continue;
595
case COLL_char:
596
if (!strcmp((char*)ce->beg, (char*)elt))
597
return 1;
598
continue;
599
case COLL_range:
600
if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
601
return 1;
602
continue;
603
case COLL_range_lc:
604
if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
605
return 1;
606
continue;
607
case COLL_range_uc:
608
if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
609
return 1;
610
continue;
611
}
612
break;
613
}
614
return 0;
615
}
616
617
static int
618
collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
619
{
620
if (!x)
621
{
622
if (collelt(ce, key, c, x))
623
return 1;
624
if (iswlower(c))
625
c = towupper(c);
626
else if (iswupper(c))
627
c = towlower(c);
628
else
629
return 0;
630
x = mbconv(key, c);
631
key[x] = 0;
632
return collelt(ce, key, c, 0);
633
}
634
while (*nxt)
635
{
636
if (collic(ce, key, nxt + 1, c, x))
637
return 1;
638
if (islower(*nxt))
639
*nxt = toupper(*nxt);
640
else if (isupper(*nxt))
641
*nxt = tolower(*nxt);
642
else
643
return 0;
644
nxt++;
645
}
646
return collelt(ce, key, c, x);
647
}
648
649
static int
650
collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
651
{
652
unsigned char* t;
653
wchar_t c;
654
int w;
655
int r;
656
int x;
657
int ic;
658
Ckey_t key;
659
Ckey_t elt;
660
661
ic = (rex->flags & REG_ICASE);
662
if ((w = MBSIZE(s)) > 1)
663
{
664
memcpy((char*)key, (char*)s, w);
665
key[w] = 0;
666
t = s;
667
c = mbchar(t);
668
#if !_lib_wctype
669
c &= 0xff;
670
#endif
671
x = 0;
672
}
673
else
674
{
675
c = s[0];
676
if (ic && isupper(c))
677
c = tolower(c);
678
key[0] = c;
679
key[1] = 0;
680
if (isalpha(c))
681
{
682
x = e - s;
683
if (x > COLL_KEY_MAX)
684
x = COLL_KEY_MAX;
685
while (w < x)
686
{
687
c = s[w];
688
if (!isalpha(c))
689
break;
690
r = mbxfrm(elt, key, COLL_KEY_MAX);
691
if (ic && isupper(c))
692
c = tolower(c);
693
key[w] = c;
694
key[w + 1] = 0;
695
if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
696
break;
697
w++;
698
}
699
}
700
key[w] = 0;
701
c = key[0];
702
x = w - 1;
703
}
704
r = 1;
705
for (;;)
706
{
707
if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
708
break;
709
if (!x)
710
{
711
r = 0;
712
break;
713
}
714
w = x--;
715
key[w] = 0;
716
}
717
*p = s + w;
718
return rex->re.collate.invert ? !r : r;
719
}
720
721
static unsigned char*
722
nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
723
{
724
register int c;
725
register int cc;
726
unsigned int n;
727
int oc;
728
729
if (type[co] & (REX_NEST_literal|REX_NEST_quote))
730
{
731
n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
732
while (s < e)
733
{
734
c = *s++;
735
if (c == co)
736
return s;
737
else if (type[c] & n)
738
{
739
if (s >= e || (type[c] & REX_NEST_terminator))
740
break;
741
s++;
742
}
743
}
744
}
745
else
746
{
747
cc = type[co] >> REX_NEST_SHIFT;
748
oc = type[co] & (REX_NEST_open|REX_NEST_close);
749
n = 1;
750
while (s < e)
751
{
752
c = *s++;
753
switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
754
{
755
case REX_NEST_delimiter:
756
case REX_NEST_terminator:
757
return oc ? 0 : s;
758
case REX_NEST_separator:
759
if (!oc)
760
return s;
761
break;
762
case REX_NEST_escape:
763
if (s >= e)
764
return 0;
765
s++;
766
break;
767
case REX_NEST_open|REX_NEST_close:
768
if (c == cc)
769
{
770
if (!--n)
771
return s;
772
}
773
/*FALLTHROUGH*/
774
case REX_NEST_open:
775
if (c == co)
776
{
777
if (!++n)
778
return 0;
779
}
780
else if (!(s = nestmatch(s, e, type, c)))
781
return 0;
782
break;
783
case REX_NEST_close:
784
if (c != cc)
785
return 0;
786
if (!--n)
787
return s;
788
break;
789
}
790
}
791
return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
792
}
793
return 0;
794
}
795
796
static int
797
parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
798
{
799
int c;
800
int d;
801
int m;
802
int r;
803
ssize_t i;
804
ssize_t n;
805
int* f;
806
unsigned char* p;
807
unsigned char* t;
808
unsigned char* b;
809
unsigned char* e;
810
char* u;
811
regmatch_t* o;
812
Trie_node_t* x;
813
Rex_t* q;
814
Rex_t catcher;
815
Rex_t next;
816
817
for (;;)
818
{
819
DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
820
switch (rex->type)
821
{
822
case REX_ALT:
823
if (env->stack)
824
{
825
if (matchpush(env, rex))
826
return BAD;
827
if (pospush(env, rex, s, BEG_ALT))
828
return BAD;
829
catcher.type = REX_ALT_CATCH;
830
catcher.serial = rex->serial;
831
catcher.re.alt_catch.cont = cont;
832
catcher.next = rex->next;
833
r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
834
if (r < BEST || (rex->flags & REG_MINIMAL))
835
{
836
matchcopy(env, rex);
837
((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
838
n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
839
if (n != NONE)
840
r = n;
841
}
842
pospop(env);
843
matchpop(env, rex);
844
}
845
else
846
{
847
if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
848
r = parse(env, rex->re.group.expr.binary.right, cont, s);
849
if (r == GOOD)
850
r = BEST;
851
}
852
return r;
853
case REX_ALT_CATCH:
854
if (pospush(env, rex, s, END_ANY))
855
return BAD;
856
r = follow(env, rex, rex->re.alt_catch.cont, s);
857
pospop(env);
858
return r;
859
case REX_BACK:
860
o = &env->match[rex->lo];
861
if (o->rm_so < 0)
862
return NONE;
863
i = o->rm_eo - o->rm_so;
864
e = s + i;
865
if (e > env->end)
866
return NONE;
867
t = env->beg + o->rm_so;
868
if (!(p = rex->map))
869
{
870
while (s < e)
871
if (*s++ != *t++)
872
return NONE;
873
}
874
else if (!mbwide())
875
{
876
while (s < e)
877
if (p[*s++] != p[*t++])
878
return NONE;
879
}
880
else
881
{
882
while (s < e)
883
{
884
c = mbchar(s);
885
d = mbchar(t);
886
if (towupper(c) != towupper(d))
887
return NONE;
888
}
889
}
890
break;
891
case REX_BEG:
892
if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
893
return NONE;
894
break;
895
case REX_CLASS:
896
if (LEADING(env, rex, s))
897
return NONE;
898
n = rex->hi;
899
if (n > env->end - s)
900
n = env->end - s;
901
m = rex->lo;
902
if (m > n)
903
return NONE;
904
r = NONE;
905
if (!(rex->flags & REG_MINIMAL))
906
{
907
for (i = 0; i < n; i++)
908
if (!settst(rex->re.charclass, s[i]))
909
{
910
n = i;
911
break;
912
}
913
for (s += n; n-- >= m; s--)
914
switch (follow(env, rex, cont, s))
915
{
916
case BAD:
917
return BAD;
918
case CUT:
919
return CUT;
920
case BEST:
921
return BEST;
922
case GOOD:
923
r = GOOD;
924
break;
925
}
926
}
927
else
928
{
929
for (e = s + m; s < e; s++)
930
if (!settst(rex->re.charclass, *s))
931
return r;
932
e += n - m;
933
for (;;)
934
{
935
switch (follow(env, rex, cont, s))
936
{
937
case BAD:
938
return BAD;
939
case CUT:
940
return CUT;
941
case BEST:
942
case GOOD:
943
return BEST;
944
}
945
if (s >= e || !settst(rex->re.charclass, *s))
946
break;
947
s++;
948
}
949
}
950
return r;
951
case REX_COLL_CLASS:
952
if (LEADING(env, rex, s))
953
return NONE;
954
n = rex->hi;
955
if (n > env->end - s)
956
n = env->end - s;
957
m = rex->lo;
958
if (m > n)
959
return NONE;
960
r = NONE;
961
e = env->end;
962
if (!(rex->flags & REG_MINIMAL))
963
{
964
if (!(b = (unsigned char*)stkpush(stkstd, n)))
965
{
966
env->error = REG_ESPACE;
967
return BAD;
968
}
969
for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
970
{
971
b[i] = t - s;
972
s = t;
973
}
974
for (; i-- >= rex->lo; s -= b[i])
975
switch (follow(env, rex, cont, s))
976
{
977
case BAD:
978
stkpop(stkstd);
979
return BAD;
980
case CUT:
981
stkpop(stkstd);
982
return CUT;
983
case BEST:
984
stkpop(stkstd);
985
return BEST;
986
case GOOD:
987
r = GOOD;
988
break;
989
}
990
stkpop(stkstd);
991
}
992
else
993
{
994
for (i = 0; i < m && s < e; i++, s = t)
995
if (!collmatch(rex, s, e, &t))
996
return r;
997
while (i++ <= n)
998
{
999
switch (follow(env, rex, cont, s))
1000
{
1001
case BAD:
1002
return BAD;
1003
case CUT:
1004
return CUT;
1005
case BEST:
1006
case GOOD:
1007
return BEST;
1008
}
1009
if (s >= e || !collmatch(rex, s, e, &s))
1010
break;
1011
}
1012
}
1013
return r;
1014
case REX_CONJ:
1015
next.type = REX_CONJ_RIGHT;
1016
next.re.conj_right.cont = cont;
1017
next.next = rex->next;
1018
catcher.type = REX_CONJ_LEFT;
1019
catcher.re.conj_left.right = rex->re.group.expr.binary.right;
1020
catcher.re.conj_left.cont = &next;
1021
catcher.re.conj_left.beg = s;
1022
catcher.next = 0;
1023
return parse(env, rex->re.group.expr.binary.left, &catcher, s);
1024
case REX_CONJ_LEFT:
1025
rex->re.conj_left.cont->re.conj_right.end = s;
1026
cont = rex->re.conj_left.cont;
1027
s = rex->re.conj_left.beg;
1028
rex = rex->re.conj_left.right;
1029
continue;
1030
case REX_CONJ_RIGHT:
1031
if (rex->re.conj_right.end != s)
1032
return NONE;
1033
cont = rex->re.conj_right.cont;
1034
break;
1035
case REX_DONE:
1036
if (!env->stack)
1037
return BEST;
1038
n = s - env->beg;
1039
r = env->nsub;
1040
DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%z,%z)(%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1041
if ((i = env->best[0].rm_eo) >= 0)
1042
{
1043
if (rex->flags & REG_MINIMAL)
1044
{
1045
if (n > i)
1046
return GOOD;
1047
}
1048
else
1049
{
1050
if (n < i)
1051
return GOOD;
1052
}
1053
if (n == i && better(env,
1054
(Pos_t*)env->bestpos->vec,
1055
(Pos_t*)env->pos->vec,
1056
(Pos_t*)env->bestpos->vec+env->bestpos->cur,
1057
(Pos_t*)env->pos->vec+env->pos->cur,
1058
0) <= 0)
1059
return GOOD;
1060
}
1061
env->best[0].rm_eo = n;
1062
memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
1063
n = env->pos->cur;
1064
if (!vector(Pos_t, env->bestpos, n))
1065
{
1066
env->error = REG_ESPACE;
1067
return BAD;
1068
}
1069
env->bestpos->cur = n;
1070
memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
1071
DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%z,%z)(%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1072
return GOOD;
1073
case REX_DOT:
1074
if (LEADING(env, rex, s))
1075
return NONE;
1076
n = rex->hi;
1077
if (n > env->end - s)
1078
n = env->end - s;
1079
m = rex->lo;
1080
if (m > n)
1081
return NONE;
1082
if ((c = rex->explicit) >= 0 && !mbwide())
1083
for (i = 0; i < n; i++)
1084
if (s[i] == c)
1085
{
1086
n = i;
1087
break;
1088
}
1089
r = NONE;
1090
if (!(rex->flags & REG_MINIMAL))
1091
{
1092
if (!mbwide())
1093
{
1094
for (s += n; n-- >= m; s--)
1095
switch (follow(env, rex, cont, s))
1096
{
1097
case BAD:
1098
return BAD;
1099
case CUT:
1100
return CUT;
1101
case BEST:
1102
return BEST;
1103
case GOOD:
1104
r = GOOD;
1105
break;
1106
}
1107
}
1108
else
1109
{
1110
if (!(b = (unsigned char*)stkpush(stkstd, n)))
1111
{
1112
env->error = REG_ESPACE;
1113
return BAD;
1114
}
1115
e = env->end;
1116
for (i = 0; s < e && i < n && *s != c; i++)
1117
s += b[i] = MBSIZE(s);
1118
for (; i-- >= m; s -= b[i])
1119
switch (follow(env, rex, cont, s))
1120
{
1121
case BAD:
1122
stkpop(stkstd);
1123
return BAD;
1124
case CUT:
1125
stkpop(stkstd);
1126
return CUT;
1127
case BEST:
1128
stkpop(stkstd);
1129
return BEST;
1130
case GOOD:
1131
r = GOOD;
1132
break;
1133
}
1134
stkpop(stkstd);
1135
}
1136
}
1137
else
1138
{
1139
if (!mbwide())
1140
{
1141
e = s + n;
1142
for (s += m; s <= e; s++)
1143
switch (follow(env, rex, cont, s))
1144
{
1145
case BAD:
1146
return BAD;
1147
case CUT:
1148
return CUT;
1149
case BEST:
1150
case GOOD:
1151
return BEST;
1152
}
1153
}
1154
else
1155
{
1156
e = env->end;
1157
for (i = 0; s < e && i < m && *s != c; i++)
1158
s += MBSIZE(s);
1159
if (i >= m)
1160
for (; s <= e && i <= n; s += MBSIZE(s), i++)
1161
switch (follow(env, rex, cont, s))
1162
{
1163
case BAD:
1164
return BAD;
1165
case CUT:
1166
return CUT;
1167
case BEST:
1168
case GOOD:
1169
return BEST;
1170
}
1171
}
1172
}
1173
return r;
1174
case REX_END:
1175
if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
1176
return NONE;
1177
break;
1178
case REX_GROUP:
1179
DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
1180
if (env->stack)
1181
{
1182
if (rex->re.group.number)
1183
env->match[rex->re.group.number].rm_so = s - env->beg;
1184
if (pospush(env, rex, s, BEG_SUB))
1185
return BAD;
1186
catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
1187
}
1188
catcher.type = REX_GROUP_CATCH;
1189
catcher.serial = rex->serial;
1190
catcher.re.group_catch.cont = cont;
1191
catcher.next = rex->next;
1192
r = parse(env, rex->re.group.expr.rex, &catcher, s);
1193
if (env->stack)
1194
{
1195
pospop(env);
1196
if (rex->re.group.number)
1197
env->match[rex->re.group.number].rm_so = -1;
1198
}
1199
return r;
1200
case REX_GROUP_CATCH:
1201
DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
1202
if (env->stack)
1203
{
1204
if (rex->re.group_catch.eo)
1205
*rex->re.group_catch.eo = s - env->beg;
1206
if (pospush(env, rex, s, END_ANY))
1207
return BAD;
1208
}
1209
r = follow(env, rex, rex->re.group_catch.cont, s);
1210
if (env->stack)
1211
{
1212
pospop(env);
1213
if (rex->re.group_catch.eo)
1214
*rex->re.group_catch.eo = -1;
1215
}
1216
return r;
1217
case REX_GROUP_AHEAD:
1218
catcher.type = REX_GROUP_AHEAD_CATCH;
1219
catcher.flags = rex->flags;
1220
catcher.serial = rex->serial;
1221
catcher.re.rep_catch.beg = s;
1222
catcher.re.rep_catch.cont = cont;
1223
catcher.next = rex->next;
1224
return parse(env, rex->re.group.expr.rex, &catcher, s);
1225
case REX_GROUP_AHEAD_CATCH:
1226
return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
1227
case REX_GROUP_AHEAD_NOT:
1228
r = parse(env, rex->re.group.expr.rex, NiL, s);
1229
if (r == NONE)
1230
r = follow(env, rex, cont, s);
1231
else if (r != BAD)
1232
r = NONE;
1233
return r;
1234
case REX_GROUP_BEHIND:
1235
if ((s - env->beg) < rex->re.group.size)
1236
return NONE;
1237
catcher.type = REX_GROUP_BEHIND_CATCH;
1238
catcher.flags = rex->flags;
1239
catcher.serial = rex->serial;
1240
catcher.re.behind_catch.beg = s;
1241
catcher.re.behind_catch.end = e = env->end;
1242
catcher.re.behind_catch.cont = cont;
1243
catcher.next = rex->next;
1244
for (t = s - rex->re.group.size; t >= env->beg; t--)
1245
{
1246
env->end = s;
1247
r = parse(env, rex->re.group.expr.rex, &catcher, t);
1248
env->end = e;
1249
if (r != NONE)
1250
return r;
1251
}
1252
return NONE;
1253
case REX_GROUP_BEHIND_CATCH:
1254
if (s != rex->re.behind_catch.beg)
1255
return NONE;
1256
env->end = rex->re.behind_catch.end;
1257
return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
1258
case REX_GROUP_BEHIND_NOT:
1259
if ((s - env->beg) < rex->re.group.size)
1260
r = NONE;
1261
else
1262
{
1263
catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
1264
catcher.re.neg_catch.beg = s;
1265
catcher.next = 0;
1266
e = env->end;
1267
env->end = s;
1268
for (t = s - rex->re.group.size; t >= env->beg; t--)
1269
{
1270
r = parse(env, rex->re.group.expr.rex, &catcher, t);
1271
if (r != NONE)
1272
break;
1273
}
1274
env->end = e;
1275
}
1276
if (r == NONE)
1277
r = follow(env, rex, cont, s);
1278
else if (r != BAD)
1279
r = NONE;
1280
return r;
1281
case REX_GROUP_BEHIND_NOT_CATCH:
1282
return s == rex->re.neg_catch.beg ? GOOD : NONE;
1283
case REX_GROUP_COND:
1284
if (q = rex->re.group.expr.binary.right)
1285
{
1286
catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
1287
catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
1288
}
1289
else
1290
catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
1291
if (q = rex->re.group.expr.binary.left)
1292
{
1293
catcher.type = REX_GROUP_COND_CATCH;
1294
catcher.flags = rex->flags;
1295
catcher.serial = rex->serial;
1296
catcher.re.cond_catch.yes = 0;
1297
catcher.re.cond_catch.beg = s;
1298
catcher.re.cond_catch.cont = cont;
1299
catcher.next = rex->next;
1300
r = parse(env, q, &catcher, s);
1301
if (r == BAD || catcher.re.cond_catch.yes)
1302
return r;
1303
}
1304
else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
1305
r = GOOD;
1306
else
1307
r = NONE;
1308
if (q = catcher.re.cond_catch.next[r != NONE])
1309
{
1310
catcher.type = REX_CAT;
1311
catcher.flags = q->flags;
1312
catcher.serial = q->serial;
1313
catcher.re.group_catch.cont = cont;
1314
catcher.next = rex->next;
1315
return parse(env, q, &catcher, s);
1316
}
1317
return follow(env, rex, cont, s);
1318
case REX_GROUP_COND_CATCH:
1319
rex->re.cond_catch.yes = 1;
1320
catcher.type = REX_CAT;
1321
catcher.flags = rex->flags;
1322
catcher.serial = rex->serial;
1323
catcher.re.group_catch.cont = rex->re.cond_catch.cont;
1324
catcher.next = rex->next;
1325
return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
1326
case REX_CAT:
1327
return follow(env, rex, rex->re.group_catch.cont, s);
1328
case REX_GROUP_CUT:
1329
catcher.type = REX_GROUP_CUT_CATCH;
1330
catcher.flags = rex->flags;
1331
catcher.serial = rex->serial;
1332
catcher.re.group_catch.cont = cont;
1333
catcher.next = rex->next;
1334
return parse(env, rex->re.group.expr.rex, &catcher, s);
1335
case REX_GROUP_CUT_CATCH:
1336
switch (r = follow(env, rex, rex->re.group_catch.cont, s))
1337
{
1338
case GOOD:
1339
r = BEST;
1340
break;
1341
case NONE:
1342
r = CUT;
1343
break;
1344
}
1345
return r;
1346
case REX_KMP:
1347
f = rex->re.string.fail;
1348
b = rex->re.string.base;
1349
n = rex->re.string.size;
1350
t = s;
1351
e = env->end;
1352
if (p = rex->map)
1353
{
1354
while (t + n <= e)
1355
{
1356
for (i = -1; t < e; t++)
1357
{
1358
while (i >= 0 && b[i+1] != p[*t])
1359
i = f[i];
1360
if (b[i+1] == p[*t])
1361
i++;
1362
if (i + 1 == n)
1363
{
1364
t++;
1365
if (env->stack)
1366
env->best[0].rm_so = t - s - n;
1367
switch (follow(env, rex, cont, t))
1368
{
1369
case BAD:
1370
return BAD;
1371
case CUT:
1372
return CUT;
1373
case BEST:
1374
case GOOD:
1375
return BEST;
1376
}
1377
t -= n - 1;
1378
break;
1379
}
1380
}
1381
}
1382
}
1383
else
1384
{
1385
while (t + n <= e)
1386
{
1387
for (i = -1; t < e; t++)
1388
{
1389
while (i >= 0 && b[i+1] != *t)
1390
i = f[i];
1391
if (b[i+1] == *t)
1392
i++;
1393
if (i + 1 == n)
1394
{
1395
t++;
1396
if (env->stack)
1397
env->best[0].rm_so = t - s - n;
1398
switch (follow(env, rex, cont, t))
1399
{
1400
case BAD:
1401
return BAD;
1402
case CUT:
1403
return CUT;
1404
case BEST:
1405
case GOOD:
1406
return BEST;
1407
}
1408
t -= n - 1;
1409
break;
1410
}
1411
}
1412
}
1413
}
1414
return NONE;
1415
case REX_NEG:
1416
if (LEADING(env, rex, s))
1417
return NONE;
1418
i = env->end - s;
1419
n = ((i + 7) >> 3) + 1;
1420
catcher.type = REX_NEG_CATCH;
1421
catcher.re.neg_catch.beg = s;
1422
if (!(p = (unsigned char*)stkpush(stkstd, n)))
1423
return BAD;
1424
memset(catcher.re.neg_catch.index = p, 0, n);
1425
catcher.next = rex->next;
1426
if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
1427
r = BAD;
1428
else
1429
{
1430
r = NONE;
1431
for (; i >= 0; i--)
1432
if (!bittst(p, i))
1433
{
1434
switch (follow(env, rex, cont, s + i))
1435
{
1436
case BAD:
1437
r = BAD;
1438
break;
1439
case BEST:
1440
r = BEST;
1441
break;
1442
case CUT:
1443
r = CUT;
1444
break;
1445
case GOOD:
1446
r = GOOD;
1447
/*FALLTHROUGH*/
1448
default:
1449
continue;
1450
}
1451
break;
1452
}
1453
}
1454
stkpop(stkstd);
1455
return r;
1456
case REX_NEG_CATCH:
1457
bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
1458
return NONE;
1459
case REX_NEST:
1460
if (s >= env->end)
1461
return NONE;
1462
do
1463
{
1464
if ((c = *s++) == rex->re.nest.primary)
1465
{
1466
if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1467
return NONE;
1468
break;
1469
}
1470
if (rex->re.nest.primary >= 0)
1471
return NONE;
1472
if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
1473
break;
1474
if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1475
return NONE;
1476
} while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
1477
break;
1478
case REX_NULL:
1479
break;
1480
case REX_ONECHAR:
1481
n = rex->hi;
1482
if (n > env->end - s)
1483
n = env->end - s;
1484
m = rex->lo;
1485
if (m > n)
1486
return NONE;
1487
r = NONE;
1488
c = rex->re.onechar;
1489
if (!(rex->flags & REG_MINIMAL))
1490
{
1491
if (!mbwide())
1492
{
1493
if (p = rex->map)
1494
{
1495
for (i = 0; i < n; i++, s++)
1496
if (p[*s] != c)
1497
break;
1498
}
1499
else
1500
{
1501
for (i = 0; i < n; i++, s++)
1502
if (*s != c)
1503
break;
1504
}
1505
for (; i-- >= m; s--)
1506
switch (follow(env, rex, cont, s))
1507
{
1508
case BAD:
1509
return BAD;
1510
case BEST:
1511
return BEST;
1512
case CUT:
1513
return CUT;
1514
case GOOD:
1515
r = GOOD;
1516
break;
1517
}
1518
}
1519
else
1520
{
1521
if (!(b = (unsigned char*)stkpush(stkstd, n)))
1522
{
1523
env->error = REG_ESPACE;
1524
return BAD;
1525
}
1526
e = env->end;
1527
if (!(rex->flags & REG_ICASE))
1528
{
1529
for (i = 0; s < e && i < n; i++, s = t)
1530
{
1531
t = s;
1532
if (mbchar(t) != c)
1533
break;
1534
b[i] = t - s;
1535
}
1536
}
1537
else
1538
{
1539
for (i = 0; s < e && i < n; i++, s = t)
1540
{
1541
t = s;
1542
if (towupper(mbchar(t)) != c)
1543
break;
1544
b[i] = t - s;
1545
}
1546
}
1547
for (; i-- >= m; s -= b[i])
1548
switch (follow(env, rex, cont, s))
1549
{
1550
case BAD:
1551
stkpop(stkstd);
1552
return BAD;
1553
case BEST:
1554
stkpop(stkstd);
1555
return BEST;
1556
case CUT:
1557
stkpop(stkstd);
1558
return CUT;
1559
case GOOD:
1560
r = GOOD;
1561
break;
1562
}
1563
stkpop(stkstd);
1564
}
1565
}
1566
else
1567
{
1568
if (!mbwide())
1569
{
1570
e = s + m;
1571
if (p = rex->map)
1572
{
1573
for (; s < e; s++)
1574
if (p[*s] != c)
1575
return r;
1576
e += n - m;
1577
for (;;)
1578
{
1579
switch (follow(env, rex, cont, s))
1580
{
1581
case BAD:
1582
return BAD;
1583
case CUT:
1584
return CUT;
1585
case BEST:
1586
case GOOD:
1587
return BEST;
1588
}
1589
if (s >= e || p[*s++] != c)
1590
break;
1591
}
1592
}
1593
else
1594
{
1595
for (; s < e; s++)
1596
if (*s != c)
1597
return r;
1598
e += n - m;
1599
for (;;)
1600
{
1601
switch (follow(env, rex, cont, s))
1602
{
1603
case BAD:
1604
return BAD;
1605
case CUT:
1606
return CUT;
1607
case BEST:
1608
case GOOD:
1609
return BEST;
1610
}
1611
if (s >= e || *s++ != c)
1612
break;
1613
}
1614
}
1615
}
1616
else
1617
{
1618
e = env->end;
1619
if (!(rex->flags & REG_ICASE))
1620
{
1621
for (i = 0; i < m && s < e; i++, s = t)
1622
{
1623
t = s;
1624
if (mbchar(t) != c)
1625
return r;
1626
}
1627
while (i++ <= n)
1628
{
1629
switch (follow(env, rex, cont, s))
1630
{
1631
case BAD:
1632
return BAD;
1633
case CUT:
1634
return CUT;
1635
case BEST:
1636
case GOOD:
1637
return BEST;
1638
}
1639
if (s >= e || mbchar(s) != c)
1640
break;
1641
}
1642
}
1643
else
1644
{
1645
for (i = 0; i < m && s < e; i++, s = t)
1646
{
1647
t = s;
1648
if (towupper(mbchar(t)) != c)
1649
return r;
1650
}
1651
while (i++ <= n)
1652
{
1653
switch (follow(env, rex, cont, s))
1654
{
1655
case BAD:
1656
return BAD;
1657
case CUT:
1658
return CUT;
1659
case BEST:
1660
case GOOD:
1661
return BEST;
1662
}
1663
if (s >= e || towupper(mbchar(s)) != c)
1664
break;
1665
}
1666
}
1667
}
1668
}
1669
return r;
1670
case REX_REP:
1671
if (env->stack && pospush(env, rex, s, BEG_REP))
1672
return BAD;
1673
r = parserep(env, rex, cont, s, 0);
1674
if (env->stack)
1675
pospop(env);
1676
return r;
1677
case REX_REP_CATCH:
1678
DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
1679
if (env->stack && pospush(env, rex, s, END_ANY))
1680
return BAD;
1681
if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
1682
{
1683
/*
1684
* optional empty iteration
1685
*/
1686
1687
DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
1688
if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
1689
r = NONE;
1690
else if (pospush(env, rex, s, END_ANY))
1691
r = BAD;
1692
else
1693
{
1694
r = follow(env, rex, rex->re.rep_catch.cont, s);
1695
pospop(env);
1696
}
1697
}
1698
else
1699
r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
1700
if (env->stack)
1701
pospop(env);
1702
return r;
1703
case REX_STRING:
1704
DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
1705
if (rex->re.string.size > (env->end - s))
1706
return NONE;
1707
t = rex->re.string.base;
1708
e = t + rex->re.string.size;
1709
if (!(p = rex->map))
1710
{
1711
while (t < e)
1712
if (*s++ != *t++)
1713
return NONE;
1714
}
1715
else if (!mbwide())
1716
{
1717
while (t < e)
1718
if (p[*s++] != *t++)
1719
return NONE;
1720
}
1721
else
1722
{
1723
while (t < e)
1724
{
1725
c = mbchar(s);
1726
d = mbchar(t);
1727
if (towupper(c) != d)
1728
return NONE;
1729
}
1730
}
1731
break;
1732
case REX_TRIE:
1733
if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
1734
return NONE;
1735
return parsetrie(env, x, rex, cont, s);
1736
case REX_EXEC:
1737
u = 0;
1738
r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
1739
e = (unsigned char*)u;
1740
if (e >= s && e <= env->end)
1741
s = e;
1742
switch (r)
1743
{
1744
case 0:
1745
break;
1746
case REG_NOMATCH:
1747
return NONE;
1748
default:
1749
env->error = r;
1750
return BAD;
1751
}
1752
break;
1753
case REX_WBEG:
1754
if (!isword(*s) || s > env->beg && isword(*(s - 1)))
1755
return NONE;
1756
break;
1757
case REX_WEND:
1758
if (isword(*s) || s > env->beg && !isword(*(s - 1)))
1759
return NONE;
1760
break;
1761
case REX_WORD:
1762
if (s > env->beg && isword(*(s - 1)) == isword(*s))
1763
return NONE;
1764
break;
1765
case REX_WORD_NOT:
1766
if (s == env->beg || isword(*(s - 1)) != isword(*s))
1767
return NONE;
1768
break;
1769
case REX_BEG_STR:
1770
if (s != env->beg)
1771
return NONE;
1772
break;
1773
case REX_END_STR:
1774
for (t = s; t < env->end && *t == '\n'; t++);
1775
if (t < env->end)
1776
return NONE;
1777
break;
1778
case REX_FIN_STR:
1779
if (s < env->end)
1780
return NONE;
1781
break;
1782
}
1783
if (!(rex = rex->next))
1784
{
1785
if (!(rex = cont))
1786
break;
1787
cont = 0;
1788
}
1789
}
1790
return GOOD;
1791
}
1792
1793
#if _AST_REGEX_DEBUG
1794
1795
static void
1796
listnode(Rex_t* e, int level)
1797
{
1798
int i;
1799
1800
if (e)
1801
{
1802
do
1803
{
1804
for (i = 0; i < level; i++)
1805
sfprintf(sfstderr, " ");
1806
sfprintf(sfstderr, "%s\n", rexname(e));
1807
switch (e->type)
1808
{
1809
case REX_ALT:
1810
case REX_CONJ:
1811
listnode(e->re.group.expr.binary.left, level + 1);
1812
listnode(e->re.group.expr.binary.right, level + 1);
1813
break;
1814
case REX_GROUP:
1815
case REX_GROUP_AHEAD:
1816
case REX_GROUP_AHEAD_NOT:
1817
case REX_GROUP_BEHIND:
1818
case REX_GROUP_BEHIND_NOT:
1819
case REX_GROUP_CUT:
1820
case REX_NEG:
1821
case REX_REP:
1822
listnode(e->re.group.expr.rex, level + 1);
1823
break;
1824
}
1825
} while (e = e->next);
1826
}
1827
}
1828
1829
static int
1830
list(Env_t* env, Rex_t* rex)
1831
{
1832
sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
1833
if (rex)
1834
listnode(rex, 1);
1835
return 0;
1836
}
1837
1838
#endif
1839
1840
/*
1841
* returning REG_BADPAT or REG_ESPACE is not explicitly
1842
* countenanced by the standard
1843
*/
1844
1845
int
1846
regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
1847
{
1848
register ssize_t n;
1849
register int i;
1850
int j;
1851
int k;
1852
int m;
1853
int advance;
1854
Env_t* env;
1855
Rex_t* e;
1856
1857
DEBUG_INIT();
1858
DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
1859
if (!p || !(env = p->env))
1860
return REG_BADPAT;
1861
if (!s)
1862
return fatal(env->disc, REG_BADPAT, NiL);
1863
if (len < env->min)
1864
{
1865
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
1866
return REG_NOMATCH;
1867
}
1868
env->regex = p;
1869
env->beg = (unsigned char*)s;
1870
env->end = env->beg + len;
1871
stknew(stkstd, &env->stk);
1872
env->flags &= ~REG_EXEC;
1873
env->flags |= (flags & REG_EXEC);
1874
advance = 0;
1875
if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
1876
{
1877
n = env->nsub;
1878
if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
1879
!env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
1880
!env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
1881
{
1882
k = REG_ESPACE;
1883
goto done;
1884
}
1885
env->pos->cur = env->bestpos->cur = 0;
1886
env->best = &env->match[n + 1];
1887
env->best[0].rm_so = 0;
1888
env->best[0].rm_eo = -1;
1889
for (i = 0; i <= n; i++)
1890
env->match[i] = state.nomatch;
1891
if (flags & REG_ADVANCE)
1892
advance = 1;
1893
}
1894
DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
1895
k = REG_NOMATCH;
1896
if ((e = env->rex)->type == REX_BM)
1897
{
1898
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
1899
if (len < e->re.bm.right)
1900
{
1901
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
1902
goto done;
1903
}
1904
else if (!(flags & REG_LEFT))
1905
{
1906
register unsigned char* buf = (unsigned char*)s;
1907
register size_t index = e->re.bm.left + e->re.bm.size;
1908
register size_t mid = len - e->re.bm.right;
1909
register size_t* skip = e->re.bm.skip;
1910
register size_t* fail = e->re.bm.fail;
1911
register Bm_mask_t** mask = e->re.bm.mask;
1912
Bm_mask_t m;
1913
size_t x;
1914
1915
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
1916
for (;;)
1917
{
1918
while ((index += skip[buf[index]]) < mid);
1919
if (index < HIT)
1920
{
1921
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
1922
goto done;
1923
}
1924
index -= HIT;
1925
m = mask[n = e->re.bm.size - 1][buf[index]];
1926
do
1927
{
1928
if (!n--)
1929
{
1930
if (e->re.bm.back < 0)
1931
goto possible;
1932
if (advance)
1933
{
1934
i = index - e->re.bm.back;
1935
s += i;
1936
if (env->stack)
1937
env->best[0].rm_so += i;
1938
goto possible;
1939
}
1940
x = index;
1941
if (index < e->re.bm.back)
1942
index = 0;
1943
else
1944
index -= e->re.bm.back;
1945
while (index <= x)
1946
{
1947
if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
1948
{
1949
if (env->stack)
1950
env->best[0].rm_so = index;
1951
n = env->nsub;
1952
goto hit;
1953
}
1954
index++;
1955
}
1956
index += e->re.bm.size;
1957
break;
1958
}
1959
} while (m &= mask[n][buf[--index]]);
1960
if ((index += fail[n + 1]) >= len)
1961
goto done;
1962
}
1963
}
1964
possible:
1965
n = env->nsub;
1966
e = e->next;
1967
}
1968
j = env->once || (flags & REG_LEFT);
1969
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
1970
while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
1971
{
1972
if (j)
1973
goto done;
1974
i = MBSIZE(s);
1975
s += i;
1976
if ((unsigned char*)s > env->end - env->min)
1977
goto done;
1978
if (env->stack)
1979
env->best[0].rm_so += i;
1980
}
1981
if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
1982
goto done;
1983
hit:
1984
if (k = env->error)
1985
goto done;
1986
if (i == CUT)
1987
{
1988
k = env->error = REG_NOMATCH;
1989
goto done;
1990
}
1991
if (!(env->flags & REG_NOSUB))
1992
{
1993
k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
1994
for (i = j = m = 0; j < nmatch; i++)
1995
if (!i || !k || (i & 1))
1996
{
1997
if (i > n)
1998
match[j] = state.nomatch;
1999
else
2000
match[m = j] = env->best[i];
2001
j++;
2002
}
2003
if (k)
2004
{
2005
while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
2006
m--;
2007
((regex_t*)p)->re_nsub = m;
2008
}
2009
}
2010
k = 0;
2011
done:
2012
stkold(stkstd, &env->stk);
2013
env->stk.base = 0;
2014
if (k > REG_NOMATCH)
2015
fatal(p->env->disc, k, NiL);
2016
return k;
2017
}
2018
2019
void
2020
regfree(regex_t* p)
2021
{
2022
Env_t* env;
2023
2024
if (p && (env = p->env))
2025
{
2026
#if _REG_subcomp
2027
if (env->sub)
2028
{
2029
regsubfree(p);
2030
p->re_sub = 0;
2031
}
2032
#endif
2033
p->env = 0;
2034
if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
2035
{
2036
drop(env->disc, env->rex);
2037
if (env->pos)
2038
vecclose(env->pos);
2039
if (env->bestpos)
2040
vecclose(env->bestpos);
2041
if (env->stk.base)
2042
stkold(stkstd, &env->stk);
2043
alloc(env->disc, env, 0);
2044
}
2045
}
2046
}
2047
2048
/*
2049
* 20120528: regoff_t changed from int to ssize_t
2050
*/
2051
2052
#if defined(__EXPORT__)
2053
#define extern __EXPORT__
2054
#endif
2055
2056
#undef regnexec
2057
#if _map_libc
2058
#define regnexec _ast_regnexec
2059
#endif
2060
2061
extern int
2062
regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, oldregmatch_t* oldmatch, regflags_t flags)
2063
{
2064
if (oldmatch)
2065
{
2066
regmatch_t* match;
2067
ssize_t i;
2068
int r;
2069
2070
if (!(match = oldof(0, regmatch_t, nmatch, 0)))
2071
return -1;
2072
if (!(r = regnexec_20120528(p, s, len, nmatch, match, flags)))
2073
for (i = 0; i < nmatch; i++)
2074
{
2075
oldmatch[i].rm_so = match[i].rm_so;
2076
oldmatch[i].rm_eo = match[i].rm_eo;
2077
}
2078
free(match);
2079
return r;
2080
}
2081
return regnexec_20120528(p, s, len, 0, NiL, flags);
2082
}
2083
2084