Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
srohatgi01
GitHub Repository: srohatgi01/cups
Path: blob/master/vcnet/regex/engine.c
1090 views
1
/*
2
* The matching engine and friends. This file is #included by regexec.c
3
* after suitable #defines of a variety of macros used herein, so that
4
* different state representations can be used without duplicating masses
5
* of code.
6
*/
7
8
#ifdef SNAMES
9
#define matcher smatcher
10
#define fast sfast
11
#define slow sslow
12
#define dissect sdissect
13
#define backref sbackref
14
#define step sstep
15
#define print sprint
16
#define at sat
17
#define match smat
18
#endif
19
#ifdef LNAMES
20
#define matcher lmatcher
21
#define fast lfast
22
#define slow lslow
23
#define dissect ldissect
24
#define backref lbackref
25
#define step lstep
26
#define print lprint
27
#define at lat
28
#define match lmat
29
#endif
30
31
/* another structure passed up and down to avoid zillions of parameters */
32
struct match {
33
struct re_guts *g;
34
int eflags;
35
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
36
char *offp; /* offsets work from here */
37
char *beginp; /* start of string -- virtual NUL precedes */
38
char *endp; /* end of string -- virtual NUL here */
39
char *coldp; /* can be no match starting before here */
40
char **lastpos; /* [nplus+1] */
41
STATEVARS;
42
states st; /* current states */
43
states fresh; /* states for a fresh start */
44
states tmp; /* temporary */
45
states empty; /* empty set of states */
46
};
47
48
#ifdef REDEBUG
49
#define SP(t, s, c) print(m, t, s, c, stdout)
50
#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
51
#define NOTE(str) { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
52
#else
53
#define SP(t, s, c) /* nothing */
54
#define AT(t, p1, p2, s1, s2) /* nothing */
55
#define NOTE(s) /* nothing */
56
#endif
57
58
static char *dissect(struct match *m, char *start,
59
char *stop, sopno startst, sopno stopst);
60
static char *backref(struct match *m, char *start,
61
char *stop, sopno startst, sopno stopst, sopno lev);
62
static char *fast(struct match *m, char *start,
63
char *stop, sopno startst, sopno stopst);
64
static char *slow(struct match *m, char *start,
65
char *stop, sopno startst, sopno stopst);
66
static states step(struct re_guts *g, sopno start, sopno stop,
67
states bef, int ch, states aft);
68
#ifdef REDEBUG
69
static void print(struct match *m, char *caption, states st,
70
int ch, FILE *d);
71
static void at(struct match *m, char *title, char *start, char *stop,
72
sopno startst, sopno stopst);
73
static char *pchar(int ch);
74
#endif
75
76
#define BOL (OUT+1)
77
#define EOL (BOL+1)
78
#define BOLEOL (BOL+2)
79
#define NOTHING (BOL+3)
80
#define BOW (BOL+4)
81
#define EOW (BOL+5)
82
#define CODEMAX (BOL+5) // highest code used
83
#define NONCHAR(c) ((c) > CHAR_MAX)
84
#define NNONCHAR (CODEMAX-CHAR_MAX)
85
86
/*
87
- matcher - the actual matching engine
88
*/
89
static int /* 0 success, REG_NOMATCH failure */
90
matcher(g, string, nmatch, pmatch, eflags)
91
struct re_guts *g;
92
char *string;
93
size_t nmatch;
94
regmatch_t pmatch[];
95
int eflags;
96
{
97
char *endp;
98
int i;
99
struct match mv;
100
struct match *m = &mv;
101
char *dp;
102
const sopno gf = g->firststate+1; /* +1 for OEND */
103
const sopno gl = g->laststate;
104
char *start;
105
char *stop;
106
107
/* simplify the situation where possible */
108
if (g->cflags&REG_NOSUB)
109
nmatch = 0;
110
if (eflags&REG_STARTEND) {
111
start = string + pmatch[0].rm_so;
112
stop = string + pmatch[0].rm_eo;
113
} else {
114
start = string;
115
stop = start + strlen(start);
116
}
117
if (stop < start)
118
return(REG_INVARG);
119
120
/* prescreening; this does wonders for this rather slow code */
121
if (g->must != NULL) {
122
for (dp = start; dp < stop; dp++)
123
if (*dp == g->must[0] && stop - dp >= g->mlen &&
124
memcmp(dp, g->must, (size_t)g->mlen) == 0)
125
break;
126
if (dp == stop) /* we didn't find g->must */
127
return(REG_NOMATCH);
128
}
129
130
/* match struct setup */
131
m->g = g;
132
m->eflags = eflags;
133
m->pmatch = NULL;
134
m->lastpos = NULL;
135
m->offp = string;
136
m->beginp = start;
137
m->endp = stop;
138
STATESETUP(m, 4);
139
SETUP(m->st);
140
SETUP(m->fresh);
141
SETUP(m->tmp);
142
SETUP(m->empty);
143
CLEAR(m->empty);
144
145
/* this loop does only one repetition except for backrefs */
146
for (;;) {
147
endp = fast(m, start, stop, gf, gl);
148
if (endp == NULL) { /* a miss */
149
STATETEARDOWN(m);
150
return(REG_NOMATCH);
151
}
152
if (nmatch == 0 && !g->backrefs)
153
break; /* no further info needed */
154
155
/* where? */
156
assert(m->coldp != NULL);
157
for (;;) {
158
NOTE("finding start");
159
endp = slow(m, m->coldp, stop, gf, gl);
160
if (endp != NULL)
161
break;
162
assert(m->coldp < m->endp);
163
m->coldp++;
164
}
165
if (nmatch == 1 && !g->backrefs)
166
break; /* no further info needed */
167
168
/* oh my, he wants the subexpressions... */
169
if (m->pmatch == NULL)
170
m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
171
sizeof(regmatch_t));
172
if (m->pmatch == NULL) {
173
STATETEARDOWN(m);
174
return(REG_ESPACE);
175
}
176
for (i = 1; (size_t) i <= m->g->nsub; i++)
177
m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
178
if (!g->backrefs && !(m->eflags&REG_BACKR)) {
179
NOTE("dissecting");
180
dp = dissect(m, m->coldp, endp, gf, gl);
181
} else {
182
if (g->nplus > 0 && m->lastpos == NULL)
183
m->lastpos = (char **)malloc(((size_t)g->nplus+1) *
184
sizeof(char *));
185
if (g->nplus > 0 && m->lastpos == NULL) {
186
free(m->pmatch);
187
STATETEARDOWN(m);
188
return(REG_ESPACE);
189
}
190
NOTE("backref dissect");
191
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
192
}
193
if (dp != NULL)
194
break;
195
196
/* uh-oh... we couldn't find a subexpression-level match */
197
assert(g->backrefs); /* must be back references doing it */
198
assert(g->nplus == 0 || m->lastpos != NULL);
199
for (;;) {
200
if (dp != NULL || endp <= m->coldp)
201
break; /* defeat */
202
NOTE("backoff");
203
endp = slow(m, m->coldp, endp-1, gf, gl);
204
if (endp == NULL)
205
break; /* defeat */
206
/* try it on a shorter possibility */
207
#ifndef NDEBUG
208
for (i = 1; (size_t) i <= m->g->nsub; i++) {
209
assert(m->pmatch[i].rm_so == -1);
210
assert(m->pmatch[i].rm_eo == -1);
211
}
212
#endif
213
NOTE("backoff dissect");
214
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
215
}
216
assert(dp == NULL || dp == endp);
217
if (dp != NULL) /* found a shorter one */
218
break;
219
220
/* despite initial appearances, there is no match here */
221
NOTE("false alarm");
222
start = m->coldp + 1; /* recycle starting later */
223
assert(start <= stop);
224
}
225
226
/* fill in the details if requested */
227
if (nmatch > 0) {
228
pmatch[0].rm_so = (regoff_t)(m->coldp - m->offp);
229
pmatch[0].rm_eo = (regoff_t)(endp - m->offp);
230
}
231
if (nmatch > 1) {
232
assert(m->pmatch != NULL);
233
for (i = 1; (size_t) i < nmatch; i++)
234
if ((size_t) i <= m->g->nsub)
235
pmatch[i] = m->pmatch[i];
236
else {
237
pmatch[i].rm_so = -1;
238
pmatch[i].rm_eo = -1;
239
}
240
}
241
242
if (m->pmatch != NULL)
243
free((char *)m->pmatch);
244
if (m->lastpos != NULL)
245
free((char *)m->lastpos);
246
STATETEARDOWN(m);
247
return(0);
248
}
249
250
/*
251
- dissect - figure out what matched what, no back references
252
*/
253
static char * /* == stop (success) always */
254
dissect(m, start, stop, startst, stopst)
255
struct match *m;
256
char *start;
257
char *stop;
258
sopno startst;
259
sopno stopst;
260
{
261
int i;
262
sopno ss; /* start sop of current subRE */
263
sopno es; /* end sop of current subRE */
264
char *sp; /* start of string matched by it */
265
char *stp; /* string matched by it cannot pass here */
266
char *rest; /* start of rest of string */
267
char *tail; /* string unmatched by rest of RE */
268
sopno ssub; /* start sop of subsubRE */
269
sopno esub; /* end sop of subsubRE */
270
char *ssp; /* start of string matched by subsubRE */
271
char *sep; /* end of string matched by subsubRE */
272
char *oldssp; /* previous ssp */
273
char *dp;
274
275
AT("diss", start, stop, startst, stopst);
276
sp = start;
277
for (ss = startst; ss < stopst; ss = es) {
278
/* identify end of subRE */
279
es = ss;
280
switch (OP(m->g->strip[es])) {
281
case OPLUS_:
282
case OQUEST_:
283
es += OPND(m->g->strip[es]);
284
break;
285
case OCH_:
286
while (OP(m->g->strip[es]) != O_CH)
287
es += OPND(m->g->strip[es]);
288
break;
289
}
290
es++;
291
292
/* figure out what it matched */
293
switch (OP(m->g->strip[ss])) {
294
case OEND:
295
assert(nope);
296
break;
297
case OCHAR:
298
sp++;
299
break;
300
case OBOL:
301
case OEOL:
302
case OBOW:
303
case OEOW:
304
break;
305
case OANY:
306
case OANYOF:
307
sp++;
308
break;
309
case OBACK_:
310
case O_BACK:
311
assert(nope);
312
break;
313
/* cases where length of match is hard to find */
314
case OQUEST_:
315
stp = stop;
316
for (;;) {
317
/* how long could this one be? */
318
rest = slow(m, sp, stp, ss, es);
319
assert(rest != NULL); /* it did match */
320
/* could the rest match the rest? */
321
tail = slow(m, rest, stop, es, stopst);
322
if (tail == stop)
323
break; /* yes! */
324
/* no -- try a shorter match for this one */
325
stp = rest - 1;
326
assert(stp >= sp); /* it did work */
327
}
328
ssub = ss + 1;
329
esub = es - 1;
330
/* did innards match? */
331
if (slow(m, sp, rest, ssub, esub) != NULL) {
332
dp = dissect(m, sp, rest, ssub, esub);
333
assert(dp == rest);
334
} else /* no */
335
assert(sp == rest);
336
sp = rest;
337
break;
338
case OPLUS_:
339
stp = stop;
340
for (;;) {
341
/* how long could this one be? */
342
rest = slow(m, sp, stp, ss, es);
343
assert(rest != NULL); /* it did match */
344
/* could the rest match the rest? */
345
tail = slow(m, rest, stop, es, stopst);
346
if (tail == stop)
347
break; /* yes! */
348
/* no -- try a shorter match for this one */
349
stp = rest - 1;
350
assert(stp >= sp); /* it did work */
351
}
352
ssub = ss + 1;
353
esub = es - 1;
354
ssp = sp;
355
oldssp = ssp;
356
for (;;) { /* find last match of innards */
357
sep = slow(m, ssp, rest, ssub, esub);
358
if (sep == NULL || sep == ssp)
359
break; /* failed or matched null */
360
oldssp = ssp; /* on to next try */
361
ssp = sep;
362
}
363
if (sep == NULL) {
364
/* last successful match */
365
sep = ssp;
366
ssp = oldssp;
367
}
368
assert(sep == rest); /* must exhaust substring */
369
assert(slow(m, ssp, sep, ssub, esub) == rest);
370
dp = dissect(m, ssp, sep, ssub, esub);
371
assert(dp == sep);
372
sp = rest;
373
break;
374
case OCH_:
375
stp = stop;
376
for (;;) {
377
/* how long could this one be? */
378
rest = slow(m, sp, stp, ss, es);
379
assert(rest != NULL); /* it did match */
380
/* could the rest match the rest? */
381
tail = slow(m, rest, stop, es, stopst);
382
if (tail == stop)
383
break; /* yes! */
384
/* no -- try a shorter match for this one */
385
stp = rest - 1;
386
assert(stp >= sp); /* it did work */
387
}
388
ssub = ss + 1;
389
esub = ss + OPND(m->g->strip[ss]) - 1;
390
assert(OP(m->g->strip[esub]) == OOR1);
391
for (;;) { /* find first matching branch */
392
if (slow(m, sp, rest, ssub, esub) == rest)
393
break; /* it matched all of it */
394
/* that one missed, try next one */
395
assert(OP(m->g->strip[esub]) == OOR1);
396
esub++;
397
assert(OP(m->g->strip[esub]) == OOR2);
398
ssub = esub + 1;
399
esub += OPND(m->g->strip[esub]);
400
if (OP(m->g->strip[esub]) == OOR2)
401
esub--;
402
else
403
assert(OP(m->g->strip[esub]) == O_CH);
404
}
405
dp = dissect(m, sp, rest, ssub, esub);
406
assert(dp == rest);
407
sp = rest;
408
break;
409
case O_PLUS:
410
case O_QUEST:
411
case OOR1:
412
case OOR2:
413
case O_CH:
414
assert(nope);
415
break;
416
case OLPAREN:
417
i = OPND(m->g->strip[ss]);
418
assert(0 < i && (size_t) i <= m->g->nsub);
419
m->pmatch[i].rm_so = (regoff_t)(sp - m->offp);
420
break;
421
case ORPAREN:
422
i = OPND(m->g->strip[ss]);
423
assert(0 < i && (size_t) i <= m->g->nsub);
424
m->pmatch[i].rm_eo = (regoff_t)(sp - m->offp);
425
break;
426
default: /* uh oh */
427
assert(nope);
428
break;
429
}
430
}
431
432
assert(sp == stop);
433
return(sp);
434
}
435
436
/*
437
- backref - figure out what matched what, figuring in back references
438
*/
439
static char * /* == stop (success) or NULL (failure) */
440
backref(m, start, stop, startst, stopst, lev)
441
struct match *m;
442
char *start;
443
char *stop;
444
sopno startst;
445
sopno stopst;
446
sopno lev; /* PLUS nesting level */
447
{
448
int i;
449
sopno ss; /* start sop of current subRE */
450
char *sp; /* start of string matched by it */
451
sopno ssub; /* start sop of subsubRE */
452
sopno esub; /* end sop of subsubRE */
453
char *ssp; /* start of string matched by subsubRE */
454
char *dp;
455
size_t len;
456
int hard;
457
sop s;
458
regoff_t offsave;
459
cset *cs;
460
461
AT("back", start, stop, startst, stopst);
462
sp = start;
463
464
/* get as far as we can with easy stuff */
465
hard = 0;
466
for (ss = startst; !hard && ss < stopst; ss++)
467
switch (OP(s = m->g->strip[ss])) {
468
case OCHAR:
469
if (sp == stop || *sp++ != (char)OPND(s))
470
return(NULL);
471
break;
472
case OANY:
473
if (sp == stop)
474
return(NULL);
475
sp++;
476
break;
477
case OANYOF:
478
cs = &m->g->sets[OPND(s)];
479
if (sp == stop || !CHIN(cs, *sp++))
480
return(NULL);
481
break;
482
case OBOL:
483
if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
484
(sp < m->endp && *(sp-1) == '\n' &&
485
(m->g->cflags&REG_NEWLINE)) )
486
{ /* yes */ }
487
else
488
return(NULL);
489
break;
490
case OEOL:
491
if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
492
(sp < m->endp && *sp == '\n' &&
493
(m->g->cflags&REG_NEWLINE)) )
494
{ /* yes */ }
495
else
496
return(NULL);
497
break;
498
case OBOW:
499
if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
500
(sp < m->endp && *(sp-1) == '\n' &&
501
(m->g->cflags&REG_NEWLINE)) ||
502
(sp > m->beginp &&
503
!ISWORD(*(sp-1))) ) &&
504
(sp < m->endp && ISWORD(*sp)) )
505
{ /* yes */ }
506
else
507
return(NULL);
508
break;
509
case OEOW:
510
if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
511
(sp < m->endp && *sp == '\n' &&
512
(m->g->cflags&REG_NEWLINE)) ||
513
(sp < m->endp && !ISWORD(*sp)) ) &&
514
(sp > m->beginp && ISWORD(*(sp-1))) )
515
{ /* yes */ }
516
else
517
return(NULL);
518
break;
519
case O_QUEST:
520
break;
521
case OOR1: /* matches null but needs to skip */
522
ss++;
523
s = m->g->strip[ss];
524
do {
525
assert(OP(s) == OOR2);
526
ss += OPND(s);
527
} while (OP(s = m->g->strip[ss]) != O_CH);
528
/* note that the ss++ gets us past the O_CH */
529
break;
530
default: /* have to make a choice */
531
hard = 1;
532
break;
533
}
534
if (!hard) { /* that was it! */
535
if (sp != stop)
536
return(NULL);
537
return(sp);
538
}
539
ss--; /* adjust for the for's final increment */
540
541
/* the hard stuff */
542
AT("hard", sp, stop, ss, stopst);
543
s = m->g->strip[ss];
544
switch (OP(s)) {
545
case OBACK_: /* the vilest depths */
546
i = OPND(s);
547
assert(0 < i && (size_t) i <= m->g->nsub);
548
if (m->pmatch[i].rm_eo == -1)
549
return(NULL);
550
assert(m->pmatch[i].rm_so != -1);
551
len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so);
552
assert((size_t) (stop - m->beginp) >= len);
553
if (sp > stop - len)
554
return(NULL); /* not enough left to match */
555
ssp = m->offp + m->pmatch[i].rm_so;
556
if (memcmp(sp, ssp, len) != 0)
557
return(NULL);
558
while (m->g->strip[ss] != SOP(O_BACK, i))
559
ss++;
560
return(backref(m, sp+len, stop, ss+1, stopst, lev));
561
break;
562
case OQUEST_: /* to null or not */
563
dp = backref(m, sp, stop, ss+1, stopst, lev);
564
if (dp != NULL)
565
return(dp); /* not */
566
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
567
break;
568
case OPLUS_:
569
assert(m->lastpos != NULL);
570
assert(lev+1 <= m->g->nplus);
571
m->lastpos[lev+1] = sp;
572
return(backref(m, sp, stop, ss+1, stopst, lev+1));
573
break;
574
case O_PLUS:
575
if (sp == m->lastpos[lev]) /* last pass matched null */
576
return(backref(m, sp, stop, ss+1, stopst, lev-1));
577
/* try another pass */
578
m->lastpos[lev] = sp;
579
dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
580
if (dp == NULL)
581
return(backref(m, sp, stop, ss+1, stopst, lev-1));
582
else
583
return(dp);
584
break;
585
case OCH_: /* find the right one, if any */
586
ssub = ss + 1;
587
esub = ss + OPND(s) - 1;
588
assert(OP(m->g->strip[esub]) == OOR1);
589
for (;;) { /* find first matching branch */
590
dp = backref(m, sp, stop, ssub, esub, lev);
591
if (dp != NULL)
592
return(dp);
593
/* that one missed, try next one */
594
if (OP(m->g->strip[esub]) == O_CH)
595
return(NULL); /* there is none */
596
esub++;
597
assert(OP(m->g->strip[esub]) == OOR2);
598
ssub = esub + 1;
599
esub += OPND(m->g->strip[esub]);
600
if (OP(m->g->strip[esub]) == OOR2)
601
esub--;
602
else
603
assert(OP(m->g->strip[esub]) == O_CH);
604
}
605
break;
606
case OLPAREN: /* must undo assignment if rest fails */
607
i = OPND(s);
608
assert(0 < i && (size_t) i <= m->g->nsub);
609
offsave = m->pmatch[i].rm_so;
610
m->pmatch[i].rm_so = (regoff_t)(sp - m->offp);
611
dp = backref(m, sp, stop, ss+1, stopst, lev);
612
if (dp != NULL)
613
return(dp);
614
m->pmatch[i].rm_so = offsave;
615
return(NULL);
616
break;
617
case ORPAREN: /* must undo assignment if rest fails */
618
i = OPND(s);
619
assert(0 < i && (size_t) i <= m->g->nsub);
620
offsave = m->pmatch[i].rm_eo;
621
m->pmatch[i].rm_eo = (regoff_t)(sp - m->offp);
622
dp = backref(m, sp, stop, ss+1, stopst, lev);
623
if (dp != NULL)
624
return(dp);
625
m->pmatch[i].rm_eo = offsave;
626
return(NULL);
627
break;
628
default: /* uh oh */
629
assert(nope);
630
break;
631
}
632
633
/* "can't happen" */
634
assert(nope);
635
/* NOTREACHED */
636
return((char *)NULL); /* dummy */
637
}
638
639
/*
640
- fast - step through the string at top speed
641
*/
642
static char * /* where tentative match ended, or NULL */
643
fast(m, start, stop, startst, stopst)
644
struct match *m;
645
char *start;
646
char *stop;
647
sopno startst;
648
sopno stopst;
649
{
650
states st = m->st;
651
states fresh = m->fresh;
652
states tmp = m->tmp;
653
char *p = start;
654
int c = (start == m->beginp) ? OUT : *(start-1);
655
int lastc; /* previous c */
656
int flagch;
657
int i;
658
char *coldp; /* last p after which no match was underway */
659
660
CLEAR(st);
661
SET1(st, startst);
662
st = step(m->g, startst, stopst, st, NOTHING, st);
663
ASSIGN(fresh, st);
664
SP("start", st, *p);
665
coldp = NULL;
666
for (;;) {
667
/* next character */
668
lastc = c;
669
c = (p == m->endp) ? OUT : *p;
670
if (EQ(st, fresh))
671
coldp = p;
672
673
/* is there an EOL and/or BOL between lastc and c? */
674
flagch = '\0';
675
i = 0;
676
if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
677
(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
678
flagch = BOL;
679
i = m->g->nbol;
680
}
681
if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
682
(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
683
flagch = (flagch == BOL) ? BOLEOL : EOL;
684
i += m->g->neol;
685
}
686
if (i != 0) {
687
for (; i > 0; i--)
688
st = step(m->g, startst, stopst, st, flagch, st);
689
SP("boleol", st, c);
690
}
691
692
/* how about a word boundary? */
693
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
694
(c != OUT && ISWORD(c)) ) {
695
flagch = BOW;
696
}
697
if ( (lastc != OUT && ISWORD(lastc)) &&
698
(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
699
flagch = EOW;
700
}
701
if (flagch == BOW || flagch == EOW) {
702
st = step(m->g, startst, stopst, st, flagch, st);
703
SP("boweow", st, c);
704
}
705
706
/* are we done? */
707
if (ISSET(st, stopst) || p == stop)
708
break; /* NOTE BREAK OUT */
709
710
/* no, we must deal with this character */
711
ASSIGN(tmp, st);
712
ASSIGN(st, fresh);
713
assert(c != OUT);
714
st = step(m->g, startst, stopst, tmp, c, st);
715
SP("aft", st, c);
716
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
717
p++;
718
}
719
720
assert(coldp != NULL);
721
m->coldp = coldp;
722
if (ISSET(st, stopst))
723
return(p+1);
724
else
725
return(NULL);
726
}
727
728
/*
729
- slow - step through the string more deliberately
730
*/
731
static char * /* where it ended */
732
slow(m, start, stop, startst, stopst)
733
struct match *m;
734
char *start;
735
char *stop;
736
sopno startst;
737
sopno stopst;
738
{
739
states st = m->st;
740
states empty = m->empty;
741
char *p = start;
742
int c = (start == m->beginp) ? OUT : *(start-1);
743
int lastc; /* previous c */
744
int flagch;
745
int i;
746
char *matchp; /* last p at which a match ended */
747
748
AT("slow", start, stop, startst, stopst);
749
CLEAR(st);
750
SET1(st, startst);
751
SP("sstart", st, *p);
752
st = step(m->g, startst, stopst, st, NOTHING, st);
753
matchp = NULL;
754
for (;;) {
755
/* next character */
756
lastc = c;
757
c = (p == m->endp) ? OUT : *p;
758
759
/* is there an EOL and/or BOL between lastc and c? */
760
flagch = '\0';
761
i = 0;
762
if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
763
(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
764
flagch = BOL;
765
i = m->g->nbol;
766
}
767
if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
768
(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
769
flagch = (flagch == BOL) ? BOLEOL : EOL;
770
i += m->g->neol;
771
}
772
if (i != 0) {
773
for (; i > 0; i--)
774
st = step(m->g, startst, stopst, st, flagch, st);
775
SP("sboleol", st, c);
776
}
777
778
/* how about a word boundary? */
779
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
780
(c != OUT && ISWORD(c)) ) {
781
flagch = BOW;
782
}
783
if ( (lastc != OUT && ISWORD(lastc)) &&
784
(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
785
flagch = EOW;
786
}
787
if (flagch == BOW || flagch == EOW) {
788
st = step(m->g, startst, stopst, st, flagch, st);
789
SP("sboweow", st, c);
790
}
791
792
/* are we done? */
793
if (ISSET(st, stopst))
794
matchp = p;
795
if (EQ(st, empty) || p == stop)
796
break; /* NOTE BREAK OUT */
797
798
/* no, we must deal with this character */
799
assert(c != OUT);
800
st = step(m->g, startst, stopst, st, c, empty);
801
SP("saft", st, c);
802
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
803
p++;
804
}
805
806
return(matchp);
807
}
808
809
810
/*
811
- step - map set of states reachable before char to set reachable after
812
*/
813
static states
814
step(g, start, stop, bef, ch, aft)
815
struct re_guts *g;
816
sopno start; /* start state within strip */
817
sopno stop; /* state after stop state within strip */
818
states bef; /* states reachable before */
819
int ch; /* character or NONCHAR code */
820
states aft; /* states already known reachable after */
821
{
822
cset *cs;
823
sop s;
824
sopno pc;
825
onestate here; /* note, macros know this name */
826
sopno look;
827
long i;
828
829
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
830
s = g->strip[pc];
831
switch (OP(s)) {
832
case OEND:
833
assert(pc == stop-1);
834
break;
835
case OCHAR:
836
/* only characters can match */
837
assert(!NONCHAR(ch) || ch != (char)OPND(s));
838
if (ch == (char)OPND(s))
839
FWD(aft, bef, 1);
840
break;
841
case OBOL:
842
if (ch == BOL || ch == BOLEOL)
843
FWD(aft, bef, 1);
844
break;
845
case OEOL:
846
if (ch == EOL || ch == BOLEOL)
847
FWD(aft, bef, 1);
848
break;
849
case OBOW:
850
if (ch == BOW)
851
FWD(aft, bef, 1);
852
break;
853
case OEOW:
854
if (ch == EOW)
855
FWD(aft, bef, 1);
856
break;
857
case OANY:
858
if (!NONCHAR(ch))
859
FWD(aft, bef, 1);
860
break;
861
case OANYOF:
862
cs = &g->sets[OPND(s)];
863
if (!NONCHAR(ch) && CHIN(cs, ch))
864
FWD(aft, bef, 1);
865
break;
866
case OBACK_: /* ignored here */
867
case O_BACK:
868
FWD(aft, aft, 1);
869
break;
870
case OPLUS_: /* forward, this is just an empty */
871
FWD(aft, aft, 1);
872
break;
873
case O_PLUS: /* both forward and back */
874
FWD(aft, aft, 1);
875
i = ISSETBACK(aft, OPND(s));
876
BACK(aft, aft, OPND(s));
877
if (!i && ISSETBACK(aft, OPND(s))) {
878
/* oho, must reconsider loop body */
879
pc -= OPND(s) + 1;
880
INIT(here, pc);
881
}
882
break;
883
case OQUEST_: /* two branches, both forward */
884
FWD(aft, aft, 1);
885
FWD(aft, aft, OPND(s));
886
break;
887
case O_QUEST: /* just an empty */
888
FWD(aft, aft, 1);
889
break;
890
case OLPAREN: /* not significant here */
891
case ORPAREN:
892
FWD(aft, aft, 1);
893
break;
894
case OCH_: /* mark the first two branches */
895
FWD(aft, aft, 1);
896
assert(OP(g->strip[pc+OPND(s)]) == OOR2);
897
FWD(aft, aft, OPND(s));
898
break;
899
case OOR1: /* done a branch, find the O_CH */
900
if (ISSTATEIN(aft, here)) {
901
for (look = 1;
902
OP(s = g->strip[pc+look]) != O_CH;
903
look += OPND(s))
904
assert(OP(s) == OOR2);
905
FWD(aft, aft, look);
906
}
907
break;
908
case OOR2: /* propagate OCH_'s marking */
909
FWD(aft, aft, 1);
910
if (OP(g->strip[pc+OPND(s)]) != O_CH) {
911
assert(OP(g->strip[pc+OPND(s)]) == OOR2);
912
FWD(aft, aft, OPND(s));
913
}
914
break;
915
case O_CH: /* just empty */
916
FWD(aft, aft, 1);
917
break;
918
default: /* ooooops... */
919
assert(nope);
920
break;
921
}
922
}
923
924
return(aft);
925
}
926
927
#ifdef REDEBUG
928
/*
929
- print - print a set of states
930
*/
931
static void
932
print(m, caption, st, ch, d)
933
struct match *m;
934
char *caption;
935
states st;
936
int ch;
937
FILE *d;
938
{
939
struct re_guts *g = m->g;
940
int i;
941
int first = 1;
942
943
if (!(m->eflags&REG_TRACE))
944
return;
945
946
fprintf(d, "%s", caption);
947
if (ch != '\0')
948
fprintf(d, " %s", pchar(ch));
949
for (i = 0; i < g->nstates; i++)
950
if (ISSET(st, i)) {
951
fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
952
first = 0;
953
}
954
fprintf(d, "\n");
955
}
956
957
/*
958
- at - print current situation
959
*/
960
static void
961
at(m, title, start, stop, startst, stopst)
962
struct match *m;
963
char *title;
964
char *start;
965
char *stop;
966
sopno startst;
967
sopno stopst;
968
{
969
if (!(m->eflags&REG_TRACE))
970
return;
971
972
printf("%s %s-", title, pchar(*start));
973
printf("%s ", pchar(*stop));
974
printf("%ld-%ld\n", (long)startst, (long)stopst);
975
}
976
977
#ifndef PCHARDONE
978
#define PCHARDONE /* never again */
979
/*
980
- pchar - make a character printable
981
*
982
* Is this identical to regchar() over in debug.c? Well, yes. But a
983
* duplicate here avoids having a debugging-capable regexec.o tied to
984
* a matching debug.o, and this is convenient. It all disappears in
985
* the non-debug compilation anyway, so it doesn't matter much.
986
*/
987
static char * /* -> representation */
988
pchar(ch)
989
int ch;
990
{
991
static char pbuf[10];
992
993
if (isprint(ch) || ch == ' ')
994
sprintf(pbuf, "%c", ch);
995
else
996
sprintf(pbuf, "\\%o", ch);
997
return(pbuf);
998
}
999
#endif
1000
#endif
1001
1002
#undef matcher
1003
#undef fast
1004
#undef slow
1005
#undef dissect
1006
#undef backref
1007
#undef step
1008
#undef print
1009
#undef at
1010
#undef match
1011
1012