Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_sre/sre_lib.h
12 views
1
/*
2
* Secret Labs' Regular Expression Engine
3
*
4
* regular expression matching engine
5
*
6
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
7
*
8
* See the sre.c file for information on usage and redistribution.
9
*/
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
{
18
/* check if pointer is at given position */
19
20
Py_ssize_t thisp, thatp;
21
22
switch (at) {
23
24
case SRE_AT_BEGINNING:
25
case SRE_AT_BEGINNING_STRING:
26
return ((void*) ptr == state->beginning);
27
28
case SRE_AT_BEGINNING_LINE:
29
return ((void*) ptr == state->beginning ||
30
SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
case SRE_AT_END:
33
return (((SRE_CHAR *)state->end - ptr == 1 &&
34
SRE_IS_LINEBREAK((int) ptr[0])) ||
35
((void*) ptr == state->end));
36
37
case SRE_AT_END_LINE:
38
return ((void*) ptr == state->end ||
39
SRE_IS_LINEBREAK((int) ptr[0]));
40
41
case SRE_AT_END_STRING:
42
return ((void*) ptr == state->end);
43
44
case SRE_AT_BOUNDARY:
45
if (state->beginning == state->end)
46
return 0;
47
thatp = ((void*) ptr > state->beginning) ?
48
SRE_IS_WORD((int) ptr[-1]) : 0;
49
thisp = ((void*) ptr < state->end) ?
50
SRE_IS_WORD((int) ptr[0]) : 0;
51
return thisp != thatp;
52
53
case SRE_AT_NON_BOUNDARY:
54
if (state->beginning == state->end)
55
return 0;
56
thatp = ((void*) ptr > state->beginning) ?
57
SRE_IS_WORD((int) ptr[-1]) : 0;
58
thisp = ((void*) ptr < state->end) ?
59
SRE_IS_WORD((int) ptr[0]) : 0;
60
return thisp == thatp;
61
62
case SRE_AT_LOC_BOUNDARY:
63
if (state->beginning == state->end)
64
return 0;
65
thatp = ((void*) ptr > state->beginning) ?
66
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67
thisp = ((void*) ptr < state->end) ?
68
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69
return thisp != thatp;
70
71
case SRE_AT_LOC_NON_BOUNDARY:
72
if (state->beginning == state->end)
73
return 0;
74
thatp = ((void*) ptr > state->beginning) ?
75
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76
thisp = ((void*) ptr < state->end) ?
77
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78
return thisp == thatp;
79
80
case SRE_AT_UNI_BOUNDARY:
81
if (state->beginning == state->end)
82
return 0;
83
thatp = ((void*) ptr > state->beginning) ?
84
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85
thisp = ((void*) ptr < state->end) ?
86
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87
return thisp != thatp;
88
89
case SRE_AT_UNI_NON_BOUNDARY:
90
if (state->beginning == state->end)
91
return 0;
92
thatp = ((void*) ptr > state->beginning) ?
93
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94
thisp = ((void*) ptr < state->end) ?
95
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96
return thisp == thatp;
97
98
}
99
100
return 0;
101
}
102
103
LOCAL(int)
104
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
105
{
106
/* check if character is a member of the given set */
107
108
int ok = 1;
109
110
for (;;) {
111
switch (*set++) {
112
113
case SRE_OP_FAILURE:
114
return !ok;
115
116
case SRE_OP_LITERAL:
117
/* <LITERAL> <code> */
118
if (ch == set[0])
119
return ok;
120
set++;
121
break;
122
123
case SRE_OP_CATEGORY:
124
/* <CATEGORY> <code> */
125
if (sre_category(set[0], (int) ch))
126
return ok;
127
set++;
128
break;
129
130
case SRE_OP_CHARSET:
131
/* <CHARSET> <bitmap> */
132
if (ch < 256 &&
133
(set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134
return ok;
135
set += 256/SRE_CODE_BITS;
136
break;
137
138
case SRE_OP_RANGE:
139
/* <RANGE> <lower> <upper> */
140
if (set[0] <= ch && ch <= set[1])
141
return ok;
142
set += 2;
143
break;
144
145
case SRE_OP_RANGE_UNI_IGNORE:
146
/* <RANGE_UNI_IGNORE> <lower> <upper> */
147
{
148
SRE_CODE uch;
149
/* ch is already lower cased */
150
if (set[0] <= ch && ch <= set[1])
151
return ok;
152
uch = sre_upper_unicode(ch);
153
if (set[0] <= uch && uch <= set[1])
154
return ok;
155
set += 2;
156
break;
157
}
158
159
case SRE_OP_NEGATE:
160
ok = !ok;
161
break;
162
163
case SRE_OP_BIGCHARSET:
164
/* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165
{
166
Py_ssize_t count, block;
167
count = *(set++);
168
169
if (ch < 0x10000u)
170
block = ((unsigned char*)set)[ch >> 8];
171
else
172
block = -1;
173
set += 256/sizeof(SRE_CODE);
174
if (block >=0 &&
175
(set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176
(1u << (ch & (SRE_CODE_BITS-1)))))
177
return ok;
178
set += count * (256/SRE_CODE_BITS);
179
break;
180
}
181
182
default:
183
/* internal error -- there's not much we can do about it
184
here, so let's just pretend it didn't match... */
185
return 0;
186
}
187
}
188
}
189
190
LOCAL(int)
191
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
192
{
193
SRE_CODE lo, up;
194
lo = sre_lower_locale(ch);
195
if (SRE(charset)(state, set, lo))
196
return 1;
197
198
up = sre_upper_locale(ch);
199
return up != lo && SRE(charset)(state, set, up);
200
}
201
202
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
203
204
LOCAL(Py_ssize_t)
205
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
206
{
207
SRE_CODE chr;
208
SRE_CHAR c;
209
const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
210
const SRE_CHAR* end = (const SRE_CHAR *)state->end;
211
Py_ssize_t i;
212
213
/* adjust end */
214
if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
215
end = ptr + maxcount;
216
217
switch (pattern[0]) {
218
219
case SRE_OP_IN:
220
/* repeated set */
221
TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
222
while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
223
ptr++;
224
break;
225
226
case SRE_OP_ANY:
227
/* repeated dot wildcard. */
228
TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
229
while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
230
ptr++;
231
break;
232
233
case SRE_OP_ANY_ALL:
234
/* repeated dot wildcard. skip to the end of the target
235
string, and backtrack from there */
236
TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
237
ptr = end;
238
break;
239
240
case SRE_OP_LITERAL:
241
/* repeated literal */
242
chr = pattern[1];
243
TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
244
c = (SRE_CHAR) chr;
245
#if SIZEOF_SRE_CHAR < 4
246
if ((SRE_CODE) c != chr)
247
; /* literal can't match: doesn't fit in char width */
248
else
249
#endif
250
while (ptr < end && *ptr == c)
251
ptr++;
252
break;
253
254
case SRE_OP_LITERAL_IGNORE:
255
/* repeated literal */
256
chr = pattern[1];
257
TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
258
while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
259
ptr++;
260
break;
261
262
case SRE_OP_LITERAL_UNI_IGNORE:
263
/* repeated literal */
264
chr = pattern[1];
265
TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
266
while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
267
ptr++;
268
break;
269
270
case SRE_OP_LITERAL_LOC_IGNORE:
271
/* repeated literal */
272
chr = pattern[1];
273
TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
274
while (ptr < end && char_loc_ignore(chr, *ptr))
275
ptr++;
276
break;
277
278
case SRE_OP_NOT_LITERAL:
279
/* repeated non-literal */
280
chr = pattern[1];
281
TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
282
c = (SRE_CHAR) chr;
283
#if SIZEOF_SRE_CHAR < 4
284
if ((SRE_CODE) c != chr)
285
ptr = end; /* literal can't match: doesn't fit in char width */
286
else
287
#endif
288
while (ptr < end && *ptr != c)
289
ptr++;
290
break;
291
292
case SRE_OP_NOT_LITERAL_IGNORE:
293
/* repeated non-literal */
294
chr = pattern[1];
295
TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
296
while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
297
ptr++;
298
break;
299
300
case SRE_OP_NOT_LITERAL_UNI_IGNORE:
301
/* repeated non-literal */
302
chr = pattern[1];
303
TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
304
while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
305
ptr++;
306
break;
307
308
case SRE_OP_NOT_LITERAL_LOC_IGNORE:
309
/* repeated non-literal */
310
chr = pattern[1];
311
TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
312
while (ptr < end && !char_loc_ignore(chr, *ptr))
313
ptr++;
314
break;
315
316
default:
317
/* repeated single character pattern */
318
TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
319
while ((SRE_CHAR*) state->ptr < end) {
320
i = SRE(match)(state, pattern, 0);
321
if (i < 0)
322
return i;
323
if (!i)
324
break;
325
}
326
TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
327
(SRE_CHAR*) state->ptr - ptr));
328
return (SRE_CHAR*) state->ptr - ptr;
329
}
330
331
TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
332
ptr - (SRE_CHAR*) state->ptr));
333
return ptr - (SRE_CHAR*) state->ptr;
334
}
335
336
/* The macros below should be used to protect recursive SRE(match)()
337
* calls that *failed* and do *not* return immediately (IOW, those
338
* that will backtrack). Explaining:
339
*
340
* - Recursive SRE(match)() returned true: that's usually a success
341
* (besides atypical cases like ASSERT_NOT), therefore there's no
342
* reason to restore lastmark;
343
*
344
* - Recursive SRE(match)() returned false but the current SRE(match)()
345
* is returning to the caller: If the current SRE(match)() is the
346
* top function of the recursion, returning false will be a matching
347
* failure, and it doesn't matter where lastmark is pointing to.
348
* If it's *not* the top function, it will be a recursive SRE(match)()
349
* failure by itself, and the calling SRE(match)() will have to deal
350
* with the failure by the same rules explained here (it will restore
351
* lastmark by itself if necessary);
352
*
353
* - Recursive SRE(match)() returned false, and will continue the
354
* outside 'for' loop: must be protected when breaking, since the next
355
* OP could potentially depend on lastmark;
356
*
357
* - Recursive SRE(match)() returned false, and will be called again
358
* inside a local for/while loop: must be protected between each
359
* loop iteration, since the recursive SRE(match)() could do anything,
360
* and could potentially depend on lastmark.
361
*
362
* For more information, check the discussion at SF patch #712900.
363
*/
364
#define LASTMARK_SAVE() \
365
do { \
366
ctx->lastmark = state->lastmark; \
367
ctx->lastindex = state->lastindex; \
368
} while (0)
369
#define LASTMARK_RESTORE() \
370
do { \
371
state->lastmark = ctx->lastmark; \
372
state->lastindex = ctx->lastindex; \
373
} while (0)
374
375
#define RETURN_ERROR(i) do { return i; } while(0)
376
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
377
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
378
379
#define RETURN_ON_ERROR(i) \
380
do { if (i < 0) RETURN_ERROR(i); } while (0)
381
#define RETURN_ON_SUCCESS(i) \
382
do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
383
#define RETURN_ON_FAILURE(i) \
384
do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
385
386
#define DATA_STACK_ALLOC(state, type, ptr) \
387
do { \
388
alloc_pos = state->data_stack_base; \
389
TRACE(("allocating %s in %zd (%zd)\n", \
390
Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
391
if (sizeof(type) > state->data_stack_size - alloc_pos) { \
392
int j = data_stack_grow(state, sizeof(type)); \
393
if (j < 0) return j; \
394
if (ctx_pos != -1) \
395
DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
396
} \
397
ptr = (type*)(state->data_stack+alloc_pos); \
398
state->data_stack_base += sizeof(type); \
399
} while (0)
400
401
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
402
do { \
403
TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
404
ptr = (type*)(state->data_stack+pos); \
405
} while (0)
406
407
#define DATA_STACK_PUSH(state, data, size) \
408
do { \
409
TRACE(("copy data in %p to %zd (%zd)\n", \
410
data, state->data_stack_base, size)); \
411
if (size > state->data_stack_size - state->data_stack_base) { \
412
int j = data_stack_grow(state, size); \
413
if (j < 0) return j; \
414
if (ctx_pos != -1) \
415
DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
416
} \
417
memcpy(state->data_stack+state->data_stack_base, data, size); \
418
state->data_stack_base += size; \
419
} while (0)
420
421
/* We add an explicit cast to memcpy here because MSVC has a bug when
422
compiling C code where it believes that `const void**` cannot be
423
safely casted to `void*`, see bpo-39943 for details. */
424
#define DATA_STACK_POP(state, data, size, discard) \
425
do { \
426
TRACE(("copy data to %p from %zd (%zd)\n", \
427
data, state->data_stack_base-size, size)); \
428
memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
429
if (discard) \
430
state->data_stack_base -= size; \
431
} while (0)
432
433
#define DATA_STACK_POP_DISCARD(state, size) \
434
do { \
435
TRACE(("discard data from %zd (%zd)\n", \
436
state->data_stack_base-size, size)); \
437
state->data_stack_base -= size; \
438
} while(0)
439
440
#define DATA_PUSH(x) \
441
DATA_STACK_PUSH(state, (x), sizeof(*(x)))
442
#define DATA_POP(x) \
443
DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
444
#define DATA_POP_DISCARD(x) \
445
DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
446
#define DATA_ALLOC(t,p) \
447
DATA_STACK_ALLOC(state, t, p)
448
#define DATA_LOOKUP_AT(t,p,pos) \
449
DATA_STACK_LOOKUP_AT(state,t,p,pos)
450
451
#define MARK_PUSH(lastmark) \
452
do if (lastmark >= 0) { \
453
size_t _marks_size = (lastmark+1) * sizeof(void*); \
454
DATA_STACK_PUSH(state, state->mark, _marks_size); \
455
} while (0)
456
#define MARK_POP(lastmark) \
457
do if (lastmark >= 0) { \
458
size_t _marks_size = (lastmark+1) * sizeof(void*); \
459
DATA_STACK_POP(state, state->mark, _marks_size, 1); \
460
} while (0)
461
#define MARK_POP_KEEP(lastmark) \
462
do if (lastmark >= 0) { \
463
size_t _marks_size = (lastmark+1) * sizeof(void*); \
464
DATA_STACK_POP(state, state->mark, _marks_size, 0); \
465
} while (0)
466
#define MARK_POP_DISCARD(lastmark) \
467
do if (lastmark >= 0) { \
468
size_t _marks_size = (lastmark+1) * sizeof(void*); \
469
DATA_STACK_POP_DISCARD(state, _marks_size); \
470
} while (0)
471
472
#define JUMP_NONE 0
473
#define JUMP_MAX_UNTIL_1 1
474
#define JUMP_MAX_UNTIL_2 2
475
#define JUMP_MAX_UNTIL_3 3
476
#define JUMP_MIN_UNTIL_1 4
477
#define JUMP_MIN_UNTIL_2 5
478
#define JUMP_MIN_UNTIL_3 6
479
#define JUMP_REPEAT 7
480
#define JUMP_REPEAT_ONE_1 8
481
#define JUMP_REPEAT_ONE_2 9
482
#define JUMP_MIN_REPEAT_ONE 10
483
#define JUMP_BRANCH 11
484
#define JUMP_ASSERT 12
485
#define JUMP_ASSERT_NOT 13
486
#define JUMP_POSS_REPEAT_1 14
487
#define JUMP_POSS_REPEAT_2 15
488
#define JUMP_ATOMIC_GROUP 16
489
490
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
491
ctx->pattern = pattern; \
492
ctx->ptr = ptr; \
493
DATA_ALLOC(SRE(match_context), nextctx); \
494
nextctx->pattern = nextpattern; \
495
nextctx->toplevel = toplevel_; \
496
nextctx->jump = jumpvalue; \
497
nextctx->last_ctx_pos = ctx_pos; \
498
pattern = nextpattern; \
499
ctx_pos = alloc_pos; \
500
ctx = nextctx; \
501
goto entrance; \
502
jumplabel: \
503
pattern = ctx->pattern; \
504
ptr = ctx->ptr;
505
506
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
507
DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
508
509
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
510
DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
511
512
typedef struct {
513
Py_ssize_t count;
514
union {
515
SRE_CODE chr;
516
SRE_REPEAT* rep;
517
} u;
518
int lastmark;
519
int lastindex;
520
const SRE_CODE* pattern;
521
const SRE_CHAR* ptr;
522
int toplevel;
523
int jump;
524
Py_ssize_t last_ctx_pos;
525
} SRE(match_context);
526
527
#define MAYBE_CHECK_SIGNALS \
528
do { \
529
if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
530
RETURN_ERROR(SRE_ERROR_INTERRUPTED); \
531
} \
532
} while (0)
533
534
#ifdef HAVE_COMPUTED_GOTOS
535
#ifndef USE_COMPUTED_GOTOS
536
#define USE_COMPUTED_GOTOS 1
537
#endif
538
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
539
#error "Computed gotos are not supported on this compiler."
540
#else
541
#undef USE_COMPUTED_GOTOS
542
#define USE_COMPUTED_GOTOS 0
543
#endif
544
545
#if USE_COMPUTED_GOTOS
546
#define TARGET(OP) TARGET_ ## OP
547
#define DISPATCH \
548
do { \
549
MAYBE_CHECK_SIGNALS; \
550
goto *sre_targets[*pattern++]; \
551
} while (0)
552
#else
553
#define TARGET(OP) case OP
554
#define DISPATCH goto dispatch
555
#endif
556
557
/* check if string matches the given pattern. returns <0 for
558
error, 0 for failure, and 1 for success */
559
LOCAL(Py_ssize_t)
560
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
561
{
562
const SRE_CHAR* end = (const SRE_CHAR *)state->end;
563
Py_ssize_t alloc_pos, ctx_pos = -1;
564
Py_ssize_t ret = 0;
565
int jump;
566
unsigned int sigcount=0;
567
568
SRE(match_context)* ctx;
569
SRE(match_context)* nextctx;
570
571
TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
572
573
DATA_ALLOC(SRE(match_context), ctx);
574
ctx->last_ctx_pos = -1;
575
ctx->jump = JUMP_NONE;
576
ctx->toplevel = toplevel;
577
ctx_pos = alloc_pos;
578
579
#if USE_COMPUTED_GOTOS
580
#include "sre_targets.h"
581
#endif
582
583
entrance:
584
585
; // Fashion statement.
586
const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
587
588
if (pattern[0] == SRE_OP_INFO) {
589
/* optimization info block */
590
/* <INFO> <1=skip> <2=flags> <3=min> ... */
591
if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
592
TRACE(("reject (got %zd chars, need %zd)\n",
593
end - ptr, (Py_ssize_t) pattern[3]));
594
RETURN_FAILURE;
595
}
596
pattern += pattern[1] + 1;
597
}
598
599
#if USE_COMPUTED_GOTOS
600
DISPATCH;
601
#else
602
dispatch:
603
MAYBE_CHECK_SIGNALS;
604
switch (*pattern++)
605
#endif
606
{
607
608
TARGET(SRE_OP_MARK):
609
/* set mark */
610
/* <MARK> <gid> */
611
TRACE(("|%p|%p|MARK %d\n", pattern,
612
ptr, pattern[0]));
613
{
614
int i = pattern[0];
615
if (i & 1)
616
state->lastindex = i/2 + 1;
617
if (i > state->lastmark) {
618
/* state->lastmark is the highest valid index in the
619
state->mark array. If it is increased by more than 1,
620
the intervening marks must be set to NULL to signal
621
that these marks have not been encountered. */
622
int j = state->lastmark + 1;
623
while (j < i)
624
state->mark[j++] = NULL;
625
state->lastmark = i;
626
}
627
state->mark[i] = ptr;
628
}
629
pattern++;
630
DISPATCH;
631
632
TARGET(SRE_OP_LITERAL):
633
/* match literal string */
634
/* <LITERAL> <code> */
635
TRACE(("|%p|%p|LITERAL %d\n", pattern,
636
ptr, *pattern));
637
if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
638
RETURN_FAILURE;
639
pattern++;
640
ptr++;
641
DISPATCH;
642
643
TARGET(SRE_OP_NOT_LITERAL):
644
/* match anything that is not literal character */
645
/* <NOT_LITERAL> <code> */
646
TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
647
ptr, *pattern));
648
if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
649
RETURN_FAILURE;
650
pattern++;
651
ptr++;
652
DISPATCH;
653
654
TARGET(SRE_OP_SUCCESS):
655
/* end of pattern */
656
TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
657
if (ctx->toplevel &&
658
((state->match_all && ptr != state->end) ||
659
(state->must_advance && ptr == state->start)))
660
{
661
RETURN_FAILURE;
662
}
663
state->ptr = ptr;
664
RETURN_SUCCESS;
665
666
TARGET(SRE_OP_AT):
667
/* match at given position */
668
/* <AT> <code> */
669
TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
670
if (!SRE(at)(state, ptr, *pattern))
671
RETURN_FAILURE;
672
pattern++;
673
DISPATCH;
674
675
TARGET(SRE_OP_CATEGORY):
676
/* match at given category */
677
/* <CATEGORY> <code> */
678
TRACE(("|%p|%p|CATEGORY %d\n", pattern,
679
ptr, *pattern));
680
if (ptr >= end || !sre_category(pattern[0], ptr[0]))
681
RETURN_FAILURE;
682
pattern++;
683
ptr++;
684
DISPATCH;
685
686
TARGET(SRE_OP_ANY):
687
/* match anything (except a newline) */
688
/* <ANY> */
689
TRACE(("|%p|%p|ANY\n", pattern, ptr));
690
if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
691
RETURN_FAILURE;
692
ptr++;
693
DISPATCH;
694
695
TARGET(SRE_OP_ANY_ALL):
696
/* match anything */
697
/* <ANY_ALL> */
698
TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
699
if (ptr >= end)
700
RETURN_FAILURE;
701
ptr++;
702
DISPATCH;
703
704
TARGET(SRE_OP_IN):
705
/* match set member (or non_member) */
706
/* <IN> <skip> <set> */
707
TRACE(("|%p|%p|IN\n", pattern, ptr));
708
if (ptr >= end ||
709
!SRE(charset)(state, pattern + 1, *ptr))
710
RETURN_FAILURE;
711
pattern += pattern[0];
712
ptr++;
713
DISPATCH;
714
715
TARGET(SRE_OP_LITERAL_IGNORE):
716
TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
717
pattern, ptr, pattern[0]));
718
if (ptr >= end ||
719
sre_lower_ascii(*ptr) != *pattern)
720
RETURN_FAILURE;
721
pattern++;
722
ptr++;
723
DISPATCH;
724
725
TARGET(SRE_OP_LITERAL_UNI_IGNORE):
726
TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
727
pattern, ptr, pattern[0]));
728
if (ptr >= end ||
729
sre_lower_unicode(*ptr) != *pattern)
730
RETURN_FAILURE;
731
pattern++;
732
ptr++;
733
DISPATCH;
734
735
TARGET(SRE_OP_LITERAL_LOC_IGNORE):
736
TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
737
pattern, ptr, pattern[0]));
738
if (ptr >= end
739
|| !char_loc_ignore(*pattern, *ptr))
740
RETURN_FAILURE;
741
pattern++;
742
ptr++;
743
DISPATCH;
744
745
TARGET(SRE_OP_NOT_LITERAL_IGNORE):
746
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
747
pattern, ptr, *pattern));
748
if (ptr >= end ||
749
sre_lower_ascii(*ptr) == *pattern)
750
RETURN_FAILURE;
751
pattern++;
752
ptr++;
753
DISPATCH;
754
755
TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
756
TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
757
pattern, ptr, *pattern));
758
if (ptr >= end ||
759
sre_lower_unicode(*ptr) == *pattern)
760
RETURN_FAILURE;
761
pattern++;
762
ptr++;
763
DISPATCH;
764
765
TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
766
TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
767
pattern, ptr, *pattern));
768
if (ptr >= end
769
|| char_loc_ignore(*pattern, *ptr))
770
RETURN_FAILURE;
771
pattern++;
772
ptr++;
773
DISPATCH;
774
775
TARGET(SRE_OP_IN_IGNORE):
776
TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
777
if (ptr >= end
778
|| !SRE(charset)(state, pattern+1,
779
(SRE_CODE)sre_lower_ascii(*ptr)))
780
RETURN_FAILURE;
781
pattern += pattern[0];
782
ptr++;
783
DISPATCH;
784
785
TARGET(SRE_OP_IN_UNI_IGNORE):
786
TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
787
if (ptr >= end
788
|| !SRE(charset)(state, pattern+1,
789
(SRE_CODE)sre_lower_unicode(*ptr)))
790
RETURN_FAILURE;
791
pattern += pattern[0];
792
ptr++;
793
DISPATCH;
794
795
TARGET(SRE_OP_IN_LOC_IGNORE):
796
TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
797
if (ptr >= end
798
|| !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
799
RETURN_FAILURE;
800
pattern += pattern[0];
801
ptr++;
802
DISPATCH;
803
804
TARGET(SRE_OP_JUMP):
805
TARGET(SRE_OP_INFO):
806
/* jump forward */
807
/* <JUMP> <offset> */
808
TRACE(("|%p|%p|JUMP %d\n", pattern,
809
ptr, pattern[0]));
810
pattern += pattern[0];
811
DISPATCH;
812
813
TARGET(SRE_OP_BRANCH):
814
/* alternation */
815
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
816
TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
817
LASTMARK_SAVE();
818
if (state->repeat)
819
MARK_PUSH(ctx->lastmark);
820
for (; pattern[0]; pattern += pattern[0]) {
821
if (pattern[1] == SRE_OP_LITERAL &&
822
(ptr >= end ||
823
(SRE_CODE) *ptr != pattern[2]))
824
continue;
825
if (pattern[1] == SRE_OP_IN &&
826
(ptr >= end ||
827
!SRE(charset)(state, pattern + 3,
828
(SRE_CODE) *ptr)))
829
continue;
830
state->ptr = ptr;
831
DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
832
if (ret) {
833
if (state->repeat)
834
MARK_POP_DISCARD(ctx->lastmark);
835
RETURN_ON_ERROR(ret);
836
RETURN_SUCCESS;
837
}
838
if (state->repeat)
839
MARK_POP_KEEP(ctx->lastmark);
840
LASTMARK_RESTORE();
841
}
842
if (state->repeat)
843
MARK_POP_DISCARD(ctx->lastmark);
844
RETURN_FAILURE;
845
846
TARGET(SRE_OP_REPEAT_ONE):
847
/* match repeated sequence (maximizing regexp) */
848
849
/* this operator only works if the repeated item is
850
exactly one character wide, and we're not already
851
collecting backtracking points. for other cases,
852
use the MAX_REPEAT operator */
853
854
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
855
856
TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
857
pattern[1], pattern[2]));
858
859
if ((Py_ssize_t) pattern[1] > end - ptr)
860
RETURN_FAILURE; /* cannot match */
861
862
state->ptr = ptr;
863
864
ret = SRE(count)(state, pattern+3, pattern[2]);
865
RETURN_ON_ERROR(ret);
866
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
867
ctx->count = ret;
868
ptr += ctx->count;
869
870
/* when we arrive here, count contains the number of
871
matches, and ptr points to the tail of the target
872
string. check if the rest of the pattern matches,
873
and backtrack if not. */
874
875
if (ctx->count < (Py_ssize_t) pattern[1])
876
RETURN_FAILURE;
877
878
if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
879
ptr == state->end &&
880
!(ctx->toplevel && state->must_advance && ptr == state->start))
881
{
882
/* tail is empty. we're finished */
883
state->ptr = ptr;
884
RETURN_SUCCESS;
885
}
886
887
LASTMARK_SAVE();
888
if (state->repeat)
889
MARK_PUSH(ctx->lastmark);
890
891
if (pattern[pattern[0]] == SRE_OP_LITERAL) {
892
/* tail starts with a literal. skip positions where
893
the rest of the pattern cannot possibly match */
894
ctx->u.chr = pattern[pattern[0]+1];
895
for (;;) {
896
while (ctx->count >= (Py_ssize_t) pattern[1] &&
897
(ptr >= end || *ptr != ctx->u.chr)) {
898
ptr--;
899
ctx->count--;
900
}
901
if (ctx->count < (Py_ssize_t) pattern[1])
902
break;
903
state->ptr = ptr;
904
DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
905
pattern+pattern[0]);
906
if (ret) {
907
if (state->repeat)
908
MARK_POP_DISCARD(ctx->lastmark);
909
RETURN_ON_ERROR(ret);
910
RETURN_SUCCESS;
911
}
912
if (state->repeat)
913
MARK_POP_KEEP(ctx->lastmark);
914
LASTMARK_RESTORE();
915
916
ptr--;
917
ctx->count--;
918
}
919
if (state->repeat)
920
MARK_POP_DISCARD(ctx->lastmark);
921
} else {
922
/* general case */
923
while (ctx->count >= (Py_ssize_t) pattern[1]) {
924
state->ptr = ptr;
925
DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
926
pattern+pattern[0]);
927
if (ret) {
928
if (state->repeat)
929
MARK_POP_DISCARD(ctx->lastmark);
930
RETURN_ON_ERROR(ret);
931
RETURN_SUCCESS;
932
}
933
if (state->repeat)
934
MARK_POP_KEEP(ctx->lastmark);
935
LASTMARK_RESTORE();
936
937
ptr--;
938
ctx->count--;
939
}
940
if (state->repeat)
941
MARK_POP_DISCARD(ctx->lastmark);
942
}
943
RETURN_FAILURE;
944
945
TARGET(SRE_OP_MIN_REPEAT_ONE):
946
/* match repeated sequence (minimizing regexp) */
947
948
/* this operator only works if the repeated item is
949
exactly one character wide, and we're not already
950
collecting backtracking points. for other cases,
951
use the MIN_REPEAT operator */
952
953
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
954
955
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
956
pattern[1], pattern[2]));
957
958
if ((Py_ssize_t) pattern[1] > end - ptr)
959
RETURN_FAILURE; /* cannot match */
960
961
state->ptr = ptr;
962
963
if (pattern[1] == 0)
964
ctx->count = 0;
965
else {
966
/* count using pattern min as the maximum */
967
ret = SRE(count)(state, pattern+3, pattern[1]);
968
RETURN_ON_ERROR(ret);
969
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
970
if (ret < (Py_ssize_t) pattern[1])
971
/* didn't match minimum number of times */
972
RETURN_FAILURE;
973
/* advance past minimum matches of repeat */
974
ctx->count = ret;
975
ptr += ctx->count;
976
}
977
978
if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
979
!(ctx->toplevel &&
980
((state->match_all && ptr != state->end) ||
981
(state->must_advance && ptr == state->start))))
982
{
983
/* tail is empty. we're finished */
984
state->ptr = ptr;
985
RETURN_SUCCESS;
986
987
} else {
988
/* general case */
989
LASTMARK_SAVE();
990
if (state->repeat)
991
MARK_PUSH(ctx->lastmark);
992
993
while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
994
|| ctx->count <= (Py_ssize_t)pattern[2]) {
995
state->ptr = ptr;
996
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
997
pattern+pattern[0]);
998
if (ret) {
999
if (state->repeat)
1000
MARK_POP_DISCARD(ctx->lastmark);
1001
RETURN_ON_ERROR(ret);
1002
RETURN_SUCCESS;
1003
}
1004
if (state->repeat)
1005
MARK_POP_KEEP(ctx->lastmark);
1006
LASTMARK_RESTORE();
1007
1008
state->ptr = ptr;
1009
ret = SRE(count)(state, pattern+3, 1);
1010
RETURN_ON_ERROR(ret);
1011
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1012
if (ret == 0)
1013
break;
1014
assert(ret == 1);
1015
ptr++;
1016
ctx->count++;
1017
}
1018
if (state->repeat)
1019
MARK_POP_DISCARD(ctx->lastmark);
1020
}
1021
RETURN_FAILURE;
1022
1023
TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1024
/* match repeated sequence (maximizing regexp) without
1025
backtracking */
1026
1027
/* this operator only works if the repeated item is
1028
exactly one character wide, and we're not already
1029
collecting backtracking points. for other cases,
1030
use the MAX_REPEAT operator */
1031
1032
/* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1033
tail */
1034
1035
TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1036
ptr, pattern[1], pattern[2]));
1037
1038
if (ptr + pattern[1] > end) {
1039
RETURN_FAILURE; /* cannot match */
1040
}
1041
1042
state->ptr = ptr;
1043
1044
ret = SRE(count)(state, pattern + 3, pattern[2]);
1045
RETURN_ON_ERROR(ret);
1046
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1047
ctx->count = ret;
1048
ptr += ctx->count;
1049
1050
/* when we arrive here, count contains the number of
1051
matches, and ptr points to the tail of the target
1052
string. check if the rest of the pattern matches,
1053
and fail if not. */
1054
1055
/* Test for not enough repetitions in match */
1056
if (ctx->count < (Py_ssize_t) pattern[1]) {
1057
RETURN_FAILURE;
1058
}
1059
1060
/* Update the pattern to point to the next op code */
1061
pattern += pattern[0];
1062
1063
/* Let the tail be evaluated separately and consider this
1064
match successful. */
1065
if (*pattern == SRE_OP_SUCCESS &&
1066
ptr == state->end &&
1067
!(ctx->toplevel && state->must_advance && ptr == state->start))
1068
{
1069
/* tail is empty. we're finished */
1070
state->ptr = ptr;
1071
RETURN_SUCCESS;
1072
}
1073
1074
/* Attempt to match the rest of the string */
1075
DISPATCH;
1076
1077
TARGET(SRE_OP_REPEAT):
1078
/* create repeat context. all the hard work is done
1079
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1080
/* <REPEAT> <skip> <1=min> <2=max>
1081
<3=repeat_index> item <UNTIL> tail */
1082
TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1083
pattern[1], pattern[2]));
1084
1085
/* install new repeat context */
1086
/* TODO(https://github.com/python/cpython/issues/67877): Fix this
1087
* potential memory leak. */
1088
ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep));
1089
if (!ctx->u.rep) {
1090
PyErr_NoMemory();
1091
RETURN_FAILURE;
1092
}
1093
ctx->u.rep->count = -1;
1094
ctx->u.rep->pattern = pattern;
1095
ctx->u.rep->prev = state->repeat;
1096
ctx->u.rep->last_ptr = NULL;
1097
state->repeat = ctx->u.rep;
1098
1099
state->ptr = ptr;
1100
DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1101
state->repeat = ctx->u.rep->prev;
1102
PyObject_Free(ctx->u.rep);
1103
1104
if (ret) {
1105
RETURN_ON_ERROR(ret);
1106
RETURN_SUCCESS;
1107
}
1108
RETURN_FAILURE;
1109
1110
TARGET(SRE_OP_MAX_UNTIL):
1111
/* maximizing repeat */
1112
/* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1113
1114
/* FIXME: we probably need to deal with zero-width
1115
matches in here... */
1116
1117
ctx->u.rep = state->repeat;
1118
if (!ctx->u.rep)
1119
RETURN_ERROR(SRE_ERROR_STATE);
1120
1121
state->ptr = ptr;
1122
1123
ctx->count = ctx->u.rep->count+1;
1124
1125
TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1126
ptr, ctx->count));
1127
1128
if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1129
/* not enough matches */
1130
ctx->u.rep->count = ctx->count;
1131
DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1132
ctx->u.rep->pattern+3);
1133
if (ret) {
1134
RETURN_ON_ERROR(ret);
1135
RETURN_SUCCESS;
1136
}
1137
ctx->u.rep->count = ctx->count-1;
1138
state->ptr = ptr;
1139
RETURN_FAILURE;
1140
}
1141
1142
if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1143
ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1144
state->ptr != ctx->u.rep->last_ptr) {
1145
/* we may have enough matches, but if we can
1146
match another item, do so */
1147
ctx->u.rep->count = ctx->count;
1148
LASTMARK_SAVE();
1149
MARK_PUSH(ctx->lastmark);
1150
/* zero-width match protection */
1151
DATA_PUSH(&ctx->u.rep->last_ptr);
1152
ctx->u.rep->last_ptr = state->ptr;
1153
DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1154
ctx->u.rep->pattern+3);
1155
DATA_POP(&ctx->u.rep->last_ptr);
1156
if (ret) {
1157
MARK_POP_DISCARD(ctx->lastmark);
1158
RETURN_ON_ERROR(ret);
1159
RETURN_SUCCESS;
1160
}
1161
MARK_POP(ctx->lastmark);
1162
LASTMARK_RESTORE();
1163
ctx->u.rep->count = ctx->count-1;
1164
state->ptr = ptr;
1165
}
1166
1167
/* cannot match more repeated items here. make sure the
1168
tail matches */
1169
state->repeat = ctx->u.rep->prev;
1170
DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1171
state->repeat = ctx->u.rep; // restore repeat before return
1172
1173
RETURN_ON_SUCCESS(ret);
1174
state->ptr = ptr;
1175
RETURN_FAILURE;
1176
1177
TARGET(SRE_OP_MIN_UNTIL):
1178
/* minimizing repeat */
1179
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1180
1181
ctx->u.rep = state->repeat;
1182
if (!ctx->u.rep)
1183
RETURN_ERROR(SRE_ERROR_STATE);
1184
1185
state->ptr = ptr;
1186
1187
ctx->count = ctx->u.rep->count+1;
1188
1189
TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1190
ptr, ctx->count, ctx->u.rep->pattern));
1191
1192
if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1193
/* not enough matches */
1194
ctx->u.rep->count = ctx->count;
1195
DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1196
ctx->u.rep->pattern+3);
1197
if (ret) {
1198
RETURN_ON_ERROR(ret);
1199
RETURN_SUCCESS;
1200
}
1201
ctx->u.rep->count = ctx->count-1;
1202
state->ptr = ptr;
1203
RETURN_FAILURE;
1204
}
1205
1206
/* see if the tail matches */
1207
state->repeat = ctx->u.rep->prev;
1208
1209
LASTMARK_SAVE();
1210
if (state->repeat)
1211
MARK_PUSH(ctx->lastmark);
1212
1213
DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1214
SRE_REPEAT *repeat_of_tail = state->repeat;
1215
state->repeat = ctx->u.rep; // restore repeat before return
1216
1217
if (ret) {
1218
if (repeat_of_tail)
1219
MARK_POP_DISCARD(ctx->lastmark);
1220
RETURN_ON_ERROR(ret);
1221
RETURN_SUCCESS;
1222
}
1223
if (repeat_of_tail)
1224
MARK_POP(ctx->lastmark);
1225
LASTMARK_RESTORE();
1226
1227
state->ptr = ptr;
1228
1229
if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1230
&& ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1231
state->ptr == ctx->u.rep->last_ptr)
1232
RETURN_FAILURE;
1233
1234
ctx->u.rep->count = ctx->count;
1235
/* zero-width match protection */
1236
DATA_PUSH(&ctx->u.rep->last_ptr);
1237
ctx->u.rep->last_ptr = state->ptr;
1238
DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1239
ctx->u.rep->pattern+3);
1240
DATA_POP(&ctx->u.rep->last_ptr);
1241
if (ret) {
1242
RETURN_ON_ERROR(ret);
1243
RETURN_SUCCESS;
1244
}
1245
ctx->u.rep->count = ctx->count-1;
1246
state->ptr = ptr;
1247
RETURN_FAILURE;
1248
1249
TARGET(SRE_OP_POSSESSIVE_REPEAT):
1250
/* create possessive repeat contexts. */
1251
/* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1252
<SUCCESS> tail */
1253
TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1254
ptr, pattern[1], pattern[2]));
1255
1256
/* Set the global Input pointer to this context's Input
1257
pointer */
1258
state->ptr = ptr;
1259
1260
/* Initialize Count to 0 */
1261
ctx->count = 0;
1262
1263
/* Check for minimum required matches. */
1264
while (ctx->count < (Py_ssize_t)pattern[1]) {
1265
/* not enough matches */
1266
DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1267
&pattern[3]);
1268
if (ret) {
1269
RETURN_ON_ERROR(ret);
1270
ctx->count++;
1271
}
1272
else {
1273
state->ptr = ptr;
1274
RETURN_FAILURE;
1275
}
1276
}
1277
1278
/* Clear the context's Input stream pointer so that it
1279
doesn't match the global state so that the while loop can
1280
be entered. */
1281
ptr = NULL;
1282
1283
/* Keep trying to parse the <pattern> sub-pattern until the
1284
end is reached, creating a new context each time. */
1285
while ((ctx->count < (Py_ssize_t)pattern[2] ||
1286
(Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1287
state->ptr != ptr) {
1288
/* Save the Capture Group Marker state into the current
1289
Context and back up the current highest number
1290
Capture Group marker. */
1291
LASTMARK_SAVE();
1292
MARK_PUSH(ctx->lastmark);
1293
1294
/* zero-width match protection */
1295
/* Set the context's Input Stream pointer to be the
1296
current Input Stream pointer from the global
1297
state. When the loop reaches the next iteration,
1298
the context will then store the last known good
1299
position with the global state holding the Input
1300
Input Stream position that has been updated with
1301
the most recent match. Thus, if state's Input
1302
stream remains the same as the one stored in the
1303
current Context, we know we have successfully
1304
matched an empty string and that all subsequent
1305
matches will also be the empty string until the
1306
maximum number of matches are counted, and because
1307
of this, we could immediately stop at that point and
1308
consider this match successful. */
1309
ptr = state->ptr;
1310
1311
/* We have not reached the maximin matches, so try to
1312
match once more. */
1313
DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1314
&pattern[3]);
1315
1316
/* Check to see if the last attempted match
1317
succeeded. */
1318
if (ret) {
1319
/* Drop the saved highest number Capture Group
1320
marker saved above and use the newly updated
1321
value. */
1322
MARK_POP_DISCARD(ctx->lastmark);
1323
RETURN_ON_ERROR(ret);
1324
1325
/* Success, increment the count. */
1326
ctx->count++;
1327
}
1328
/* Last attempted match failed. */
1329
else {
1330
/* Restore the previously saved highest number
1331
Capture Group marker since the last iteration
1332
did not match, then restore that to the global
1333
state. */
1334
MARK_POP(ctx->lastmark);
1335
LASTMARK_RESTORE();
1336
1337
/* We have sufficient matches, so exit loop. */
1338
break;
1339
}
1340
}
1341
1342
/* Evaluate Tail */
1343
/* Jump to end of pattern indicated by skip, and then skip
1344
the SUCCESS op code that follows it. */
1345
pattern += pattern[0] + 1;
1346
ptr = state->ptr;
1347
DISPATCH;
1348
1349
TARGET(SRE_OP_ATOMIC_GROUP):
1350
/* Atomic Group Sub Pattern */
1351
/* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1352
TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1353
1354
/* Set the global Input pointer to this context's Input
1355
pointer */
1356
state->ptr = ptr;
1357
1358
/* Evaluate the Atomic Group in a new context, terminating
1359
when the end of the group, represented by a SUCCESS op
1360
code, is reached. */
1361
/* Group Pattern begins at an offset of 1 code. */
1362
DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1363
&pattern[1]);
1364
1365
/* Test Exit Condition */
1366
RETURN_ON_ERROR(ret);
1367
1368
if (ret == 0) {
1369
/* Atomic Group failed to Match. */
1370
state->ptr = ptr;
1371
RETURN_FAILURE;
1372
}
1373
1374
/* Evaluate Tail */
1375
/* Jump to end of pattern indicated by skip, and then skip
1376
the SUCCESS op code that follows it. */
1377
pattern += pattern[0];
1378
ptr = state->ptr;
1379
DISPATCH;
1380
1381
TARGET(SRE_OP_GROUPREF):
1382
/* match backreference */
1383
TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1384
ptr, pattern[0]));
1385
{
1386
int groupref = pattern[0] * 2;
1387
if (groupref >= state->lastmark) {
1388
RETURN_FAILURE;
1389
} else {
1390
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1391
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1392
if (!p || !e || e < p)
1393
RETURN_FAILURE;
1394
while (p < e) {
1395
if (ptr >= end || *ptr != *p)
1396
RETURN_FAILURE;
1397
p++;
1398
ptr++;
1399
}
1400
}
1401
}
1402
pattern++;
1403
DISPATCH;
1404
1405
TARGET(SRE_OP_GROUPREF_IGNORE):
1406
/* match backreference */
1407
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1408
ptr, pattern[0]));
1409
{
1410
int groupref = pattern[0] * 2;
1411
if (groupref >= state->lastmark) {
1412
RETURN_FAILURE;
1413
} else {
1414
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1415
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1416
if (!p || !e || e < p)
1417
RETURN_FAILURE;
1418
while (p < e) {
1419
if (ptr >= end ||
1420
sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1421
RETURN_FAILURE;
1422
p++;
1423
ptr++;
1424
}
1425
}
1426
}
1427
pattern++;
1428
DISPATCH;
1429
1430
TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1431
/* match backreference */
1432
TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1433
ptr, pattern[0]));
1434
{
1435
int groupref = pattern[0] * 2;
1436
if (groupref >= state->lastmark) {
1437
RETURN_FAILURE;
1438
} else {
1439
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1440
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1441
if (!p || !e || e < p)
1442
RETURN_FAILURE;
1443
while (p < e) {
1444
if (ptr >= end ||
1445
sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1446
RETURN_FAILURE;
1447
p++;
1448
ptr++;
1449
}
1450
}
1451
}
1452
pattern++;
1453
DISPATCH;
1454
1455
TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1456
/* match backreference */
1457
TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1458
ptr, pattern[0]));
1459
{
1460
int groupref = pattern[0] * 2;
1461
if (groupref >= state->lastmark) {
1462
RETURN_FAILURE;
1463
} else {
1464
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1465
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1466
if (!p || !e || e < p)
1467
RETURN_FAILURE;
1468
while (p < e) {
1469
if (ptr >= end ||
1470
sre_lower_locale(*ptr) != sre_lower_locale(*p))
1471
RETURN_FAILURE;
1472
p++;
1473
ptr++;
1474
}
1475
}
1476
}
1477
pattern++;
1478
DISPATCH;
1479
1480
TARGET(SRE_OP_GROUPREF_EXISTS):
1481
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1482
ptr, pattern[0]));
1483
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1484
{
1485
int groupref = pattern[0] * 2;
1486
if (groupref >= state->lastmark) {
1487
pattern += pattern[1];
1488
DISPATCH;
1489
} else {
1490
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1491
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1492
if (!p || !e || e < p) {
1493
pattern += pattern[1];
1494
DISPATCH;
1495
}
1496
}
1497
}
1498
pattern += 2;
1499
DISPATCH;
1500
1501
TARGET(SRE_OP_ASSERT):
1502
/* assert subpattern */
1503
/* <ASSERT> <skip> <back> <pattern> */
1504
TRACE(("|%p|%p|ASSERT %d\n", pattern,
1505
ptr, pattern[1]));
1506
if (ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)pattern[1])
1507
RETURN_FAILURE;
1508
state->ptr = ptr - pattern[1];
1509
DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1510
RETURN_ON_FAILURE(ret);
1511
pattern += pattern[0];
1512
DISPATCH;
1513
1514
TARGET(SRE_OP_ASSERT_NOT):
1515
/* assert not subpattern */
1516
/* <ASSERT_NOT> <skip> <back> <pattern> */
1517
TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1518
ptr, pattern[1]));
1519
if (ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)pattern[1]) {
1520
state->ptr = ptr - pattern[1];
1521
LASTMARK_SAVE();
1522
if (state->repeat)
1523
MARK_PUSH(ctx->lastmark);
1524
1525
DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1526
if (ret) {
1527
if (state->repeat)
1528
MARK_POP_DISCARD(ctx->lastmark);
1529
RETURN_ON_ERROR(ret);
1530
RETURN_FAILURE;
1531
}
1532
if (state->repeat)
1533
MARK_POP(ctx->lastmark);
1534
LASTMARK_RESTORE();
1535
}
1536
pattern += pattern[0];
1537
DISPATCH;
1538
1539
TARGET(SRE_OP_FAILURE):
1540
/* immediate failure */
1541
TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1542
RETURN_FAILURE;
1543
1544
#if !USE_COMPUTED_GOTOS
1545
default:
1546
#endif
1547
// Also any unused opcodes:
1548
TARGET(SRE_OP_RANGE_UNI_IGNORE):
1549
TARGET(SRE_OP_SUBPATTERN):
1550
TARGET(SRE_OP_RANGE):
1551
TARGET(SRE_OP_NEGATE):
1552
TARGET(SRE_OP_BIGCHARSET):
1553
TARGET(SRE_OP_CHARSET):
1554
TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1555
pattern[-1]));
1556
RETURN_ERROR(SRE_ERROR_ILLEGAL);
1557
1558
}
1559
1560
exit:
1561
ctx_pos = ctx->last_ctx_pos;
1562
jump = ctx->jump;
1563
DATA_POP_DISCARD(ctx);
1564
if (ctx_pos == -1)
1565
return ret;
1566
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1567
1568
switch (jump) {
1569
case JUMP_MAX_UNTIL_2:
1570
TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1571
goto jump_max_until_2;
1572
case JUMP_MAX_UNTIL_3:
1573
TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1574
goto jump_max_until_3;
1575
case JUMP_MIN_UNTIL_2:
1576
TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1577
goto jump_min_until_2;
1578
case JUMP_MIN_UNTIL_3:
1579
TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1580
goto jump_min_until_3;
1581
case JUMP_BRANCH:
1582
TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1583
goto jump_branch;
1584
case JUMP_MAX_UNTIL_1:
1585
TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1586
goto jump_max_until_1;
1587
case JUMP_MIN_UNTIL_1:
1588
TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1589
goto jump_min_until_1;
1590
case JUMP_POSS_REPEAT_1:
1591
TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1592
goto jump_poss_repeat_1;
1593
case JUMP_POSS_REPEAT_2:
1594
TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1595
goto jump_poss_repeat_2;
1596
case JUMP_REPEAT:
1597
TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1598
goto jump_repeat;
1599
case JUMP_REPEAT_ONE_1:
1600
TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1601
goto jump_repeat_one_1;
1602
case JUMP_REPEAT_ONE_2:
1603
TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1604
goto jump_repeat_one_2;
1605
case JUMP_MIN_REPEAT_ONE:
1606
TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1607
goto jump_min_repeat_one;
1608
case JUMP_ATOMIC_GROUP:
1609
TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1610
goto jump_atomic_group;
1611
case JUMP_ASSERT:
1612
TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1613
goto jump_assert;
1614
case JUMP_ASSERT_NOT:
1615
TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1616
goto jump_assert_not;
1617
case JUMP_NONE:
1618
TRACE(("|%p|%p|RETURN %zd\n", pattern,
1619
ptr, ret));
1620
break;
1621
}
1622
1623
return ret; /* should never get here */
1624
}
1625
1626
/* need to reset capturing groups between two SRE(match) callings in loops */
1627
#define RESET_CAPTURE_GROUP() \
1628
do { state->lastmark = state->lastindex = -1; } while (0)
1629
1630
LOCAL(Py_ssize_t)
1631
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1632
{
1633
SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1634
SRE_CHAR* end = (SRE_CHAR *)state->end;
1635
Py_ssize_t status = 0;
1636
Py_ssize_t prefix_len = 0;
1637
Py_ssize_t prefix_skip = 0;
1638
SRE_CODE* prefix = NULL;
1639
SRE_CODE* charset = NULL;
1640
SRE_CODE* overlap = NULL;
1641
int flags = 0;
1642
1643
if (ptr > end)
1644
return 0;
1645
1646
if (pattern[0] == SRE_OP_INFO) {
1647
/* optimization info block */
1648
/* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1649
1650
flags = pattern[2];
1651
1652
if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1653
TRACE(("reject (got %u chars, need %u)\n",
1654
(unsigned int)(end - ptr), pattern[3]));
1655
return 0;
1656
}
1657
if (pattern[3] > 1) {
1658
/* adjust end point (but make sure we leave at least one
1659
character in there, so literal search will work) */
1660
end -= pattern[3] - 1;
1661
if (end <= ptr)
1662
end = ptr;
1663
}
1664
1665
if (flags & SRE_INFO_PREFIX) {
1666
/* pattern starts with a known prefix */
1667
/* <length> <skip> <prefix data> <overlap data> */
1668
prefix_len = pattern[5];
1669
prefix_skip = pattern[6];
1670
prefix = pattern + 7;
1671
overlap = prefix + prefix_len - 1;
1672
} else if (flags & SRE_INFO_CHARSET)
1673
/* pattern starts with a character from a known set */
1674
/* <charset> */
1675
charset = pattern + 5;
1676
1677
pattern += 1 + pattern[1];
1678
}
1679
1680
TRACE(("prefix = %p %zd %zd\n",
1681
prefix, prefix_len, prefix_skip));
1682
TRACE(("charset = %p\n", charset));
1683
1684
if (prefix_len == 1) {
1685
/* pattern starts with a literal character */
1686
SRE_CHAR c = (SRE_CHAR) prefix[0];
1687
#if SIZEOF_SRE_CHAR < 4
1688
if ((SRE_CODE) c != prefix[0])
1689
return 0; /* literal can't match: doesn't fit in char width */
1690
#endif
1691
end = (SRE_CHAR *)state->end;
1692
state->must_advance = 0;
1693
while (ptr < end) {
1694
while (*ptr != c) {
1695
if (++ptr >= end)
1696
return 0;
1697
}
1698
TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1699
state->start = ptr;
1700
state->ptr = ptr + prefix_skip;
1701
if (flags & SRE_INFO_LITERAL)
1702
return 1; /* we got all of it */
1703
status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1704
if (status != 0)
1705
return status;
1706
++ptr;
1707
RESET_CAPTURE_GROUP();
1708
}
1709
return 0;
1710
}
1711
1712
if (prefix_len > 1) {
1713
/* pattern starts with a known prefix. use the overlap
1714
table to skip forward as fast as we possibly can */
1715
Py_ssize_t i = 0;
1716
1717
end = (SRE_CHAR *)state->end;
1718
if (prefix_len > end - ptr)
1719
return 0;
1720
#if SIZEOF_SRE_CHAR < 4
1721
for (i = 0; i < prefix_len; i++)
1722
if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1723
return 0; /* literal can't match: doesn't fit in char width */
1724
#endif
1725
while (ptr < end) {
1726
SRE_CHAR c = (SRE_CHAR) prefix[0];
1727
while (*ptr++ != c) {
1728
if (ptr >= end)
1729
return 0;
1730
}
1731
if (ptr >= end)
1732
return 0;
1733
1734
i = 1;
1735
state->must_advance = 0;
1736
do {
1737
if (*ptr == (SRE_CHAR) prefix[i]) {
1738
if (++i != prefix_len) {
1739
if (++ptr >= end)
1740
return 0;
1741
continue;
1742
}
1743
/* found a potential match */
1744
TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1745
state->start = ptr - (prefix_len - 1);
1746
state->ptr = ptr - (prefix_len - prefix_skip - 1);
1747
if (flags & SRE_INFO_LITERAL)
1748
return 1; /* we got all of it */
1749
status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1750
if (status != 0)
1751
return status;
1752
/* close but no cigar -- try again */
1753
if (++ptr >= end)
1754
return 0;
1755
RESET_CAPTURE_GROUP();
1756
}
1757
i = overlap[i];
1758
} while (i != 0);
1759
}
1760
return 0;
1761
}
1762
1763
if (charset) {
1764
/* pattern starts with a character from a known set */
1765
end = (SRE_CHAR *)state->end;
1766
state->must_advance = 0;
1767
for (;;) {
1768
while (ptr < end && !SRE(charset)(state, charset, *ptr))
1769
ptr++;
1770
if (ptr >= end)
1771
return 0;
1772
TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1773
state->start = ptr;
1774
state->ptr = ptr;
1775
status = SRE(match)(state, pattern, 0);
1776
if (status != 0)
1777
break;
1778
ptr++;
1779
RESET_CAPTURE_GROUP();
1780
}
1781
} else {
1782
/* general case */
1783
assert(ptr <= end);
1784
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1785
state->start = state->ptr = ptr;
1786
status = SRE(match)(state, pattern, 1);
1787
state->must_advance = 0;
1788
if (status == 0 && pattern[0] == SRE_OP_AT &&
1789
(pattern[1] == SRE_AT_BEGINNING ||
1790
pattern[1] == SRE_AT_BEGINNING_STRING))
1791
{
1792
state->start = state->ptr = ptr = end;
1793
return 0;
1794
}
1795
while (status == 0 && ptr < end) {
1796
ptr++;
1797
RESET_CAPTURE_GROUP();
1798
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1799
state->start = state->ptr = ptr;
1800
status = SRE(match)(state, pattern, 0);
1801
}
1802
}
1803
1804
return status;
1805
}
1806
1807
#undef SRE_CHAR
1808
#undef SIZEOF_SRE_CHAR
1809
#undef SRE
1810
1811
/* vim:ts=4:sw=4:et
1812
*/
1813
1814