Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_sre/sre.c
12 views
1
/*
2
* Secret Labs' Regular Expression Engine
3
*
4
* regular expression matching engine
5
*
6
* partial history:
7
* 1999-10-24 fl created (based on existing template matcher code)
8
* 2000-03-06 fl first alpha, sort of
9
* 2000-08-01 fl fixes for 1.6b1
10
* 2000-08-07 fl use PyOS_CheckStack() if available
11
* 2000-09-20 fl added expand method
12
* 2001-03-20 fl lots of fixes for 2.1b2
13
* 2001-04-15 fl export copyright as Python attribute, not global
14
* 2001-04-28 fl added __copy__ methods (work in progress)
15
* 2001-05-14 fl fixes for 1.5.2 compatibility
16
* 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17
* 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18
* 2001-10-20 fl added split primitive; re-enable unicode for 1.6/2.0/2.1
19
* 2001-10-21 fl added sub/subn primitive
20
* 2001-10-24 fl added finditer primitive (for 2.2 only)
21
* 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22
* 2002-11-09 fl fixed empty sub/subn return type
23
* 2003-04-18 mvl fully support 4-byte codes
24
* 2003-10-17 gn implemented non recursive scheme
25
* 2013-02-04 mrab added fullmatch primitive
26
*
27
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28
*
29
* This version of the SRE library can be redistributed under CNRI's
30
* Python 1.6 license. For any other use, please contact Secret Labs
31
* AB ([email protected]).
32
*
33
* Portions of this engine have been developed in cooperation with
34
* CNRI. Hewlett-Packard provided funding for 1.6 integration and
35
* other compatibility work.
36
*/
37
38
static const char copyright[] =
39
" SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41
#include "Python.h"
42
#include "pycore_long.h" // _PyLong_GetZero()
43
#include "pycore_moduleobject.h" // _PyModule_GetState()
44
#include "structmember.h" // PyMemberDef
45
46
#include "sre.h"
47
48
#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
50
#include <ctype.h>
51
52
/* defining this one enables tracing */
53
#undef VERBOSE
54
55
/* -------------------------------------------------------------------- */
56
57
#if defined(_MSC_VER)
58
#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
59
#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
60
/* fastest possible local call under MSVC */
61
#define LOCAL(type) static __inline type __fastcall
62
#else
63
#define LOCAL(type) static inline type
64
#endif
65
66
/* error codes */
67
#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
68
#define SRE_ERROR_STATE -2 /* illegal state */
69
#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
70
#define SRE_ERROR_MEMORY -9 /* out of memory */
71
#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
72
73
#if defined(VERBOSE)
74
#define TRACE(v) printf v
75
#else
76
#define TRACE(v)
77
#endif
78
79
/* -------------------------------------------------------------------- */
80
/* search engine state */
81
82
#define SRE_IS_DIGIT(ch)\
83
((ch) <= '9' && Py_ISDIGIT(ch))
84
#define SRE_IS_SPACE(ch)\
85
((ch) <= ' ' && Py_ISSPACE(ch))
86
#define SRE_IS_LINEBREAK(ch)\
87
((ch) == '\n')
88
#define SRE_IS_WORD(ch)\
89
((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
90
91
static unsigned int sre_lower_ascii(unsigned int ch)
92
{
93
return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
94
}
95
96
/* locale-specific character predicates */
97
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
98
* warnings when c's type supports only numbers < N+1 */
99
#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
100
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
101
102
static unsigned int sre_lower_locale(unsigned int ch)
103
{
104
return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
105
}
106
107
static unsigned int sre_upper_locale(unsigned int ch)
108
{
109
return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
110
}
111
112
/* unicode-specific character predicates */
113
114
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
115
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
116
#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
117
#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
118
#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
119
120
static unsigned int sre_lower_unicode(unsigned int ch)
121
{
122
return (unsigned int) Py_UNICODE_TOLOWER(ch);
123
}
124
125
static unsigned int sre_upper_unicode(unsigned int ch)
126
{
127
return (unsigned int) Py_UNICODE_TOUPPER(ch);
128
}
129
130
LOCAL(int)
131
sre_category(SRE_CODE category, unsigned int ch)
132
{
133
switch (category) {
134
135
case SRE_CATEGORY_DIGIT:
136
return SRE_IS_DIGIT(ch);
137
case SRE_CATEGORY_NOT_DIGIT:
138
return !SRE_IS_DIGIT(ch);
139
case SRE_CATEGORY_SPACE:
140
return SRE_IS_SPACE(ch);
141
case SRE_CATEGORY_NOT_SPACE:
142
return !SRE_IS_SPACE(ch);
143
case SRE_CATEGORY_WORD:
144
return SRE_IS_WORD(ch);
145
case SRE_CATEGORY_NOT_WORD:
146
return !SRE_IS_WORD(ch);
147
case SRE_CATEGORY_LINEBREAK:
148
return SRE_IS_LINEBREAK(ch);
149
case SRE_CATEGORY_NOT_LINEBREAK:
150
return !SRE_IS_LINEBREAK(ch);
151
152
case SRE_CATEGORY_LOC_WORD:
153
return SRE_LOC_IS_WORD(ch);
154
case SRE_CATEGORY_LOC_NOT_WORD:
155
return !SRE_LOC_IS_WORD(ch);
156
157
case SRE_CATEGORY_UNI_DIGIT:
158
return SRE_UNI_IS_DIGIT(ch);
159
case SRE_CATEGORY_UNI_NOT_DIGIT:
160
return !SRE_UNI_IS_DIGIT(ch);
161
case SRE_CATEGORY_UNI_SPACE:
162
return SRE_UNI_IS_SPACE(ch);
163
case SRE_CATEGORY_UNI_NOT_SPACE:
164
return !SRE_UNI_IS_SPACE(ch);
165
case SRE_CATEGORY_UNI_WORD:
166
return SRE_UNI_IS_WORD(ch);
167
case SRE_CATEGORY_UNI_NOT_WORD:
168
return !SRE_UNI_IS_WORD(ch);
169
case SRE_CATEGORY_UNI_LINEBREAK:
170
return SRE_UNI_IS_LINEBREAK(ch);
171
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
172
return !SRE_UNI_IS_LINEBREAK(ch);
173
}
174
return 0;
175
}
176
177
LOCAL(int)
178
char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
179
{
180
return ch == pattern
181
|| (SRE_CODE) sre_lower_locale(ch) == pattern
182
|| (SRE_CODE) sre_upper_locale(ch) == pattern;
183
}
184
185
186
/* helpers */
187
188
static void
189
data_stack_dealloc(SRE_STATE* state)
190
{
191
if (state->data_stack) {
192
PyMem_Free(state->data_stack);
193
state->data_stack = NULL;
194
}
195
state->data_stack_size = state->data_stack_base = 0;
196
}
197
198
static int
199
data_stack_grow(SRE_STATE* state, Py_ssize_t size)
200
{
201
Py_ssize_t minsize, cursize;
202
minsize = state->data_stack_base+size;
203
cursize = state->data_stack_size;
204
if (cursize < minsize) {
205
void* stack;
206
cursize = minsize+minsize/4+1024;
207
TRACE(("allocate/grow stack %zd\n", cursize));
208
stack = PyMem_Realloc(state->data_stack, cursize);
209
if (!stack) {
210
data_stack_dealloc(state);
211
return SRE_ERROR_MEMORY;
212
}
213
state->data_stack = (char *)stack;
214
state->data_stack_size = cursize;
215
}
216
return 0;
217
}
218
219
/* generate 8-bit version */
220
221
#define SRE_CHAR Py_UCS1
222
#define SIZEOF_SRE_CHAR 1
223
#define SRE(F) sre_ucs1_##F
224
#include "sre_lib.h"
225
226
/* generate 16-bit unicode version */
227
228
#define SRE_CHAR Py_UCS2
229
#define SIZEOF_SRE_CHAR 2
230
#define SRE(F) sre_ucs2_##F
231
#include "sre_lib.h"
232
233
/* generate 32-bit unicode version */
234
235
#define SRE_CHAR Py_UCS4
236
#define SIZEOF_SRE_CHAR 4
237
#define SRE(F) sre_ucs4_##F
238
#include "sre_lib.h"
239
240
/* -------------------------------------------------------------------- */
241
/* factories and destructors */
242
243
/* module state */
244
typedef struct {
245
PyTypeObject *Pattern_Type;
246
PyTypeObject *Match_Type;
247
PyTypeObject *Scanner_Type;
248
PyTypeObject *Template_Type;
249
PyObject *compile_template; // reference to re._compile_template
250
} _sremodulestate;
251
252
static _sremodulestate *
253
get_sre_module_state(PyObject *m)
254
{
255
_sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
256
assert(state);
257
return state;
258
}
259
260
static struct PyModuleDef sremodule;
261
#define get_sre_module_state_by_class(cls) \
262
(get_sre_module_state(PyType_GetModule(cls)))
263
264
/* see sre.h for object declarations */
265
static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
266
static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
267
268
/*[clinic input]
269
module _sre
270
class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
271
class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
272
class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
273
[clinic start generated code]*/
274
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
275
276
/*[clinic input]
277
_sre.getcodesize -> int
278
[clinic start generated code]*/
279
280
static int
281
_sre_getcodesize_impl(PyObject *module)
282
/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
283
{
284
return sizeof(SRE_CODE);
285
}
286
287
/*[clinic input]
288
_sre.ascii_iscased -> bool
289
290
character: int
291
/
292
293
[clinic start generated code]*/
294
295
static int
296
_sre_ascii_iscased_impl(PyObject *module, int character)
297
/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
298
{
299
unsigned int ch = (unsigned int)character;
300
return ch < 128 && Py_ISALPHA(ch);
301
}
302
303
/*[clinic input]
304
_sre.unicode_iscased -> bool
305
306
character: int
307
/
308
309
[clinic start generated code]*/
310
311
static int
312
_sre_unicode_iscased_impl(PyObject *module, int character)
313
/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
314
{
315
unsigned int ch = (unsigned int)character;
316
return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
317
}
318
319
/*[clinic input]
320
_sre.ascii_tolower -> int
321
322
character: int
323
/
324
325
[clinic start generated code]*/
326
327
static int
328
_sre_ascii_tolower_impl(PyObject *module, int character)
329
/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
330
{
331
return sre_lower_ascii(character);
332
}
333
334
/*[clinic input]
335
_sre.unicode_tolower -> int
336
337
character: int
338
/
339
340
[clinic start generated code]*/
341
342
static int
343
_sre_unicode_tolower_impl(PyObject *module, int character)
344
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
345
{
346
return sre_lower_unicode(character);
347
}
348
349
LOCAL(void)
350
state_reset(SRE_STATE* state)
351
{
352
/* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
353
/*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
354
355
state->lastmark = -1;
356
state->lastindex = -1;
357
358
state->repeat = NULL;
359
360
data_stack_dealloc(state);
361
}
362
363
static const void*
364
getstring(PyObject* string, Py_ssize_t* p_length,
365
int* p_isbytes, int* p_charsize,
366
Py_buffer *view)
367
{
368
/* given a python object, return a data pointer, a length (in
369
characters), and a character size. return NULL if the object
370
is not a string (or not compatible) */
371
372
/* Unicode objects do not support the buffer API. So, get the data
373
directly instead. */
374
if (PyUnicode_Check(string)) {
375
*p_length = PyUnicode_GET_LENGTH(string);
376
*p_charsize = PyUnicode_KIND(string);
377
*p_isbytes = 0;
378
return PyUnicode_DATA(string);
379
}
380
381
/* get pointer to byte string buffer */
382
if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
383
PyErr_Format(PyExc_TypeError, "expected string or bytes-like "
384
"object, got '%.200s'", Py_TYPE(string)->tp_name);
385
return NULL;
386
}
387
388
*p_length = view->len;
389
*p_charsize = 1;
390
*p_isbytes = 1;
391
392
if (view->buf == NULL) {
393
PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
394
PyBuffer_Release(view);
395
view->buf = NULL;
396
return NULL;
397
}
398
return view->buf;
399
}
400
401
LOCAL(PyObject*)
402
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
403
Py_ssize_t start, Py_ssize_t end)
404
{
405
/* prepare state object */
406
407
Py_ssize_t length;
408
int isbytes, charsize;
409
const void* ptr;
410
411
memset(state, 0, sizeof(SRE_STATE));
412
413
state->mark = PyMem_New(const void *, pattern->groups * 2);
414
if (!state->mark) {
415
PyErr_NoMemory();
416
goto err;
417
}
418
state->lastmark = -1;
419
state->lastindex = -1;
420
421
state->buffer.buf = NULL;
422
ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
423
if (!ptr)
424
goto err;
425
426
if (isbytes && pattern->isbytes == 0) {
427
PyErr_SetString(PyExc_TypeError,
428
"cannot use a string pattern on a bytes-like object");
429
goto err;
430
}
431
if (!isbytes && pattern->isbytes > 0) {
432
PyErr_SetString(PyExc_TypeError,
433
"cannot use a bytes pattern on a string-like object");
434
goto err;
435
}
436
437
/* adjust boundaries */
438
if (start < 0)
439
start = 0;
440
else if (start > length)
441
start = length;
442
443
if (end < 0)
444
end = 0;
445
else if (end > length)
446
end = length;
447
448
state->isbytes = isbytes;
449
state->charsize = charsize;
450
state->match_all = 0;
451
state->must_advance = 0;
452
453
state->beginning = ptr;
454
455
state->start = (void*) ((char*) ptr + start * state->charsize);
456
state->end = (void*) ((char*) ptr + end * state->charsize);
457
458
state->string = Py_NewRef(string);
459
state->pos = start;
460
state->endpos = end;
461
462
return string;
463
err:
464
/* We add an explicit cast here because MSVC has a bug when
465
compiling C code where it believes that `const void**` cannot be
466
safely casted to `void*`, see bpo-39943 for details. */
467
PyMem_Free((void*) state->mark);
468
state->mark = NULL;
469
if (state->buffer.buf)
470
PyBuffer_Release(&state->buffer);
471
return NULL;
472
}
473
474
LOCAL(void)
475
state_fini(SRE_STATE* state)
476
{
477
if (state->buffer.buf)
478
PyBuffer_Release(&state->buffer);
479
Py_XDECREF(state->string);
480
data_stack_dealloc(state);
481
/* See above PyMem_Del for why we explicitly cast here. */
482
PyMem_Free((void*) state->mark);
483
state->mark = NULL;
484
}
485
486
/* calculate offset from start of string */
487
#define STATE_OFFSET(state, member)\
488
(((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
489
490
LOCAL(PyObject*)
491
getslice(int isbytes, const void *ptr,
492
PyObject* string, Py_ssize_t start, Py_ssize_t end)
493
{
494
if (isbytes) {
495
if (PyBytes_CheckExact(string) &&
496
start == 0 && end == PyBytes_GET_SIZE(string)) {
497
return Py_NewRef(string);
498
}
499
return PyBytes_FromStringAndSize(
500
(const char *)ptr + start, end - start);
501
}
502
else {
503
return PyUnicode_Substring(string, start, end);
504
}
505
}
506
507
LOCAL(PyObject*)
508
state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
509
{
510
Py_ssize_t i, j;
511
512
index = (index - 1) * 2;
513
514
if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
515
if (empty)
516
/* want empty string */
517
i = j = 0;
518
else {
519
Py_RETURN_NONE;
520
}
521
} else {
522
i = STATE_OFFSET(state, state->mark[index]);
523
j = STATE_OFFSET(state, state->mark[index+1]);
524
525
/* check wrong span */
526
if (i > j) {
527
PyErr_SetString(PyExc_SystemError,
528
"The span of capturing group is wrong,"
529
" please report a bug for the re module.");
530
return NULL;
531
}
532
}
533
534
return getslice(state->isbytes, state->beginning, string, i, j);
535
}
536
537
static void
538
pattern_error(Py_ssize_t status)
539
{
540
switch (status) {
541
case SRE_ERROR_RECURSION_LIMIT:
542
/* This error code seems to be unused. */
543
PyErr_SetString(
544
PyExc_RecursionError,
545
"maximum recursion limit exceeded"
546
);
547
break;
548
case SRE_ERROR_MEMORY:
549
PyErr_NoMemory();
550
break;
551
case SRE_ERROR_INTERRUPTED:
552
/* An exception has already been raised, so let it fly */
553
break;
554
default:
555
/* other error codes indicate compiler/engine bugs */
556
PyErr_SetString(
557
PyExc_RuntimeError,
558
"internal error in regular expression engine"
559
);
560
}
561
}
562
563
static int
564
pattern_traverse(PatternObject *self, visitproc visit, void *arg)
565
{
566
Py_VISIT(Py_TYPE(self));
567
Py_VISIT(self->groupindex);
568
Py_VISIT(self->indexgroup);
569
Py_VISIT(self->pattern);
570
return 0;
571
}
572
573
static int
574
pattern_clear(PatternObject *self)
575
{
576
Py_CLEAR(self->groupindex);
577
Py_CLEAR(self->indexgroup);
578
Py_CLEAR(self->pattern);
579
return 0;
580
}
581
582
static void
583
pattern_dealloc(PatternObject* self)
584
{
585
PyTypeObject *tp = Py_TYPE(self);
586
587
PyObject_GC_UnTrack(self);
588
if (self->weakreflist != NULL) {
589
PyObject_ClearWeakRefs((PyObject *) self);
590
}
591
(void)pattern_clear(self);
592
tp->tp_free(self);
593
Py_DECREF(tp);
594
}
595
596
LOCAL(Py_ssize_t)
597
sre_match(SRE_STATE* state, SRE_CODE* pattern)
598
{
599
if (state->charsize == 1)
600
return sre_ucs1_match(state, pattern, 1);
601
if (state->charsize == 2)
602
return sre_ucs2_match(state, pattern, 1);
603
assert(state->charsize == 4);
604
return sre_ucs4_match(state, pattern, 1);
605
}
606
607
LOCAL(Py_ssize_t)
608
sre_search(SRE_STATE* state, SRE_CODE* pattern)
609
{
610
if (state->charsize == 1)
611
return sre_ucs1_search(state, pattern);
612
if (state->charsize == 2)
613
return sre_ucs2_search(state, pattern);
614
assert(state->charsize == 4);
615
return sre_ucs4_search(state, pattern);
616
}
617
618
/*[clinic input]
619
_sre.SRE_Pattern.match
620
621
cls: defining_class
622
/
623
string: object
624
pos: Py_ssize_t = 0
625
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
626
627
Matches zero or more characters at the beginning of the string.
628
[clinic start generated code]*/
629
630
static PyObject *
631
_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
632
PyObject *string, Py_ssize_t pos,
633
Py_ssize_t endpos)
634
/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/
635
{
636
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
637
SRE_STATE state;
638
Py_ssize_t status;
639
PyObject *match;
640
641
if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
642
return NULL;
643
644
state.ptr = state.start;
645
646
TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
647
648
status = sre_match(&state, PatternObject_GetCode(self));
649
650
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
651
if (PyErr_Occurred()) {
652
state_fini(&state);
653
return NULL;
654
}
655
656
match = pattern_new_match(module_state, self, &state, status);
657
state_fini(&state);
658
return match;
659
}
660
661
/*[clinic input]
662
_sre.SRE_Pattern.fullmatch
663
664
cls: defining_class
665
/
666
string: object
667
pos: Py_ssize_t = 0
668
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
669
670
Matches against all of the string.
671
[clinic start generated code]*/
672
673
static PyObject *
674
_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
675
PyObject *string, Py_ssize_t pos,
676
Py_ssize_t endpos)
677
/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
678
{
679
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
680
SRE_STATE state;
681
Py_ssize_t status;
682
PyObject *match;
683
684
if (!state_init(&state, self, string, pos, endpos))
685
return NULL;
686
687
state.ptr = state.start;
688
689
TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
690
691
state.match_all = 1;
692
status = sre_match(&state, PatternObject_GetCode(self));
693
694
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
695
if (PyErr_Occurred()) {
696
state_fini(&state);
697
return NULL;
698
}
699
700
match = pattern_new_match(module_state, self, &state, status);
701
state_fini(&state);
702
return match;
703
}
704
705
/*[clinic input]
706
_sre.SRE_Pattern.search
707
708
cls: defining_class
709
/
710
string: object
711
pos: Py_ssize_t = 0
712
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
713
714
Scan through string looking for a match, and return a corresponding match object instance.
715
716
Return None if no position in the string matches.
717
[clinic start generated code]*/
718
719
static PyObject *
720
_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
721
PyObject *string, Py_ssize_t pos,
722
Py_ssize_t endpos)
723
/*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/
724
{
725
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
726
SRE_STATE state;
727
Py_ssize_t status;
728
PyObject *match;
729
730
if (!state_init(&state, self, string, pos, endpos))
731
return NULL;
732
733
TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
734
735
status = sre_search(&state, PatternObject_GetCode(self));
736
737
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
738
739
if (PyErr_Occurred()) {
740
state_fini(&state);
741
return NULL;
742
}
743
744
match = pattern_new_match(module_state, self, &state, status);
745
state_fini(&state);
746
return match;
747
}
748
749
/*[clinic input]
750
_sre.SRE_Pattern.findall
751
752
string: object
753
pos: Py_ssize_t = 0
754
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
755
756
Return a list of all non-overlapping matches of pattern in string.
757
[clinic start generated code]*/
758
759
static PyObject *
760
_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
761
Py_ssize_t pos, Py_ssize_t endpos)
762
/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
763
{
764
SRE_STATE state;
765
PyObject* list;
766
Py_ssize_t status;
767
Py_ssize_t i, b, e;
768
769
if (!state_init(&state, self, string, pos, endpos))
770
return NULL;
771
772
list = PyList_New(0);
773
if (!list) {
774
state_fini(&state);
775
return NULL;
776
}
777
778
while (state.start <= state.end) {
779
780
PyObject* item;
781
782
state_reset(&state);
783
784
state.ptr = state.start;
785
786
status = sre_search(&state, PatternObject_GetCode(self));
787
if (PyErr_Occurred())
788
goto error;
789
790
if (status <= 0) {
791
if (status == 0)
792
break;
793
pattern_error(status);
794
goto error;
795
}
796
797
/* don't bother to build a match object */
798
switch (self->groups) {
799
case 0:
800
b = STATE_OFFSET(&state, state.start);
801
e = STATE_OFFSET(&state, state.ptr);
802
item = getslice(state.isbytes, state.beginning,
803
string, b, e);
804
if (!item)
805
goto error;
806
break;
807
case 1:
808
item = state_getslice(&state, 1, string, 1);
809
if (!item)
810
goto error;
811
break;
812
default:
813
item = PyTuple_New(self->groups);
814
if (!item)
815
goto error;
816
for (i = 0; i < self->groups; i++) {
817
PyObject* o = state_getslice(&state, i+1, string, 1);
818
if (!o) {
819
Py_DECREF(item);
820
goto error;
821
}
822
PyTuple_SET_ITEM(item, i, o);
823
}
824
break;
825
}
826
827
status = PyList_Append(list, item);
828
Py_DECREF(item);
829
if (status < 0)
830
goto error;
831
832
state.must_advance = (state.ptr == state.start);
833
state.start = state.ptr;
834
}
835
836
state_fini(&state);
837
return list;
838
839
error:
840
Py_DECREF(list);
841
state_fini(&state);
842
return NULL;
843
844
}
845
846
/*[clinic input]
847
_sre.SRE_Pattern.finditer
848
849
cls: defining_class
850
/
851
string: object
852
pos: Py_ssize_t = 0
853
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
854
855
Return an iterator over all non-overlapping matches for the RE pattern in string.
856
857
For each match, the iterator returns a match object.
858
[clinic start generated code]*/
859
860
static PyObject *
861
_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
862
PyObject *string, Py_ssize_t pos,
863
Py_ssize_t endpos)
864
/*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/
865
{
866
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
867
PyObject* scanner;
868
PyObject* search;
869
PyObject* iterator;
870
871
scanner = pattern_scanner(module_state, self, string, pos, endpos);
872
if (!scanner)
873
return NULL;
874
875
search = PyObject_GetAttrString(scanner, "search");
876
Py_DECREF(scanner);
877
if (!search)
878
return NULL;
879
880
iterator = PyCallIter_New(search, Py_None);
881
Py_DECREF(search);
882
883
return iterator;
884
}
885
886
/*[clinic input]
887
_sre.SRE_Pattern.scanner
888
889
cls: defining_class
890
/
891
string: object
892
pos: Py_ssize_t = 0
893
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
894
895
[clinic start generated code]*/
896
897
static PyObject *
898
_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
899
PyObject *string, Py_ssize_t pos,
900
Py_ssize_t endpos)
901
/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
902
{
903
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
904
905
return pattern_scanner(module_state, self, string, pos, endpos);
906
}
907
908
/*[clinic input]
909
_sre.SRE_Pattern.split
910
911
string: object
912
maxsplit: Py_ssize_t = 0
913
914
Split string by the occurrences of pattern.
915
[clinic start generated code]*/
916
917
static PyObject *
918
_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
919
Py_ssize_t maxsplit)
920
/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
921
{
922
SRE_STATE state;
923
PyObject* list;
924
PyObject* item;
925
Py_ssize_t status;
926
Py_ssize_t n;
927
Py_ssize_t i;
928
const void* last;
929
930
assert(self->codesize != 0);
931
932
if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
933
return NULL;
934
935
list = PyList_New(0);
936
if (!list) {
937
state_fini(&state);
938
return NULL;
939
}
940
941
n = 0;
942
last = state.start;
943
944
while (!maxsplit || n < maxsplit) {
945
946
state_reset(&state);
947
948
state.ptr = state.start;
949
950
status = sre_search(&state, PatternObject_GetCode(self));
951
if (PyErr_Occurred())
952
goto error;
953
954
if (status <= 0) {
955
if (status == 0)
956
break;
957
pattern_error(status);
958
goto error;
959
}
960
961
/* get segment before this match */
962
item = getslice(state.isbytes, state.beginning,
963
string, STATE_OFFSET(&state, last),
964
STATE_OFFSET(&state, state.start)
965
);
966
if (!item)
967
goto error;
968
status = PyList_Append(list, item);
969
Py_DECREF(item);
970
if (status < 0)
971
goto error;
972
973
/* add groups (if any) */
974
for (i = 0; i < self->groups; i++) {
975
item = state_getslice(&state, i+1, string, 0);
976
if (!item)
977
goto error;
978
status = PyList_Append(list, item);
979
Py_DECREF(item);
980
if (status < 0)
981
goto error;
982
}
983
984
n = n + 1;
985
state.must_advance = (state.ptr == state.start);
986
last = state.start = state.ptr;
987
988
}
989
990
/* get segment following last match (even if empty) */
991
item = getslice(state.isbytes, state.beginning,
992
string, STATE_OFFSET(&state, last), state.endpos
993
);
994
if (!item)
995
goto error;
996
status = PyList_Append(list, item);
997
Py_DECREF(item);
998
if (status < 0)
999
goto error;
1000
1001
state_fini(&state);
1002
return list;
1003
1004
error:
1005
Py_DECREF(list);
1006
state_fini(&state);
1007
return NULL;
1008
1009
}
1010
1011
static PyObject *
1012
compile_template(_sremodulestate *module_state,
1013
PatternObject *pattern, PyObject *template)
1014
{
1015
/* delegate to Python code */
1016
PyObject *func = module_state->compile_template;
1017
if (func == NULL) {
1018
func = _PyImport_GetModuleAttrString("re", "_compile_template");
1019
if (func == NULL) {
1020
return NULL;
1021
}
1022
Py_XSETREF(module_state->compile_template, func);
1023
}
1024
1025
PyObject *args[] = {(PyObject *)pattern, template};
1026
PyObject *result = PyObject_Vectorcall(func, args, 2, NULL);
1027
1028
if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
1029
/* If the replacement string is unhashable (e.g. bytearray),
1030
* convert it to the basic type (str or bytes) and repeat. */
1031
if (PyUnicode_Check(template) && !PyUnicode_CheckExact(template)) {
1032
PyErr_Clear();
1033
template = _PyUnicode_Copy(template);
1034
}
1035
else if (PyObject_CheckBuffer(template) && !PyBytes_CheckExact(template)) {
1036
PyErr_Clear();
1037
template = PyBytes_FromObject(template);
1038
}
1039
else {
1040
return NULL;
1041
}
1042
if (template == NULL) {
1043
return NULL;
1044
}
1045
args[1] = template;
1046
result = PyObject_Vectorcall(func, args, 2, NULL);
1047
Py_DECREF(template);
1048
}
1049
1050
if (result != NULL && Py_TYPE(result) != module_state->Template_Type) {
1051
PyErr_Format(PyExc_RuntimeError,
1052
"the result of compiling a replacement string is %.200s",
1053
Py_TYPE(result)->tp_name);
1054
Py_DECREF(result);
1055
return NULL;
1056
}
1057
return result;
1058
}
1059
1060
static PyObject *expand_template(TemplateObject *, MatchObject *); /* Forward */
1061
1062
static PyObject*
1063
pattern_subx(_sremodulestate* module_state,
1064
PatternObject* self,
1065
PyObject* ptemplate,
1066
PyObject* string,
1067
Py_ssize_t count,
1068
Py_ssize_t subn)
1069
{
1070
SRE_STATE state;
1071
PyObject* list;
1072
PyObject* joiner;
1073
PyObject* item;
1074
PyObject* filter;
1075
PyObject* match;
1076
const void* ptr;
1077
Py_ssize_t status;
1078
Py_ssize_t n;
1079
Py_ssize_t i, b, e;
1080
int isbytes, charsize;
1081
enum {LITERAL, TEMPLATE, CALLABLE} filter_type;
1082
Py_buffer view;
1083
1084
if (PyCallable_Check(ptemplate)) {
1085
/* sub/subn takes either a function or a template */
1086
filter = Py_NewRef(ptemplate);
1087
filter_type = CALLABLE;
1088
} else {
1089
/* if not callable, check if it's a literal string */
1090
int literal;
1091
view.buf = NULL;
1092
ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1093
if (ptr) {
1094
if (charsize == 1)
1095
literal = memchr(ptr, '\\', n) == NULL;
1096
else
1097
literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1098
} else {
1099
PyErr_Clear();
1100
literal = 0;
1101
}
1102
if (view.buf)
1103
PyBuffer_Release(&view);
1104
if (literal) {
1105
filter = Py_NewRef(ptemplate);
1106
filter_type = LITERAL;
1107
} else {
1108
/* not a literal; hand it over to the template compiler */
1109
filter = compile_template(module_state, self, ptemplate);
1110
if (!filter)
1111
return NULL;
1112
1113
assert(Py_TYPE(filter) == module_state->Template_Type);
1114
if (Py_SIZE(filter) == 0) {
1115
Py_SETREF(filter,
1116
Py_NewRef(((TemplateObject *)filter)->literal));
1117
filter_type = LITERAL;
1118
}
1119
else {
1120
filter_type = TEMPLATE;
1121
}
1122
}
1123
}
1124
1125
if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1126
Py_DECREF(filter);
1127
return NULL;
1128
}
1129
1130
list = PyList_New(0);
1131
if (!list) {
1132
Py_DECREF(filter);
1133
state_fini(&state);
1134
return NULL;
1135
}
1136
1137
n = i = 0;
1138
1139
while (!count || n < count) {
1140
1141
state_reset(&state);
1142
1143
state.ptr = state.start;
1144
1145
status = sre_search(&state, PatternObject_GetCode(self));
1146
if (PyErr_Occurred())
1147
goto error;
1148
1149
if (status <= 0) {
1150
if (status == 0)
1151
break;
1152
pattern_error(status);
1153
goto error;
1154
}
1155
1156
b = STATE_OFFSET(&state, state.start);
1157
e = STATE_OFFSET(&state, state.ptr);
1158
1159
if (i < b) {
1160
/* get segment before this match */
1161
item = getslice(state.isbytes, state.beginning,
1162
string, i, b);
1163
if (!item)
1164
goto error;
1165
status = PyList_Append(list, item);
1166
Py_DECREF(item);
1167
if (status < 0)
1168
goto error;
1169
1170
}
1171
1172
if (filter_type != LITERAL) {
1173
/* pass match object through filter */
1174
match = pattern_new_match(module_state, self, &state, 1);
1175
if (!match)
1176
goto error;
1177
if (filter_type == TEMPLATE) {
1178
item = expand_template((TemplateObject *)filter,
1179
(MatchObject *)match);
1180
}
1181
else {
1182
assert(filter_type == CALLABLE);
1183
item = PyObject_CallOneArg(filter, match);
1184
}
1185
Py_DECREF(match);
1186
if (!item)
1187
goto error;
1188
} else {
1189
/* filter is literal string */
1190
item = Py_NewRef(filter);
1191
}
1192
1193
/* add to list */
1194
if (item != Py_None) {
1195
status = PyList_Append(list, item);
1196
Py_DECREF(item);
1197
if (status < 0)
1198
goto error;
1199
}
1200
1201
i = e;
1202
n = n + 1;
1203
state.must_advance = (state.ptr == state.start);
1204
state.start = state.ptr;
1205
}
1206
1207
/* get segment following last match */
1208
if (i < state.endpos) {
1209
item = getslice(state.isbytes, state.beginning,
1210
string, i, state.endpos);
1211
if (!item)
1212
goto error;
1213
status = PyList_Append(list, item);
1214
Py_DECREF(item);
1215
if (status < 0)
1216
goto error;
1217
}
1218
1219
state_fini(&state);
1220
1221
Py_DECREF(filter);
1222
1223
/* convert list to single string (also removes list) */
1224
joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1225
if (!joiner) {
1226
Py_DECREF(list);
1227
return NULL;
1228
}
1229
if (PyList_GET_SIZE(list) == 0) {
1230
Py_DECREF(list);
1231
item = joiner;
1232
}
1233
else {
1234
if (state.isbytes)
1235
item = _PyBytes_Join(joiner, list);
1236
else
1237
item = PyUnicode_Join(joiner, list);
1238
Py_DECREF(joiner);
1239
Py_DECREF(list);
1240
if (!item)
1241
return NULL;
1242
}
1243
1244
if (subn)
1245
return Py_BuildValue("Nn", item, n);
1246
1247
return item;
1248
1249
error:
1250
Py_DECREF(list);
1251
state_fini(&state);
1252
Py_DECREF(filter);
1253
return NULL;
1254
1255
}
1256
1257
/*[clinic input]
1258
_sre.SRE_Pattern.sub
1259
1260
cls: defining_class
1261
/
1262
repl: object
1263
string: object
1264
count: Py_ssize_t = 0
1265
1266
Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1267
[clinic start generated code]*/
1268
1269
static PyObject *
1270
_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
1271
PyObject *repl, PyObject *string, Py_ssize_t count)
1272
/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/
1273
{
1274
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
1275
1276
return pattern_subx(module_state, self, repl, string, count, 0);
1277
}
1278
1279
/*[clinic input]
1280
_sre.SRE_Pattern.subn
1281
1282
cls: defining_class
1283
/
1284
repl: object
1285
string: object
1286
count: Py_ssize_t = 0
1287
1288
Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1289
[clinic start generated code]*/
1290
1291
static PyObject *
1292
_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
1293
PyObject *repl, PyObject *string,
1294
Py_ssize_t count)
1295
/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/
1296
{
1297
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
1298
1299
return pattern_subx(module_state, self, repl, string, count, 1);
1300
}
1301
1302
/*[clinic input]
1303
_sre.SRE_Pattern.__copy__
1304
1305
[clinic start generated code]*/
1306
1307
static PyObject *
1308
_sre_SRE_Pattern___copy___impl(PatternObject *self)
1309
/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1310
{
1311
return Py_NewRef(self);
1312
}
1313
1314
/*[clinic input]
1315
_sre.SRE_Pattern.__deepcopy__
1316
1317
memo: object
1318
/
1319
1320
[clinic start generated code]*/
1321
1322
static PyObject *
1323
_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1324
/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1325
{
1326
return Py_NewRef(self);
1327
}
1328
1329
static PyObject *
1330
pattern_repr(PatternObject *obj)
1331
{
1332
static const struct {
1333
const char *name;
1334
int value;
1335
} flag_names[] = {
1336
{"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1337
{"re.LOCALE", SRE_FLAG_LOCALE},
1338
{"re.MULTILINE", SRE_FLAG_MULTILINE},
1339
{"re.DOTALL", SRE_FLAG_DOTALL},
1340
{"re.UNICODE", SRE_FLAG_UNICODE},
1341
{"re.VERBOSE", SRE_FLAG_VERBOSE},
1342
{"re.DEBUG", SRE_FLAG_DEBUG},
1343
{"re.ASCII", SRE_FLAG_ASCII},
1344
};
1345
PyObject *result = NULL;
1346
PyObject *flag_items;
1347
size_t i;
1348
int flags = obj->flags;
1349
1350
/* Omit re.UNICODE for valid string patterns. */
1351
if (obj->isbytes == 0 &&
1352
(flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1353
SRE_FLAG_UNICODE)
1354
flags &= ~SRE_FLAG_UNICODE;
1355
1356
flag_items = PyList_New(0);
1357
if (!flag_items)
1358
return NULL;
1359
1360
for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1361
if (flags & flag_names[i].value) {
1362
PyObject *item = PyUnicode_FromString(flag_names[i].name);
1363
if (!item)
1364
goto done;
1365
1366
if (PyList_Append(flag_items, item) < 0) {
1367
Py_DECREF(item);
1368
goto done;
1369
}
1370
Py_DECREF(item);
1371
flags &= ~flag_names[i].value;
1372
}
1373
}
1374
if (flags) {
1375
PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1376
if (!item)
1377
goto done;
1378
1379
if (PyList_Append(flag_items, item) < 0) {
1380
Py_DECREF(item);
1381
goto done;
1382
}
1383
Py_DECREF(item);
1384
}
1385
1386
if (PyList_Size(flag_items) > 0) {
1387
PyObject *flags_result;
1388
PyObject *sep = PyUnicode_FromString("|");
1389
if (!sep)
1390
goto done;
1391
flags_result = PyUnicode_Join(sep, flag_items);
1392
Py_DECREF(sep);
1393
if (!flags_result)
1394
goto done;
1395
result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1396
obj->pattern, flags_result);
1397
Py_DECREF(flags_result);
1398
}
1399
else {
1400
result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1401
}
1402
1403
done:
1404
Py_DECREF(flag_items);
1405
return result;
1406
}
1407
1408
PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1409
1410
/* PatternObject's 'groupindex' method. */
1411
static PyObject *
1412
pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1413
{
1414
if (self->groupindex == NULL)
1415
return PyDict_New();
1416
return PyDictProxy_New(self->groupindex);
1417
}
1418
1419
static int _validate(PatternObject *self); /* Forward */
1420
1421
/*[clinic input]
1422
_sre.compile
1423
1424
pattern: object
1425
flags: int
1426
code: object(subclass_of='&PyList_Type')
1427
groups: Py_ssize_t
1428
groupindex: object(subclass_of='&PyDict_Type')
1429
indexgroup: object(subclass_of='&PyTuple_Type')
1430
1431
[clinic start generated code]*/
1432
1433
static PyObject *
1434
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1435
PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1436
PyObject *indexgroup)
1437
/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1438
{
1439
/* "compile" pattern descriptor to pattern object */
1440
1441
_sremodulestate *module_state = get_sre_module_state(module);
1442
PatternObject* self;
1443
Py_ssize_t i, n;
1444
1445
n = PyList_GET_SIZE(code);
1446
/* coverity[ampersand_in_size] */
1447
self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
1448
if (!self)
1449
return NULL;
1450
self->weakreflist = NULL;
1451
self->pattern = NULL;
1452
self->groupindex = NULL;
1453
self->indexgroup = NULL;
1454
1455
self->codesize = n;
1456
1457
for (i = 0; i < n; i++) {
1458
PyObject *o = PyList_GET_ITEM(code, i);
1459
unsigned long value = PyLong_AsUnsignedLong(o);
1460
self->code[i] = (SRE_CODE) value;
1461
if ((unsigned long) self->code[i] != value) {
1462
PyErr_SetString(PyExc_OverflowError,
1463
"regular expression code size limit exceeded");
1464
break;
1465
}
1466
}
1467
PyObject_GC_Track(self);
1468
1469
if (PyErr_Occurred()) {
1470
Py_DECREF(self);
1471
return NULL;
1472
}
1473
1474
if (pattern == Py_None) {
1475
self->isbytes = -1;
1476
}
1477
else {
1478
Py_ssize_t p_length;
1479
int charsize;
1480
Py_buffer view;
1481
view.buf = NULL;
1482
if (!getstring(pattern, &p_length, &self->isbytes,
1483
&charsize, &view)) {
1484
Py_DECREF(self);
1485
return NULL;
1486
}
1487
if (view.buf)
1488
PyBuffer_Release(&view);
1489
}
1490
1491
self->pattern = Py_NewRef(pattern);
1492
1493
self->flags = flags;
1494
1495
self->groups = groups;
1496
1497
if (PyDict_GET_SIZE(groupindex) > 0) {
1498
self->groupindex = Py_NewRef(groupindex);
1499
if (PyTuple_GET_SIZE(indexgroup) > 0) {
1500
self->indexgroup = Py_NewRef(indexgroup);
1501
}
1502
}
1503
1504
if (!_validate(self)) {
1505
Py_DECREF(self);
1506
return NULL;
1507
}
1508
1509
return (PyObject*) self;
1510
}
1511
1512
/*[clinic input]
1513
_sre.template
1514
1515
pattern: object
1516
template: object(subclass_of="&PyList_Type")
1517
A list containing interleaved literal strings (str or bytes) and group
1518
indices (int), as returned by re._parser.parse_template():
1519
[literal1, group1, ..., literalN, groupN]
1520
/
1521
1522
[clinic start generated code]*/
1523
1524
static PyObject *
1525
_sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
1526
/*[clinic end generated code: output=d51290e596ebca86 input=af55380b27f02942]*/
1527
{
1528
/* template is a list containing interleaved literal strings (str or bytes)
1529
* and group indices (int), as returned by _parser.parse_template:
1530
* [literal1, group1, literal2, ..., literalN].
1531
*/
1532
_sremodulestate *module_state = get_sre_module_state(module);
1533
TemplateObject *self = NULL;
1534
Py_ssize_t n = PyList_GET_SIZE(template);
1535
if ((n & 1) == 0 || n < 1) {
1536
goto bad_template;
1537
}
1538
n /= 2;
1539
self = PyObject_GC_NewVar(TemplateObject, module_state->Template_Type, n);
1540
if (!self)
1541
return NULL;
1542
self->chunks = 1 + 2*n;
1543
self->literal = Py_NewRef(PyList_GET_ITEM(template, 0));
1544
for (Py_ssize_t i = 0; i < n; i++) {
1545
Py_ssize_t index = PyLong_AsSsize_t(PyList_GET_ITEM(template, 2*i+1));
1546
if (index == -1 && PyErr_Occurred()) {
1547
Py_DECREF(self);
1548
return NULL;
1549
}
1550
if (index < 0) {
1551
goto bad_template;
1552
}
1553
self->items[i].index = index;
1554
1555
PyObject *literal = PyList_GET_ITEM(template, 2*i+2);
1556
// Skip empty literals.
1557
if ((PyUnicode_Check(literal) && !PyUnicode_GET_LENGTH(literal)) ||
1558
(PyBytes_Check(literal) && !PyBytes_GET_SIZE(literal)))
1559
{
1560
literal = NULL;
1561
self->chunks--;
1562
}
1563
self->items[i].literal = Py_XNewRef(literal);
1564
}
1565
return (PyObject*) self;
1566
1567
bad_template:
1568
PyErr_SetString(PyExc_TypeError, "invalid template");
1569
Py_XDECREF(self);
1570
return NULL;
1571
}
1572
1573
/* -------------------------------------------------------------------- */
1574
/* Code validation */
1575
1576
/* To learn more about this code, have a look at the _compile() function in
1577
Lib/sre_compile.py. The validation functions below checks the code array
1578
for conformance with the code patterns generated there.
1579
1580
The nice thing about the generated code is that it is position-independent:
1581
all jumps are relative jumps forward. Also, jumps don't cross each other:
1582
the target of a later jump is always earlier than the target of an earlier
1583
jump. IOW, this is okay:
1584
1585
J---------J-------T--------T
1586
\ \_____/ /
1587
\______________________/
1588
1589
but this is not:
1590
1591
J---------J-------T--------T
1592
\_________\_____/ /
1593
\____________/
1594
1595
It also helps that SRE_CODE is always an unsigned type.
1596
*/
1597
1598
/* Defining this one enables tracing of the validator */
1599
#undef VVERBOSE
1600
1601
/* Trace macro for the validator */
1602
#if defined(VVERBOSE)
1603
#define VTRACE(v) printf v
1604
#else
1605
#define VTRACE(v) do {} while(0) /* do nothing */
1606
#endif
1607
1608
/* Report failure */
1609
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
1610
1611
/* Extract opcode, argument, or skip count from code array */
1612
#define GET_OP \
1613
do { \
1614
VTRACE(("%p: ", code)); \
1615
if (code >= end) FAIL; \
1616
op = *code++; \
1617
VTRACE(("%lu (op)\n", (unsigned long)op)); \
1618
} while (0)
1619
#define GET_ARG \
1620
do { \
1621
VTRACE(("%p= ", code)); \
1622
if (code >= end) FAIL; \
1623
arg = *code++; \
1624
VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1625
} while (0)
1626
#define GET_SKIP_ADJ(adj) \
1627
do { \
1628
VTRACE(("%p= ", code)); \
1629
if (code >= end) FAIL; \
1630
skip = *code; \
1631
VTRACE(("%lu (skip to %p)\n", \
1632
(unsigned long)skip, code+skip)); \
1633
if (skip-adj > (uintptr_t)(end - code)) \
1634
FAIL; \
1635
code++; \
1636
} while (0)
1637
#define GET_SKIP GET_SKIP_ADJ(0)
1638
1639
static int
1640
_validate_charset(SRE_CODE *code, SRE_CODE *end)
1641
{
1642
/* Some variables are manipulated by the macros above */
1643
SRE_CODE op;
1644
SRE_CODE arg;
1645
SRE_CODE offset;
1646
int i;
1647
1648
while (code < end) {
1649
GET_OP;
1650
switch (op) {
1651
1652
case SRE_OP_NEGATE:
1653
break;
1654
1655
case SRE_OP_LITERAL:
1656
GET_ARG;
1657
break;
1658
1659
case SRE_OP_RANGE:
1660
case SRE_OP_RANGE_UNI_IGNORE:
1661
GET_ARG;
1662
GET_ARG;
1663
break;
1664
1665
case SRE_OP_CHARSET:
1666
offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1667
if (offset > (uintptr_t)(end - code))
1668
FAIL;
1669
code += offset;
1670
break;
1671
1672
case SRE_OP_BIGCHARSET:
1673
GET_ARG; /* Number of blocks */
1674
offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1675
if (offset > (uintptr_t)(end - code))
1676
FAIL;
1677
/* Make sure that each byte points to a valid block */
1678
for (i = 0; i < 256; i++) {
1679
if (((unsigned char *)code)[i] >= arg)
1680
FAIL;
1681
}
1682
code += offset;
1683
offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1684
if (offset > (uintptr_t)(end - code))
1685
FAIL;
1686
code += offset;
1687
break;
1688
1689
case SRE_OP_CATEGORY:
1690
GET_ARG;
1691
switch (arg) {
1692
case SRE_CATEGORY_DIGIT:
1693
case SRE_CATEGORY_NOT_DIGIT:
1694
case SRE_CATEGORY_SPACE:
1695
case SRE_CATEGORY_NOT_SPACE:
1696
case SRE_CATEGORY_WORD:
1697
case SRE_CATEGORY_NOT_WORD:
1698
case SRE_CATEGORY_LINEBREAK:
1699
case SRE_CATEGORY_NOT_LINEBREAK:
1700
case SRE_CATEGORY_LOC_WORD:
1701
case SRE_CATEGORY_LOC_NOT_WORD:
1702
case SRE_CATEGORY_UNI_DIGIT:
1703
case SRE_CATEGORY_UNI_NOT_DIGIT:
1704
case SRE_CATEGORY_UNI_SPACE:
1705
case SRE_CATEGORY_UNI_NOT_SPACE:
1706
case SRE_CATEGORY_UNI_WORD:
1707
case SRE_CATEGORY_UNI_NOT_WORD:
1708
case SRE_CATEGORY_UNI_LINEBREAK:
1709
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1710
break;
1711
default:
1712
FAIL;
1713
}
1714
break;
1715
1716
default:
1717
FAIL;
1718
1719
}
1720
}
1721
1722
return 0;
1723
}
1724
1725
/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
1726
static int
1727
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1728
{
1729
/* Some variables are manipulated by the macros above */
1730
SRE_CODE op;
1731
SRE_CODE arg;
1732
SRE_CODE skip;
1733
1734
VTRACE(("code=%p, end=%p\n", code, end));
1735
1736
if (code > end)
1737
FAIL;
1738
1739
while (code < end) {
1740
GET_OP;
1741
switch (op) {
1742
1743
case SRE_OP_MARK:
1744
/* We don't check whether marks are properly nested; the
1745
sre_match() code is robust even if they don't, and the worst
1746
you can get is nonsensical match results. */
1747
GET_ARG;
1748
if (arg > 2 * (size_t)groups + 1) {
1749
VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1750
FAIL;
1751
}
1752
break;
1753
1754
case SRE_OP_LITERAL:
1755
case SRE_OP_NOT_LITERAL:
1756
case SRE_OP_LITERAL_IGNORE:
1757
case SRE_OP_NOT_LITERAL_IGNORE:
1758
case SRE_OP_LITERAL_UNI_IGNORE:
1759
case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1760
case SRE_OP_LITERAL_LOC_IGNORE:
1761
case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1762
GET_ARG;
1763
/* The arg is just a character, nothing to check */
1764
break;
1765
1766
case SRE_OP_SUCCESS:
1767
case SRE_OP_FAILURE:
1768
/* Nothing to check; these normally end the matching process */
1769
break;
1770
1771
case SRE_OP_AT:
1772
GET_ARG;
1773
switch (arg) {
1774
case SRE_AT_BEGINNING:
1775
case SRE_AT_BEGINNING_STRING:
1776
case SRE_AT_BEGINNING_LINE:
1777
case SRE_AT_END:
1778
case SRE_AT_END_LINE:
1779
case SRE_AT_END_STRING:
1780
case SRE_AT_BOUNDARY:
1781
case SRE_AT_NON_BOUNDARY:
1782
case SRE_AT_LOC_BOUNDARY:
1783
case SRE_AT_LOC_NON_BOUNDARY:
1784
case SRE_AT_UNI_BOUNDARY:
1785
case SRE_AT_UNI_NON_BOUNDARY:
1786
break;
1787
default:
1788
FAIL;
1789
}
1790
break;
1791
1792
case SRE_OP_ANY:
1793
case SRE_OP_ANY_ALL:
1794
/* These have no operands */
1795
break;
1796
1797
case SRE_OP_IN:
1798
case SRE_OP_IN_IGNORE:
1799
case SRE_OP_IN_UNI_IGNORE:
1800
case SRE_OP_IN_LOC_IGNORE:
1801
GET_SKIP;
1802
/* Stop 1 before the end; we check the FAILURE below */
1803
if (_validate_charset(code, code+skip-2))
1804
FAIL;
1805
if (code[skip-2] != SRE_OP_FAILURE)
1806
FAIL;
1807
code += skip-1;
1808
break;
1809
1810
case SRE_OP_INFO:
1811
{
1812
/* A minimal info field is
1813
<INFO> <1=skip> <2=flags> <3=min> <4=max>;
1814
If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1815
more follows. */
1816
SRE_CODE flags, i;
1817
SRE_CODE *newcode;
1818
GET_SKIP;
1819
newcode = code+skip-1;
1820
GET_ARG; flags = arg;
1821
GET_ARG;
1822
GET_ARG;
1823
/* Check that only valid flags are present */
1824
if ((flags & ~(SRE_INFO_PREFIX |
1825
SRE_INFO_LITERAL |
1826
SRE_INFO_CHARSET)) != 0)
1827
FAIL;
1828
/* PREFIX and CHARSET are mutually exclusive */
1829
if ((flags & SRE_INFO_PREFIX) &&
1830
(flags & SRE_INFO_CHARSET))
1831
FAIL;
1832
/* LITERAL implies PREFIX */
1833
if ((flags & SRE_INFO_LITERAL) &&
1834
!(flags & SRE_INFO_PREFIX))
1835
FAIL;
1836
/* Validate the prefix */
1837
if (flags & SRE_INFO_PREFIX) {
1838
SRE_CODE prefix_len;
1839
GET_ARG; prefix_len = arg;
1840
GET_ARG;
1841
/* Here comes the prefix string */
1842
if (prefix_len > (uintptr_t)(newcode - code))
1843
FAIL;
1844
code += prefix_len;
1845
/* And here comes the overlap table */
1846
if (prefix_len > (uintptr_t)(newcode - code))
1847
FAIL;
1848
/* Each overlap value should be < prefix_len */
1849
for (i = 0; i < prefix_len; i++) {
1850
if (code[i] >= prefix_len)
1851
FAIL;
1852
}
1853
code += prefix_len;
1854
}
1855
/* Validate the charset */
1856
if (flags & SRE_INFO_CHARSET) {
1857
if (_validate_charset(code, newcode-1))
1858
FAIL;
1859
if (newcode[-1] != SRE_OP_FAILURE)
1860
FAIL;
1861
code = newcode;
1862
}
1863
else if (code != newcode) {
1864
VTRACE(("code=%p, newcode=%p\n", code, newcode));
1865
FAIL;
1866
}
1867
}
1868
break;
1869
1870
case SRE_OP_BRANCH:
1871
{
1872
SRE_CODE *target = NULL;
1873
for (;;) {
1874
GET_SKIP;
1875
if (skip == 0)
1876
break;
1877
/* Stop 2 before the end; we check the JUMP below */
1878
if (_validate_inner(code, code+skip-3, groups))
1879
FAIL;
1880
code += skip-3;
1881
/* Check that it ends with a JUMP, and that each JUMP
1882
has the same target */
1883
GET_OP;
1884
if (op != SRE_OP_JUMP)
1885
FAIL;
1886
GET_SKIP;
1887
if (target == NULL)
1888
target = code+skip-1;
1889
else if (code+skip-1 != target)
1890
FAIL;
1891
}
1892
if (code != target)
1893
FAIL;
1894
}
1895
break;
1896
1897
case SRE_OP_REPEAT_ONE:
1898
case SRE_OP_MIN_REPEAT_ONE:
1899
case SRE_OP_POSSESSIVE_REPEAT_ONE:
1900
{
1901
SRE_CODE min, max;
1902
GET_SKIP;
1903
GET_ARG; min = arg;
1904
GET_ARG; max = arg;
1905
if (min > max)
1906
FAIL;
1907
if (max > SRE_MAXREPEAT)
1908
FAIL;
1909
if (_validate_inner(code, code+skip-4, groups))
1910
FAIL;
1911
code += skip-4;
1912
GET_OP;
1913
if (op != SRE_OP_SUCCESS)
1914
FAIL;
1915
}
1916
break;
1917
1918
case SRE_OP_REPEAT:
1919
case SRE_OP_POSSESSIVE_REPEAT:
1920
{
1921
SRE_CODE op1 = op, min, max;
1922
GET_SKIP;
1923
GET_ARG; min = arg;
1924
GET_ARG; max = arg;
1925
if (min > max)
1926
FAIL;
1927
if (max > SRE_MAXREPEAT)
1928
FAIL;
1929
if (_validate_inner(code, code+skip-3, groups))
1930
FAIL;
1931
code += skip-3;
1932
GET_OP;
1933
if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
1934
if (op != SRE_OP_SUCCESS)
1935
FAIL;
1936
}
1937
else {
1938
if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1939
FAIL;
1940
}
1941
}
1942
break;
1943
1944
case SRE_OP_ATOMIC_GROUP:
1945
{
1946
GET_SKIP;
1947
if (_validate_inner(code, code+skip-2, groups))
1948
FAIL;
1949
code += skip-2;
1950
GET_OP;
1951
if (op != SRE_OP_SUCCESS)
1952
FAIL;
1953
}
1954
break;
1955
1956
case SRE_OP_GROUPREF:
1957
case SRE_OP_GROUPREF_IGNORE:
1958
case SRE_OP_GROUPREF_UNI_IGNORE:
1959
case SRE_OP_GROUPREF_LOC_IGNORE:
1960
GET_ARG;
1961
if (arg >= (size_t)groups)
1962
FAIL;
1963
break;
1964
1965
case SRE_OP_GROUPREF_EXISTS:
1966
/* The regex syntax for this is: '(?(group)then|else)', where
1967
'group' is either an integer group number or a group name,
1968
'then' and 'else' are sub-regexes, and 'else' is optional. */
1969
GET_ARG;
1970
if (arg >= (size_t)groups)
1971
FAIL;
1972
GET_SKIP_ADJ(1);
1973
code--; /* The skip is relative to the first arg! */
1974
/* There are two possibilities here: if there is both a 'then'
1975
part and an 'else' part, the generated code looks like:
1976
1977
GROUPREF_EXISTS
1978
<group>
1979
<skipyes>
1980
...then part...
1981
JUMP
1982
<skipno>
1983
(<skipyes> jumps here)
1984
...else part...
1985
(<skipno> jumps here)
1986
1987
If there is only a 'then' part, it looks like:
1988
1989
GROUPREF_EXISTS
1990
<group>
1991
<skip>
1992
...then part...
1993
(<skip> jumps here)
1994
1995
There is no direct way to decide which it is, and we don't want
1996
to allow arbitrary jumps anywhere in the code; so we just look
1997
for a JUMP opcode preceding our skip target.
1998
*/
1999
VTRACE(("then part:\n"));
2000
int rc = _validate_inner(code+1, code+skip-1, groups);
2001
if (rc == 1) {
2002
VTRACE(("else part:\n"));
2003
code += skip-2; /* Position after JUMP, at <skipno> */
2004
GET_SKIP;
2005
rc = _validate_inner(code, code+skip-1, groups);
2006
}
2007
if (rc)
2008
FAIL;
2009
code += skip-1;
2010
break;
2011
2012
case SRE_OP_ASSERT:
2013
case SRE_OP_ASSERT_NOT:
2014
GET_SKIP;
2015
GET_ARG; /* 0 for lookahead, width for lookbehind */
2016
code--; /* Back up over arg to simplify math below */
2017
if (arg & 0x80000000)
2018
FAIL; /* Width too large */
2019
/* Stop 1 before the end; we check the SUCCESS below */
2020
if (_validate_inner(code+1, code+skip-2, groups))
2021
FAIL;
2022
code += skip-2;
2023
GET_OP;
2024
if (op != SRE_OP_SUCCESS)
2025
FAIL;
2026
break;
2027
2028
case SRE_OP_JUMP:
2029
if (code + 1 != end)
2030
FAIL;
2031
VTRACE(("JUMP: %d\n", __LINE__));
2032
return 1;
2033
2034
default:
2035
FAIL;
2036
2037
}
2038
}
2039
2040
VTRACE(("okay\n"));
2041
return 0;
2042
}
2043
2044
static int
2045
_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2046
{
2047
if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
2048
code >= end || end[-1] != SRE_OP_SUCCESS)
2049
FAIL;
2050
return _validate_inner(code, end-1, groups);
2051
}
2052
2053
static int
2054
_validate(PatternObject *self)
2055
{
2056
if (_validate_outer(self->code, self->code+self->codesize, self->groups))
2057
{
2058
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
2059
return 0;
2060
}
2061
else
2062
VTRACE(("Success!\n"));
2063
return 1;
2064
}
2065
2066
/* -------------------------------------------------------------------- */
2067
/* match methods */
2068
2069
static int
2070
match_traverse(MatchObject *self, visitproc visit, void *arg)
2071
{
2072
Py_VISIT(Py_TYPE(self));
2073
Py_VISIT(self->string);
2074
Py_VISIT(self->regs);
2075
Py_VISIT(self->pattern);
2076
return 0;
2077
}
2078
2079
static int
2080
match_clear(MatchObject *self)
2081
{
2082
Py_CLEAR(self->string);
2083
Py_CLEAR(self->regs);
2084
Py_CLEAR(self->pattern);
2085
return 0;
2086
}
2087
2088
static void
2089
match_dealloc(MatchObject* self)
2090
{
2091
PyTypeObject *tp = Py_TYPE(self);
2092
2093
PyObject_GC_UnTrack(self);
2094
(void)match_clear(self);
2095
tp->tp_free(self);
2096
Py_DECREF(tp);
2097
}
2098
2099
static PyObject*
2100
match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
2101
{
2102
Py_ssize_t length;
2103
int isbytes, charsize;
2104
Py_buffer view;
2105
PyObject *result;
2106
const void* ptr;
2107
Py_ssize_t i, j;
2108
2109
assert(0 <= index && index < self->groups);
2110
index *= 2;
2111
2112
if (self->string == Py_None || self->mark[index] < 0) {
2113
/* return default value if the string or group is undefined */
2114
return Py_NewRef(def);
2115
}
2116
2117
ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
2118
if (ptr == NULL)
2119
return NULL;
2120
2121
i = self->mark[index];
2122
j = self->mark[index+1];
2123
i = Py_MIN(i, length);
2124
j = Py_MIN(j, length);
2125
result = getslice(isbytes, ptr, self->string, i, j);
2126
if (isbytes && view.buf != NULL)
2127
PyBuffer_Release(&view);
2128
return result;
2129
}
2130
2131
static Py_ssize_t
2132
match_getindex(MatchObject* self, PyObject* index)
2133
{
2134
Py_ssize_t i;
2135
2136
if (index == NULL)
2137
/* Default value */
2138
return 0;
2139
2140
if (PyIndex_Check(index)) {
2141
i = PyNumber_AsSsize_t(index, NULL);
2142
}
2143
else {
2144
i = -1;
2145
2146
if (self->pattern->groupindex) {
2147
index = PyDict_GetItemWithError(self->pattern->groupindex, index);
2148
if (index && PyLong_Check(index)) {
2149
i = PyLong_AsSsize_t(index);
2150
}
2151
}
2152
}
2153
if (i < 0 || i >= self->groups) {
2154
/* raise IndexError if we were given a bad group number */
2155
if (!PyErr_Occurred()) {
2156
PyErr_SetString(PyExc_IndexError, "no such group");
2157
}
2158
return -1;
2159
}
2160
2161
return i;
2162
}
2163
2164
static PyObject*
2165
match_getslice(MatchObject* self, PyObject* index, PyObject* def)
2166
{
2167
Py_ssize_t i = match_getindex(self, index);
2168
2169
if (i < 0) {
2170
return NULL;
2171
}
2172
2173
return match_getslice_by_index(self, i, def);
2174
}
2175
2176
/*[clinic input]
2177
_sre.SRE_Match.expand
2178
2179
template: object
2180
2181
Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2182
[clinic start generated code]*/
2183
2184
static PyObject *
2185
_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2186
/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
2187
{
2188
_sremodulestate *module_state = get_sre_module_state_by_class(Py_TYPE(self));
2189
PyObject *filter = compile_template(module_state, self->pattern, template);
2190
if (filter == NULL) {
2191
return NULL;
2192
}
2193
PyObject *result = expand_template((TemplateObject *)filter, self);
2194
Py_DECREF(filter);
2195
return result;
2196
}
2197
2198
static PyObject*
2199
match_group(MatchObject* self, PyObject* args)
2200
{
2201
PyObject* result;
2202
Py_ssize_t i, size;
2203
2204
size = PyTuple_GET_SIZE(args);
2205
2206
switch (size) {
2207
case 0:
2208
result = match_getslice(self, _PyLong_GetZero(), Py_None);
2209
break;
2210
case 1:
2211
result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2212
break;
2213
default:
2214
/* fetch multiple items */
2215
result = PyTuple_New(size);
2216
if (!result)
2217
return NULL;
2218
for (i = 0; i < size; i++) {
2219
PyObject* item = match_getslice(
2220
self, PyTuple_GET_ITEM(args, i), Py_None
2221
);
2222
if (!item) {
2223
Py_DECREF(result);
2224
return NULL;
2225
}
2226
PyTuple_SET_ITEM(result, i, item);
2227
}
2228
break;
2229
}
2230
return result;
2231
}
2232
2233
static PyObject*
2234
match_getitem(MatchObject* self, PyObject* name)
2235
{
2236
return match_getslice(self, name, Py_None);
2237
}
2238
2239
/*[clinic input]
2240
_sre.SRE_Match.groups
2241
2242
default: object = None
2243
Is used for groups that did not participate in the match.
2244
2245
Return a tuple containing all the subgroups of the match, from 1.
2246
[clinic start generated code]*/
2247
2248
static PyObject *
2249
_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2250
/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2251
{
2252
PyObject* result;
2253
Py_ssize_t index;
2254
2255
result = PyTuple_New(self->groups-1);
2256
if (!result)
2257
return NULL;
2258
2259
for (index = 1; index < self->groups; index++) {
2260
PyObject* item;
2261
item = match_getslice_by_index(self, index, default_value);
2262
if (!item) {
2263
Py_DECREF(result);
2264
return NULL;
2265
}
2266
PyTuple_SET_ITEM(result, index-1, item);
2267
}
2268
2269
return result;
2270
}
2271
2272
/*[clinic input]
2273
_sre.SRE_Match.groupdict
2274
2275
default: object = None
2276
Is used for groups that did not participate in the match.
2277
2278
Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2279
[clinic start generated code]*/
2280
2281
static PyObject *
2282
_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2283
/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2284
{
2285
PyObject *result;
2286
PyObject *key;
2287
PyObject *value;
2288
Py_ssize_t pos = 0;
2289
Py_hash_t hash;
2290
2291
result = PyDict_New();
2292
if (!result || !self->pattern->groupindex)
2293
return result;
2294
2295
while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2296
int status;
2297
Py_INCREF(key);
2298
value = match_getslice(self, key, default_value);
2299
if (!value) {
2300
Py_DECREF(key);
2301
goto failed;
2302
}
2303
status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2304
Py_DECREF(value);
2305
Py_DECREF(key);
2306
if (status < 0)
2307
goto failed;
2308
}
2309
2310
return result;
2311
2312
failed:
2313
Py_DECREF(result);
2314
return NULL;
2315
}
2316
2317
/*[clinic input]
2318
_sre.SRE_Match.start -> Py_ssize_t
2319
2320
group: object(c_default="NULL") = 0
2321
/
2322
2323
Return index of the start of the substring matched by group.
2324
[clinic start generated code]*/
2325
2326
static Py_ssize_t
2327
_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2328
/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2329
{
2330
Py_ssize_t index = match_getindex(self, group);
2331
2332
if (index < 0) {
2333
return -1;
2334
}
2335
2336
/* mark is -1 if group is undefined */
2337
return self->mark[index*2];
2338
}
2339
2340
/*[clinic input]
2341
_sre.SRE_Match.end -> Py_ssize_t
2342
2343
group: object(c_default="NULL") = 0
2344
/
2345
2346
Return index of the end of the substring matched by group.
2347
[clinic start generated code]*/
2348
2349
static Py_ssize_t
2350
_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2351
/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2352
{
2353
Py_ssize_t index = match_getindex(self, group);
2354
2355
if (index < 0) {
2356
return -1;
2357
}
2358
2359
/* mark is -1 if group is undefined */
2360
return self->mark[index*2+1];
2361
}
2362
2363
LOCAL(PyObject*)
2364
_pair(Py_ssize_t i1, Py_ssize_t i2)
2365
{
2366
PyObject* pair;
2367
PyObject* item;
2368
2369
pair = PyTuple_New(2);
2370
if (!pair)
2371
return NULL;
2372
2373
item = PyLong_FromSsize_t(i1);
2374
if (!item)
2375
goto error;
2376
PyTuple_SET_ITEM(pair, 0, item);
2377
2378
item = PyLong_FromSsize_t(i2);
2379
if (!item)
2380
goto error;
2381
PyTuple_SET_ITEM(pair, 1, item);
2382
2383
return pair;
2384
2385
error:
2386
Py_DECREF(pair);
2387
return NULL;
2388
}
2389
2390
/*[clinic input]
2391
_sre.SRE_Match.span
2392
2393
group: object(c_default="NULL") = 0
2394
/
2395
2396
For match object m, return the 2-tuple (m.start(group), m.end(group)).
2397
[clinic start generated code]*/
2398
2399
static PyObject *
2400
_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2401
/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2402
{
2403
Py_ssize_t index = match_getindex(self, group);
2404
2405
if (index < 0) {
2406
return NULL;
2407
}
2408
2409
/* marks are -1 if group is undefined */
2410
return _pair(self->mark[index*2], self->mark[index*2+1]);
2411
}
2412
2413
static PyObject*
2414
match_regs(MatchObject* self)
2415
{
2416
PyObject* regs;
2417
PyObject* item;
2418
Py_ssize_t index;
2419
2420
regs = PyTuple_New(self->groups);
2421
if (!regs)
2422
return NULL;
2423
2424
for (index = 0; index < self->groups; index++) {
2425
item = _pair(self->mark[index*2], self->mark[index*2+1]);
2426
if (!item) {
2427
Py_DECREF(regs);
2428
return NULL;
2429
}
2430
PyTuple_SET_ITEM(regs, index, item);
2431
}
2432
2433
self->regs = Py_NewRef(regs);
2434
2435
return regs;
2436
}
2437
2438
/*[clinic input]
2439
_sre.SRE_Match.__copy__
2440
2441
[clinic start generated code]*/
2442
2443
static PyObject *
2444
_sre_SRE_Match___copy___impl(MatchObject *self)
2445
/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2446
{
2447
return Py_NewRef(self);
2448
}
2449
2450
/*[clinic input]
2451
_sre.SRE_Match.__deepcopy__
2452
2453
memo: object
2454
/
2455
2456
[clinic start generated code]*/
2457
2458
static PyObject *
2459
_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2460
/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2461
{
2462
return Py_NewRef(self);
2463
}
2464
2465
PyDoc_STRVAR(match_doc,
2466
"The result of re.match() and re.search().\n\
2467
Match objects always have a boolean value of True.");
2468
2469
PyDoc_STRVAR(match_group_doc,
2470
"group([group1, ...]) -> str or tuple.\n\
2471
Return subgroup(s) of the match by indices or names.\n\
2472
For 0 returns the entire match.");
2473
2474
static PyObject *
2475
match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2476
{
2477
if (self->lastindex >= 0)
2478
return PyLong_FromSsize_t(self->lastindex);
2479
Py_RETURN_NONE;
2480
}
2481
2482
static PyObject *
2483
match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2484
{
2485
if (self->pattern->indexgroup &&
2486
self->lastindex >= 0 &&
2487
self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2488
{
2489
PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2490
self->lastindex);
2491
return Py_NewRef(result);
2492
}
2493
Py_RETURN_NONE;
2494
}
2495
2496
static PyObject *
2497
match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2498
{
2499
if (self->regs) {
2500
return Py_NewRef(self->regs);
2501
} else
2502
return match_regs(self);
2503
}
2504
2505
static PyObject *
2506
match_repr(MatchObject *self)
2507
{
2508
PyObject *result;
2509
PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2510
if (group0 == NULL)
2511
return NULL;
2512
result = PyUnicode_FromFormat(
2513
"<%s object; span=(%zd, %zd), match=%.50R>",
2514
Py_TYPE(self)->tp_name,
2515
self->mark[0], self->mark[1], group0);
2516
Py_DECREF(group0);
2517
return result;
2518
}
2519
2520
2521
static PyObject*
2522
pattern_new_match(_sremodulestate* module_state,
2523
PatternObject* pattern,
2524
SRE_STATE* state,
2525
Py_ssize_t status)
2526
{
2527
/* create match object (from state object) */
2528
2529
MatchObject* match;
2530
Py_ssize_t i, j;
2531
char* base;
2532
int n;
2533
2534
if (status > 0) {
2535
2536
/* create match object (with room for extra group marks) */
2537
/* coverity[ampersand_in_size] */
2538
match = PyObject_GC_NewVar(MatchObject,
2539
module_state->Match_Type,
2540
2*(pattern->groups+1));
2541
if (!match)
2542
return NULL;
2543
2544
match->pattern = (PatternObject*)Py_NewRef(pattern);
2545
2546
match->string = Py_NewRef(state->string);
2547
2548
match->regs = NULL;
2549
match->groups = pattern->groups+1;
2550
2551
/* fill in group slices */
2552
2553
base = (char*) state->beginning;
2554
n = state->charsize;
2555
2556
match->mark[0] = ((char*) state->start - base) / n;
2557
match->mark[1] = ((char*) state->ptr - base) / n;
2558
2559
for (i = j = 0; i < pattern->groups; i++, j+=2)
2560
if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2561
match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2562
match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2563
2564
/* check wrong span */
2565
if (match->mark[j+2] > match->mark[j+3]) {
2566
PyErr_SetString(PyExc_SystemError,
2567
"The span of capturing group is wrong,"
2568
" please report a bug for the re module.");
2569
Py_DECREF(match);
2570
return NULL;
2571
}
2572
} else
2573
match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2574
2575
match->pos = state->pos;
2576
match->endpos = state->endpos;
2577
2578
match->lastindex = state->lastindex;
2579
2580
PyObject_GC_Track(match);
2581
return (PyObject*) match;
2582
2583
} else if (status == 0) {
2584
2585
/* no match */
2586
Py_RETURN_NONE;
2587
2588
}
2589
2590
/* internal error */
2591
pattern_error(status);
2592
return NULL;
2593
}
2594
2595
2596
/* -------------------------------------------------------------------- */
2597
/* scanner methods (experimental) */
2598
2599
static int
2600
scanner_traverse(ScannerObject *self, visitproc visit, void *arg)
2601
{
2602
Py_VISIT(Py_TYPE(self));
2603
Py_VISIT(self->pattern);
2604
return 0;
2605
}
2606
2607
static int
2608
scanner_clear(ScannerObject *self)
2609
{
2610
Py_CLEAR(self->pattern);
2611
return 0;
2612
}
2613
2614
static void
2615
scanner_dealloc(ScannerObject* self)
2616
{
2617
PyTypeObject *tp = Py_TYPE(self);
2618
2619
PyObject_GC_UnTrack(self);
2620
state_fini(&self->state);
2621
(void)scanner_clear(self);
2622
tp->tp_free(self);
2623
Py_DECREF(tp);
2624
}
2625
2626
static int
2627
scanner_begin(ScannerObject* self)
2628
{
2629
if (self->executing) {
2630
PyErr_SetString(PyExc_ValueError,
2631
"regular expression scanner already executing");
2632
return 0;
2633
}
2634
self->executing = 1;
2635
return 1;
2636
}
2637
2638
static void
2639
scanner_end(ScannerObject* self)
2640
{
2641
assert(self->executing);
2642
self->executing = 0;
2643
}
2644
2645
/*[clinic input]
2646
_sre.SRE_Scanner.match
2647
2648
cls: defining_class
2649
/
2650
2651
[clinic start generated code]*/
2652
2653
static PyObject *
2654
_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls)
2655
/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/
2656
{
2657
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
2658
SRE_STATE* state = &self->state;
2659
PyObject* match;
2660
Py_ssize_t status;
2661
2662
if (!scanner_begin(self)) {
2663
return NULL;
2664
}
2665
if (state->start == NULL) {
2666
scanner_end(self);
2667
Py_RETURN_NONE;
2668
}
2669
2670
state_reset(state);
2671
2672
state->ptr = state->start;
2673
2674
status = sre_match(state, PatternObject_GetCode(self->pattern));
2675
if (PyErr_Occurred()) {
2676
scanner_end(self);
2677
return NULL;
2678
}
2679
2680
match = pattern_new_match(module_state, (PatternObject*) self->pattern,
2681
state, status);
2682
2683
if (status == 0)
2684
state->start = NULL;
2685
else {
2686
state->must_advance = (state->ptr == state->start);
2687
state->start = state->ptr;
2688
}
2689
2690
scanner_end(self);
2691
return match;
2692
}
2693
2694
2695
/*[clinic input]
2696
_sre.SRE_Scanner.search
2697
2698
cls: defining_class
2699
/
2700
2701
[clinic start generated code]*/
2702
2703
static PyObject *
2704
_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
2705
/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
2706
{
2707
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
2708
SRE_STATE* state = &self->state;
2709
PyObject* match;
2710
Py_ssize_t status;
2711
2712
if (!scanner_begin(self)) {
2713
return NULL;
2714
}
2715
if (state->start == NULL) {
2716
scanner_end(self);
2717
Py_RETURN_NONE;
2718
}
2719
2720
state_reset(state);
2721
2722
state->ptr = state->start;
2723
2724
status = sre_search(state, PatternObject_GetCode(self->pattern));
2725
if (PyErr_Occurred()) {
2726
scanner_end(self);
2727
return NULL;
2728
}
2729
2730
match = pattern_new_match(module_state, (PatternObject*) self->pattern,
2731
state, status);
2732
2733
if (status == 0)
2734
state->start = NULL;
2735
else {
2736
state->must_advance = (state->ptr == state->start);
2737
state->start = state->ptr;
2738
}
2739
2740
scanner_end(self);
2741
return match;
2742
}
2743
2744
static PyObject *
2745
pattern_scanner(_sremodulestate *module_state,
2746
PatternObject *self,
2747
PyObject *string,
2748
Py_ssize_t pos,
2749
Py_ssize_t endpos)
2750
{
2751
ScannerObject* scanner;
2752
2753
/* create scanner object */
2754
scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
2755
if (!scanner)
2756
return NULL;
2757
scanner->pattern = NULL;
2758
scanner->executing = 0;
2759
2760
/* create search state object */
2761
if (!state_init(&scanner->state, self, string, pos, endpos)) {
2762
Py_DECREF(scanner);
2763
return NULL;
2764
}
2765
2766
scanner->pattern = Py_NewRef(self);
2767
2768
PyObject_GC_Track(scanner);
2769
return (PyObject*) scanner;
2770
}
2771
2772
/* -------------------------------------------------------------------- */
2773
/* template methods */
2774
2775
static int
2776
template_traverse(TemplateObject *self, visitproc visit, void *arg)
2777
{
2778
Py_VISIT(Py_TYPE(self));
2779
Py_VISIT(self->literal);
2780
for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
2781
Py_VISIT(self->items[i].literal);
2782
}
2783
return 0;
2784
}
2785
2786
static int
2787
template_clear(TemplateObject *self)
2788
{
2789
Py_CLEAR(self->literal);
2790
for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
2791
Py_CLEAR(self->items[i].literal);
2792
}
2793
return 0;
2794
}
2795
2796
static void
2797
template_dealloc(TemplateObject *self)
2798
{
2799
PyTypeObject *tp = Py_TYPE(self);
2800
2801
PyObject_GC_UnTrack(self);
2802
(void)template_clear(self);
2803
tp->tp_free(self);
2804
Py_DECREF(tp);
2805
}
2806
2807
static PyObject *
2808
expand_template(TemplateObject *self, MatchObject *match)
2809
{
2810
if (Py_SIZE(self) == 0) {
2811
return Py_NewRef(self->literal);
2812
}
2813
2814
PyObject *result = NULL;
2815
Py_ssize_t count = 0; // the number of non-empty chunks
2816
/* For small number of strings use a buffer allocated on the stack,
2817
* otherwise use a list object. */
2818
PyObject *buffer[10];
2819
PyObject **out = buffer;
2820
PyObject *list = NULL;
2821
if (self->chunks > (int)Py_ARRAY_LENGTH(buffer) ||
2822
!PyUnicode_Check(self->literal))
2823
{
2824
list = PyList_New(self->chunks);
2825
if (!list) {
2826
return NULL;
2827
}
2828
out = &PyList_GET_ITEM(list, 0);
2829
}
2830
2831
out[count++] = Py_NewRef(self->literal);
2832
for (Py_ssize_t i = 0; i < Py_SIZE(self); i++) {
2833
Py_ssize_t index = self->items[i].index;
2834
if (index >= match->groups) {
2835
PyErr_SetString(PyExc_IndexError, "no such group");
2836
goto cleanup;
2837
}
2838
PyObject *item = match_getslice_by_index(match, index, Py_None);
2839
if (item == NULL) {
2840
goto cleanup;
2841
}
2842
if (item != Py_None) {
2843
out[count++] = Py_NewRef(item);
2844
}
2845
Py_DECREF(item);
2846
2847
PyObject *literal = self->items[i].literal;
2848
if (literal != NULL) {
2849
out[count++] = Py_NewRef(literal);
2850
}
2851
}
2852
2853
if (PyUnicode_Check(self->literal)) {
2854
result = _PyUnicode_JoinArray(&_Py_STR(empty), out, count);
2855
}
2856
else {
2857
Py_SET_SIZE(list, count);
2858
result = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list);
2859
}
2860
2861
cleanup:
2862
if (list) {
2863
Py_DECREF(list);
2864
}
2865
else {
2866
for (Py_ssize_t i = 0; i < count; i++) {
2867
Py_DECREF(out[i]);
2868
}
2869
}
2870
return result;
2871
}
2872
2873
2874
static Py_hash_t
2875
pattern_hash(PatternObject *self)
2876
{
2877
Py_hash_t hash, hash2;
2878
2879
hash = PyObject_Hash(self->pattern);
2880
if (hash == -1) {
2881
return -1;
2882
}
2883
2884
hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2885
hash ^= hash2;
2886
2887
hash ^= self->flags;
2888
hash ^= self->isbytes;
2889
hash ^= self->codesize;
2890
2891
if (hash == -1) {
2892
hash = -2;
2893
}
2894
return hash;
2895
}
2896
2897
static PyObject*
2898
pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2899
{
2900
PyTypeObject *tp = Py_TYPE(lefto);
2901
_sremodulestate *module_state = get_sre_module_state_by_class(tp);
2902
PatternObject *left, *right;
2903
int cmp;
2904
2905
if (op != Py_EQ && op != Py_NE) {
2906
Py_RETURN_NOTIMPLEMENTED;
2907
}
2908
2909
if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
2910
{
2911
Py_RETURN_NOTIMPLEMENTED;
2912
}
2913
2914
if (lefto == righto) {
2915
/* a pattern is equal to itself */
2916
return PyBool_FromLong(op == Py_EQ);
2917
}
2918
2919
left = (PatternObject *)lefto;
2920
right = (PatternObject *)righto;
2921
2922
cmp = (left->flags == right->flags
2923
&& left->isbytes == right->isbytes
2924
&& left->codesize == right->codesize);
2925
if (cmp) {
2926
/* Compare the code and the pattern because the same pattern can
2927
produce different codes depending on the locale used to compile the
2928
pattern when the re.LOCALE flag is used. Don't compare groups,
2929
indexgroup nor groupindex: they are derivated from the pattern. */
2930
cmp = (memcmp(left->code, right->code,
2931
sizeof(left->code[0]) * left->codesize) == 0);
2932
}
2933
if (cmp) {
2934
cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2935
Py_EQ);
2936
if (cmp < 0) {
2937
return NULL;
2938
}
2939
}
2940
if (op == Py_NE) {
2941
cmp = !cmp;
2942
}
2943
return PyBool_FromLong(cmp);
2944
}
2945
2946
#include "clinic/sre.c.h"
2947
2948
static PyMethodDef pattern_methods[] = {
2949
_SRE_SRE_PATTERN_MATCH_METHODDEF
2950
_SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2951
_SRE_SRE_PATTERN_SEARCH_METHODDEF
2952
_SRE_SRE_PATTERN_SUB_METHODDEF
2953
_SRE_SRE_PATTERN_SUBN_METHODDEF
2954
_SRE_SRE_PATTERN_FINDALL_METHODDEF
2955
_SRE_SRE_PATTERN_SPLIT_METHODDEF
2956
_SRE_SRE_PATTERN_FINDITER_METHODDEF
2957
_SRE_SRE_PATTERN_SCANNER_METHODDEF
2958
_SRE_SRE_PATTERN___COPY___METHODDEF
2959
_SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2960
{"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
2961
PyDoc_STR("See PEP 585")},
2962
{NULL, NULL}
2963
};
2964
2965
static PyGetSetDef pattern_getset[] = {
2966
{"groupindex", (getter)pattern_groupindex, (setter)NULL,
2967
"A dictionary mapping group names to group numbers."},
2968
{NULL} /* Sentinel */
2969
};
2970
2971
#define PAT_OFF(x) offsetof(PatternObject, x)
2972
static PyMemberDef pattern_members[] = {
2973
{"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2974
"The pattern string from which the RE object was compiled."},
2975
{"flags", T_INT, PAT_OFF(flags), READONLY,
2976
"The regex matching flags."},
2977
{"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2978
"The number of capturing groups in the pattern."},
2979
{"__weaklistoffset__", T_PYSSIZET, offsetof(PatternObject, weakreflist), READONLY},
2980
{NULL} /* Sentinel */
2981
};
2982
2983
static PyType_Slot pattern_slots[] = {
2984
{Py_tp_dealloc, (destructor)pattern_dealloc},
2985
{Py_tp_repr, (reprfunc)pattern_repr},
2986
{Py_tp_hash, (hashfunc)pattern_hash},
2987
{Py_tp_doc, (void *)pattern_doc},
2988
{Py_tp_richcompare, pattern_richcompare},
2989
{Py_tp_methods, pattern_methods},
2990
{Py_tp_members, pattern_members},
2991
{Py_tp_getset, pattern_getset},
2992
{Py_tp_traverse, pattern_traverse},
2993
{Py_tp_clear, pattern_clear},
2994
{0, NULL},
2995
};
2996
2997
static PyType_Spec pattern_spec = {
2998
.name = "re.Pattern",
2999
.basicsize = sizeof(PatternObject),
3000
.itemsize = sizeof(SRE_CODE),
3001
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3002
Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3003
.slots = pattern_slots,
3004
};
3005
3006
static PyMethodDef match_methods[] = {
3007
{"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
3008
_SRE_SRE_MATCH_START_METHODDEF
3009
_SRE_SRE_MATCH_END_METHODDEF
3010
_SRE_SRE_MATCH_SPAN_METHODDEF
3011
_SRE_SRE_MATCH_GROUPS_METHODDEF
3012
_SRE_SRE_MATCH_GROUPDICT_METHODDEF
3013
_SRE_SRE_MATCH_EXPAND_METHODDEF
3014
_SRE_SRE_MATCH___COPY___METHODDEF
3015
_SRE_SRE_MATCH___DEEPCOPY___METHODDEF
3016
{"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3017
PyDoc_STR("See PEP 585")},
3018
{NULL, NULL}
3019
};
3020
3021
static PyGetSetDef match_getset[] = {
3022
{"lastindex", (getter)match_lastindex_get, (setter)NULL,
3023
"The integer index of the last matched capturing group."},
3024
{"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
3025
"The name of the last matched capturing group."},
3026
{"regs", (getter)match_regs_get, (setter)NULL},
3027
{NULL}
3028
};
3029
3030
#define MATCH_OFF(x) offsetof(MatchObject, x)
3031
static PyMemberDef match_members[] = {
3032
{"string", T_OBJECT, MATCH_OFF(string), READONLY,
3033
"The string passed to match() or search()."},
3034
{"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
3035
"The regular expression object."},
3036
{"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
3037
"The index into the string at which the RE engine started looking for a match."},
3038
{"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
3039
"The index into the string beyond which the RE engine will not go."},
3040
{NULL}
3041
};
3042
3043
/* FIXME: implement setattr("string", None) as a special case (to
3044
detach the associated string, if any */
3045
static PyType_Slot match_slots[] = {
3046
{Py_tp_dealloc, match_dealloc},
3047
{Py_tp_repr, match_repr},
3048
{Py_tp_doc, (void *)match_doc},
3049
{Py_tp_methods, match_methods},
3050
{Py_tp_members, match_members},
3051
{Py_tp_getset, match_getset},
3052
{Py_tp_traverse, match_traverse},
3053
{Py_tp_clear, match_clear},
3054
3055
/* As mapping.
3056
*
3057
* Match objects do not support length or assignment, but do support
3058
* __getitem__.
3059
*/
3060
{Py_mp_subscript, match_getitem},
3061
3062
{0, NULL},
3063
};
3064
3065
static PyType_Spec match_spec = {
3066
.name = "re.Match",
3067
.basicsize = sizeof(MatchObject),
3068
.itemsize = sizeof(Py_ssize_t),
3069
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3070
Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3071
.slots = match_slots,
3072
};
3073
3074
static PyMethodDef scanner_methods[] = {
3075
_SRE_SRE_SCANNER_MATCH_METHODDEF
3076
_SRE_SRE_SCANNER_SEARCH_METHODDEF
3077
{NULL, NULL}
3078
};
3079
3080
#define SCAN_OFF(x) offsetof(ScannerObject, x)
3081
static PyMemberDef scanner_members[] = {
3082
{"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
3083
{NULL} /* Sentinel */
3084
};
3085
3086
static PyType_Slot scanner_slots[] = {
3087
{Py_tp_dealloc, scanner_dealloc},
3088
{Py_tp_methods, scanner_methods},
3089
{Py_tp_members, scanner_members},
3090
{Py_tp_traverse, scanner_traverse},
3091
{Py_tp_clear, scanner_clear},
3092
{0, NULL},
3093
};
3094
3095
static PyType_Spec scanner_spec = {
3096
.name = "_sre.SRE_Scanner",
3097
.basicsize = sizeof(ScannerObject),
3098
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3099
Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3100
.slots = scanner_slots,
3101
};
3102
3103
static PyType_Slot template_slots[] = {
3104
{Py_tp_dealloc, template_dealloc},
3105
{Py_tp_traverse, template_traverse},
3106
{Py_tp_clear, template_clear},
3107
{0, NULL},
3108
};
3109
3110
static PyType_Spec template_spec = {
3111
.name = "_sre.SRE_Template",
3112
.basicsize = sizeof(TemplateObject),
3113
.itemsize = sizeof(((TemplateObject *)0)->items[0]),
3114
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3115
Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3116
.slots = template_slots,
3117
};
3118
3119
static PyMethodDef _functions[] = {
3120
_SRE_COMPILE_METHODDEF
3121
_SRE_TEMPLATE_METHODDEF
3122
_SRE_GETCODESIZE_METHODDEF
3123
_SRE_ASCII_ISCASED_METHODDEF
3124
_SRE_UNICODE_ISCASED_METHODDEF
3125
_SRE_ASCII_TOLOWER_METHODDEF
3126
_SRE_UNICODE_TOLOWER_METHODDEF
3127
{NULL, NULL}
3128
};
3129
3130
static int
3131
sre_traverse(PyObject *module, visitproc visit, void *arg)
3132
{
3133
_sremodulestate *state = get_sre_module_state(module);
3134
3135
Py_VISIT(state->Pattern_Type);
3136
Py_VISIT(state->Match_Type);
3137
Py_VISIT(state->Scanner_Type);
3138
Py_VISIT(state->Template_Type);
3139
Py_VISIT(state->compile_template);
3140
3141
return 0;
3142
}
3143
3144
static int
3145
sre_clear(PyObject *module)
3146
{
3147
_sremodulestate *state = get_sre_module_state(module);
3148
3149
Py_CLEAR(state->Pattern_Type);
3150
Py_CLEAR(state->Match_Type);
3151
Py_CLEAR(state->Scanner_Type);
3152
Py_CLEAR(state->Template_Type);
3153
Py_CLEAR(state->compile_template);
3154
3155
return 0;
3156
}
3157
3158
static void
3159
sre_free(void *module)
3160
{
3161
sre_clear((PyObject *)module);
3162
}
3163
3164
#define CREATE_TYPE(m, type, spec) \
3165
do { \
3166
type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
3167
if (type == NULL) { \
3168
goto error; \
3169
} \
3170
} while (0)
3171
3172
#define ADD_ULONG_CONSTANT(module, name, value) \
3173
do { \
3174
PyObject *o = PyLong_FromUnsignedLong(value); \
3175
if (!o) \
3176
goto error; \
3177
int res = PyModule_AddObjectRef(module, name, o); \
3178
Py_DECREF(o); \
3179
if (res < 0) { \
3180
goto error; \
3181
} \
3182
} while (0)
3183
3184
static int
3185
sre_exec(PyObject *m)
3186
{
3187
_sremodulestate *state;
3188
3189
/* Create heap types */
3190
state = get_sre_module_state(m);
3191
CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
3192
CREATE_TYPE(m, state->Match_Type, &match_spec);
3193
CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
3194
CREATE_TYPE(m, state->Template_Type, &template_spec);
3195
3196
if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
3197
goto error;
3198
}
3199
3200
if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
3201
goto error;
3202
}
3203
3204
ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
3205
ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
3206
3207
if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
3208
goto error;
3209
}
3210
3211
return 0;
3212
3213
error:
3214
return -1;
3215
}
3216
3217
static PyModuleDef_Slot sre_slots[] = {
3218
{Py_mod_exec, sre_exec},
3219
{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
3220
{0, NULL},
3221
};
3222
3223
static struct PyModuleDef sremodule = {
3224
.m_base = PyModuleDef_HEAD_INIT,
3225
.m_name = "_sre",
3226
.m_size = sizeof(_sremodulestate),
3227
.m_methods = _functions,
3228
.m_slots = sre_slots,
3229
.m_traverse = sre_traverse,
3230
.m_free = sre_free,
3231
.m_clear = sre_clear,
3232
};
3233
3234
PyMODINIT_FUNC
3235
PyInit__sre(void)
3236
{
3237
return PyModuleDef_Init(&sremodule);
3238
}
3239
3240
/* vim:ts=4:sw=4:et
3241
*/
3242
3243