Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_jit_compile.c
21790 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
This module by Zoltan Herczeg
10
Original API code Copyright (c) 1997-2012 University of Cambridge
11
New API code Copyright (c) 2016-2024 University of Cambridge
12
13
-----------------------------------------------------------------------------
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions are met:
16
17
* Redistributions of source code must retain the above copyright notice,
18
this list of conditions and the following disclaimer.
19
20
* Redistributions in binary form must reproduce the above copyright
21
notice, this list of conditions and the following disclaimer in the
22
documentation and/or other materials provided with the distribution.
23
24
* Neither the name of the University of Cambridge nor the names of its
25
contributors may be used to endorse or promote products derived from
26
this software without specific prior written permission.
27
28
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
-----------------------------------------------------------------------------
40
*/
41
42
#if defined(__has_feature)
43
#if __has_feature(memory_sanitizer)
44
#include <sanitizer/msan_interface.h>
45
#endif /* __has_feature(memory_sanitizer) */
46
#endif /* defined(__has_feature) */
47
48
#include "pcre2_internal.h"
49
50
#ifdef SUPPORT_JIT
51
52
/* All-in-one: Since we use the JIT compiler only from here,
53
we just include it. This way we don't need to touch the build
54
system files. */
55
56
#define SLJIT_CONFIG_AUTO 1
57
#define SLJIT_CONFIG_STATIC 1
58
#define SLJIT_VERBOSE 0
59
60
#ifdef PCRE2_DEBUG
61
#define SLJIT_DEBUG 1
62
#else
63
#define SLJIT_DEBUG 0
64
#endif
65
66
#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
67
#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
68
69
static void * pcre2_jit_malloc(size_t size, void *allocator_data)
70
{
71
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
72
return allocator->malloc(size, allocator->memory_data);
73
}
74
75
static void pcre2_jit_free(void *ptr, void *allocator_data)
76
{
77
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
78
allocator->free(ptr, allocator->memory_data);
79
}
80
81
#include "../deps/sljit/sljit_src/sljitLir.c"
82
83
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
84
#error Unsupported architecture
85
#endif
86
87
/* Defines for debugging purposes. */
88
89
/* 1 - Use unoptimized capturing brackets.
90
2 - Enable capture_last_ptr (includes option 1). */
91
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
92
93
/* 1 - Always have a control head. */
94
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
95
96
/* Allocate memory for the regex stack on the real machine stack.
97
Fast, but limited size. */
98
#define MACHINE_STACK_SIZE 32768
99
100
/* Growth rate for stack allocated by the OS. Should be the multiply
101
of page size. */
102
#define STACK_GROWTH_RATE 8192
103
104
/* Enable to check that the allocation could destroy temporaries. */
105
#if defined SLJIT_DEBUG && SLJIT_DEBUG
106
#define DESTROY_REGISTERS 1
107
#endif
108
109
/*
110
Short summary about the backtracking mechanism empolyed by the jit code generator:
111
112
The code generator follows the recursive nature of the PERL compatible regular
113
expressions. The basic blocks of regular expressions are condition checkers
114
whose execute different commands depending on the result of the condition check.
115
The relationship between the operators can be horizontal (concatenation) and
116
vertical (sub-expression) (See struct backtrack_common for more details).
117
118
'ab' - 'a' and 'b' regexps are concatenated
119
'a+' - 'a' is the sub-expression of the '+' operator
120
121
The condition checkers are boolean (true/false) checkers. Machine code is generated
122
for the checker itself and for the actions depending on the result of the checker.
123
The 'true' case is called as the matching path (expected path), and the other is called as
124
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
125
branches on the matching path.
126
127
Greedy star operator (*) :
128
Matching path: match happens.
129
Backtrack path: match failed.
130
Non-greedy star operator (*?) :
131
Matching path: no need to perform a match.
132
Backtrack path: match is required.
133
134
The following example shows how the code generated for a capturing bracket
135
with two alternatives. Let A, B, C, D are arbirary regular expressions, and
136
we have the following regular expression:
137
138
A(B|C)D
139
140
The generated code will be the following:
141
142
A matching path
143
'(' matching path (pushing arguments to the stack)
144
B matching path
145
')' matching path (pushing arguments to the stack)
146
D matching path
147
return with successful match
148
149
D backtrack path
150
')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
151
B backtrack path
152
C expected path
153
jump to D matching path
154
C backtrack path
155
A backtrack path
156
157
Notice, that the order of backtrack code paths are the opposite of the fast
158
code paths. In this way the topmost value on the stack is always belong
159
to the current backtrack code path. The backtrack path must check
160
whether there is a next alternative. If so, it needs to jump back to
161
the matching path eventually. Otherwise it needs to clear out its own stack
162
frame and continue the execution on the backtrack code paths.
163
*/
164
165
/*
166
Saved stack frames:
167
168
Atomic blocks and asserts require reloading the values of private data
169
when the backtrack mechanism performed. Because of OP_RECURSE, the data
170
are not necessarly known in compile time, thus we need a dynamic restore
171
mechanism.
172
173
The stack frames are stored in a chain list, and have the following format:
174
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
175
176
Thus we can restore the private data to a particular point in the stack.
177
*/
178
179
typedef struct jit_arguments {
180
/* Pointers first. */
181
struct sljit_stack *stack;
182
PCRE2_SPTR str;
183
PCRE2_SPTR begin;
184
PCRE2_SPTR end;
185
pcre2_match_data *match_data;
186
PCRE2_SPTR startchar_ptr;
187
PCRE2_UCHAR *mark_ptr;
188
int (*callout)(pcre2_callout_block *, void *);
189
void *callout_data;
190
/* Everything else after. */
191
sljit_uw offset_limit;
192
sljit_u32 limit_match;
193
sljit_u32 oveccount;
194
sljit_u32 options;
195
} jit_arguments;
196
197
#define JIT_NUMBER_OF_COMPILE_MODES 3
198
199
typedef struct executable_functions {
200
void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
201
void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
202
sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
203
sljit_u32 top_bracket;
204
sljit_u32 limit_match;
205
} executable_functions;
206
207
typedef struct jump_list {
208
struct sljit_jump *jump;
209
struct jump_list *next;
210
} jump_list;
211
212
typedef struct stub_list {
213
struct sljit_jump *start;
214
struct sljit_label *quit;
215
struct stub_list *next;
216
} stub_list;
217
218
enum frame_types {
219
no_frame = -1,
220
no_stack = -2
221
};
222
223
enum control_types {
224
type_mark = 0,
225
type_then_trap = 1
226
};
227
228
enum early_fail_types {
229
type_skip = 0,
230
type_fail = 1,
231
type_fail_range = 2
232
};
233
234
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
235
236
/* The following structure is the key data type for the recursive
237
code generator. It is allocated by compile_matchingpath, and contains
238
the arguments for compile_backtrackingpath. Must be the first member
239
of its descendants. */
240
typedef struct backtrack_common {
241
/* Backtracking path of an opcode, which falls back
242
to our opcode, if it cannot resume matching. */
243
struct backtrack_common *prev;
244
/* Backtracks for opcodes without backtracking path.
245
These opcodes are between 'prev' and the current
246
opcode, and they never resume the match. */
247
jump_list *simple_backtracks;
248
/* Internal backtracking list for block constructs
249
which contains other opcodes, such as brackets,
250
asserts, conditionals, etc. */
251
struct backtrack_common *top;
252
/* Backtracks used internally by the opcode. For component
253
opcodes, this list is also used by those opcodes without
254
backtracking path which follows the 'top' backtrack. */
255
jump_list *own_backtracks;
256
/* Opcode pointer. */
257
PCRE2_SPTR cc;
258
} backtrack_common;
259
260
typedef struct assert_backtrack {
261
backtrack_common common;
262
jump_list *condfailed;
263
/* Less than 0 if a frame is not needed. */
264
int framesize;
265
/* Points to our private memory word on the stack. */
266
int private_data_ptr;
267
/* For iterators. */
268
struct sljit_label *matchingpath;
269
} assert_backtrack;
270
271
typedef struct bracket_backtrack {
272
backtrack_common common;
273
/* Where to coninue if an alternative is successfully matched. */
274
struct sljit_label *alternative_matchingpath;
275
/* For rmin and rmax iterators. */
276
struct sljit_label *recursive_matchingpath;
277
/* For greedy ? operator. */
278
struct sljit_label *zero_matchingpath;
279
/* Contains the branches of a failed condition. */
280
union {
281
/* Both for OP_COND, OP_SCOND, OP_ASSERT_SCS. */
282
jump_list *no_capture;
283
assert_backtrack *assert;
284
/* For OP_ONCE. Less than 0 if not needed. */
285
int framesize;
286
} u;
287
/* For brackets with >3 alternatives. */
288
struct sljit_jump *matching_mov_addr;
289
/* Points to our private memory word on the stack. */
290
int private_data_ptr;
291
} bracket_backtrack;
292
293
typedef struct bracketpos_backtrack {
294
backtrack_common common;
295
/* Points to our private memory word on the stack. */
296
int private_data_ptr;
297
/* Reverting stack is needed. */
298
int framesize;
299
/* Allocated stack size. */
300
int stacksize;
301
} bracketpos_backtrack;
302
303
typedef struct braminzero_backtrack {
304
backtrack_common common;
305
struct sljit_label *matchingpath;
306
} braminzero_backtrack;
307
308
typedef struct char_iterator_backtrack {
309
backtrack_common common;
310
/* Next iteration. */
311
struct sljit_label *matchingpath;
312
/* Creating a range based on the next character. */
313
struct {
314
unsigned int othercasebit;
315
PCRE2_UCHAR chr;
316
BOOL charpos_enabled;
317
} charpos;
318
} char_iterator_backtrack;
319
320
typedef struct ref_iterator_backtrack {
321
backtrack_common common;
322
/* Next iteration. */
323
struct sljit_label *matchingpath;
324
} ref_iterator_backtrack;
325
326
typedef struct recurse_entry {
327
struct recurse_entry *next;
328
/* Contains the function entry label. */
329
struct sljit_label *entry_label;
330
/* Contains the function entry label. */
331
struct sljit_label *backtrack_label;
332
/* Collects the entry calls until the function is not created. */
333
jump_list *entry_calls;
334
/* Collects the backtrack calls until the function is not created. */
335
jump_list *backtrack_calls;
336
/* Points to the starting opcode. */
337
sljit_sw start;
338
/* Start of caller arguments. */
339
PCRE2_SPTR arg_start;
340
/* Size of caller arguments in bytes. */
341
sljit_uw arg_size;
342
} recurse_entry;
343
344
typedef struct recurse_backtrack {
345
backtrack_common common;
346
/* Return to the matching path. */
347
struct sljit_label *matchingpath;
348
/* Recursive pattern. */
349
recurse_entry *entry;
350
/* Pattern is inlined. */
351
BOOL inlined_pattern;
352
} recurse_backtrack;
353
354
typedef struct vreverse_backtrack {
355
backtrack_common common;
356
/* Return to the matching path. */
357
struct sljit_label *matchingpath;
358
} vreverse_backtrack;
359
360
#define OP_THEN_TRAP OP_TABLE_LENGTH
361
362
typedef struct then_trap_backtrack {
363
backtrack_common common;
364
/* If then_trap is not NULL, this structure contains the real
365
then_trap for the backtracking path. */
366
struct then_trap_backtrack *then_trap;
367
/* Points to the starting opcode. */
368
sljit_sw start;
369
/* Exit point for the then opcodes of this alternative. */
370
jump_list *quit;
371
/* Frame size of the current alternative. */
372
int framesize;
373
} then_trap_backtrack;
374
375
#define MAX_N_CHARS 12
376
#define MAX_DIFF_CHARS 5
377
378
typedef struct fast_forward_char_data {
379
/* Number of characters in the chars array, 255 for any character. */
380
sljit_u8 count;
381
/* Number of last UTF-8 characters in the chars array. */
382
sljit_u8 last_count;
383
/* Available characters in the current position. */
384
PCRE2_UCHAR chars[MAX_DIFF_CHARS];
385
} fast_forward_char_data;
386
387
#define MAX_CLASS_RANGE_SIZE 4
388
#define MAX_CLASS_CHARS_SIZE 3
389
390
typedef struct compiler_common {
391
/* The sljit ceneric compiler. */
392
struct sljit_compiler *compiler;
393
/* Compiled regular expression. */
394
pcre2_real_code *re;
395
/* First byte code. */
396
PCRE2_SPTR start;
397
/* Maps private data offset to each opcode. */
398
sljit_s32 *private_data_ptrs;
399
/* Chain list of read-only data ptrs. */
400
void *read_only_data_head;
401
/* Bitset which tells which capture brackets can be optimized. */
402
sljit_u8 *optimized_cbrackets;
403
/* Bitset for tracking capture bracket status. */
404
sljit_u8 *cbracket_bitset;
405
/* Tells whether the starting offset is a target of then. */
406
sljit_u8 *then_offsets;
407
/* Current position where a THEN must jump. */
408
then_trap_backtrack *then_trap;
409
/* Starting offset of private data for capturing brackets. */
410
sljit_s32 cbra_ptr;
411
#if defined SLJIT_DEBUG && SLJIT_DEBUG
412
/* End offset of locals for assertions. */
413
sljit_s32 locals_size;
414
#endif
415
/* Output vector starting point. Must be divisible by 2. */
416
sljit_s32 ovector_start;
417
/* Points to the starting character of the current match. */
418
sljit_s32 start_ptr;
419
/* Last known position of the requested byte. */
420
sljit_s32 req_char_ptr;
421
/* Head of the last recursion. */
422
sljit_s32 recursive_head_ptr;
423
/* First inspected character for partial matching.
424
(Needed for avoiding zero length partial matches.) */
425
sljit_s32 start_used_ptr;
426
/* Starting pointer for partial soft matches. */
427
sljit_s32 hit_start;
428
/* Pointer of the match end position. */
429
sljit_s32 match_end_ptr;
430
/* Points to the marked string. */
431
sljit_s32 mark_ptr;
432
/* Head of the recursive control verb management chain.
433
Each item must have a previous offset and type
434
(see control_types) values. See do_search_mark. */
435
sljit_s32 control_head_ptr;
436
/* The offset of the saved STR_END in the outermost
437
scan substring block. Since scan substring restores
438
STR_END after a match, it is enough to restore
439
STR_END inside a scan substring block. */
440
sljit_s32 restore_end_ptr;
441
/* Points to the last matched capture block index. */
442
sljit_s32 capture_last_ptr;
443
/* Fast forward skipping byte code pointer. */
444
PCRE2_SPTR fast_forward_bc_ptr;
445
/* Locals used by fast fail optimization. */
446
sljit_s32 early_fail_start_ptr;
447
sljit_s32 early_fail_end_ptr;
448
/* Byte length of optimized_cbrackets and cbracket_bitset. */
449
sljit_u32 cbracket_bitset_length;
450
/* Variables used by recursive call generator. */
451
sljit_s32 recurse_bitset_size;
452
uint8_t *recurse_bitset;
453
454
/* Flipped and lower case tables. */
455
const sljit_u8 *fcc;
456
sljit_sw lcc;
457
/* Mode can be PCRE2_JIT_COMPLETE and others. */
458
int mode;
459
/* TRUE, when empty match is accepted for partial matching. */
460
BOOL allow_empty_partial;
461
/* TRUE, when minlength is greater than 0. */
462
BOOL might_be_empty;
463
/* \K is found in the pattern. */
464
BOOL has_set_som;
465
/* (*SKIP:arg) is found in the pattern. */
466
BOOL has_skip_arg;
467
/* (*THEN) is found in the pattern. */
468
BOOL has_then;
469
/* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
470
BOOL has_skip_in_assert_back;
471
/* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
472
BOOL local_quit_available;
473
/* Currently in a positive assertion. */
474
BOOL in_positive_assertion;
475
/* Newline control. */
476
int nltype;
477
sljit_u32 nlmax;
478
sljit_u32 nlmin;
479
int newline;
480
int bsr_nltype;
481
sljit_u32 bsr_nlmax;
482
sljit_u32 bsr_nlmin;
483
/* Dollar endonly. */
484
int endonly;
485
/* Tables. */
486
sljit_sw ctypes;
487
/* Named capturing brackets. */
488
PCRE2_SPTR name_table;
489
sljit_sw name_count;
490
sljit_sw name_entry_size;
491
492
/* Labels and jump lists. */
493
struct sljit_label *partialmatchlabel;
494
struct sljit_label *quit_label;
495
struct sljit_label *abort_label;
496
struct sljit_label *accept_label;
497
struct sljit_label *ff_newline_shortcut;
498
stub_list *stubs;
499
recurse_entry *entries;
500
recurse_entry *currententry;
501
jump_list *partialmatch;
502
jump_list *quit;
503
jump_list *positive_assertion_quit;
504
jump_list *abort;
505
jump_list *failed_match;
506
jump_list *accept;
507
jump_list *calllimit;
508
jump_list *stackalloc;
509
jump_list *revertframes;
510
jump_list *wordboundary;
511
jump_list *ucp_wordboundary;
512
jump_list *anynewline;
513
jump_list *hspace;
514
jump_list *vspace;
515
jump_list *casefulcmp;
516
jump_list *caselesscmp;
517
jump_list *reset_match;
518
/* Same as reset_match, but resets the STR_PTR as well. */
519
jump_list *restart_match;
520
BOOL unset_backref;
521
BOOL alt_circumflex;
522
#ifdef SUPPORT_UNICODE
523
BOOL utf;
524
BOOL invalid_utf;
525
BOOL ucp;
526
/* Points to saving area for iref. */
527
jump_list *getucd;
528
jump_list *getucdtype;
529
#if PCRE2_CODE_UNIT_WIDTH == 8
530
jump_list *utfreadchar;
531
jump_list *utfreadtype8;
532
jump_list *utfpeakcharback;
533
#endif
534
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
535
jump_list *utfreadchar_invalid;
536
jump_list *utfreadnewline_invalid;
537
jump_list *utfmoveback_invalid;
538
jump_list *utfpeakcharback_invalid;
539
#endif
540
#endif /* SUPPORT_UNICODE */
541
} compiler_common;
542
543
/* For byte_sequence_compare. */
544
545
typedef struct compare_context {
546
int length;
547
int sourcereg;
548
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
549
int ucharptr;
550
union {
551
sljit_s32 asint;
552
sljit_u16 asushort;
553
#if PCRE2_CODE_UNIT_WIDTH == 8
554
sljit_u8 asbyte;
555
sljit_u8 asuchars[4];
556
#elif PCRE2_CODE_UNIT_WIDTH == 16
557
sljit_u16 asuchars[2];
558
#elif PCRE2_CODE_UNIT_WIDTH == 32
559
sljit_u32 asuchars[1];
560
#endif
561
} c;
562
union {
563
sljit_s32 asint;
564
sljit_u16 asushort;
565
#if PCRE2_CODE_UNIT_WIDTH == 8
566
sljit_u8 asbyte;
567
sljit_u8 asuchars[4];
568
#elif PCRE2_CODE_UNIT_WIDTH == 16
569
sljit_u16 asuchars[2];
570
#elif PCRE2_CODE_UNIT_WIDTH == 32
571
sljit_u32 asuchars[1];
572
#endif
573
} oc;
574
#endif
575
} compare_context;
576
577
/* Undefine sljit macros. */
578
#undef CMP
579
580
/* Used for accessing the elements of the stack. */
581
#define STACK(i) ((i) * SSIZE_OF(sw))
582
583
#ifdef SLJIT_PREF_SHIFT_REG
584
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
585
/* Nothing. */
586
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
587
#define SHIFT_REG_IS_R3
588
#else
589
#error "Unsupported shift register"
590
#endif
591
#endif
592
593
#define TMP1 SLJIT_R0
594
#ifdef SHIFT_REG_IS_R3
595
#define TMP2 SLJIT_R3
596
#define TMP3 SLJIT_R2
597
#else
598
#define TMP2 SLJIT_R2
599
#define TMP3 SLJIT_R3
600
#endif
601
#define STR_PTR SLJIT_R1
602
#define STR_END SLJIT_S0
603
#define STACK_TOP SLJIT_S1
604
#define STACK_LIMIT SLJIT_S2
605
#define COUNT_MATCH SLJIT_S3
606
#define ARGUMENTS SLJIT_S4
607
#define RETURN_ADDR SLJIT_R4
608
609
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
610
#define HAS_VIRTUAL_REGISTERS 1
611
#else
612
#define HAS_VIRTUAL_REGISTERS 0
613
#endif
614
615
/* Local space layout. */
616
/* Max limit of recursions. */
617
#define LIMIT_MATCH (0 * sizeof(sljit_sw))
618
/* Local variables. Their number is computed by check_opcode_types. */
619
#define LOCAL0 (1 * sizeof(sljit_sw))
620
#define LOCAL1 (2 * sizeof(sljit_sw))
621
#define LOCAL2 (3 * sizeof(sljit_sw))
622
#define LOCAL3 (4 * sizeof(sljit_sw))
623
#define LOCAL4 (5 * sizeof(sljit_sw))
624
/* The output vector is stored on the stack, and contains pointers
625
to characters. The vector data is divided into two groups: the first
626
group contains the start / end character pointers, and the second is
627
the start pointers when the end of the capturing group has not yet reached. */
628
#define OVECTOR_START (common->ovector_start)
629
#define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw))
630
#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw))
631
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
632
633
#if PCRE2_CODE_UNIT_WIDTH == 8
634
#define MOV_UCHAR SLJIT_MOV_U8
635
#define IN_UCHARS(x) (x)
636
#elif PCRE2_CODE_UNIT_WIDTH == 16
637
#define MOV_UCHAR SLJIT_MOV_U16
638
#define UCHAR_SHIFT (1)
639
#define IN_UCHARS(x) ((x) * 2)
640
#elif PCRE2_CODE_UNIT_WIDTH == 32
641
#define MOV_UCHAR SLJIT_MOV_U32
642
#define UCHAR_SHIFT (2)
643
#define IN_UCHARS(x) ((x) * 4)
644
#else
645
#error Unsupported compiling mode
646
#endif
647
648
/* Shortcuts. */
649
#define DEFINE_COMPILER \
650
struct sljit_compiler *compiler = common->compiler
651
#define OP1(op, dst, dstw, src, srcw) \
652
sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
653
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
654
sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
655
#define OP2U(op, src1, src1w, src2, src2w) \
656
sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
657
#define OP_SRC(op, src, srcw) \
658
sljit_emit_op_src(compiler, (op), (src), (srcw))
659
#define LABEL() \
660
sljit_emit_label(compiler)
661
#define JUMP(type) \
662
sljit_emit_jump(compiler, (type))
663
#define JUMPTO(type, label) \
664
sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
665
#define JUMPHERE(jump) \
666
sljit_set_label((jump), sljit_emit_label(compiler))
667
#define SET_LABEL(jump, label) \
668
sljit_set_label((jump), (label))
669
#define CMP(type, src1, src1w, src2, src2w) \
670
sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
671
#define CMPTO(type, src1, src1w, src2, src2w, label) \
672
sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
673
#define OP_FLAGS(op, dst, dstw, type) \
674
sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
675
#define SELECT(type, dst_reg, src1, src1w, src2_reg) \
676
sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
677
#define GET_LOCAL_BASE(dst, dstw, offset) \
678
sljit_get_local_base(compiler, (dst), (dstw), (offset))
679
680
#define READ_CHAR_MAX ((sljit_u32)0xffffffff)
681
682
#define INVALID_UTF_CHAR -1
683
#define UNASSIGNED_UTF_CHAR 888
684
685
#if defined SUPPORT_UNICODE
686
#if PCRE2_CODE_UNIT_WIDTH == 8
687
688
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
689
{ \
690
if (ptr[0] <= 0x7f) \
691
c = *ptr++; \
692
else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
693
{ \
694
c = ptr[1] - 0x80; \
695
\
696
if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
697
{ \
698
c |= (ptr[0] - 0xc0) << 6; \
699
ptr += 2; \
700
} \
701
else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
702
{ \
703
c = c << 6 | (ptr[2] - 0x80); \
704
\
705
if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
706
{ \
707
c |= (ptr[0] - 0xe0) << 12; \
708
ptr += 3; \
709
\
710
if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
711
{ \
712
invalid_action; \
713
} \
714
} \
715
else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
716
{ \
717
c = c << 6 | (ptr[3] - 0x80); \
718
\
719
if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
720
{ \
721
c |= (ptr[0] - 0xf0) << 18; \
722
ptr += 4; \
723
\
724
if (c >= 0x110000 || c < 0x10000) \
725
{ \
726
invalid_action; \
727
} \
728
} \
729
else \
730
{ \
731
invalid_action; \
732
} \
733
} \
734
else \
735
{ \
736
invalid_action; \
737
} \
738
} \
739
else \
740
{ \
741
invalid_action; \
742
} \
743
} \
744
else \
745
{ \
746
invalid_action; \
747
} \
748
}
749
750
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
751
{ \
752
c = ptr[-1]; \
753
if (c <= 0x7f) \
754
ptr--; \
755
else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
756
{ \
757
c -= 0x80; \
758
\
759
if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
760
{ \
761
c |= (ptr[-2] - 0xc0) << 6; \
762
ptr -= 2; \
763
} \
764
else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
765
{ \
766
c = c << 6 | (ptr[-2] - 0x80); \
767
\
768
if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
769
{ \
770
c |= (ptr[-3] - 0xe0) << 12; \
771
ptr -= 3; \
772
\
773
if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
774
{ \
775
invalid_action; \
776
} \
777
} \
778
else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
779
{ \
780
c = c << 6 | (ptr[-3] - 0x80); \
781
\
782
if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
783
{ \
784
c |= (ptr[-4] - 0xf0) << 18; \
785
ptr -= 4; \
786
\
787
if (c >= 0x110000 || c < 0x10000) \
788
{ \
789
invalid_action; \
790
} \
791
} \
792
else \
793
{ \
794
invalid_action; \
795
} \
796
} \
797
else \
798
{ \
799
invalid_action; \
800
} \
801
} \
802
else \
803
{ \
804
invalid_action; \
805
} \
806
} \
807
else \
808
{ \
809
invalid_action; \
810
} \
811
}
812
813
#elif PCRE2_CODE_UNIT_WIDTH == 16
814
815
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
816
{ \
817
if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
818
c = *ptr++; \
819
else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
820
{ \
821
c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
822
ptr += 2; \
823
} \
824
else \
825
{ \
826
invalid_action; \
827
} \
828
}
829
830
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
831
{ \
832
c = ptr[-1]; \
833
if (c < 0xd800 || c >= 0xe000) \
834
ptr--; \
835
else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
836
{ \
837
c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
838
ptr -= 2; \
839
} \
840
else \
841
{ \
842
invalid_action; \
843
} \
844
}
845
846
847
#elif PCRE2_CODE_UNIT_WIDTH == 32
848
849
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
850
{ \
851
if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
852
c = *ptr++; \
853
else \
854
{ \
855
invalid_action; \
856
} \
857
}
858
859
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
860
{ \
861
c = ptr[-1]; \
862
if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
863
ptr--; \
864
else \
865
{ \
866
invalid_action; \
867
} \
868
}
869
870
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
871
#endif /* SUPPORT_UNICODE */
872
873
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
874
{
875
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
876
do cc += GET(cc, 1); while (*cc == OP_ALT);
877
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
878
cc += 1 + LINK_SIZE;
879
return cc;
880
}
881
882
static int no_alternatives(PCRE2_SPTR cc)
883
{
884
int count = 0;
885
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
886
do
887
{
888
cc += GET(cc, 1);
889
count++;
890
}
891
while (*cc == OP_ALT);
892
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
893
return count;
894
}
895
896
static BOOL find_vreverse(PCRE2_SPTR cc)
897
{
898
SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA);
899
900
do
901
{
902
if (cc[1 + LINK_SIZE] == OP_VREVERSE)
903
return TRUE;
904
cc += GET(cc, 1);
905
}
906
while (*cc == OP_ALT);
907
908
return FALSE;
909
}
910
911
/* Functions whose might need modification for all new supported opcodes:
912
next_opcode
913
check_opcode_types
914
set_private_data_ptrs
915
get_framesize
916
init_frame
917
get_recurse_data_length
918
copy_recurse_data
919
compile_matchingpath
920
compile_backtrackingpath
921
*/
922
923
static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
924
{
925
SLJIT_UNUSED_ARG(common);
926
switch(*cc)
927
{
928
case OP_SOD:
929
case OP_SOM:
930
case OP_SET_SOM:
931
case OP_NOT_WORD_BOUNDARY:
932
case OP_WORD_BOUNDARY:
933
case OP_NOT_DIGIT:
934
case OP_DIGIT:
935
case OP_NOT_WHITESPACE:
936
case OP_WHITESPACE:
937
case OP_NOT_WORDCHAR:
938
case OP_WORDCHAR:
939
case OP_ANY:
940
case OP_ALLANY:
941
case OP_NOTPROP:
942
case OP_PROP:
943
case OP_ANYNL:
944
case OP_NOT_HSPACE:
945
case OP_HSPACE:
946
case OP_NOT_VSPACE:
947
case OP_VSPACE:
948
case OP_EXTUNI:
949
case OP_EODN:
950
case OP_EOD:
951
case OP_CIRC:
952
case OP_CIRCM:
953
case OP_DOLL:
954
case OP_DOLLM:
955
case OP_CRSTAR:
956
case OP_CRMINSTAR:
957
case OP_CRPLUS:
958
case OP_CRMINPLUS:
959
case OP_CRQUERY:
960
case OP_CRMINQUERY:
961
case OP_CRRANGE:
962
case OP_CRMINRANGE:
963
case OP_CRPOSSTAR:
964
case OP_CRPOSPLUS:
965
case OP_CRPOSQUERY:
966
case OP_CRPOSRANGE:
967
case OP_CLASS:
968
case OP_NCLASS:
969
case OP_REF:
970
case OP_REFI:
971
case OP_DNREF:
972
case OP_DNREFI:
973
case OP_RECURSE:
974
case OP_CALLOUT:
975
case OP_ALT:
976
case OP_KET:
977
case OP_KETRMAX:
978
case OP_KETRMIN:
979
case OP_KETRPOS:
980
case OP_REVERSE:
981
case OP_VREVERSE:
982
case OP_ASSERT:
983
case OP_ASSERT_NOT:
984
case OP_ASSERTBACK:
985
case OP_ASSERTBACK_NOT:
986
case OP_ASSERT_NA:
987
case OP_ASSERTBACK_NA:
988
case OP_ASSERT_SCS:
989
case OP_ONCE:
990
case OP_SCRIPT_RUN:
991
case OP_BRA:
992
case OP_BRAPOS:
993
case OP_CBRA:
994
case OP_CBRAPOS:
995
case OP_COND:
996
case OP_SBRA:
997
case OP_SBRAPOS:
998
case OP_SCBRA:
999
case OP_SCBRAPOS:
1000
case OP_SCOND:
1001
case OP_CREF:
1002
case OP_DNCREF:
1003
case OP_RREF:
1004
case OP_DNRREF:
1005
case OP_FALSE:
1006
case OP_TRUE:
1007
case OP_BRAZERO:
1008
case OP_BRAMINZERO:
1009
case OP_BRAPOSZERO:
1010
case OP_PRUNE:
1011
case OP_SKIP:
1012
case OP_THEN:
1013
case OP_COMMIT:
1014
case OP_FAIL:
1015
case OP_ACCEPT:
1016
case OP_ASSERT_ACCEPT:
1017
case OP_CLOSE:
1018
case OP_SKIPZERO:
1019
case OP_NOT_UCP_WORD_BOUNDARY:
1020
case OP_UCP_WORD_BOUNDARY:
1021
return cc + PRIV(OP_lengths)[*cc];
1022
1023
case OP_CHAR:
1024
case OP_CHARI:
1025
case OP_NOT:
1026
case OP_NOTI:
1027
case OP_STAR:
1028
case OP_MINSTAR:
1029
case OP_PLUS:
1030
case OP_MINPLUS:
1031
case OP_QUERY:
1032
case OP_MINQUERY:
1033
case OP_UPTO:
1034
case OP_MINUPTO:
1035
case OP_EXACT:
1036
case OP_POSSTAR:
1037
case OP_POSPLUS:
1038
case OP_POSQUERY:
1039
case OP_POSUPTO:
1040
case OP_STARI:
1041
case OP_MINSTARI:
1042
case OP_PLUSI:
1043
case OP_MINPLUSI:
1044
case OP_QUERYI:
1045
case OP_MINQUERYI:
1046
case OP_UPTOI:
1047
case OP_MINUPTOI:
1048
case OP_EXACTI:
1049
case OP_POSSTARI:
1050
case OP_POSPLUSI:
1051
case OP_POSQUERYI:
1052
case OP_POSUPTOI:
1053
case OP_NOTSTAR:
1054
case OP_NOTMINSTAR:
1055
case OP_NOTPLUS:
1056
case OP_NOTMINPLUS:
1057
case OP_NOTQUERY:
1058
case OP_NOTMINQUERY:
1059
case OP_NOTUPTO:
1060
case OP_NOTMINUPTO:
1061
case OP_NOTEXACT:
1062
case OP_NOTPOSSTAR:
1063
case OP_NOTPOSPLUS:
1064
case OP_NOTPOSQUERY:
1065
case OP_NOTPOSUPTO:
1066
case OP_NOTSTARI:
1067
case OP_NOTMINSTARI:
1068
case OP_NOTPLUSI:
1069
case OP_NOTMINPLUSI:
1070
case OP_NOTQUERYI:
1071
case OP_NOTMINQUERYI:
1072
case OP_NOTUPTOI:
1073
case OP_NOTMINUPTOI:
1074
case OP_NOTEXACTI:
1075
case OP_NOTPOSSTARI:
1076
case OP_NOTPOSPLUSI:
1077
case OP_NOTPOSQUERYI:
1078
case OP_NOTPOSUPTOI:
1079
cc += PRIV(OP_lengths)[*cc];
1080
#ifdef SUPPORT_UNICODE
1081
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1082
#endif
1083
return cc;
1084
1085
/* Special cases. */
1086
case OP_TYPESTAR:
1087
case OP_TYPEMINSTAR:
1088
case OP_TYPEPLUS:
1089
case OP_TYPEMINPLUS:
1090
case OP_TYPEQUERY:
1091
case OP_TYPEMINQUERY:
1092
case OP_TYPEUPTO:
1093
case OP_TYPEMINUPTO:
1094
case OP_TYPEEXACT:
1095
case OP_TYPEPOSSTAR:
1096
case OP_TYPEPOSPLUS:
1097
case OP_TYPEPOSQUERY:
1098
case OP_TYPEPOSUPTO:
1099
return cc + PRIV(OP_lengths)[*cc] - 1;
1100
1101
case OP_ANYBYTE:
1102
#ifdef SUPPORT_UNICODE
1103
if (common->utf) return NULL;
1104
#endif
1105
return cc + 1;
1106
1107
case OP_CALLOUT_STR:
1108
return cc + GET(cc, 1 + 2*LINK_SIZE);
1109
1110
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1111
case OP_ECLASS:
1112
case OP_XCLASS:
1113
SLJIT_COMPILE_ASSERT(OP_XCLASS + 1 == OP_ECLASS && OP_CLASS + 1 == OP_NCLASS && OP_NCLASS < OP_XCLASS, class_byte_code_order);
1114
return cc + GET(cc, 1);
1115
#endif
1116
1117
case OP_MARK:
1118
case OP_COMMIT_ARG:
1119
case OP_PRUNE_ARG:
1120
case OP_SKIP_ARG:
1121
case OP_THEN_ARG:
1122
return cc + 1 + 2 + cc[1];
1123
1124
default:
1125
SLJIT_UNREACHABLE();
1126
return NULL;
1127
}
1128
}
1129
1130
static sljit_s32 ref_update_local_size(compiler_common *common, PCRE2_SPTR cc, sljit_s32 current_locals_size)
1131
{
1132
/* Depends on do_casefulcmp(), do_caselesscmp(), and compile_ref_matchingpath() */
1133
int locals_size = 2 * SSIZE_OF(sw);
1134
SLJIT_UNUSED_ARG(common);
1135
1136
#ifdef SUPPORT_UNICODE
1137
if ((*cc == OP_REFI || *cc == OP_DNREFI) && (common->utf || common->ucp))
1138
locals_size = 3 * SSIZE_OF(sw);
1139
#endif
1140
1141
cc += PRIV(OP_lengths)[*cc];
1142
/* Although do_casefulcmp() uses only one local, the allocate_stack()
1143
calls during the repeat destroys LOCAL1 variables. */
1144
if (*cc >= OP_CRSTAR && *cc <= OP_CRPOSRANGE)
1145
locals_size += 2 * SSIZE_OF(sw);
1146
1147
return (current_locals_size >= locals_size) ? current_locals_size : locals_size;
1148
}
1149
1150
static SLJIT_INLINE BOOL is_optimized_cbracket(compiler_common *common, sljit_s32 capture_index)
1151
{
1152
sljit_u8 bit = (sljit_u8)(1 << (capture_index & 0x7));
1153
return (common->optimized_cbrackets[capture_index >> 3] & bit) != 0;
1154
}
1155
1156
static SLJIT_INLINE void clear_optimized_cbracket(compiler_common *common, sljit_s32 capture_index)
1157
{
1158
sljit_u8 mask = (sljit_u8)~(1 << (capture_index & 0x7));
1159
common->optimized_cbrackets[capture_index >> 3] &= mask;
1160
}
1161
1162
static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1163
{
1164
int count;
1165
PCRE2_SPTR slot;
1166
PCRE2_SPTR assert_back_end = cc - 1;
1167
PCRE2_SPTR assert_na_end = cc - 1;
1168
sljit_s32 locals_size = 2 * SSIZE_OF(sw);
1169
BOOL set_recursive_head = FALSE;
1170
BOOL set_capture_last = FALSE;
1171
BOOL set_mark = FALSE;
1172
1173
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1174
while (cc < ccend)
1175
{
1176
switch(*cc)
1177
{
1178
case OP_SET_SOM:
1179
common->has_set_som = TRUE;
1180
common->might_be_empty = TRUE;
1181
cc += 1;
1182
break;
1183
1184
case OP_TYPEUPTO:
1185
case OP_TYPEEXACT:
1186
if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1187
locals_size = 3 * SSIZE_OF(sw);
1188
cc += (2 + IMM2_SIZE) - 1;
1189
break;
1190
1191
case OP_TYPEPOSSTAR:
1192
case OP_TYPEPOSPLUS:
1193
case OP_TYPEPOSQUERY:
1194
if (cc[1] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1195
locals_size = 3 * SSIZE_OF(sw);
1196
cc += 2 - 1;
1197
break;
1198
1199
case OP_TYPEPOSUPTO:
1200
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1201
if (common->utf && locals_size <= 3 * SSIZE_OF(sw))
1202
locals_size = 3 * SSIZE_OF(sw);
1203
#endif
1204
if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1205
locals_size = 3 * SSIZE_OF(sw);
1206
cc += (2 + IMM2_SIZE) - 1;
1207
break;
1208
1209
case OP_REFI:
1210
case OP_REF:
1211
locals_size = ref_update_local_size(common, cc, locals_size);
1212
clear_optimized_cbracket(common, GET2(cc, 1));
1213
cc += PRIV(OP_lengths)[*cc];
1214
break;
1215
1216
case OP_ASSERT_NA:
1217
case OP_ASSERTBACK_NA:
1218
case OP_ASSERT_SCS:
1219
slot = bracketend(cc);
1220
if (slot > assert_na_end)
1221
assert_na_end = slot;
1222
cc += 1 + LINK_SIZE;
1223
break;
1224
1225
case OP_CBRAPOS:
1226
case OP_SCBRAPOS:
1227
clear_optimized_cbracket(common, GET2(cc, 1 + LINK_SIZE));
1228
cc += 1 + LINK_SIZE + IMM2_SIZE;
1229
break;
1230
1231
case OP_COND:
1232
case OP_SCOND:
1233
/* Only AUTO_CALLOUT can insert this opcode. We do
1234
not intend to support this case. */
1235
if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1236
return FALSE;
1237
cc += 1 + LINK_SIZE;
1238
break;
1239
1240
case OP_CREF:
1241
clear_optimized_cbracket(common, GET2(cc, 1));
1242
cc += 1 + IMM2_SIZE;
1243
break;
1244
1245
case OP_DNREFI:
1246
case OP_DNREF:
1247
locals_size = ref_update_local_size(common, cc, locals_size);
1248
PCRE2_FALLTHROUGH /* Fall through */
1249
case OP_DNCREF:
1250
count = GET2(cc, 1 + IMM2_SIZE);
1251
slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1252
while (count-- > 0)
1253
{
1254
clear_optimized_cbracket(common, GET2(slot, 0));
1255
slot += common->name_entry_size;
1256
}
1257
cc += PRIV(OP_lengths)[*cc];
1258
break;
1259
1260
case OP_RECURSE:
1261
/* Set its value only once. */
1262
set_recursive_head = TRUE;
1263
cc += 1 + LINK_SIZE;
1264
while (*cc == OP_CREF)
1265
{
1266
clear_optimized_cbracket(common, GET2(cc, 1));
1267
cc += 1 + IMM2_SIZE;
1268
}
1269
break;
1270
1271
case OP_CALLOUT:
1272
case OP_CALLOUT_STR:
1273
set_capture_last = TRUE;
1274
cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1275
break;
1276
1277
case OP_ASSERTBACK:
1278
slot = bracketend(cc);
1279
if (slot > assert_back_end)
1280
assert_back_end = slot;
1281
cc += 1 + LINK_SIZE;
1282
break;
1283
1284
case OP_THEN_ARG:
1285
common->has_then = TRUE;
1286
common->control_head_ptr = 1;
1287
PCRE2_FALLTHROUGH /* Fall through */
1288
1289
case OP_COMMIT_ARG:
1290
case OP_PRUNE_ARG:
1291
case OP_MARK:
1292
set_mark = TRUE;
1293
cc += 1 + 2 + cc[1];
1294
break;
1295
1296
case OP_THEN:
1297
common->has_then = TRUE;
1298
common->control_head_ptr = 1;
1299
cc += 1;
1300
break;
1301
1302
case OP_SKIP:
1303
if (cc < assert_back_end)
1304
common->has_skip_in_assert_back = TRUE;
1305
cc += 1;
1306
break;
1307
1308
case OP_SKIP_ARG:
1309
common->control_head_ptr = 1;
1310
common->has_skip_arg = TRUE;
1311
if (cc < assert_back_end)
1312
common->has_skip_in_assert_back = TRUE;
1313
cc += 1 + 2 + cc[1];
1314
break;
1315
1316
case OP_ASSERT_ACCEPT:
1317
if (cc < assert_na_end)
1318
return FALSE;
1319
cc++;
1320
break;
1321
1322
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1323
case OP_CRPOSRANGE:
1324
/* The second value can be 0 for infinite repeats. */
1325
if (common->utf && GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE) && locals_size <= 3 * SSIZE_OF(sw))
1326
locals_size = 3 * SSIZE_OF(sw);
1327
cc += 1 + 2 * IMM2_SIZE;
1328
break;
1329
1330
case OP_POSUPTO:
1331
case OP_POSUPTOI:
1332
case OP_NOTPOSUPTO:
1333
case OP_NOTPOSUPTOI:
1334
if (common->utf && locals_size <= 3 * SSIZE_OF(sw))
1335
locals_size = 3 * SSIZE_OF(sw);
1336
#endif
1337
PCRE2_FALLTHROUGH /* Fall through */
1338
default:
1339
cc = next_opcode(common, cc);
1340
if (cc == NULL)
1341
return FALSE;
1342
break;
1343
}
1344
}
1345
1346
SLJIT_ASSERT((locals_size & (SSIZE_OF(sw) - 1)) == 0);
1347
#if defined SLJIT_DEBUG && SLJIT_DEBUG
1348
common->locals_size = locals_size;
1349
#endif
1350
1351
if (locals_size > 0)
1352
common->ovector_start += locals_size;
1353
1354
if (set_mark)
1355
{
1356
SLJIT_ASSERT(common->mark_ptr == 0);
1357
common->mark_ptr = common->ovector_start;
1358
common->ovector_start += sizeof(sljit_sw);
1359
}
1360
1361
if (set_recursive_head)
1362
{
1363
SLJIT_ASSERT(common->recursive_head_ptr == 0);
1364
common->recursive_head_ptr = common->ovector_start;
1365
common->ovector_start += sizeof(sljit_sw);
1366
}
1367
1368
if (set_capture_last)
1369
{
1370
SLJIT_ASSERT(common->capture_last_ptr == 0);
1371
common->capture_last_ptr = common->ovector_start;
1372
common->ovector_start += sizeof(sljit_sw);
1373
}
1374
1375
return TRUE;
1376
}
1377
1378
#define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1379
1380
/*
1381
Start represent the number of allowed early fail enhancements
1382
1383
The 0-2 values has a special meaning:
1384
0 - skip is allowed for all iterators
1385
1 - fail is allowed for all iterators
1386
2 - fail is allowed for greedy iterators
1387
3 - only ranged early fail is allowed
1388
>3 - (start - 3) number of remaining ranged early fails allowed
1389
1390
return: the updated value of start
1391
*/
1392
static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1393
int *private_data_start, sljit_s32 depth, int start)
1394
{
1395
PCRE2_SPTR begin = cc;
1396
PCRE2_SPTR next_alt;
1397
PCRE2_SPTR end;
1398
PCRE2_SPTR accelerated_start;
1399
int result = 0;
1400
int count, prev_count;
1401
1402
SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1403
SLJIT_ASSERT(*cc != OP_CBRA || is_optimized_cbracket(common, GET2(cc, 1 + LINK_SIZE)));
1404
SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1405
1406
next_alt = cc + GET(cc, 1);
1407
if (*next_alt == OP_ALT && start < 1)
1408
start = 1;
1409
1410
do
1411
{
1412
count = start;
1413
cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1414
1415
while (TRUE)
1416
{
1417
accelerated_start = NULL;
1418
1419
switch(*cc)
1420
{
1421
case OP_SOD:
1422
case OP_SOM:
1423
case OP_SET_SOM:
1424
case OP_NOT_WORD_BOUNDARY:
1425
case OP_WORD_BOUNDARY:
1426
case OP_EODN:
1427
case OP_EOD:
1428
case OP_CIRC:
1429
case OP_CIRCM:
1430
case OP_DOLL:
1431
case OP_DOLLM:
1432
case OP_NOT_UCP_WORD_BOUNDARY:
1433
case OP_UCP_WORD_BOUNDARY:
1434
/* Zero width assertions. */
1435
cc++;
1436
continue;
1437
1438
case OP_NOT_DIGIT:
1439
case OP_DIGIT:
1440
case OP_NOT_WHITESPACE:
1441
case OP_WHITESPACE:
1442
case OP_NOT_WORDCHAR:
1443
case OP_WORDCHAR:
1444
case OP_ANY:
1445
case OP_ALLANY:
1446
case OP_ANYBYTE:
1447
case OP_NOT_HSPACE:
1448
case OP_HSPACE:
1449
case OP_NOT_VSPACE:
1450
case OP_VSPACE:
1451
if (count < 1)
1452
count = 1;
1453
cc++;
1454
continue;
1455
1456
case OP_ANYNL:
1457
case OP_EXTUNI:
1458
if (count < 3)
1459
count = 3;
1460
cc++;
1461
continue;
1462
1463
case OP_NOTPROP:
1464
case OP_PROP:
1465
if (count < 1)
1466
count = 1;
1467
cc += 1 + 2;
1468
continue;
1469
1470
case OP_CHAR:
1471
case OP_CHARI:
1472
case OP_NOT:
1473
case OP_NOTI:
1474
if (count < 1)
1475
count = 1;
1476
cc += 2;
1477
#ifdef SUPPORT_UNICODE
1478
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1479
#endif
1480
continue;
1481
1482
case OP_TYPEMINSTAR:
1483
case OP_TYPEMINPLUS:
1484
if (count == 2)
1485
count = 3;
1486
PCRE2_FALLTHROUGH /* Fall through */
1487
1488
case OP_TYPESTAR:
1489
case OP_TYPEPLUS:
1490
case OP_TYPEPOSSTAR:
1491
case OP_TYPEPOSPLUS:
1492
/* The type or prop opcode is skipped in the next iteration. */
1493
cc += 1;
1494
1495
if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1496
{
1497
accelerated_start = cc - 1;
1498
break;
1499
}
1500
1501
if (count < 3)
1502
count = 3;
1503
continue;
1504
1505
case OP_TYPEEXACT:
1506
if (count < 1)
1507
count = 1;
1508
cc += 1 + IMM2_SIZE;
1509
continue;
1510
1511
case OP_TYPEUPTO:
1512
case OP_TYPEMINUPTO:
1513
case OP_TYPEPOSUPTO:
1514
cc += IMM2_SIZE;
1515
PCRE2_FALLTHROUGH /* Fall through */
1516
1517
case OP_TYPEQUERY:
1518
case OP_TYPEMINQUERY:
1519
case OP_TYPEPOSQUERY:
1520
/* The type or prop opcode is skipped in the next iteration. */
1521
if (count < 3)
1522
count = 3;
1523
cc += 1;
1524
continue;
1525
1526
case OP_MINSTAR:
1527
case OP_MINPLUS:
1528
case OP_MINSTARI:
1529
case OP_MINPLUSI:
1530
case OP_NOTMINSTAR:
1531
case OP_NOTMINPLUS:
1532
case OP_NOTMINSTARI:
1533
case OP_NOTMINPLUSI:
1534
if (count == 2)
1535
count = 3;
1536
PCRE2_FALLTHROUGH /* Fall through */
1537
1538
case OP_STAR:
1539
case OP_PLUS:
1540
case OP_POSSTAR:
1541
case OP_POSPLUS:
1542
1543
case OP_STARI:
1544
case OP_PLUSI:
1545
case OP_POSSTARI:
1546
case OP_POSPLUSI:
1547
1548
case OP_NOTSTAR:
1549
case OP_NOTPLUS:
1550
case OP_NOTPOSSTAR:
1551
case OP_NOTPOSPLUS:
1552
1553
case OP_NOTSTARI:
1554
case OP_NOTPLUSI:
1555
case OP_NOTPOSSTARI:
1556
case OP_NOTPOSPLUSI:
1557
accelerated_start = cc;
1558
cc += 2;
1559
#ifdef SUPPORT_UNICODE
1560
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1561
#endif
1562
break;
1563
1564
case OP_EXACT:
1565
if (count < 1)
1566
count = 1;
1567
cc += 2 + IMM2_SIZE;
1568
#ifdef SUPPORT_UNICODE
1569
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1570
#endif
1571
continue;
1572
1573
case OP_UPTO:
1574
case OP_MINUPTO:
1575
case OP_POSUPTO:
1576
case OP_UPTOI:
1577
case OP_MINUPTOI:
1578
case OP_EXACTI:
1579
case OP_POSUPTOI:
1580
case OP_NOTUPTO:
1581
case OP_NOTMINUPTO:
1582
case OP_NOTEXACT:
1583
case OP_NOTPOSUPTO:
1584
case OP_NOTUPTOI:
1585
case OP_NOTMINUPTOI:
1586
case OP_NOTEXACTI:
1587
case OP_NOTPOSUPTOI:
1588
cc += IMM2_SIZE;
1589
PCRE2_FALLTHROUGH /* Fall through */
1590
1591
case OP_QUERY:
1592
case OP_MINQUERY:
1593
case OP_POSQUERY:
1594
case OP_QUERYI:
1595
case OP_MINQUERYI:
1596
case OP_POSQUERYI:
1597
case OP_NOTQUERY:
1598
case OP_NOTMINQUERY:
1599
case OP_NOTPOSQUERY:
1600
case OP_NOTQUERYI:
1601
case OP_NOTMINQUERYI:
1602
case OP_NOTPOSQUERYI:
1603
if (count < 3)
1604
count = 3;
1605
cc += 2;
1606
#ifdef SUPPORT_UNICODE
1607
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1608
#endif
1609
continue;
1610
1611
case OP_CLASS:
1612
case OP_NCLASS:
1613
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1614
case OP_XCLASS:
1615
case OP_ECLASS:
1616
accelerated_start = cc;
1617
cc += (*cc >= OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)));
1618
#else
1619
accelerated_start = cc;
1620
cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1621
#endif
1622
1623
switch (*cc)
1624
{
1625
case OP_CRMINSTAR:
1626
case OP_CRMINPLUS:
1627
if (count == 2)
1628
count = 3;
1629
PCRE2_FALLTHROUGH /* Fall through */
1630
1631
case OP_CRSTAR:
1632
case OP_CRPLUS:
1633
case OP_CRPOSSTAR:
1634
case OP_CRPOSPLUS:
1635
cc++;
1636
break;
1637
1638
case OP_CRRANGE:
1639
case OP_CRMINRANGE:
1640
case OP_CRPOSRANGE:
1641
if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1642
{
1643
/* Exact repeat. */
1644
cc += 1 + 2 * IMM2_SIZE;
1645
if (count < 1)
1646
count = 1;
1647
continue;
1648
}
1649
1650
cc += 2 * IMM2_SIZE;
1651
PCRE2_FALLTHROUGH /* Fall through */
1652
case OP_CRQUERY:
1653
case OP_CRMINQUERY:
1654
case OP_CRPOSQUERY:
1655
cc++;
1656
if (count < 3)
1657
count = 3;
1658
continue;
1659
1660
default:
1661
/* No repeat. */
1662
if (count < 1)
1663
count = 1;
1664
continue;
1665
}
1666
break;
1667
1668
case OP_BRA:
1669
case OP_CBRA:
1670
prev_count = count;
1671
if (count < 1)
1672
count = 1;
1673
1674
if (depth >= 4)
1675
break;
1676
1677
if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1678
count = 3;
1679
1680
end = bracketend(cc);
1681
if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && !is_optimized_cbracket(common, GET2(cc, 1 + LINK_SIZE))))
1682
break;
1683
1684
prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1685
1686
if (prev_count > count)
1687
count = prev_count;
1688
1689
if (PRIVATE_DATA(cc) != 0)
1690
common->private_data_ptrs[begin - common->start] = 1;
1691
1692
if (count < EARLY_FAIL_ENHANCE_MAX)
1693
{
1694
cc = end;
1695
continue;
1696
}
1697
break;
1698
1699
case OP_KET:
1700
SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1701
if (cc >= next_alt)
1702
break;
1703
cc += 1 + LINK_SIZE;
1704
continue;
1705
}
1706
1707
if (accelerated_start == NULL)
1708
break;
1709
1710
if (count == 0)
1711
{
1712
common->fast_forward_bc_ptr = accelerated_start;
1713
common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1714
*private_data_start += sizeof(sljit_sw);
1715
count = 4;
1716
}
1717
else if (count < 3)
1718
{
1719
common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1720
1721
if (common->early_fail_start_ptr == 0)
1722
common->early_fail_start_ptr = *private_data_start;
1723
1724
*private_data_start += sizeof(sljit_sw);
1725
common->early_fail_end_ptr = *private_data_start;
1726
1727
if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1728
return EARLY_FAIL_ENHANCE_MAX;
1729
1730
count = 4;
1731
}
1732
else
1733
{
1734
common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1735
1736
if (common->early_fail_start_ptr == 0)
1737
common->early_fail_start_ptr = *private_data_start;
1738
1739
*private_data_start += 2 * sizeof(sljit_sw);
1740
common->early_fail_end_ptr = *private_data_start;
1741
1742
if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1743
return EARLY_FAIL_ENHANCE_MAX;
1744
1745
count++;
1746
}
1747
1748
/* Cannot be part of a repeat. */
1749
common->private_data_ptrs[begin - common->start] = 1;
1750
1751
if (count >= EARLY_FAIL_ENHANCE_MAX)
1752
break;
1753
}
1754
1755
if (*cc != OP_ALT && *cc != OP_KET)
1756
result = EARLY_FAIL_ENHANCE_MAX;
1757
else if (result < count)
1758
result = count;
1759
1760
cc = next_alt;
1761
next_alt = cc + GET(cc, 1);
1762
}
1763
while (*cc == OP_ALT);
1764
1765
return result;
1766
}
1767
1768
static int get_class_iterator_size(PCRE2_SPTR cc)
1769
{
1770
sljit_u32 min;
1771
sljit_u32 max;
1772
switch(*cc)
1773
{
1774
case OP_CRSTAR:
1775
case OP_CRPLUS:
1776
return 2;
1777
1778
case OP_CRMINSTAR:
1779
case OP_CRMINPLUS:
1780
case OP_CRQUERY:
1781
case OP_CRMINQUERY:
1782
return 1;
1783
1784
case OP_CRRANGE:
1785
case OP_CRMINRANGE:
1786
min = GET2(cc, 1);
1787
max = GET2(cc, 1 + IMM2_SIZE);
1788
if (max == 0)
1789
return (*cc == OP_CRRANGE) ? 2 : 1;
1790
max -= min;
1791
if (max > (sljit_u32)(*cc == OP_CRRANGE ? 0 : 1))
1792
max = 2;
1793
return max;
1794
1795
default:
1796
return 0;
1797
}
1798
}
1799
1800
static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1801
{
1802
PCRE2_SPTR end = bracketend(begin);
1803
PCRE2_SPTR next;
1804
PCRE2_SPTR next_end;
1805
PCRE2_SPTR max_end;
1806
PCRE2_UCHAR type;
1807
sljit_sw length = end - begin;
1808
sljit_s32 min, max, i;
1809
1810
/* Detect fixed iterations first. */
1811
if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1812
return FALSE;
1813
1814
/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1815
* Skip the check of the second part. */
1816
if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1817
return TRUE;
1818
1819
next = end;
1820
min = 1;
1821
while (1)
1822
{
1823
if (*next != *begin)
1824
break;
1825
next_end = bracketend(next);
1826
if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1827
break;
1828
next = next_end;
1829
min++;
1830
}
1831
1832
if (min == 2)
1833
return FALSE;
1834
1835
max = 0;
1836
max_end = next;
1837
if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1838
{
1839
type = *next;
1840
while (1)
1841
{
1842
if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1843
break;
1844
next_end = bracketend(next + 2 + LINK_SIZE);
1845
if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1846
break;
1847
next = next_end;
1848
max++;
1849
}
1850
1851
if (next[0] == type && next[1] == *begin && max >= 1)
1852
{
1853
next_end = bracketend(next + 1);
1854
if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1855
{
1856
for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1857
if (*next_end != OP_KET)
1858
break;
1859
1860
if (i == max)
1861
{
1862
/* Patterns must fit into an int32 even for link-size=4. */
1863
common->private_data_ptrs[max_end - common->start - LINK_SIZE] = (sljit_s32)(next_end - max_end);
1864
common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1865
/* +2 the original and the last. */
1866
common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1867
if (min == 1)
1868
return TRUE;
1869
min--;
1870
max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1871
}
1872
}
1873
}
1874
}
1875
1876
if (min >= 3)
1877
{
1878
common->private_data_ptrs[end - common->start - LINK_SIZE] = (sljit_s32)(max_end - end);
1879
common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1880
common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1881
return TRUE;
1882
}
1883
1884
return FALSE;
1885
}
1886
1887
#define CASE_ITERATOR_PRIVATE_DATA_1 \
1888
case OP_MINSTAR: \
1889
case OP_MINPLUS: \
1890
case OP_QUERY: \
1891
case OP_MINQUERY: \
1892
case OP_MINSTARI: \
1893
case OP_MINPLUSI: \
1894
case OP_QUERYI: \
1895
case OP_MINQUERYI: \
1896
case OP_NOTMINSTAR: \
1897
case OP_NOTMINPLUS: \
1898
case OP_NOTQUERY: \
1899
case OP_NOTMINQUERY: \
1900
case OP_NOTMINSTARI: \
1901
case OP_NOTMINPLUSI: \
1902
case OP_NOTQUERYI: \
1903
case OP_NOTMINQUERYI:
1904
1905
#define CASE_ITERATOR_PRIVATE_DATA_2A \
1906
case OP_STAR: \
1907
case OP_PLUS: \
1908
case OP_STARI: \
1909
case OP_PLUSI: \
1910
case OP_NOTSTAR: \
1911
case OP_NOTPLUS: \
1912
case OP_NOTSTARI: \
1913
case OP_NOTPLUSI:
1914
1915
#define CASE_ITERATOR_PRIVATE_DATA_2B \
1916
case OP_UPTO: \
1917
case OP_MINUPTO: \
1918
case OP_UPTOI: \
1919
case OP_MINUPTOI: \
1920
case OP_NOTUPTO: \
1921
case OP_NOTMINUPTO: \
1922
case OP_NOTUPTOI: \
1923
case OP_NOTMINUPTOI:
1924
1925
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1926
case OP_TYPEMINSTAR: \
1927
case OP_TYPEMINPLUS: \
1928
case OP_TYPEQUERY: \
1929
case OP_TYPEMINQUERY:
1930
1931
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1932
case OP_TYPESTAR: \
1933
case OP_TYPEPLUS:
1934
1935
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1936
case OP_TYPEUPTO: \
1937
case OP_TYPEMINUPTO:
1938
1939
static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1940
{
1941
PCRE2_SPTR cc = common->start;
1942
PCRE2_SPTR alternative;
1943
PCRE2_SPTR end = NULL;
1944
int private_data_ptr = *private_data_start;
1945
int space, size, bracketlen;
1946
BOOL repeat_check = TRUE;
1947
1948
while (cc < ccend)
1949
{
1950
space = 0;
1951
size = 0;
1952
bracketlen = 0;
1953
if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1954
break;
1955
1956
/* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1957
if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1958
{
1959
if (detect_repeat(common, cc))
1960
{
1961
/* These brackets are converted to repeats, so no global
1962
based single character repeat is allowed. */
1963
if (cc >= end)
1964
end = bracketend(cc);
1965
}
1966
}
1967
repeat_check = TRUE;
1968
1969
switch(*cc)
1970
{
1971
case OP_KET:
1972
if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1973
{
1974
common->private_data_ptrs[cc - common->start] = private_data_ptr;
1975
private_data_ptr += sizeof(sljit_sw);
1976
cc += common->private_data_ptrs[cc + 1 - common->start];
1977
}
1978
cc += 1 + LINK_SIZE;
1979
break;
1980
1981
case OP_ASSERT:
1982
case OP_ASSERT_NOT:
1983
case OP_ASSERTBACK:
1984
case OP_ASSERTBACK_NOT:
1985
case OP_ASSERT_NA:
1986
case OP_ONCE:
1987
case OP_SCRIPT_RUN:
1988
case OP_BRAPOS:
1989
case OP_SBRA:
1990
case OP_SBRAPOS:
1991
case OP_SCOND:
1992
common->private_data_ptrs[cc - common->start] = private_data_ptr;
1993
private_data_ptr += sizeof(sljit_sw);
1994
bracketlen = 1 + LINK_SIZE;
1995
break;
1996
1997
case OP_ASSERTBACK_NA:
1998
common->private_data_ptrs[cc - common->start] = private_data_ptr;
1999
private_data_ptr += sizeof(sljit_sw);
2000
2001
if (find_vreverse(cc))
2002
{
2003
common->private_data_ptrs[cc + 1 - common->start] = 1;
2004
private_data_ptr += sizeof(sljit_sw);
2005
}
2006
2007
bracketlen = 1 + LINK_SIZE;
2008
break;
2009
2010
case OP_ASSERT_SCS:
2011
common->private_data_ptrs[cc - common->start] = private_data_ptr;
2012
private_data_ptr += 2 * sizeof(sljit_sw);
2013
bracketlen = 1 + LINK_SIZE;
2014
break;
2015
2016
case OP_CBRAPOS:
2017
case OP_SCBRAPOS:
2018
common->private_data_ptrs[cc - common->start] = private_data_ptr;
2019
private_data_ptr += sizeof(sljit_sw);
2020
bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
2021
break;
2022
2023
case OP_COND:
2024
/* Might be a hidden SCOND. */
2025
common->private_data_ptrs[cc - common->start] = 0;
2026
alternative = cc + GET(cc, 1);
2027
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2028
{
2029
common->private_data_ptrs[cc - common->start] = private_data_ptr;
2030
private_data_ptr += sizeof(sljit_sw);
2031
}
2032
bracketlen = 1 + LINK_SIZE;
2033
break;
2034
2035
case OP_BRA:
2036
bracketlen = 1 + LINK_SIZE;
2037
break;
2038
2039
case OP_CBRA:
2040
case OP_SCBRA:
2041
bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
2042
break;
2043
2044
case OP_BRAZERO:
2045
case OP_BRAMINZERO:
2046
case OP_BRAPOSZERO:
2047
size = 1;
2048
repeat_check = FALSE;
2049
break;
2050
2051
CASE_ITERATOR_PRIVATE_DATA_1
2052
size = -2;
2053
space = 1;
2054
break;
2055
2056
CASE_ITERATOR_PRIVATE_DATA_2A
2057
size = -2;
2058
space = 2;
2059
break;
2060
2061
CASE_ITERATOR_PRIVATE_DATA_2B
2062
size = -(2 + IMM2_SIZE);
2063
space = 2;
2064
break;
2065
2066
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2067
size = 1;
2068
space = 1;
2069
break;
2070
2071
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2072
size = 1;
2073
if (cc[1] != OP_EXTUNI)
2074
space = 2;
2075
break;
2076
2077
case OP_TYPEUPTO:
2078
size = 1 + IMM2_SIZE;
2079
if (cc[1 + IMM2_SIZE] != OP_EXTUNI)
2080
space = 2;
2081
break;
2082
2083
case OP_TYPEMINUPTO:
2084
size = 1 + IMM2_SIZE;
2085
space = 2;
2086
break;
2087
2088
case OP_CLASS:
2089
case OP_NCLASS:
2090
size = 1 + 32 / sizeof(PCRE2_UCHAR);
2091
space = get_class_iterator_size(cc + size);
2092
break;
2093
2094
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2095
case OP_XCLASS:
2096
case OP_ECLASS:
2097
size = GET(cc, 1);
2098
space = get_class_iterator_size(cc + size);
2099
break;
2100
#endif
2101
2102
default:
2103
cc = next_opcode(common, cc);
2104
SLJIT_ASSERT(cc != NULL);
2105
break;
2106
}
2107
2108
/* Character iterators, which are not inside a repeated bracket,
2109
gets a private slot instead of allocating it on the stack. */
2110
if (space > 0 && cc >= end)
2111
{
2112
common->private_data_ptrs[cc - common->start] = private_data_ptr;
2113
private_data_ptr += sizeof(sljit_sw) * space;
2114
}
2115
2116
if (size != 0)
2117
{
2118
if (size < 0)
2119
{
2120
cc += -size;
2121
#ifdef SUPPORT_UNICODE
2122
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2123
#endif
2124
}
2125
else
2126
cc += size;
2127
}
2128
2129
if (bracketlen > 0)
2130
{
2131
if (cc >= end)
2132
{
2133
end = bracketend(cc);
2134
if (end[-1 - LINK_SIZE] == OP_KET)
2135
end = NULL;
2136
}
2137
cc += bracketlen;
2138
}
2139
}
2140
*private_data_start = private_data_ptr;
2141
}
2142
2143
static SLJIT_INLINE BOOL is_cbracket_processed(compiler_common *common, sljit_s32 capture_index)
2144
{
2145
sljit_u8 bit = (sljit_u8)(1 << (capture_index & 0x7));
2146
sljit_u8 *ptr = common->cbracket_bitset + (capture_index >> 3);
2147
sljit_u8 value = *ptr;
2148
2149
*ptr |= bit;
2150
return (value & bit) != 0;
2151
}
2152
2153
/* Returns with a frame_types (always < 0) if no need for frame. */
2154
static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2155
{
2156
int length = 0;
2157
int possessive = 0;
2158
int offset;
2159
BOOL stack_restore = FALSE;
2160
BOOL setsom_found = recursive;
2161
BOOL setmark_found = recursive;
2162
/* The last capture is a local variable even for recursions. */
2163
BOOL capture_last_found = FALSE;
2164
2165
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2166
SLJIT_ASSERT(common->control_head_ptr != 0);
2167
*needs_control_head = TRUE;
2168
#else
2169
*needs_control_head = FALSE;
2170
#endif
2171
2172
memset(common->cbracket_bitset, 0, common->cbracket_bitset_length);
2173
2174
if (ccend == NULL)
2175
{
2176
ccend = bracketend(cc) - (1 + LINK_SIZE);
2177
if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2178
{
2179
possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2180
/* This is correct regardless of common->capture_last_ptr. */
2181
capture_last_found = TRUE;
2182
}
2183
cc = next_opcode(common, cc);
2184
}
2185
2186
SLJIT_ASSERT(cc != NULL);
2187
while (cc < ccend)
2188
switch(*cc)
2189
{
2190
case OP_SET_SOM:
2191
SLJIT_ASSERT(common->has_set_som);
2192
stack_restore = TRUE;
2193
if (!setsom_found)
2194
{
2195
length += 2;
2196
setsom_found = TRUE;
2197
}
2198
cc += 1;
2199
break;
2200
2201
case OP_MARK:
2202
case OP_COMMIT_ARG:
2203
case OP_PRUNE_ARG:
2204
case OP_THEN_ARG:
2205
SLJIT_ASSERT(common->mark_ptr != 0);
2206
stack_restore = TRUE;
2207
if (!setmark_found)
2208
{
2209
length += 2;
2210
setmark_found = TRUE;
2211
}
2212
if (common->control_head_ptr != 0)
2213
*needs_control_head = TRUE;
2214
cc += 1 + 2 + cc[1];
2215
break;
2216
2217
case OP_RECURSE:
2218
stack_restore = TRUE;
2219
if (common->has_set_som && !setsom_found)
2220
{
2221
length += 2;
2222
setsom_found = TRUE;
2223
}
2224
if (common->mark_ptr != 0 && !setmark_found)
2225
{
2226
length += 2;
2227
setmark_found = TRUE;
2228
}
2229
if (common->capture_last_ptr != 0 && !capture_last_found)
2230
{
2231
length += 2;
2232
capture_last_found = TRUE;
2233
}
2234
2235
cc += 1 + LINK_SIZE;
2236
while (*cc == OP_CREF)
2237
{
2238
offset = GET2(cc, 1);
2239
if (!is_cbracket_processed(common, offset))
2240
length += 3;
2241
cc += 1 + IMM2_SIZE;
2242
}
2243
break;
2244
2245
case OP_CBRA:
2246
case OP_CBRAPOS:
2247
case OP_SCBRA:
2248
case OP_SCBRAPOS:
2249
stack_restore = TRUE;
2250
if (common->capture_last_ptr != 0 && !capture_last_found)
2251
{
2252
length += 2;
2253
capture_last_found = TRUE;
2254
}
2255
2256
offset = GET2(cc, 1 + LINK_SIZE);
2257
if (!is_cbracket_processed(common, offset))
2258
length += 3;
2259
cc += 1 + LINK_SIZE + IMM2_SIZE;
2260
break;
2261
2262
case OP_THEN:
2263
stack_restore = TRUE;
2264
if (common->control_head_ptr != 0)
2265
*needs_control_head = TRUE;
2266
cc ++;
2267
break;
2268
2269
default:
2270
stack_restore = TRUE;
2271
PCRE2_FALLTHROUGH /* Fall through */
2272
2273
case OP_NOT_WORD_BOUNDARY:
2274
case OP_WORD_BOUNDARY:
2275
case OP_NOT_DIGIT:
2276
case OP_DIGIT:
2277
case OP_NOT_WHITESPACE:
2278
case OP_WHITESPACE:
2279
case OP_NOT_WORDCHAR:
2280
case OP_WORDCHAR:
2281
case OP_ANY:
2282
case OP_ALLANY:
2283
case OP_ANYBYTE:
2284
case OP_NOTPROP:
2285
case OP_PROP:
2286
case OP_ANYNL:
2287
case OP_NOT_HSPACE:
2288
case OP_HSPACE:
2289
case OP_NOT_VSPACE:
2290
case OP_VSPACE:
2291
case OP_EXTUNI:
2292
case OP_EODN:
2293
case OP_EOD:
2294
case OP_CIRC:
2295
case OP_CIRCM:
2296
case OP_DOLL:
2297
case OP_DOLLM:
2298
case OP_CHAR:
2299
case OP_CHARI:
2300
case OP_NOT:
2301
case OP_NOTI:
2302
2303
case OP_EXACT:
2304
case OP_POSSTAR:
2305
case OP_POSPLUS:
2306
case OP_POSQUERY:
2307
case OP_POSUPTO:
2308
2309
case OP_EXACTI:
2310
case OP_POSSTARI:
2311
case OP_POSPLUSI:
2312
case OP_POSQUERYI:
2313
case OP_POSUPTOI:
2314
2315
case OP_NOTEXACT:
2316
case OP_NOTPOSSTAR:
2317
case OP_NOTPOSPLUS:
2318
case OP_NOTPOSQUERY:
2319
case OP_NOTPOSUPTO:
2320
2321
case OP_NOTEXACTI:
2322
case OP_NOTPOSSTARI:
2323
case OP_NOTPOSPLUSI:
2324
case OP_NOTPOSQUERYI:
2325
case OP_NOTPOSUPTOI:
2326
2327
case OP_TYPEEXACT:
2328
case OP_TYPEPOSSTAR:
2329
case OP_TYPEPOSPLUS:
2330
case OP_TYPEPOSQUERY:
2331
case OP_TYPEPOSUPTO:
2332
2333
case OP_CLASS:
2334
case OP_NCLASS:
2335
case OP_XCLASS:
2336
case OP_ECLASS:
2337
2338
case OP_CALLOUT:
2339
case OP_CALLOUT_STR:
2340
2341
case OP_NOT_UCP_WORD_BOUNDARY:
2342
case OP_UCP_WORD_BOUNDARY:
2343
2344
cc = next_opcode(common, cc);
2345
SLJIT_ASSERT(cc != NULL);
2346
break;
2347
}
2348
2349
/* Possessive quantifiers can use a special case. */
2350
if (SLJIT_UNLIKELY(possessive == length))
2351
return stack_restore ? no_frame : no_stack;
2352
2353
if (length > 0)
2354
return length + 1;
2355
return stack_restore ? no_frame : no_stack;
2356
}
2357
2358
static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2359
{
2360
DEFINE_COMPILER;
2361
BOOL setsom_found = FALSE;
2362
BOOL setmark_found = FALSE;
2363
/* The last capture is a local variable even for recursions. */
2364
BOOL capture_last_found = FALSE;
2365
int offset;
2366
2367
/* >= 1 + shortest item size (2) */
2368
SLJIT_UNUSED_ARG(stacktop);
2369
SLJIT_ASSERT(stackpos >= stacktop + 2);
2370
2371
memset(common->cbracket_bitset, 0, common->cbracket_bitset_length);
2372
2373
stackpos = STACK(stackpos);
2374
if (ccend == NULL)
2375
{
2376
ccend = bracketend(cc) - (1 + LINK_SIZE);
2377
if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2378
cc = next_opcode(common, cc);
2379
}
2380
2381
/* The data is restored by do_revertframes(). */
2382
SLJIT_ASSERT(cc != NULL);
2383
while (cc < ccend)
2384
switch(*cc)
2385
{
2386
case OP_SET_SOM:
2387
SLJIT_ASSERT(common->has_set_som);
2388
if (!setsom_found)
2389
{
2390
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2391
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2392
stackpos -= SSIZE_OF(sw);
2393
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2394
stackpos -= SSIZE_OF(sw);
2395
setsom_found = TRUE;
2396
}
2397
cc += 1;
2398
break;
2399
2400
case OP_MARK:
2401
case OP_COMMIT_ARG:
2402
case OP_PRUNE_ARG:
2403
case OP_THEN_ARG:
2404
SLJIT_ASSERT(common->mark_ptr != 0);
2405
if (!setmark_found)
2406
{
2407
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2408
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2409
stackpos -= SSIZE_OF(sw);
2410
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2411
stackpos -= SSIZE_OF(sw);
2412
setmark_found = TRUE;
2413
}
2414
cc += 1 + 2 + cc[1];
2415
break;
2416
2417
case OP_RECURSE:
2418
if (common->has_set_som && !setsom_found)
2419
{
2420
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2421
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2422
stackpos -= SSIZE_OF(sw);
2423
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2424
stackpos -= SSIZE_OF(sw);
2425
setsom_found = TRUE;
2426
}
2427
if (common->mark_ptr != 0 && !setmark_found)
2428
{
2429
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2430
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2431
stackpos -= SSIZE_OF(sw);
2432
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2433
stackpos -= SSIZE_OF(sw);
2434
setmark_found = TRUE;
2435
}
2436
if (common->capture_last_ptr != 0 && !capture_last_found)
2437
{
2438
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2439
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2440
stackpos -= SSIZE_OF(sw);
2441
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2442
stackpos -= SSIZE_OF(sw);
2443
capture_last_found = TRUE;
2444
}
2445
cc += 1 + LINK_SIZE;
2446
while (*cc == OP_CREF)
2447
{
2448
offset = GET2(cc, 1);
2449
if (!is_cbracket_processed(common, offset))
2450
{
2451
offset <<= 1;
2452
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2453
stackpos -= SSIZE_OF(sw);
2454
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2455
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2456
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2457
stackpos -= SSIZE_OF(sw);
2458
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2459
stackpos -= SSIZE_OF(sw);
2460
}
2461
cc += 1 + IMM2_SIZE;
2462
}
2463
break;
2464
2465
case OP_CBRA:
2466
case OP_CBRAPOS:
2467
case OP_SCBRA:
2468
case OP_SCBRAPOS:
2469
if (common->capture_last_ptr != 0 && !capture_last_found)
2470
{
2471
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2472
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2473
stackpos -= SSIZE_OF(sw);
2474
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2475
stackpos -= SSIZE_OF(sw);
2476
capture_last_found = TRUE;
2477
}
2478
2479
offset = GET2(cc, 1 + LINK_SIZE);
2480
if (!is_cbracket_processed(common, offset))
2481
{
2482
offset <<= 1;
2483
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2484
stackpos -= SSIZE_OF(sw);
2485
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2486
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2487
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2488
stackpos -= SSIZE_OF(sw);
2489
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2490
stackpos -= SSIZE_OF(sw);
2491
}
2492
2493
cc += 1 + LINK_SIZE + IMM2_SIZE;
2494
break;
2495
2496
default:
2497
cc = next_opcode(common, cc);
2498
SLJIT_ASSERT(cc != NULL);
2499
break;
2500
}
2501
2502
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2503
SLJIT_ASSERT(stackpos == STACK(stacktop));
2504
}
2505
2506
#define RECURSE_TMP_REG_COUNT 3
2507
2508
typedef struct delayed_mem_copy_status {
2509
struct sljit_compiler *compiler;
2510
int store_bases[RECURSE_TMP_REG_COUNT];
2511
sljit_s32 store_offsets[RECURSE_TMP_REG_COUNT];
2512
int tmp_regs[RECURSE_TMP_REG_COUNT];
2513
int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2514
int next_tmp_reg;
2515
} delayed_mem_copy_status;
2516
2517
static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2518
{
2519
int i;
2520
2521
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2522
{
2523
SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2524
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2525
2526
status->store_bases[i] = -1;
2527
}
2528
status->next_tmp_reg = 0;
2529
status->compiler = common->compiler;
2530
}
2531
2532
static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2533
int store_base, sljit_s32 store_offset)
2534
{
2535
struct sljit_compiler *compiler = status->compiler;
2536
int next_tmp_reg = status->next_tmp_reg;
2537
int tmp_reg = status->tmp_regs[next_tmp_reg];
2538
2539
SLJIT_ASSERT(load_base > 0 && store_base > 0);
2540
2541
if (status->store_bases[next_tmp_reg] == -1)
2542
{
2543
/* Preserve virtual registers. */
2544
if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2545
OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2546
}
2547
else
2548
OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2549
2550
OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2551
status->store_bases[next_tmp_reg] = store_base;
2552
status->store_offsets[next_tmp_reg] = store_offset;
2553
2554
status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2555
}
2556
2557
static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2558
{
2559
struct sljit_compiler *compiler = status->compiler;
2560
int next_tmp_reg = status->next_tmp_reg;
2561
int tmp_reg, saved_tmp_reg, i;
2562
2563
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2564
{
2565
if (status->store_bases[next_tmp_reg] != -1)
2566
{
2567
tmp_reg = status->tmp_regs[next_tmp_reg];
2568
saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2569
2570
OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2571
2572
/* Restore virtual registers. */
2573
if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2574
OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2575
}
2576
2577
next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2578
}
2579
}
2580
2581
#undef RECURSE_TMP_REG_COUNT
2582
2583
static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2584
{
2585
uint8_t *byte;
2586
uint8_t mask;
2587
2588
SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2589
2590
bit_index >>= SLJIT_WORD_SHIFT;
2591
2592
SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2593
2594
mask = 1 << (bit_index & 0x7);
2595
byte = common->recurse_bitset + (bit_index >> 3);
2596
2597
if (*byte & mask)
2598
return FALSE;
2599
2600
*byte |= mask;
2601
return TRUE;
2602
}
2603
2604
enum get_recurse_flags {
2605
recurse_flag_quit_found = (1 << 0),
2606
recurse_flag_accept_found = (1 << 1),
2607
recurse_flag_setsom_found = (1 << 2),
2608
recurse_flag_setmark_found = (1 << 3),
2609
recurse_flag_control_head_found = (1 << 4),
2610
recurse_flag_recurse_arg = (1 << 5),
2611
};
2612
2613
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2614
{
2615
int length = 1;
2616
int size, offset;
2617
PCRE2_SPTR alternative, cref;
2618
uint32_t recurse_flags = 0;
2619
2620
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2621
2622
if (common->currententry->arg_size > 0)
2623
{
2624
cref = common->currententry->arg_start;
2625
2626
do
2627
{
2628
offset = GET2(cref, 1);
2629
recurse_check_bit(common, OVECTOR(offset << 1));
2630
cref += 1 + IMM2_SIZE;
2631
}
2632
while (*cref == OP_CREF);
2633
}
2634
2635
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2636
SLJIT_ASSERT(common->control_head_ptr != 0);
2637
recurse_flags |= recurse_flag_control_head_found;
2638
#endif
2639
2640
/* Calculate the sum of the private machine words. */
2641
while (cc < ccend)
2642
{
2643
size = 0;
2644
switch(*cc)
2645
{
2646
case OP_SET_SOM:
2647
SLJIT_ASSERT(common->has_set_som);
2648
recurse_flags |= recurse_flag_setsom_found;
2649
cc += 1;
2650
break;
2651
2652
case OP_RECURSE:
2653
if (common->has_set_som)
2654
recurse_flags |= recurse_flag_setsom_found;
2655
if (common->mark_ptr != 0)
2656
recurse_flags |= recurse_flag_setmark_found;
2657
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2658
length++;
2659
cc += 1 + LINK_SIZE;
2660
if (*cc == OP_CREF)
2661
recurse_flags |= recurse_flag_recurse_arg;
2662
break;
2663
2664
case OP_KET:
2665
offset = PRIVATE_DATA(cc);
2666
if (offset != 0)
2667
{
2668
if (recurse_check_bit(common, offset))
2669
length++;
2670
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2671
cc += PRIVATE_DATA(cc + 1);
2672
}
2673
cc += 1 + LINK_SIZE;
2674
break;
2675
2676
case OP_ASSERT:
2677
case OP_ASSERT_NOT:
2678
case OP_ASSERTBACK:
2679
case OP_ASSERTBACK_NOT:
2680
case OP_ASSERT_NA:
2681
case OP_ASSERTBACK_NA:
2682
case OP_ONCE:
2683
case OP_SCRIPT_RUN:
2684
case OP_BRAPOS:
2685
case OP_SBRA:
2686
case OP_SBRAPOS:
2687
case OP_SCOND:
2688
SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2689
if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2690
length++;
2691
cc += 1 + LINK_SIZE;
2692
break;
2693
2694
case OP_CREF:
2695
if ((recurse_flags & recurse_flag_recurse_arg) != 0)
2696
{
2697
offset = GET2(cc, 1);
2698
if (recurse_check_bit(common, OVECTOR(offset << 1)))
2699
{
2700
SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2701
length += 2;
2702
}
2703
2704
if (cc[1 + IMM2_SIZE] != OP_CREF)
2705
recurse_flags &= ~(uint32_t)recurse_flag_recurse_arg;
2706
}
2707
cc += 1 + IMM2_SIZE;
2708
break;
2709
2710
case OP_ASSERT_SCS:
2711
SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2712
if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2713
length += 2;
2714
cc += 1 + LINK_SIZE;
2715
break;
2716
2717
case OP_CBRA:
2718
case OP_SCBRA:
2719
offset = GET2(cc, 1 + LINK_SIZE);
2720
if (recurse_check_bit(common, OVECTOR(offset << 1)))
2721
{
2722
SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2723
length += 2;
2724
}
2725
if (!is_optimized_cbracket(common, offset) && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2726
length++;
2727
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2728
length++;
2729
cc += 1 + LINK_SIZE + IMM2_SIZE;
2730
break;
2731
2732
case OP_CBRAPOS:
2733
case OP_SCBRAPOS:
2734
offset = GET2(cc, 1 + LINK_SIZE);
2735
if (recurse_check_bit(common, OVECTOR(offset << 1)))
2736
{
2737
SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2738
length += 2;
2739
}
2740
if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2741
length++;
2742
if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2743
length++;
2744
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2745
length++;
2746
cc += 1 + LINK_SIZE + IMM2_SIZE;
2747
break;
2748
2749
case OP_COND:
2750
/* Might be a hidden SCOND. */
2751
alternative = cc + GET(cc, 1);
2752
if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2753
length++;
2754
cc += 1 + LINK_SIZE;
2755
break;
2756
2757
CASE_ITERATOR_PRIVATE_DATA_1
2758
offset = PRIVATE_DATA(cc);
2759
if (offset != 0 && recurse_check_bit(common, offset))
2760
length++;
2761
cc += 2;
2762
#ifdef SUPPORT_UNICODE
2763
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2764
#endif
2765
break;
2766
2767
CASE_ITERATOR_PRIVATE_DATA_2A
2768
offset = PRIVATE_DATA(cc);
2769
if (offset != 0 && recurse_check_bit(common, offset))
2770
{
2771
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2772
length += 2;
2773
}
2774
cc += 2;
2775
#ifdef SUPPORT_UNICODE
2776
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2777
#endif
2778
break;
2779
2780
CASE_ITERATOR_PRIVATE_DATA_2B
2781
offset = PRIVATE_DATA(cc);
2782
if (offset != 0 && recurse_check_bit(common, offset))
2783
{
2784
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2785
length += 2;
2786
}
2787
cc += 2 + IMM2_SIZE;
2788
#ifdef SUPPORT_UNICODE
2789
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2790
#endif
2791
break;
2792
2793
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2794
offset = PRIVATE_DATA(cc);
2795
if (offset != 0 && recurse_check_bit(common, offset))
2796
length++;
2797
cc += 1;
2798
break;
2799
2800
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2801
offset = PRIVATE_DATA(cc);
2802
if (offset != 0 && recurse_check_bit(common, offset))
2803
{
2804
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2805
length += 2;
2806
}
2807
cc += 1;
2808
break;
2809
2810
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2811
offset = PRIVATE_DATA(cc);
2812
if (offset != 0 && recurse_check_bit(common, offset))
2813
{
2814
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2815
length += 2;
2816
}
2817
cc += 1 + IMM2_SIZE;
2818
break;
2819
2820
case OP_CLASS:
2821
case OP_NCLASS:
2822
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2823
case OP_XCLASS:
2824
case OP_ECLASS:
2825
size = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2826
#else
2827
size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2828
#endif
2829
2830
offset = PRIVATE_DATA(cc);
2831
if (offset != 0 && recurse_check_bit(common, offset))
2832
length += get_class_iterator_size(cc + size);
2833
cc += size;
2834
break;
2835
2836
case OP_MARK:
2837
case OP_COMMIT_ARG:
2838
case OP_PRUNE_ARG:
2839
case OP_THEN_ARG:
2840
SLJIT_ASSERT(common->mark_ptr != 0);
2841
recurse_flags |= recurse_flag_setmark_found;
2842
if (common->control_head_ptr != 0)
2843
recurse_flags |= recurse_flag_control_head_found;
2844
if (*cc != OP_MARK)
2845
recurse_flags |= recurse_flag_quit_found;
2846
2847
cc += 1 + 2 + cc[1];
2848
break;
2849
2850
case OP_PRUNE:
2851
case OP_SKIP:
2852
case OP_COMMIT:
2853
recurse_flags |= recurse_flag_quit_found;
2854
cc++;
2855
break;
2856
2857
case OP_SKIP_ARG:
2858
recurse_flags |= recurse_flag_quit_found;
2859
cc += 1 + 2 + cc[1];
2860
break;
2861
2862
case OP_THEN:
2863
SLJIT_ASSERT(common->control_head_ptr != 0);
2864
recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2865
cc++;
2866
break;
2867
2868
case OP_ACCEPT:
2869
case OP_ASSERT_ACCEPT:
2870
recurse_flags |= recurse_flag_accept_found;
2871
cc++;
2872
break;
2873
2874
default:
2875
cc = next_opcode(common, cc);
2876
SLJIT_ASSERT(cc != NULL);
2877
break;
2878
}
2879
}
2880
SLJIT_ASSERT(cc == ccend);
2881
2882
if (recurse_flags & recurse_flag_control_head_found)
2883
length++;
2884
if (recurse_flags & recurse_flag_quit_found)
2885
{
2886
if (recurse_flags & recurse_flag_setsom_found)
2887
length++;
2888
if (recurse_flags & recurse_flag_setmark_found)
2889
length++;
2890
}
2891
2892
*result_flags = recurse_flags;
2893
return length;
2894
}
2895
2896
enum copy_recurse_data_types {
2897
recurse_copy_from_global,
2898
recurse_copy_private_to_global,
2899
recurse_copy_shared_to_global,
2900
recurse_copy_kept_shared_to_global,
2901
recurse_swap_global
2902
};
2903
2904
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2905
int type, int stackptr, int stacktop, uint32_t recurse_flags)
2906
{
2907
delayed_mem_copy_status status;
2908
PCRE2_SPTR alternative, cref;
2909
sljit_sw private_srcw[2];
2910
sljit_sw shared_srcw[3];
2911
sljit_sw kept_shared_srcw[2];
2912
int private_count, shared_count, kept_shared_count;
2913
int from_sp, base_reg, offset, i;
2914
2915
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2916
2917
if (common->currententry->arg_size > 0)
2918
{
2919
cref = common->currententry->arg_start;
2920
2921
do
2922
{
2923
offset = GET2(cref, 1);
2924
recurse_check_bit(common, OVECTOR(offset << 1));
2925
cref += 1 + IMM2_SIZE;
2926
}
2927
while (*cref == OP_CREF);
2928
}
2929
2930
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2931
SLJIT_ASSERT(common->control_head_ptr != 0);
2932
recurse_check_bit(common, common->control_head_ptr);
2933
#endif
2934
2935
switch (type)
2936
{
2937
case recurse_copy_from_global:
2938
from_sp = TRUE;
2939
base_reg = STACK_TOP;
2940
break;
2941
2942
case recurse_copy_private_to_global:
2943
case recurse_copy_shared_to_global:
2944
case recurse_copy_kept_shared_to_global:
2945
from_sp = FALSE;
2946
base_reg = STACK_TOP;
2947
break;
2948
2949
default:
2950
SLJIT_ASSERT(type == recurse_swap_global);
2951
from_sp = FALSE;
2952
base_reg = TMP2;
2953
break;
2954
}
2955
2956
stackptr = STACK(stackptr);
2957
stacktop = STACK(stacktop);
2958
2959
status.tmp_regs[0] = TMP1;
2960
status.saved_tmp_regs[0] = TMP1;
2961
2962
if (base_reg != TMP2)
2963
{
2964
status.tmp_regs[1] = TMP2;
2965
status.saved_tmp_regs[1] = TMP2;
2966
}
2967
else
2968
{
2969
status.saved_tmp_regs[1] = RETURN_ADDR;
2970
if (HAS_VIRTUAL_REGISTERS)
2971
status.tmp_regs[1] = STR_PTR;
2972
else
2973
status.tmp_regs[1] = RETURN_ADDR;
2974
}
2975
2976
status.saved_tmp_regs[2] = TMP3;
2977
if (HAS_VIRTUAL_REGISTERS)
2978
status.tmp_regs[2] = STR_END;
2979
else
2980
status.tmp_regs[2] = TMP3;
2981
2982
delayed_mem_copy_init(&status, common);
2983
2984
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2985
{
2986
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2987
2988
if (!from_sp)
2989
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2990
2991
if (from_sp || type == recurse_swap_global)
2992
delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2993
}
2994
2995
stackptr += sizeof(sljit_sw);
2996
2997
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2998
if (type != recurse_copy_shared_to_global)
2999
{
3000
if (!from_sp)
3001
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
3002
3003
if (from_sp || type == recurse_swap_global)
3004
delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
3005
}
3006
3007
stackptr += sizeof(sljit_sw);
3008
#endif
3009
3010
while (cc < ccend)
3011
{
3012
private_count = 0;
3013
shared_count = 0;
3014
kept_shared_count = 0;
3015
3016
switch(*cc)
3017
{
3018
case OP_SET_SOM:
3019
SLJIT_ASSERT(common->has_set_som);
3020
if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
3021
{
3022
kept_shared_srcw[0] = OVECTOR(0);
3023
kept_shared_count = 1;
3024
}
3025
cc += 1;
3026
break;
3027
3028
case OP_RECURSE:
3029
if (recurse_flags & recurse_flag_quit_found)
3030
{
3031
if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
3032
{
3033
kept_shared_srcw[0] = OVECTOR(0);
3034
kept_shared_count = 1;
3035
}
3036
if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
3037
{
3038
kept_shared_srcw[kept_shared_count] = common->mark_ptr;
3039
kept_shared_count++;
3040
}
3041
}
3042
3043
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
3044
{
3045
shared_srcw[0] = common->capture_last_ptr;
3046
shared_count = 1;
3047
}
3048
3049
cc += 1 + LINK_SIZE;
3050
if (*cc == OP_CREF)
3051
recurse_flags |= recurse_flag_recurse_arg;
3052
break;
3053
3054
case OP_KET:
3055
private_srcw[0] = PRIVATE_DATA(cc);
3056
if (private_srcw[0] != 0)
3057
{
3058
if (recurse_check_bit(common, private_srcw[0]))
3059
private_count = 1;
3060
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
3061
cc += PRIVATE_DATA(cc + 1);
3062
}
3063
cc += 1 + LINK_SIZE;
3064
break;
3065
3066
case OP_ASSERT:
3067
case OP_ASSERT_NOT:
3068
case OP_ASSERTBACK:
3069
case OP_ASSERTBACK_NOT:
3070
case OP_ASSERT_NA:
3071
case OP_ASSERTBACK_NA:
3072
case OP_ONCE:
3073
case OP_SCRIPT_RUN:
3074
case OP_BRAPOS:
3075
case OP_SBRA:
3076
case OP_SBRAPOS:
3077
case OP_SCOND:
3078
private_srcw[0] = PRIVATE_DATA(cc);
3079
if (recurse_check_bit(common, private_srcw[0]))
3080
private_count = 1;
3081
cc += 1 + LINK_SIZE;
3082
break;
3083
3084
case OP_CREF:
3085
if ((recurse_flags & recurse_flag_recurse_arg) != 0)
3086
{
3087
offset = GET2(cc, 1);
3088
shared_srcw[0] = OVECTOR(offset << 1);
3089
if (recurse_check_bit(common, shared_srcw[0]))
3090
{
3091
shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
3092
SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
3093
shared_count = 2;
3094
}
3095
3096
if (cc[1 + IMM2_SIZE] != OP_CREF)
3097
recurse_flags &= ~(uint32_t)recurse_flag_recurse_arg;
3098
}
3099
cc += 1 + IMM2_SIZE;
3100
break;
3101
3102
case OP_ASSERT_SCS:
3103
private_srcw[0] = PRIVATE_DATA(cc);
3104
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3105
if (recurse_check_bit(common, private_srcw[0]))
3106
private_count = 2;
3107
cc += 1 + LINK_SIZE;
3108
break;
3109
3110
case OP_CBRA:
3111
case OP_SCBRA:
3112
offset = GET2(cc, 1 + LINK_SIZE);
3113
shared_srcw[0] = OVECTOR(offset << 1);
3114
if (recurse_check_bit(common, shared_srcw[0]))
3115
{
3116
shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
3117
SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
3118
shared_count = 2;
3119
}
3120
3121
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
3122
{
3123
shared_srcw[shared_count] = common->capture_last_ptr;
3124
shared_count++;
3125
}
3126
3127
if (!is_optimized_cbracket(common, offset))
3128
{
3129
private_srcw[0] = OVECTOR_PRIV(offset);
3130
if (recurse_check_bit(common, private_srcw[0]))
3131
private_count = 1;
3132
}
3133
3134
cc += 1 + LINK_SIZE + IMM2_SIZE;
3135
break;
3136
3137
case OP_CBRAPOS:
3138
case OP_SCBRAPOS:
3139
offset = GET2(cc, 1 + LINK_SIZE);
3140
shared_srcw[0] = OVECTOR(offset << 1);
3141
if (recurse_check_bit(common, shared_srcw[0]))
3142
{
3143
shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
3144
SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
3145
shared_count = 2;
3146
}
3147
3148
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
3149
{
3150
shared_srcw[shared_count] = common->capture_last_ptr;
3151
shared_count++;
3152
}
3153
3154
private_srcw[0] = PRIVATE_DATA(cc);
3155
if (recurse_check_bit(common, private_srcw[0]))
3156
private_count = 1;
3157
3158
offset = OVECTOR_PRIV(offset);
3159
if (recurse_check_bit(common, offset))
3160
{
3161
private_srcw[private_count] = offset;
3162
private_count++;
3163
}
3164
cc += 1 + LINK_SIZE + IMM2_SIZE;
3165
break;
3166
3167
case OP_COND:
3168
/* Might be a hidden SCOND. */
3169
alternative = cc + GET(cc, 1);
3170
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
3171
{
3172
private_srcw[0] = PRIVATE_DATA(cc);
3173
if (recurse_check_bit(common, private_srcw[0]))
3174
private_count = 1;
3175
}
3176
cc += 1 + LINK_SIZE;
3177
break;
3178
3179
CASE_ITERATOR_PRIVATE_DATA_1
3180
private_srcw[0] = PRIVATE_DATA(cc);
3181
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3182
private_count = 1;
3183
cc += 2;
3184
#ifdef SUPPORT_UNICODE
3185
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3186
#endif
3187
break;
3188
3189
CASE_ITERATOR_PRIVATE_DATA_2A
3190
private_srcw[0] = PRIVATE_DATA(cc);
3191
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3192
{
3193
private_count = 2;
3194
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3195
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3196
}
3197
cc += 2;
3198
#ifdef SUPPORT_UNICODE
3199
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3200
#endif
3201
break;
3202
3203
CASE_ITERATOR_PRIVATE_DATA_2B
3204
private_srcw[0] = PRIVATE_DATA(cc);
3205
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3206
{
3207
private_count = 2;
3208
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3209
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3210
}
3211
cc += 2 + IMM2_SIZE;
3212
#ifdef SUPPORT_UNICODE
3213
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3214
#endif
3215
break;
3216
3217
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3218
private_srcw[0] = PRIVATE_DATA(cc);
3219
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3220
private_count = 1;
3221
cc += 1;
3222
break;
3223
3224
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3225
private_srcw[0] = PRIVATE_DATA(cc);
3226
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3227
{
3228
private_count = 2;
3229
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3230
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3231
}
3232
cc += 1;
3233
break;
3234
3235
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3236
private_srcw[0] = PRIVATE_DATA(cc);
3237
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3238
{
3239
private_count = 2;
3240
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3241
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3242
}
3243
cc += 1 + IMM2_SIZE;
3244
break;
3245
3246
case OP_CLASS:
3247
case OP_NCLASS:
3248
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3249
case OP_XCLASS:
3250
case OP_ECLASS:
3251
i = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3252
#else
3253
i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3254
#endif
3255
if (PRIVATE_DATA(cc) != 0)
3256
{
3257
private_count = 1;
3258
private_srcw[0] = PRIVATE_DATA(cc);
3259
switch(get_class_iterator_size(cc + i))
3260
{
3261
case 1:
3262
break;
3263
3264
case 2:
3265
if (recurse_check_bit(common, private_srcw[0]))
3266
{
3267
private_count = 2;
3268
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3269
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3270
}
3271
break;
3272
3273
default:
3274
SLJIT_UNREACHABLE();
3275
break;
3276
}
3277
}
3278
cc += i;
3279
break;
3280
3281
case OP_MARK:
3282
case OP_COMMIT_ARG:
3283
case OP_PRUNE_ARG:
3284
case OP_THEN_ARG:
3285
SLJIT_ASSERT(common->mark_ptr != 0);
3286
if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3287
{
3288
kept_shared_srcw[0] = common->mark_ptr;
3289
kept_shared_count = 1;
3290
}
3291
if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3292
{
3293
private_srcw[0] = common->control_head_ptr;
3294
private_count = 1;
3295
}
3296
cc += 1 + 2 + cc[1];
3297
break;
3298
3299
case OP_THEN:
3300
SLJIT_ASSERT(common->control_head_ptr != 0);
3301
if (recurse_check_bit(common, common->control_head_ptr))
3302
{
3303
private_srcw[0] = common->control_head_ptr;
3304
private_count = 1;
3305
}
3306
cc++;
3307
break;
3308
3309
default:
3310
cc = next_opcode(common, cc);
3311
SLJIT_ASSERT(cc != NULL);
3312
continue;
3313
}
3314
3315
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3316
{
3317
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3318
3319
for (i = 0; i < private_count; i++)
3320
{
3321
SLJIT_ASSERT(private_srcw[i] != 0);
3322
3323
if (!from_sp)
3324
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, (sljit_s32)private_srcw[i]);
3325
3326
if (from_sp || type == recurse_swap_global)
3327
delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3328
3329
stackptr += sizeof(sljit_sw);
3330
}
3331
}
3332
else
3333
stackptr += sizeof(sljit_sw) * private_count;
3334
3335
if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3336
{
3337
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3338
3339
for (i = 0; i < shared_count; i++)
3340
{
3341
SLJIT_ASSERT(shared_srcw[i] != 0);
3342
3343
if (!from_sp)
3344
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, (sljit_s32)shared_srcw[i]);
3345
3346
if (from_sp || type == recurse_swap_global)
3347
delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3348
3349
stackptr += sizeof(sljit_sw);
3350
}
3351
}
3352
else
3353
stackptr += sizeof(sljit_sw) * shared_count;
3354
3355
if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3356
{
3357
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3358
3359
for (i = 0; i < kept_shared_count; i++)
3360
{
3361
SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3362
3363
if (!from_sp)
3364
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, (sljit_s32)kept_shared_srcw[i]);
3365
3366
if (from_sp || type == recurse_swap_global)
3367
delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3368
3369
stackptr += sizeof(sljit_sw);
3370
}
3371
}
3372
else
3373
stackptr += sizeof(sljit_sw) * kept_shared_count;
3374
}
3375
3376
SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3377
3378
delayed_mem_copy_finish(&status);
3379
}
3380
3381
static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3382
{
3383
PCRE2_SPTR end = bracketend(cc);
3384
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3385
3386
/* Assert captures *THEN verb even if it has no alternatives. */
3387
if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
3388
current_offset = NULL;
3389
else if (*cc >= OP_ASSERT_NA && *cc <= OP_ASSERT_SCS)
3390
has_alternatives = TRUE;
3391
/* Conditional block does never capture. */
3392
else if (*cc == OP_COND || *cc == OP_SCOND)
3393
has_alternatives = FALSE;
3394
3395
cc = next_opcode(common, cc);
3396
3397
if (has_alternatives)
3398
{
3399
switch (*cc)
3400
{
3401
case OP_REVERSE:
3402
case OP_CREF:
3403
cc += 1 + IMM2_SIZE;
3404
break;
3405
case OP_VREVERSE:
3406
case OP_DNCREF:
3407
cc += 1 + 2 * IMM2_SIZE;
3408
break;
3409
}
3410
3411
current_offset = common->then_offsets + (cc - common->start);
3412
}
3413
3414
while (cc < end)
3415
{
3416
if (*cc >= OP_ASSERT && *cc <= OP_SCOND)
3417
{
3418
cc = set_then_offsets(common, cc, current_offset);
3419
continue;
3420
}
3421
3422
if (*cc == OP_ALT && has_alternatives)
3423
{
3424
cc += 1 + LINK_SIZE;
3425
3426
if (*cc == OP_REVERSE)
3427
cc += 1 + IMM2_SIZE;
3428
else if (*cc == OP_VREVERSE)
3429
cc += 1 + 2 * IMM2_SIZE;
3430
3431
current_offset = common->then_offsets + (cc - common->start);
3432
continue;
3433
}
3434
3435
if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3436
*current_offset = 1;
3437
cc = next_opcode(common, cc);
3438
}
3439
3440
cc = end - 1 - LINK_SIZE;
3441
3442
/* Ignore repeats. */
3443
if (*cc == OP_KET && PRIVATE_DATA(cc) != 0)
3444
end += PRIVATE_DATA(cc + 1);
3445
3446
return end;
3447
}
3448
3449
#undef CASE_ITERATOR_PRIVATE_DATA_1
3450
#undef CASE_ITERATOR_PRIVATE_DATA_2A
3451
#undef CASE_ITERATOR_PRIVATE_DATA_2B
3452
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3453
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3454
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3455
3456
static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3457
{
3458
return (value & (value - 1)) == 0;
3459
}
3460
3461
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3462
{
3463
while (list != NULL)
3464
{
3465
/* sljit_set_label is clever enough to do nothing
3466
if either the jump or the label is NULL. */
3467
SET_LABEL(list->jump, label);
3468
list = list->next;
3469
}
3470
}
3471
3472
static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3473
{
3474
jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3475
if (list_item)
3476
{
3477
list_item->next = *list;
3478
list_item->jump = jump;
3479
*list = list_item;
3480
}
3481
}
3482
3483
static void add_stub(compiler_common *common, struct sljit_jump *start)
3484
{
3485
DEFINE_COMPILER;
3486
stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3487
3488
if (list_item)
3489
{
3490
list_item->start = start;
3491
list_item->quit = LABEL();
3492
list_item->next = common->stubs;
3493
common->stubs = list_item;
3494
}
3495
}
3496
3497
static void flush_stubs(compiler_common *common)
3498
{
3499
DEFINE_COMPILER;
3500
stub_list *list_item = common->stubs;
3501
3502
while (list_item)
3503
{
3504
JUMPHERE(list_item->start);
3505
add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3506
JUMPTO(SLJIT_JUMP, list_item->quit);
3507
list_item = list_item->next;
3508
}
3509
common->stubs = NULL;
3510
}
3511
3512
static SLJIT_INLINE void count_match(compiler_common *common)
3513
{
3514
DEFINE_COMPILER;
3515
3516
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3517
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3518
}
3519
3520
static SLJIT_INLINE void allocate_stack(compiler_common *common, sljit_s32 size)
3521
{
3522
/* May destroy all locals and registers except TMP2. */
3523
DEFINE_COMPILER;
3524
3525
SLJIT_ASSERT(size > 0);
3526
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3527
#ifdef DESTROY_REGISTERS
3528
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3529
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3530
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3531
#if defined SLJIT_DEBUG && SLJIT_DEBUG
3532
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
3533
/* These two are also used by the stackalloc calls. */
3534
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP1, 0);
3535
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP1, 0);
3536
#endif
3537
#endif
3538
add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3539
}
3540
3541
static SLJIT_INLINE void free_stack(compiler_common *common, sljit_s32 size)
3542
{
3543
DEFINE_COMPILER;
3544
3545
SLJIT_ASSERT(size > 0);
3546
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3547
}
3548
3549
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3550
{
3551
DEFINE_COMPILER;
3552
sljit_uw *result;
3553
3554
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3555
return NULL;
3556
3557
result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3558
if (SLJIT_UNLIKELY(result == NULL))
3559
{
3560
sljit_set_compiler_memory_error(compiler);
3561
return NULL;
3562
}
3563
3564
*(void**)result = common->read_only_data_head;
3565
common->read_only_data_head = (void *)result;
3566
return result + 1;
3567
}
3568
3569
static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3570
{
3571
DEFINE_COMPILER;
3572
struct sljit_label *loop;
3573
sljit_s32 i;
3574
3575
/* At this point we can freely use all temporary registers. */
3576
SLJIT_ASSERT(length > 1);
3577
/* TMP1 returns with begin - 1. */
3578
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3579
if (length < 8)
3580
{
3581
for (i = 1; i < length; i++)
3582
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3583
}
3584
else
3585
{
3586
if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3587
{
3588
GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3589
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3590
loop = LABEL();
3591
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3592
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3593
JUMPTO(SLJIT_NOT_ZERO, loop);
3594
}
3595
else
3596
{
3597
GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3598
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3599
loop = LABEL();
3600
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3601
OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3602
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3603
JUMPTO(SLJIT_NOT_ZERO, loop);
3604
}
3605
}
3606
}
3607
3608
static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3609
{
3610
DEFINE_COMPILER;
3611
sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3612
sljit_u32 uncleared_size;
3613
sljit_s32 src = SLJIT_IMM;
3614
sljit_s32 i;
3615
struct sljit_label *loop;
3616
3617
SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3618
3619
if (size == sizeof(sljit_sw))
3620
{
3621
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3622
return;
3623
}
3624
3625
if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3626
{
3627
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3628
src = TMP3;
3629
}
3630
3631
if (size <= 6 * sizeof(sljit_sw))
3632
{
3633
for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3634
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3635
return;
3636
}
3637
3638
GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3639
3640
uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3641
3642
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3643
3644
loop = LABEL();
3645
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3646
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3647
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3648
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3649
CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3650
3651
if (uncleared_size >= sizeof(sljit_sw))
3652
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3653
3654
if (uncleared_size >= 2 * sizeof(sljit_sw))
3655
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3656
}
3657
3658
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3659
{
3660
DEFINE_COMPILER;
3661
struct sljit_label *loop;
3662
int i;
3663
3664
SLJIT_ASSERT(length > 1);
3665
/* OVECTOR(1) contains the "string begin - 1" constant. */
3666
if (length > 2)
3667
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3668
if (length < 8)
3669
{
3670
for (i = 2; i < length; i++)
3671
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3672
}
3673
else
3674
{
3675
if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3676
{
3677
GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3678
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3679
loop = LABEL();
3680
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3681
OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3682
JUMPTO(SLJIT_NOT_ZERO, loop);
3683
}
3684
else
3685
{
3686
GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3687
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3688
loop = LABEL();
3689
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3690
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3691
OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3692
JUMPTO(SLJIT_NOT_ZERO, loop);
3693
}
3694
}
3695
3696
if (!HAS_VIRTUAL_REGISTERS)
3697
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3698
else
3699
OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3700
3701
if (common->mark_ptr != 0)
3702
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3703
if (common->control_head_ptr != 0)
3704
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3705
if (HAS_VIRTUAL_REGISTERS)
3706
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3707
3708
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3709
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3710
}
3711
3712
static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3713
{
3714
while (current != NULL)
3715
{
3716
switch (current[1])
3717
{
3718
case type_then_trap:
3719
break;
3720
3721
case type_mark:
3722
if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3723
return current[3];
3724
break;
3725
3726
default:
3727
SLJIT_UNREACHABLE();
3728
break;
3729
}
3730
SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3731
current = (sljit_sw*)current[0];
3732
}
3733
return 0;
3734
}
3735
3736
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3737
{
3738
DEFINE_COMPILER;
3739
struct sljit_label *loop;
3740
BOOL has_pre;
3741
3742
/* At this point we can freely use all registers. */
3743
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3744
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3745
3746
if (HAS_VIRTUAL_REGISTERS)
3747
{
3748
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3749
OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3750
if (common->mark_ptr != 0)
3751
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3752
OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3753
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3754
if (common->mark_ptr != 0)
3755
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3756
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3757
SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3758
}
3759
else
3760
{
3761
OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3762
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3763
if (common->mark_ptr != 0)
3764
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3765
OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3766
OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3767
if (common->mark_ptr != 0)
3768
OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3769
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3770
}
3771
3772
has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3773
3774
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3775
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3776
3777
loop = LABEL();
3778
3779
if (has_pre)
3780
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3781
else
3782
{
3783
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3784
OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3785
}
3786
3787
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3788
OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3789
/* Copy the integer value to the output buffer */
3790
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3791
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3792
#endif
3793
3794
SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3795
OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3796
3797
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3798
JUMPTO(SLJIT_NOT_ZERO, loop);
3799
3800
/* Calculate the return value, which is the maximum ovector value. */
3801
if (topbracket > 1)
3802
{
3803
if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3804
{
3805
GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3806
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3807
3808
/* OVECTOR(0) is never equal to SLJIT_S2. */
3809
loop = LABEL();
3810
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3811
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3812
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3813
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3814
}
3815
else
3816
{
3817
GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3818
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3819
3820
/* OVECTOR(0) is never equal to SLJIT_S2. */
3821
loop = LABEL();
3822
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3823
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3824
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3825
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3826
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3827
}
3828
}
3829
else
3830
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3831
}
3832
3833
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3834
{
3835
DEFINE_COMPILER;
3836
sljit_s32 mov_opcode;
3837
sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3838
3839
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3840
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3841
&& (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3842
3843
if (arguments_reg != ARGUMENTS)
3844
OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3845
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3846
common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3847
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3848
3849
/* Store match begin and end. */
3850
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3851
OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3852
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3853
3854
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3855
3856
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3857
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3858
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3859
#endif
3860
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3861
3862
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3863
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3864
OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3865
#endif
3866
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3867
3868
JUMPTO(SLJIT_JUMP, quit);
3869
}
3870
3871
static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3872
{
3873
/* May destroy TMP1. */
3874
DEFINE_COMPILER;
3875
struct sljit_jump *jump;
3876
3877
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3878
{
3879
/* The value of -1 must be kept for start_used_ptr! */
3880
OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3881
/* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3882
is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3883
jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3884
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3885
JUMPHERE(jump);
3886
}
3887
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3888
{
3889
jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3890
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3891
JUMPHERE(jump);
3892
}
3893
}
3894
3895
static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3896
{
3897
/* Detects if the character has an othercase. */
3898
unsigned int c;
3899
3900
#ifdef SUPPORT_UNICODE
3901
if (common->utf || common->ucp)
3902
{
3903
if (common->utf)
3904
{
3905
GETCHAR(c, cc);
3906
}
3907
else
3908
c = *cc;
3909
3910
if (c > 127)
3911
return c != UCD_OTHERCASE(c);
3912
3913
return common->fcc[c] != c;
3914
}
3915
else
3916
#endif
3917
c = *cc;
3918
return MAX_255(c) ? common->fcc[c] != c : FALSE;
3919
}
3920
3921
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3922
{
3923
/* Returns with the othercase. */
3924
#ifdef SUPPORT_UNICODE
3925
if ((common->utf || common->ucp) && c > 127)
3926
return UCD_OTHERCASE(c);
3927
#endif
3928
return TABLE_GET(c, common->fcc, c);
3929
}
3930
3931
static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3932
{
3933
/* Detects if the character and its othercase has only 1 bit difference. */
3934
unsigned int c, oc, bit;
3935
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3936
int n;
3937
#endif
3938
3939
#ifdef SUPPORT_UNICODE
3940
if (common->utf || common->ucp)
3941
{
3942
if (common->utf)
3943
{
3944
GETCHAR(c, cc);
3945
}
3946
else
3947
c = *cc;
3948
3949
if (c <= 127)
3950
oc = common->fcc[c];
3951
else
3952
oc = UCD_OTHERCASE(c);
3953
}
3954
else
3955
{
3956
c = *cc;
3957
oc = TABLE_GET(c, common->fcc, c);
3958
}
3959
#else
3960
c = *cc;
3961
oc = TABLE_GET(c, common->fcc, c);
3962
#endif
3963
3964
SLJIT_ASSERT(c != oc);
3965
3966
bit = c ^ oc;
3967
3968
#ifndef EBCDIC
3969
/* Optimized for English alphabet. */
3970
if (c <= 127 && bit == 0x20)
3971
return (0 << 8) | 0x20;
3972
#endif
3973
3974
/* Since c != oc, they must have at least 1 bit difference. */
3975
if (!is_powerof2(bit))
3976
return 0;
3977
3978
#if PCRE2_CODE_UNIT_WIDTH == 8
3979
3980
#ifdef SUPPORT_UNICODE
3981
if (common->utf && c > 127)
3982
{
3983
n = GET_EXTRALEN(*cc);
3984
while ((bit & 0x3f) == 0)
3985
{
3986
n--;
3987
bit >>= 6;
3988
}
3989
return (n << 8) | bit;
3990
}
3991
#endif /* SUPPORT_UNICODE */
3992
return (0 << 8) | bit;
3993
3994
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3995
3996
#ifdef SUPPORT_UNICODE
3997
if (common->utf && c > 65535)
3998
{
3999
if (bit >= (1u << 10))
4000
bit >>= 10;
4001
else
4002
return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
4003
}
4004
#endif /* SUPPORT_UNICODE */
4005
return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
4006
4007
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4008
}
4009
4010
static void check_partial(compiler_common *common, BOOL force)
4011
{
4012
/* Checks whether a partial matching is occurred. Does not modify registers. */
4013
DEFINE_COMPILER;
4014
struct sljit_jump *jump = NULL;
4015
4016
SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
4017
4018
if (common->mode == PCRE2_JIT_COMPLETE)
4019
return;
4020
4021
if (!force && !common->allow_empty_partial)
4022
jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
4023
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4024
jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
4025
4026
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4027
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
4028
else
4029
{
4030
if (common->partialmatchlabel != NULL)
4031
JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
4032
else
4033
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
4034
}
4035
4036
if (jump != NULL)
4037
JUMPHERE(jump);
4038
}
4039
4040
static void check_str_end(compiler_common *common, jump_list **end_reached)
4041
{
4042
/* Does not affect registers. Usually used in a tight spot. */
4043
DEFINE_COMPILER;
4044
struct sljit_jump *jump;
4045
4046
if (common->mode == PCRE2_JIT_COMPLETE)
4047
{
4048
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4049
return;
4050
}
4051
4052
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4053
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4054
{
4055
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
4056
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
4057
add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
4058
}
4059
else
4060
{
4061
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
4062
if (common->partialmatchlabel != NULL)
4063
JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
4064
else
4065
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
4066
}
4067
JUMPHERE(jump);
4068
}
4069
4070
static void detect_partial_match(compiler_common *common, jump_list **backtracks)
4071
{
4072
DEFINE_COMPILER;
4073
struct sljit_jump *jump;
4074
4075
if (common->mode == PCRE2_JIT_COMPLETE)
4076
{
4077
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4078
return;
4079
}
4080
4081
/* Partial matching mode. */
4082
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4083
if (!common->allow_empty_partial)
4084
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
4085
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4086
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
4087
4088
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4089
{
4090
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
4091
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4092
}
4093
else
4094
{
4095
if (common->partialmatchlabel != NULL)
4096
JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
4097
else
4098
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
4099
}
4100
JUMPHERE(jump);
4101
}
4102
4103
static void process_partial_match(compiler_common *common)
4104
{
4105
DEFINE_COMPILER;
4106
struct sljit_jump *jump;
4107
4108
/* Partial matching mode. */
4109
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4110
{
4111
jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
4112
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
4113
JUMPHERE(jump);
4114
}
4115
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
4116
{
4117
if (common->partialmatchlabel != NULL)
4118
CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
4119
else
4120
add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
4121
}
4122
}
4123
4124
static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
4125
{
4126
DEFINE_COMPILER;
4127
4128
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
4129
process_partial_match(common);
4130
}
4131
4132
static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
4133
{
4134
/* Reads the character into TMP1, keeps STR_PTR.
4135
Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
4136
DEFINE_COMPILER;
4137
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4138
struct sljit_jump *jump;
4139
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4140
4141
SLJIT_UNUSED_ARG(max);
4142
SLJIT_UNUSED_ARG(dst);
4143
SLJIT_UNUSED_ARG(dstw);
4144
SLJIT_UNUSED_ARG(backtracks);
4145
4146
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4147
4148
#ifdef SUPPORT_UNICODE
4149
#if PCRE2_CODE_UNIT_WIDTH == 8
4150
if (common->utf)
4151
{
4152
if (max < 128) return;
4153
4154
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4155
OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
4156
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4157
add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4158
OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
4159
if (backtracks && common->invalid_utf)
4160
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4161
JUMPHERE(jump);
4162
}
4163
#elif PCRE2_CODE_UNIT_WIDTH == 16
4164
if (common->utf)
4165
{
4166
if (max < 0xd800) return;
4167
4168
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4169
4170
if (common->invalid_utf)
4171
{
4172
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4173
OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
4174
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4175
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4176
OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
4177
if (backtracks && common->invalid_utf)
4178
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4179
}
4180
else
4181
{
4182
/* TMP2 contains the high surrogate. */
4183
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4184
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4185
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4186
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4187
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4188
}
4189
4190
JUMPHERE(jump);
4191
}
4192
#elif PCRE2_CODE_UNIT_WIDTH == 32
4193
if (common->invalid_utf)
4194
{
4195
if (max < 0xd800) return;
4196
4197
if (backtracks != NULL)
4198
{
4199
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4200
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4201
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4202
}
4203
else
4204
{
4205
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4206
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4207
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4208
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4209
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4210
}
4211
}
4212
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4213
#endif /* SUPPORT_UNICODE */
4214
}
4215
4216
static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
4217
{
4218
/* Reads one character back without moving STR_PTR. TMP2 must
4219
contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
4220
DEFINE_COMPILER;
4221
4222
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4223
struct sljit_jump *jump;
4224
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4225
4226
SLJIT_UNUSED_ARG(max);
4227
SLJIT_UNUSED_ARG(backtracks);
4228
4229
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4230
4231
#ifdef SUPPORT_UNICODE
4232
#if PCRE2_CODE_UNIT_WIDTH == 8
4233
if (common->utf)
4234
{
4235
if (max < 128) return;
4236
4237
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4238
if (common->invalid_utf)
4239
{
4240
add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
4241
if (backtracks != NULL)
4242
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4243
}
4244
else
4245
add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
4246
JUMPHERE(jump);
4247
}
4248
#elif PCRE2_CODE_UNIT_WIDTH == 16
4249
if (common->utf)
4250
{
4251
if (max < 0xd800) return;
4252
4253
if (common->invalid_utf)
4254
{
4255
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4256
add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
4257
if (backtracks != NULL)
4258
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4259
}
4260
else
4261
{
4262
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4263
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
4264
/* TMP2 contains the low surrogate. */
4265
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4266
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4267
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4268
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4269
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4270
}
4271
JUMPHERE(jump);
4272
}
4273
#elif PCRE2_CODE_UNIT_WIDTH == 32
4274
if (common->invalid_utf)
4275
{
4276
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4277
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4278
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4279
}
4280
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4281
#endif /* SUPPORT_UNICODE */
4282
}
4283
4284
#define READ_CHAR_UPDATE_STR_PTR 0x1
4285
#define READ_CHAR_UTF8_NEWLINE 0x2
4286
#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4287
#define READ_CHAR_VALID_UTF 0x4
4288
4289
static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4290
jump_list **backtracks, sljit_u32 options)
4291
{
4292
/* Reads the precise value of a character into TMP1, if the character is
4293
between min and max (c >= min && c <= max). Otherwise it returns with a value
4294
outside the range. Does not check STR_END. */
4295
DEFINE_COMPILER;
4296
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4297
struct sljit_jump *jump;
4298
#endif
4299
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4300
struct sljit_jump *jump2;
4301
#endif
4302
4303
SLJIT_UNUSED_ARG(min);
4304
SLJIT_UNUSED_ARG(max);
4305
SLJIT_UNUSED_ARG(backtracks);
4306
SLJIT_UNUSED_ARG(options);
4307
SLJIT_ASSERT(min <= max);
4308
4309
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4310
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4311
4312
#ifdef SUPPORT_UNICODE
4313
#if PCRE2_CODE_UNIT_WIDTH == 8
4314
if (common->utf)
4315
{
4316
if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4317
4318
if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4319
{
4320
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4321
4322
if (options & READ_CHAR_UTF8_NEWLINE)
4323
add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4324
else
4325
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4326
4327
if (backtracks != NULL)
4328
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4329
JUMPHERE(jump);
4330
return;
4331
}
4332
4333
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4334
if (min >= 0x10000)
4335
{
4336
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4337
if (options & READ_CHAR_UPDATE_STR_PTR)
4338
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4339
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4340
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4341
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4342
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4343
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4344
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4345
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4346
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4347
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4348
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4349
if (!(options & READ_CHAR_UPDATE_STR_PTR))
4350
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4351
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4352
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4353
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4354
JUMPHERE(jump2);
4355
if (options & READ_CHAR_UPDATE_STR_PTR)
4356
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4357
}
4358
else if (min >= 0x800 && max <= 0xffff)
4359
{
4360
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4361
if (options & READ_CHAR_UPDATE_STR_PTR)
4362
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4363
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4364
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4365
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4366
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4367
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4368
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4369
if (!(options & READ_CHAR_UPDATE_STR_PTR))
4370
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4371
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4372
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4373
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4374
JUMPHERE(jump2);
4375
if (options & READ_CHAR_UPDATE_STR_PTR)
4376
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4377
}
4378
else if (max >= 0x800)
4379
{
4380
add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4381
}
4382
else if (max < 128)
4383
{
4384
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4385
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4386
}
4387
else
4388
{
4389
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4390
if (!(options & READ_CHAR_UPDATE_STR_PTR))
4391
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4392
else
4393
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4394
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4395
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4396
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4397
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4398
if (options & READ_CHAR_UPDATE_STR_PTR)
4399
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4400
}
4401
JUMPHERE(jump);
4402
}
4403
#elif PCRE2_CODE_UNIT_WIDTH == 16
4404
if (common->utf)
4405
{
4406
if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4407
4408
if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4409
{
4410
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4411
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4412
4413
if (options & READ_CHAR_UTF8_NEWLINE)
4414
add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4415
else
4416
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4417
4418
if (backtracks != NULL)
4419
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4420
JUMPHERE(jump);
4421
return;
4422
}
4423
4424
if (max >= 0x10000)
4425
{
4426
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4427
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4428
/* TMP2 contains the high surrogate. */
4429
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4430
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4431
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4432
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4433
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4434
JUMPHERE(jump);
4435
return;
4436
}
4437
4438
/* Skip low surrogate if necessary. */
4439
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4440
4441
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4442
{
4443
if (options & READ_CHAR_UPDATE_STR_PTR)
4444
OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4445
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4446
if (options & READ_CHAR_UPDATE_STR_PTR)
4447
SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4448
if (max >= 0xd800)
4449
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4450
}
4451
else
4452
{
4453
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4454
if (options & READ_CHAR_UPDATE_STR_PTR)
4455
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4456
if (max >= 0xd800)
4457
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4458
JUMPHERE(jump);
4459
}
4460
}
4461
#elif PCRE2_CODE_UNIT_WIDTH == 32
4462
if (common->invalid_utf)
4463
{
4464
if (backtracks != NULL)
4465
{
4466
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4467
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4468
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4469
}
4470
else
4471
{
4472
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4473
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4474
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4475
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4476
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4477
}
4478
}
4479
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4480
#endif /* SUPPORT_UNICODE */
4481
}
4482
4483
static void skip_valid_char(compiler_common *common)
4484
{
4485
DEFINE_COMPILER;
4486
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4487
struct sljit_jump *jump;
4488
#endif
4489
4490
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4491
if (common->utf)
4492
{
4493
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4494
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4495
#if PCRE2_CODE_UNIT_WIDTH == 8
4496
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4497
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4498
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4499
#elif PCRE2_CODE_UNIT_WIDTH == 16
4500
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4501
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4502
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4503
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4504
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4505
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4506
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4507
JUMPHERE(jump);
4508
return;
4509
}
4510
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4511
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4512
}
4513
4514
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4515
4516
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4517
{
4518
/* Tells whether the character codes below 128 are enough
4519
to determine a match. */
4520
const sljit_u8 value = nclass ? 0xff : 0;
4521
const sljit_u8 *end = bitset + 32;
4522
4523
bitset += 16;
4524
do
4525
{
4526
if (*bitset++ != value)
4527
return FALSE;
4528
}
4529
while (bitset < end);
4530
return TRUE;
4531
}
4532
4533
static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4534
{
4535
/* Reads the precise character type of a character into TMP1, if the character
4536
is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4537
full_read argument tells whether characters above max are accepted or not. */
4538
DEFINE_COMPILER;
4539
struct sljit_jump *jump;
4540
4541
SLJIT_ASSERT(common->utf);
4542
4543
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4544
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4545
4546
/* All values > 127 are zero in ctypes. */
4547
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4548
4549
if (negated)
4550
{
4551
jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4552
4553
if (common->invalid_utf)
4554
{
4555
OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4556
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4557
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4558
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4559
}
4560
else
4561
{
4562
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4563
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4564
}
4565
JUMPHERE(jump);
4566
}
4567
}
4568
4569
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4570
4571
static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4572
{
4573
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4574
DEFINE_COMPILER;
4575
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4576
struct sljit_jump *jump;
4577
#endif
4578
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4579
struct sljit_jump *jump2;
4580
#endif
4581
4582
SLJIT_UNUSED_ARG(backtracks);
4583
SLJIT_UNUSED_ARG(negated);
4584
4585
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4586
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4587
4588
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4589
if (common->utf)
4590
{
4591
/* The result of this read may be unused, but saves an "else" part. */
4592
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4593
jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4594
4595
if (!negated)
4596
{
4597
if (common->invalid_utf)
4598
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4599
4600
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4601
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4602
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4603
if (common->invalid_utf)
4604
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4605
4606
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4607
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4608
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4609
if (common->invalid_utf)
4610
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4611
4612
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4613
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4614
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4615
JUMPHERE(jump2);
4616
}
4617
else if (common->invalid_utf)
4618
{
4619
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4620
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4621
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4622
4623
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4624
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4625
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4626
JUMPHERE(jump2);
4627
}
4628
else
4629
add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4630
4631
JUMPHERE(jump);
4632
return;
4633
}
4634
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4635
4636
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4637
if (common->invalid_utf && negated)
4638
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4639
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4640
4641
#if PCRE2_CODE_UNIT_WIDTH != 8
4642
/* The ctypes array contains only 256 values. */
4643
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4644
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4645
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4646
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4647
#if PCRE2_CODE_UNIT_WIDTH != 8
4648
JUMPHERE(jump);
4649
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4650
4651
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4652
if (common->utf && negated)
4653
{
4654
/* Skip low surrogate if necessary. */
4655
if (!common->invalid_utf)
4656
{
4657
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4658
4659
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4660
{
4661
OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4662
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4663
SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4664
}
4665
else
4666
{
4667
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4668
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4669
JUMPHERE(jump);
4670
}
4671
return;
4672
}
4673
4674
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4675
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4676
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4677
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4678
4679
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4680
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4681
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4682
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4683
4684
JUMPHERE(jump);
4685
return;
4686
}
4687
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4688
}
4689
4690
static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4691
{
4692
/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4693
TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4694
and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4695
DEFINE_COMPILER;
4696
4697
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4698
struct sljit_jump *jump;
4699
#endif
4700
4701
#ifdef SUPPORT_UNICODE
4702
#if PCRE2_CODE_UNIT_WIDTH == 8
4703
struct sljit_label *label;
4704
4705
if (common->utf)
4706
{
4707
if (!must_be_valid && common->invalid_utf)
4708
{
4709
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4710
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4711
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4712
add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4713
if (backtracks != NULL)
4714
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4715
JUMPHERE(jump);
4716
return;
4717
}
4718
4719
label = LABEL();
4720
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4721
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4722
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4723
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4724
return;
4725
}
4726
#elif PCRE2_CODE_UNIT_WIDTH == 16
4727
if (common->utf)
4728
{
4729
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4730
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4731
4732
if (!must_be_valid && common->invalid_utf)
4733
{
4734
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4735
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4736
add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4737
if (backtracks != NULL)
4738
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4739
JUMPHERE(jump);
4740
return;
4741
}
4742
4743
/* Skip low surrogate if necessary. */
4744
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4745
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4746
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4747
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4748
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4749
return;
4750
}
4751
#elif PCRE2_CODE_UNIT_WIDTH == 32
4752
if (common->invalid_utf && !must_be_valid)
4753
{
4754
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4755
if (backtracks != NULL)
4756
{
4757
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4758
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4759
return;
4760
}
4761
4762
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4763
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4764
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4765
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4766
return;
4767
}
4768
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4769
#endif /* SUPPORT_UNICODE */
4770
4771
SLJIT_UNUSED_ARG(backtracks);
4772
SLJIT_UNUSED_ARG(must_be_valid);
4773
4774
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4775
}
4776
4777
static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4778
{
4779
/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4780
DEFINE_COMPILER;
4781
struct sljit_jump *jump;
4782
4783
if (nltype == NLTYPE_ANY)
4784
{
4785
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4786
sljit_set_current_flags(compiler, SLJIT_SET_Z);
4787
add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4788
}
4789
else if (nltype == NLTYPE_ANYCRLF)
4790
{
4791
if (jumpifmatch)
4792
{
4793
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4794
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4795
}
4796
else
4797
{
4798
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4799
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4800
JUMPHERE(jump);
4801
}
4802
}
4803
else
4804
{
4805
SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4806
add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4807
}
4808
}
4809
4810
#ifdef SUPPORT_UNICODE
4811
4812
#if PCRE2_CODE_UNIT_WIDTH == 8
4813
static void do_utfreadchar(compiler_common *common)
4814
{
4815
/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4816
of the character (>= 0xc0). Return char value in TMP1. */
4817
DEFINE_COMPILER;
4818
struct sljit_jump *jump;
4819
4820
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4821
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4822
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4823
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4824
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4825
4826
/* Searching for the first zero. */
4827
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4828
jump = JUMP(SLJIT_NOT_ZERO);
4829
/* Two byte sequence. */
4830
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4831
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4832
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4833
4834
JUMPHERE(jump);
4835
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4836
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4837
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4838
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4839
4840
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4841
jump = JUMP(SLJIT_NOT_ZERO);
4842
/* Three byte sequence. */
4843
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4844
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4845
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4846
4847
/* Four byte sequence. */
4848
JUMPHERE(jump);
4849
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4850
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4851
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4852
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4853
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4854
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4855
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4856
}
4857
4858
static void do_utfreadtype8(compiler_common *common)
4859
{
4860
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4861
of the character (>= 0xc0). Return value in TMP1. */
4862
DEFINE_COMPILER;
4863
struct sljit_jump *jump;
4864
struct sljit_jump *compare;
4865
4866
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4867
4868
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4869
jump = JUMP(SLJIT_NOT_ZERO);
4870
/* Two byte sequence. */
4871
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4872
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4873
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4874
/* The upper 5 bits are known at this point. */
4875
compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4876
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4877
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4878
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4879
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4880
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4881
4882
JUMPHERE(compare);
4883
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4884
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4885
4886
/* We only have types for characters less than 256. */
4887
JUMPHERE(jump);
4888
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4889
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4890
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4891
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4892
}
4893
4894
static void do_utfreadchar_invalid(compiler_common *common)
4895
{
4896
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4897
of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4898
undefined for invalid characters. */
4899
DEFINE_COMPILER;
4900
sljit_s32 i;
4901
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4902
struct sljit_jump *jump;
4903
struct sljit_jump *buffer_end_close;
4904
struct sljit_label *three_byte_entry;
4905
struct sljit_label *exit_invalid_label;
4906
struct sljit_jump *exit_invalid[11];
4907
4908
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4909
4910
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4911
4912
/* Usually more than 3 characters remained in the subject buffer. */
4913
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4914
4915
/* Not a valid start of a multi-byte sequence, no more bytes read. */
4916
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4917
4918
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4919
4920
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4921
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4922
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4923
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4924
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4925
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4926
4927
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4928
jump = JUMP(SLJIT_NOT_ZERO);
4929
4930
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4931
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4932
4933
JUMPHERE(jump);
4934
4935
/* Three-byte sequence. */
4936
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4937
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4938
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4939
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4940
if (has_cmov)
4941
{
4942
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4943
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4944
exit_invalid[2] = NULL;
4945
}
4946
else
4947
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4948
4949
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4950
jump = JUMP(SLJIT_NOT_ZERO);
4951
4952
three_byte_entry = LABEL();
4953
4954
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4955
if (has_cmov)
4956
{
4957
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4958
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4959
exit_invalid[3] = NULL;
4960
}
4961
else
4962
exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4963
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4964
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4965
4966
if (has_cmov)
4967
{
4968
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4969
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4970
exit_invalid[4] = NULL;
4971
}
4972
else
4973
exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4974
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4975
4976
JUMPHERE(jump);
4977
4978
/* Four-byte sequence. */
4979
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4980
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4981
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4982
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4983
if (has_cmov)
4984
{
4985
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4986
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4987
exit_invalid[5] = NULL;
4988
}
4989
else
4990
exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4991
4992
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4993
if (has_cmov)
4994
{
4995
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4996
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4997
exit_invalid[6] = NULL;
4998
}
4999
else
5000
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5001
5002
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5003
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5004
5005
JUMPHERE(buffer_end_close);
5006
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5007
exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
5008
5009
/* Two-byte sequence. */
5010
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5011
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5012
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5013
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5014
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5015
exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5016
5017
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
5018
jump = JUMP(SLJIT_NOT_ZERO);
5019
5020
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5021
5022
/* Three-byte sequence. */
5023
JUMPHERE(jump);
5024
exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5025
5026
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5027
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5028
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5029
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5030
if (has_cmov)
5031
{
5032
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5033
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5034
exit_invalid[10] = NULL;
5035
}
5036
else
5037
exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5038
5039
/* One will be substracted from STR_PTR later. */
5040
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5041
5042
/* Four byte sequences are not possible. */
5043
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
5044
5045
exit_invalid_label = LABEL();
5046
for (i = 0; i < 11; i++)
5047
sljit_set_label(exit_invalid[i], exit_invalid_label);
5048
5049
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5050
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5051
}
5052
5053
static void do_utfreadnewline_invalid(compiler_common *common)
5054
{
5055
/* Slow decoding a UTF-8 character, specialized for newlines.
5056
TMP1 contains the first byte of the character (>= 0xc0). Return
5057
char value in TMP1. */
5058
DEFINE_COMPILER;
5059
struct sljit_label *loop;
5060
struct sljit_label *skip_start;
5061
struct sljit_label *three_byte_exit;
5062
struct sljit_jump *jump[5];
5063
5064
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5065
5066
if (common->nltype != NLTYPE_ANY)
5067
{
5068
SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
5069
5070
/* All newlines are ascii, just skip intermediate octets. */
5071
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5072
loop = LABEL();
5073
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
5074
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5075
else
5076
{
5077
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5078
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5079
}
5080
5081
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
5082
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
5083
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5084
5085
JUMPHERE(jump[0]);
5086
5087
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5088
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5089
return;
5090
}
5091
5092
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5093
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5094
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5095
5096
jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
5097
jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
5098
5099
skip_start = LABEL();
5100
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
5101
jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
5102
5103
/* Skip intermediate octets. */
5104
loop = LABEL();
5105
jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5106
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5107
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5108
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
5109
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
5110
5111
JUMPHERE(jump[3]);
5112
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5113
5114
three_byte_exit = LABEL();
5115
JUMPHERE(jump[0]);
5116
JUMPHERE(jump[4]);
5117
5118
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5119
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5120
5121
/* Two byte long newline: 0x85. */
5122
JUMPHERE(jump[1]);
5123
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
5124
5125
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
5126
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5127
5128
/* Three byte long newlines: 0x2028 and 0x2029. */
5129
JUMPHERE(jump[2]);
5130
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
5131
CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
5132
5133
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5134
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5135
5136
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
5137
CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
5138
5139
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
5140
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5141
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5142
}
5143
5144
static void do_utfmoveback_invalid(compiler_common *common)
5145
{
5146
/* Goes one character back. */
5147
DEFINE_COMPILER;
5148
sljit_s32 i;
5149
struct sljit_jump *jump;
5150
struct sljit_jump *buffer_start_close;
5151
struct sljit_label *exit_ok_label;
5152
struct sljit_label *exit_invalid_label;
5153
struct sljit_jump *exit_invalid[7];
5154
5155
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5156
5157
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
5158
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5159
5160
/* Two-byte sequence. */
5161
buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5162
5163
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
5164
5165
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5166
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
5167
5168
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5169
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5170
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5171
5172
/* Three-byte sequence. */
5173
JUMPHERE(jump);
5174
exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
5175
5176
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5177
5178
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5179
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
5180
5181
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5182
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5183
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5184
5185
/* Four-byte sequence. */
5186
JUMPHERE(jump);
5187
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
5188
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
5189
5190
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5191
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
5192
exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
5193
5194
exit_ok_label = LABEL();
5195
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5196
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5197
5198
/* Two-byte sequence. */
5199
JUMPHERE(buffer_start_close);
5200
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5201
5202
exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5203
5204
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5205
5206
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5207
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
5208
5209
/* Three-byte sequence. */
5210
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5211
exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
5212
exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5213
5214
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5215
5216
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5217
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
5218
5219
/* Four-byte sequences are not possible. */
5220
5221
exit_invalid_label = LABEL();
5222
sljit_set_label(exit_invalid[5], exit_invalid_label);
5223
sljit_set_label(exit_invalid[6], exit_invalid_label);
5224
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5225
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
5226
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5227
5228
JUMPHERE(exit_invalid[4]);
5229
/* -2 + 4 = 2 */
5230
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5231
5232
exit_invalid_label = LABEL();
5233
for (i = 0; i < 4; i++)
5234
sljit_set_label(exit_invalid[i], exit_invalid_label);
5235
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5236
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
5237
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5238
}
5239
5240
static void do_utfpeakcharback(compiler_common *common)
5241
{
5242
/* Peak a character back. Does not modify STR_PTR. */
5243
DEFINE_COMPILER;
5244
struct sljit_jump *jump[2];
5245
5246
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5247
5248
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5249
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5250
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
5251
5252
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5253
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5254
jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
5255
5256
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5257
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
5258
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5259
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5260
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5261
5262
JUMPHERE(jump[1]);
5263
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5264
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5265
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5266
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5267
5268
JUMPHERE(jump[0]);
5269
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5270
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5271
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5272
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5273
5274
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5275
}
5276
5277
static void do_utfpeakcharback_invalid(compiler_common *common)
5278
{
5279
/* Peak a character back. Does not modify STR_PTR. */
5280
DEFINE_COMPILER;
5281
sljit_s32 i;
5282
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5283
struct sljit_jump *jump[2];
5284
struct sljit_label *two_byte_entry;
5285
struct sljit_label *three_byte_entry;
5286
struct sljit_label *exit_invalid_label;
5287
struct sljit_jump *exit_invalid[8];
5288
5289
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5290
5291
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5292
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5293
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5294
5295
/* Two-byte sequence. */
5296
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5297
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5298
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5299
5300
two_byte_entry = LABEL();
5301
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5302
/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5303
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5304
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5305
5306
JUMPHERE(jump[1]);
5307
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5308
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5309
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5310
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5311
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5312
5313
/* Three-byte sequence. */
5314
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5315
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5316
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5317
5318
three_byte_entry = LABEL();
5319
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5320
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5321
5322
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5323
if (has_cmov)
5324
{
5325
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5326
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5327
exit_invalid[2] = NULL;
5328
}
5329
else
5330
exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5331
5332
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5333
if (has_cmov)
5334
{
5335
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5336
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5337
exit_invalid[3] = NULL;
5338
}
5339
else
5340
exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5341
5342
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5343
5344
JUMPHERE(jump[1]);
5345
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5346
exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5347
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5348
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5349
5350
/* Four-byte sequence. */
5351
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5352
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5353
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5354
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5355
/* ADD is used instead of OR because of the SUB 0x10000 above. */
5356
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5357
5358
if (has_cmov)
5359
{
5360
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5361
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5362
exit_invalid[5] = NULL;
5363
}
5364
else
5365
exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5366
5367
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5368
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5369
5370
JUMPHERE(jump[0]);
5371
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5372
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5373
5374
/* Two-byte sequence. */
5375
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5376
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5377
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5378
5379
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5380
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5381
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5382
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5383
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5384
5385
/* Three-byte sequence. */
5386
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5387
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5388
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5389
5390
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5391
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5392
5393
JUMPHERE(jump[0]);
5394
exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5395
5396
/* Two-byte sequence. */
5397
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5398
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5399
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5400
5401
exit_invalid_label = LABEL();
5402
for (i = 0; i < 8; i++)
5403
sljit_set_label(exit_invalid[i], exit_invalid_label);
5404
5405
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5406
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5407
}
5408
5409
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5410
5411
#if PCRE2_CODE_UNIT_WIDTH == 16
5412
5413
static void do_utfreadchar_invalid(compiler_common *common)
5414
{
5415
/* Slow decoding a UTF-16 character. TMP1 contains the first half
5416
of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5417
undefined for invalid characters. */
5418
DEFINE_COMPILER;
5419
struct sljit_jump *exit_invalid[3];
5420
5421
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5422
5423
/* TMP2 contains the high surrogate. */
5424
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5425
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5426
5427
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5428
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5429
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5430
5431
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5432
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5433
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5434
5435
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5436
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5437
5438
JUMPHERE(exit_invalid[0]);
5439
JUMPHERE(exit_invalid[1]);
5440
JUMPHERE(exit_invalid[2]);
5441
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5442
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5443
}
5444
5445
static void do_utfreadnewline_invalid(compiler_common *common)
5446
{
5447
/* Slow decoding a UTF-16 character, specialized for newlines.
5448
TMP1 contains the first half of the character (>= 0xd800). Return
5449
char value in TMP1. */
5450
5451
DEFINE_COMPILER;
5452
struct sljit_jump *exit_invalid[2];
5453
5454
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5455
5456
/* TMP2 contains the high surrogate. */
5457
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5458
5459
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5460
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5461
5462
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5463
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5464
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5465
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5466
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5467
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5468
5469
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5470
5471
JUMPHERE(exit_invalid[0]);
5472
JUMPHERE(exit_invalid[1]);
5473
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5474
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5475
}
5476
5477
static void do_utfmoveback_invalid(compiler_common *common)
5478
{
5479
/* Goes one character back. */
5480
DEFINE_COMPILER;
5481
struct sljit_jump *exit_invalid[3];
5482
5483
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5484
5485
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5486
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5487
5488
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5489
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5490
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5491
5492
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5493
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5494
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5495
5496
JUMPHERE(exit_invalid[0]);
5497
JUMPHERE(exit_invalid[1]);
5498
JUMPHERE(exit_invalid[2]);
5499
5500
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5501
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5502
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5503
}
5504
5505
static void do_utfpeakcharback_invalid(compiler_common *common)
5506
{
5507
/* Peak a character back. Does not modify STR_PTR. */
5508
DEFINE_COMPILER;
5509
struct sljit_jump *jump;
5510
struct sljit_jump *exit_invalid[3];
5511
5512
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5513
5514
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5515
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5516
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5517
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5518
5519
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5520
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5521
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5522
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5523
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5524
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5525
5526
JUMPHERE(jump);
5527
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5528
5529
JUMPHERE(exit_invalid[0]);
5530
JUMPHERE(exit_invalid[1]);
5531
JUMPHERE(exit_invalid[2]);
5532
5533
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5534
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5535
}
5536
5537
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5538
5539
/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5540
#define UCD_BLOCK_MASK 127
5541
#define UCD_BLOCK_SHIFT 7
5542
5543
static void do_getucd(compiler_common *common)
5544
{
5545
/* Search the UCD record for the character comes in TMP1.
5546
Returns chartype in TMP1 and UCD offset in TMP2. */
5547
DEFINE_COMPILER;
5548
#if PCRE2_CODE_UNIT_WIDTH == 32
5549
struct sljit_jump *jump;
5550
#endif
5551
5552
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5553
/* dummy_ucd_record */
5554
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5555
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5556
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5557
#endif
5558
5559
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5560
5561
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5562
5563
#if PCRE2_CODE_UNIT_WIDTH == 32
5564
if (!common->utf)
5565
{
5566
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5567
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5568
JUMPHERE(jump);
5569
}
5570
#endif
5571
5572
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5573
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5574
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5575
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5576
sljit_emit_op2_shift(compiler, SLJIT_ADD | SLJIT_SHL_IMM | SLJIT_SRC2_UNDEFINED, TMP1, 0, TMP1, 0, TMP2, 0, UCD_BLOCK_SHIFT);
5577
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5578
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5579
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5580
}
5581
5582
static void do_getucdtype(compiler_common *common)
5583
{
5584
/* Search the UCD record for the character comes in TMP1.
5585
Returns chartype in TMP1 and UCD offset in TMP2. */
5586
DEFINE_COMPILER;
5587
#if PCRE2_CODE_UNIT_WIDTH == 32
5588
struct sljit_jump *jump;
5589
#endif
5590
5591
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5592
/* dummy_ucd_record */
5593
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5594
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5595
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5596
#endif
5597
5598
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5599
5600
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5601
5602
#if PCRE2_CODE_UNIT_WIDTH == 32
5603
if (!common->utf)
5604
{
5605
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5606
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5607
JUMPHERE(jump);
5608
}
5609
#endif
5610
5611
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5612
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5613
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5614
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5615
sljit_emit_op2_shift(compiler, SLJIT_ADD | SLJIT_SHL_IMM | SLJIT_SRC2_UNDEFINED, TMP1, 0, TMP1, 0, TMP2, 0, UCD_BLOCK_SHIFT);
5616
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5617
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5618
5619
/* TMP2 is multiplied by 12. Same as (TMP2 + (TMP2 << 1)) << 2. */
5620
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5621
sljit_emit_op2_shift(compiler, SLJIT_ADD | SLJIT_SHL_IMM | SLJIT_SRC2_UNDEFINED, TMP2, 0, TMP2, 0, TMP2, 0, 1);
5622
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 2);
5623
5624
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5625
}
5626
5627
#endif /* SUPPORT_UNICODE */
5628
5629
static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5630
{
5631
DEFINE_COMPILER;
5632
struct sljit_label *mainloop;
5633
struct sljit_label *newlinelabel = NULL;
5634
struct sljit_jump *start;
5635
struct sljit_jump *end = NULL;
5636
struct sljit_jump *end2 = NULL;
5637
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5638
struct sljit_label *loop;
5639
struct sljit_jump *jump;
5640
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5641
jump_list *newline = NULL;
5642
sljit_u32 overall_options = common->re->overall_options;
5643
BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5644
BOOL newlinecheck = FALSE;
5645
BOOL readuchar = FALSE;
5646
5647
if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5648
&& (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5649
newlinecheck = TRUE;
5650
5651
SLJIT_ASSERT(common->abort_label == NULL);
5652
5653
if ((overall_options & PCRE2_FIRSTLINE) != 0)
5654
{
5655
/* Search for the end of the first line. */
5656
SLJIT_ASSERT(common->match_end_ptr != 0);
5657
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5658
5659
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5660
{
5661
mainloop = LABEL();
5662
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5663
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5664
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5665
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5666
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5667
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5668
JUMPHERE(end);
5669
OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5670
}
5671
else
5672
{
5673
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5674
mainloop = LABEL();
5675
/* Continual stores does not cause data dependency. */
5676
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5677
read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5678
check_newlinechar(common, common->nltype, &newline, TRUE);
5679
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5680
JUMPHERE(end);
5681
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5682
set_jumps(newline, LABEL());
5683
}
5684
5685
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5686
}
5687
else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5688
{
5689
/* Check whether offset limit is set and valid. */
5690
SLJIT_ASSERT(common->match_end_ptr != 0);
5691
5692
if (HAS_VIRTUAL_REGISTERS)
5693
{
5694
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5695
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5696
}
5697
else
5698
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5699
5700
OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5701
end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5702
if (HAS_VIRTUAL_REGISTERS)
5703
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5704
else
5705
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5706
5707
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5708
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5709
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5710
if (HAS_VIRTUAL_REGISTERS)
5711
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5712
5713
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5714
end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5715
OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5716
JUMPHERE(end2);
5717
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5718
add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5719
JUMPHERE(end);
5720
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5721
}
5722
5723
start = JUMP(SLJIT_JUMP);
5724
5725
if (newlinecheck)
5726
{
5727
newlinelabel = LABEL();
5728
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5729
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5730
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5731
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5732
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5733
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5734
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5735
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5736
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5737
end2 = JUMP(SLJIT_JUMP);
5738
}
5739
5740
mainloop = LABEL();
5741
5742
/* Increasing the STR_PTR here requires one less jump in the most common case. */
5743
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5744
if (common->utf && !common->invalid_utf) readuchar = TRUE;
5745
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5746
if (newlinecheck) readuchar = TRUE;
5747
5748
if (readuchar)
5749
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5750
5751
if (newlinecheck)
5752
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5753
5754
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5755
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5756
#if PCRE2_CODE_UNIT_WIDTH == 8
5757
if (common->invalid_utf)
5758
{
5759
/* Skip continuation code units. */
5760
loop = LABEL();
5761
jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5762
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5763
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5764
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5765
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5766
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5767
JUMPHERE(jump);
5768
}
5769
else if (common->utf)
5770
{
5771
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5772
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5773
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5774
JUMPHERE(jump);
5775
}
5776
#elif PCRE2_CODE_UNIT_WIDTH == 16
5777
if (common->invalid_utf)
5778
{
5779
/* Skip continuation code units. */
5780
loop = LABEL();
5781
jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5782
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5783
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5784
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5785
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5786
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5787
JUMPHERE(jump);
5788
}
5789
else if (common->utf)
5790
{
5791
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5792
5793
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5794
{
5795
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5796
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5797
SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5798
}
5799
else
5800
{
5801
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5802
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5803
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5804
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5805
}
5806
}
5807
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5808
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5809
JUMPHERE(start);
5810
5811
if (newlinecheck)
5812
{
5813
JUMPHERE(end);
5814
JUMPHERE(end2);
5815
}
5816
5817
return mainloop;
5818
}
5819
5820
5821
static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5822
{
5823
sljit_u32 i, count = chars->count;
5824
5825
if (count == 255)
5826
return;
5827
5828
if (count == 0)
5829
{
5830
chars->count = 1;
5831
chars->chars[0] = chr;
5832
5833
if (last)
5834
chars->last_count = 1;
5835
return;
5836
}
5837
5838
for (i = 0; i < count; i++)
5839
if (chars->chars[i] == chr)
5840
return;
5841
5842
if (count >= MAX_DIFF_CHARS)
5843
{
5844
chars->count = 255;
5845
return;
5846
}
5847
5848
chars->chars[count] = chr;
5849
chars->count = count + 1;
5850
5851
if (last)
5852
chars->last_count++;
5853
}
5854
5855
/* Value can be increased if needed. Patterns
5856
such as /(a|){33}b/ can exhaust the stack.
5857
5858
Note: /(a|){29}b/ already stops scan_prefix()
5859
because it reaches the maximum step_count. */
5860
#define SCAN_PREFIX_STACK_END 32
5861
5862
/*
5863
Scan prefix stores the prefix string in the chars array.
5864
The elements of the chars array is either small character
5865
sets or "any" (count is set to 255).
5866
5867
Examples (the chars array is represented by a simple regex):
5868
5869
/(abc|xbyd)/ prefix: /[ax]b[cy]/ (length: 3)
5870
/a[a-z]b+c/ prefix: a.b (length: 3)
5871
/ab?cd/ prefix: a[bc][cd] (length: 3)
5872
/(ab|cd)|(ef|gh)/ prefix: [aceg][bdfh] (length: 2)
5873
5874
The length is returned by scan_prefix(). The length is
5875
less than or equal than the minimum length of the pattern.
5876
*/
5877
5878
static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars)
5879
{
5880
fast_forward_char_data *chars_start = chars;
5881
fast_forward_char_data *chars_end = chars + MAX_N_CHARS;
5882
PCRE2_SPTR cc_stack[SCAN_PREFIX_STACK_END];
5883
fast_forward_char_data *chars_stack[SCAN_PREFIX_STACK_END];
5884
sljit_u8 next_alternative_stack[SCAN_PREFIX_STACK_END];
5885
BOOL last, any, class, caseless;
5886
int stack_ptr, step_count, repeat, len, len_save;
5887
sljit_u32 chr; /* Any unicode character. */
5888
sljit_u8 *bytes, *bytes_end, byte;
5889
PCRE2_SPTR alternative, cc_save, oc;
5890
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5891
PCRE2_UCHAR othercase[4];
5892
#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5893
PCRE2_UCHAR othercase[2];
5894
#else
5895
PCRE2_UCHAR othercase[1];
5896
#endif
5897
5898
repeat = 1;
5899
stack_ptr = 0;
5900
step_count = 10000;
5901
while (TRUE)
5902
{
5903
if (--step_count == 0)
5904
return 0;
5905
5906
SLJIT_ASSERT(chars <= chars_start + MAX_N_CHARS);
5907
5908
if (chars >= chars_end)
5909
{
5910
if (stack_ptr == 0)
5911
return (int)(chars_end - chars_start);
5912
5913
--stack_ptr;
5914
cc = cc_stack[stack_ptr];
5915
chars = chars_stack[stack_ptr];
5916
5917
if (chars >= chars_end)
5918
continue;
5919
5920
if (next_alternative_stack[stack_ptr] != 0)
5921
{
5922
/* When an alternative is processed, the
5923
next alternative is pushed onto the stack. */
5924
SLJIT_ASSERT(*cc == OP_ALT);
5925
alternative = cc + GET(cc, 1);
5926
if (*alternative == OP_ALT)
5927
{
5928
SLJIT_ASSERT(stack_ptr < SCAN_PREFIX_STACK_END);
5929
SLJIT_ASSERT(chars_stack[stack_ptr] == chars);
5930
SLJIT_ASSERT(next_alternative_stack[stack_ptr] == 1);
5931
cc_stack[stack_ptr] = alternative;
5932
stack_ptr++;
5933
}
5934
cc += 1 + LINK_SIZE;
5935
}
5936
}
5937
5938
last = TRUE;
5939
any = FALSE;
5940
class = FALSE;
5941
caseless = FALSE;
5942
5943
switch (*cc)
5944
{
5945
case OP_CHARI:
5946
caseless = TRUE;
5947
PCRE2_FALLTHROUGH /* Fall through */
5948
case OP_CHAR:
5949
last = FALSE;
5950
cc++;
5951
break;
5952
5953
case OP_SOD:
5954
case OP_SOM:
5955
case OP_SET_SOM:
5956
case OP_NOT_WORD_BOUNDARY:
5957
case OP_WORD_BOUNDARY:
5958
case OP_EODN:
5959
case OP_EOD:
5960
case OP_CIRC:
5961
case OP_CIRCM:
5962
case OP_DOLL:
5963
case OP_DOLLM:
5964
case OP_NOT_UCP_WORD_BOUNDARY:
5965
case OP_UCP_WORD_BOUNDARY:
5966
/* Zero width assertions. */
5967
cc++;
5968
continue;
5969
5970
case OP_ASSERT:
5971
case OP_ASSERT_NOT:
5972
case OP_ASSERTBACK:
5973
case OP_ASSERTBACK_NOT:
5974
case OP_ASSERT_NA:
5975
case OP_ASSERTBACK_NA:
5976
case OP_ASSERT_SCS:
5977
cc = bracketend(cc);
5978
continue;
5979
5980
case OP_PLUSI:
5981
case OP_MINPLUSI:
5982
case OP_POSPLUSI:
5983
caseless = TRUE;
5984
PCRE2_FALLTHROUGH /* Fall through */
5985
case OP_PLUS:
5986
case OP_MINPLUS:
5987
case OP_POSPLUS:
5988
cc++;
5989
break;
5990
5991
case OP_EXACTI:
5992
caseless = TRUE;
5993
PCRE2_FALLTHROUGH /* Fall through */
5994
case OP_EXACT:
5995
repeat = GET2(cc, 1);
5996
last = FALSE;
5997
cc += 1 + IMM2_SIZE;
5998
break;
5999
6000
case OP_QUERYI:
6001
case OP_MINQUERYI:
6002
case OP_POSQUERYI:
6003
caseless = TRUE;
6004
PCRE2_FALLTHROUGH /* Fall through */
6005
case OP_QUERY:
6006
case OP_MINQUERY:
6007
case OP_POSQUERY:
6008
len = 1;
6009
cc++;
6010
#ifdef SUPPORT_UNICODE
6011
if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
6012
#endif
6013
if (stack_ptr >= SCAN_PREFIX_STACK_END)
6014
{
6015
chars_end = chars;
6016
continue;
6017
}
6018
6019
cc_stack[stack_ptr] = cc + len;
6020
chars_stack[stack_ptr] = chars;
6021
next_alternative_stack[stack_ptr] = 0;
6022
stack_ptr++;
6023
6024
last = FALSE;
6025
break;
6026
6027
case OP_KET:
6028
cc += 1 + LINK_SIZE;
6029
continue;
6030
6031
case OP_ALT:
6032
cc += GET(cc, 1);
6033
continue;
6034
6035
case OP_ONCE:
6036
case OP_BRA:
6037
case OP_BRAPOS:
6038
case OP_CBRA:
6039
case OP_CBRAPOS:
6040
alternative = cc + GET(cc, 1);
6041
if (*alternative == OP_ALT)
6042
{
6043
if (stack_ptr >= SCAN_PREFIX_STACK_END)
6044
{
6045
chars_end = chars;
6046
continue;
6047
}
6048
6049
cc_stack[stack_ptr] = alternative;
6050
chars_stack[stack_ptr] = chars;
6051
next_alternative_stack[stack_ptr] = 1;
6052
stack_ptr++;
6053
}
6054
6055
if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
6056
cc += IMM2_SIZE;
6057
cc += 1 + LINK_SIZE;
6058
continue;
6059
6060
case OP_CLASS:
6061
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6062
if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
6063
{
6064
chars_end = chars;
6065
continue;
6066
}
6067
#endif
6068
class = TRUE;
6069
break;
6070
6071
case OP_NCLASS:
6072
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6073
if (common->utf)
6074
{
6075
chars_end = chars;
6076
continue;
6077
}
6078
#endif
6079
class = TRUE;
6080
break;
6081
6082
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
6083
case OP_XCLASS:
6084
case OP_ECLASS:
6085
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6086
if (common->utf)
6087
{
6088
chars_end = chars;
6089
continue;
6090
}
6091
#endif
6092
any = TRUE;
6093
cc += GET(cc, 1);
6094
break;
6095
#endif
6096
6097
case OP_DIGIT:
6098
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6099
if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6100
{
6101
chars_end = chars;
6102
continue;
6103
}
6104
#endif
6105
any = TRUE;
6106
cc++;
6107
break;
6108
6109
case OP_WHITESPACE:
6110
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6111
if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6112
{
6113
chars_end = chars;
6114
continue;
6115
}
6116
#endif
6117
any = TRUE;
6118
cc++;
6119
break;
6120
6121
case OP_WORDCHAR:
6122
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6123
if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6124
{
6125
chars_end = chars;
6126
continue;
6127
}
6128
#endif
6129
any = TRUE;
6130
cc++;
6131
break;
6132
6133
case OP_NOT:
6134
case OP_NOTI:
6135
cc++;
6136
PCRE2_FALLTHROUGH /* Fall through */
6137
case OP_NOT_DIGIT:
6138
case OP_NOT_WHITESPACE:
6139
case OP_NOT_WORDCHAR:
6140
case OP_ANY:
6141
case OP_ALLANY:
6142
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6143
if (common->utf)
6144
{
6145
chars_end = chars;
6146
continue;
6147
}
6148
#endif
6149
any = TRUE;
6150
cc++;
6151
break;
6152
6153
#ifdef SUPPORT_UNICODE
6154
case OP_NOTPROP:
6155
case OP_PROP:
6156
#if PCRE2_CODE_UNIT_WIDTH != 32
6157
if (common->utf)
6158
{
6159
chars_end = chars;
6160
continue;
6161
}
6162
#endif
6163
any = TRUE;
6164
cc += 1 + 2;
6165
break;
6166
#endif
6167
6168
case OP_TYPEEXACT:
6169
repeat = GET2(cc, 1);
6170
cc += 1 + IMM2_SIZE;
6171
continue;
6172
6173
case OP_NOTEXACT:
6174
case OP_NOTEXACTI:
6175
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6176
if (common->utf)
6177
{
6178
chars_end = chars;
6179
continue;
6180
}
6181
#endif
6182
any = TRUE;
6183
repeat = GET2(cc, 1);
6184
cc += 1 + IMM2_SIZE + 1;
6185
break;
6186
6187
default:
6188
chars_end = chars;
6189
continue;
6190
}
6191
6192
SLJIT_ASSERT(chars < chars_end);
6193
6194
if (any)
6195
{
6196
do
6197
{
6198
chars->count = 255;
6199
chars++;
6200
}
6201
while (--repeat > 0 && chars < chars_end);
6202
6203
repeat = 1;
6204
continue;
6205
}
6206
6207
if (class)
6208
{
6209
bytes = (sljit_u8*) (cc + 1);
6210
cc += 1 + 32 / sizeof(PCRE2_UCHAR);
6211
6212
SLJIT_ASSERT(last == TRUE && repeat == 1);
6213
switch (*cc)
6214
{
6215
case OP_CRQUERY:
6216
case OP_CRMINQUERY:
6217
case OP_CRPOSQUERY:
6218
last = FALSE;
6219
PCRE2_FALLTHROUGH /* Fall through */
6220
case OP_CRSTAR:
6221
case OP_CRMINSTAR:
6222
case OP_CRPOSSTAR:
6223
if (stack_ptr >= SCAN_PREFIX_STACK_END)
6224
{
6225
chars_end = chars;
6226
continue;
6227
}
6228
6229
cc_stack[stack_ptr] = ++cc;
6230
chars_stack[stack_ptr] = chars;
6231
next_alternative_stack[stack_ptr] = 0;
6232
stack_ptr++;
6233
break;
6234
6235
default:
6236
case OP_CRPLUS:
6237
case OP_CRMINPLUS:
6238
case OP_CRPOSPLUS:
6239
break;
6240
6241
case OP_CRRANGE:
6242
case OP_CRMINRANGE:
6243
case OP_CRPOSRANGE:
6244
repeat = GET2(cc, 1);
6245
if (repeat <= 0)
6246
{
6247
chars_end = chars;
6248
continue;
6249
}
6250
6251
last = (repeat != (int)GET2(cc, 1 + IMM2_SIZE));
6252
cc += 1 + 2 * IMM2_SIZE;
6253
break;
6254
}
6255
6256
do
6257
{
6258
if (bytes[31] & 0x80)
6259
chars->count = 255;
6260
else if (chars->count != 255)
6261
{
6262
bytes_end = bytes + 32;
6263
chr = 0;
6264
do
6265
{
6266
byte = *bytes++;
6267
SLJIT_ASSERT((chr & 0x7) == 0);
6268
if (byte == 0)
6269
chr += 8;
6270
else
6271
{
6272
do
6273
{
6274
if ((byte & 0x1) != 0)
6275
add_prefix_char(chr, chars, TRUE);
6276
byte >>= 1;
6277
chr++;
6278
}
6279
while (byte != 0);
6280
chr = (chr + 7) & (sljit_u32)(~7);
6281
}
6282
}
6283
while (chars->count != 255 && bytes < bytes_end);
6284
bytes = bytes_end - 32;
6285
}
6286
6287
chars++;
6288
}
6289
while (--repeat > 0 && chars < chars_end);
6290
6291
repeat = 1;
6292
if (last)
6293
chars_end = chars;
6294
continue;
6295
}
6296
6297
len = 1;
6298
#ifdef SUPPORT_UNICODE
6299
if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
6300
#endif
6301
6302
if (caseless && char_has_othercase(common, cc))
6303
{
6304
#ifdef SUPPORT_UNICODE
6305
if (common->utf)
6306
{
6307
GETCHAR(chr, cc);
6308
if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
6309
{
6310
chars_end = chars;
6311
continue;
6312
}
6313
}
6314
else
6315
#endif
6316
{
6317
chr = *cc;
6318
#ifdef SUPPORT_UNICODE
6319
if (common->ucp && chr > 127)
6320
{
6321
chr = UCD_OTHERCASE(chr);
6322
othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;
6323
}
6324
else
6325
#endif
6326
othercase[0] = TABLE_GET(chr, common->fcc, chr);
6327
}
6328
}
6329
else
6330
{
6331
caseless = FALSE;
6332
othercase[0] = 0; /* Stops compiler warning - PH */
6333
}
6334
6335
len_save = len;
6336
cc_save = cc;
6337
while (TRUE)
6338
{
6339
oc = othercase;
6340
do
6341
{
6342
len--;
6343
6344
chr = *cc;
6345
add_prefix_char(*cc, chars, len == 0);
6346
6347
if (caseless)
6348
add_prefix_char(*oc, chars, len == 0);
6349
6350
chars++;
6351
cc++;
6352
oc++;
6353
}
6354
while (len > 0 && chars < chars_end);
6355
6356
if (--repeat == 0 || chars >= chars_end)
6357
break;
6358
6359
len = len_save;
6360
cc = cc_save;
6361
}
6362
6363
repeat = 1;
6364
if (last)
6365
chars_end = chars;
6366
}
6367
}
6368
6369
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6370
static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6371
{
6372
#if PCRE2_CODE_UNIT_WIDTH == 8
6373
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6374
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6375
#elif PCRE2_CODE_UNIT_WIDTH == 16
6376
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6377
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6378
#else
6379
#error "Unknown code width"
6380
#endif
6381
}
6382
#endif
6383
6384
#include "pcre2_jit_simd_inc.h"
6385
6386
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6387
6388
static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6389
{
6390
sljit_s32 i, j, max_i = 0, max_j = 0;
6391
sljit_u32 max_pri = 0;
6392
sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6393
PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6394
6395
for (i = max - 1; i >= 1; i--)
6396
{
6397
if (chars[i].last_count > 2)
6398
{
6399
a1 = chars[i].chars[0];
6400
a2 = chars[i].chars[1];
6401
a_pri = chars[i].last_count;
6402
6403
j = i - max_offset;
6404
if (j < 0)
6405
j = 0;
6406
6407
while (j < i)
6408
{
6409
b_pri = chars[j].last_count;
6410
if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6411
{
6412
b1 = chars[j].chars[0];
6413
b2 = chars[j].chars[1];
6414
6415
if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6416
{
6417
max_pri = a_pri + b_pri;
6418
max_i = i;
6419
max_j = j;
6420
}
6421
}
6422
j++;
6423
}
6424
}
6425
}
6426
6427
if (max_pri == 0)
6428
return FALSE;
6429
6430
fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6431
return TRUE;
6432
}
6433
6434
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6435
6436
static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6437
{
6438
DEFINE_COMPILER;
6439
struct sljit_label *start;
6440
struct sljit_jump *match;
6441
struct sljit_jump *partial_quit;
6442
PCRE2_UCHAR mask;
6443
BOOL has_match_end = (common->match_end_ptr != 0);
6444
6445
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6446
6447
if (has_match_end)
6448
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6449
6450
if (offset > 0)
6451
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6452
6453
if (has_match_end)
6454
{
6455
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6456
6457
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6458
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6459
SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6460
}
6461
6462
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6463
6464
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6465
{
6466
fast_forward_char_simd(common, char1, char2, offset);
6467
6468
if (offset > 0)
6469
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6470
6471
if (has_match_end)
6472
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6473
return;
6474
}
6475
6476
#endif
6477
6478
start = LABEL();
6479
6480
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6481
if (common->mode == PCRE2_JIT_COMPLETE)
6482
add_jump(compiler, &common->failed_match, partial_quit);
6483
6484
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6485
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6486
6487
if (char1 == char2)
6488
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6489
else
6490
{
6491
mask = char1 ^ char2;
6492
if (is_powerof2(mask))
6493
{
6494
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6495
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6496
}
6497
else
6498
{
6499
match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6500
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6501
JUMPHERE(match);
6502
}
6503
}
6504
6505
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6506
if (common->utf && offset > 0)
6507
{
6508
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6509
jumpto_if_not_utf_char_start(compiler, TMP1, start);
6510
}
6511
#endif
6512
6513
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6514
6515
if (common->mode != PCRE2_JIT_COMPLETE)
6516
JUMPHERE(partial_quit);
6517
6518
if (has_match_end)
6519
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6520
}
6521
6522
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6523
{
6524
DEFINE_COMPILER;
6525
struct sljit_label *start;
6526
struct sljit_jump *match;
6527
fast_forward_char_data chars[MAX_N_CHARS];
6528
sljit_s32 offset;
6529
PCRE2_UCHAR mask;
6530
PCRE2_UCHAR *char_set, *char_set_end;
6531
int i, max, from;
6532
int range_right = -1, range_len;
6533
sljit_u8 *update_table = NULL;
6534
BOOL in_range;
6535
6536
for (i = 0; i < MAX_N_CHARS; i++)
6537
{
6538
chars[i].count = 0;
6539
chars[i].last_count = 0;
6540
}
6541
6542
max = scan_prefix(common, common->start, chars);
6543
6544
if (max < 1)
6545
return FALSE;
6546
6547
/* Convert last_count to priority. */
6548
for (i = 0; i < max; i++)
6549
{
6550
SLJIT_ASSERT(chars[i].last_count <= chars[i].count);
6551
6552
switch (chars[i].count)
6553
{
6554
case 0:
6555
chars[i].count = 255;
6556
chars[i].last_count = 0;
6557
break;
6558
6559
case 1:
6560
chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6561
/* Simplifies algorithms later. */
6562
chars[i].chars[1] = chars[i].chars[0];
6563
break;
6564
6565
case 2:
6566
SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6567
6568
if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6569
chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6570
else
6571
chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6572
break;
6573
6574
default:
6575
chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6576
break;
6577
}
6578
}
6579
6580
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6581
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6582
return TRUE;
6583
#endif
6584
6585
in_range = FALSE;
6586
/* Prevent compiler "uninitialized" warning */
6587
from = 0;
6588
range_len = 4 /* minimum length */ - 1;
6589
for (i = 0; i <= max; i++)
6590
{
6591
if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6592
{
6593
range_len = i - from;
6594
range_right = i - 1;
6595
}
6596
6597
if (i < max && chars[i].count < 255)
6598
{
6599
SLJIT_ASSERT(chars[i].count > 0);
6600
if (!in_range)
6601
{
6602
in_range = TRUE;
6603
from = i;
6604
}
6605
}
6606
else
6607
in_range = FALSE;
6608
}
6609
6610
if (range_right >= 0)
6611
{
6612
update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6613
if (update_table == NULL)
6614
return TRUE;
6615
memset(update_table, IN_UCHARS(range_len), 256);
6616
6617
for (i = 0; i < range_len; i++)
6618
{
6619
SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6620
6621
char_set = chars[range_right - i].chars;
6622
char_set_end = char_set + chars[range_right - i].count;
6623
do
6624
{
6625
if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6626
update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6627
char_set++;
6628
}
6629
while (char_set < char_set_end);
6630
}
6631
}
6632
6633
offset = -1;
6634
/* Scan forward. */
6635
for (i = 0; i < max; i++)
6636
{
6637
if (range_right == i)
6638
continue;
6639
6640
if (offset == -1)
6641
{
6642
if (chars[i].last_count >= 2)
6643
offset = i;
6644
}
6645
else if (chars[offset].last_count < chars[i].last_count)
6646
offset = i;
6647
}
6648
6649
SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6650
6651
if (range_right < 0)
6652
{
6653
if (offset < 0)
6654
return FALSE;
6655
/* Works regardless the value is 1 or 2. */
6656
fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6657
return TRUE;
6658
}
6659
6660
SLJIT_ASSERT(range_right != offset);
6661
6662
if (common->match_end_ptr != 0)
6663
{
6664
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6665
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6666
OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6667
add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6668
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6669
SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6670
}
6671
else
6672
{
6673
OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6674
add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6675
}
6676
6677
SLJIT_ASSERT(range_right >= 0);
6678
6679
if (!HAS_VIRTUAL_REGISTERS)
6680
OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6681
6682
start = LABEL();
6683
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6684
6685
#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6686
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6687
#else
6688
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6689
#endif
6690
6691
if (!HAS_VIRTUAL_REGISTERS)
6692
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6693
else
6694
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6695
6696
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6697
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6698
6699
if (offset >= 0)
6700
{
6701
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6702
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6703
6704
if (chars[offset].count == 1)
6705
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6706
else
6707
{
6708
mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6709
if (is_powerof2(mask))
6710
{
6711
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6712
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6713
}
6714
else
6715
{
6716
match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6717
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6718
JUMPHERE(match);
6719
}
6720
}
6721
}
6722
6723
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6724
if (common->utf && offset != 0)
6725
{
6726
if (offset < 0)
6727
{
6728
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6729
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6730
}
6731
else
6732
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6733
6734
jumpto_if_not_utf_char_start(compiler, TMP1, start);
6735
6736
if (offset < 0)
6737
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6738
}
6739
#endif
6740
6741
if (offset >= 0)
6742
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6743
6744
if (common->match_end_ptr != 0)
6745
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6746
else
6747
OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6748
return TRUE;
6749
}
6750
6751
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6752
{
6753
PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6754
PCRE2_UCHAR oc;
6755
6756
oc = first_char;
6757
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6758
{
6759
oc = TABLE_GET(first_char, common->fcc, first_char);
6760
#if defined SUPPORT_UNICODE
6761
if (first_char > 127 && (common->utf || common->ucp))
6762
oc = UCD_OTHERCASE(first_char);
6763
#endif
6764
}
6765
6766
fast_forward_first_char2(common, first_char, oc, 0);
6767
}
6768
6769
static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6770
{
6771
DEFINE_COMPILER;
6772
struct sljit_label *loop;
6773
struct sljit_jump *lastchar = NULL;
6774
struct sljit_jump *firstchar;
6775
struct sljit_jump *quit = NULL;
6776
struct sljit_jump *foundcr = NULL;
6777
struct sljit_jump *notfoundnl;
6778
jump_list *newline = NULL;
6779
6780
if (common->match_end_ptr != 0)
6781
{
6782
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6783
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6784
}
6785
6786
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6787
{
6788
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6789
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6790
{
6791
if (HAS_VIRTUAL_REGISTERS)
6792
{
6793
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6794
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6795
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6796
}
6797
else
6798
{
6799
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6800
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6801
}
6802
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6803
6804
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6805
OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6806
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6807
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6808
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6809
#endif
6810
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6811
6812
fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6813
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6814
}
6815
else
6816
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6817
{
6818
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6819
if (HAS_VIRTUAL_REGISTERS)
6820
{
6821
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6822
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6823
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6824
}
6825
else
6826
{
6827
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6828
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6829
}
6830
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6831
6832
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6833
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6834
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6835
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6836
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6837
#endif
6838
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6839
6840
loop = LABEL();
6841
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6842
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6843
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6844
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6845
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6846
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6847
6848
JUMPHERE(quit);
6849
JUMPHERE(lastchar);
6850
}
6851
6852
JUMPHERE(firstchar);
6853
6854
if (common->match_end_ptr != 0)
6855
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6856
return;
6857
}
6858
6859
if (HAS_VIRTUAL_REGISTERS)
6860
{
6861
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6862
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6863
}
6864
else
6865
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6866
6867
/* Example: match /^/ to \r\n from offset 1. */
6868
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6869
6870
if (common->nltype == NLTYPE_ANY)
6871
move_back(common, NULL, FALSE);
6872
else
6873
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6874
6875
loop = LABEL();
6876
common->ff_newline_shortcut = loop;
6877
6878
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6879
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6880
{
6881
if (common->nltype == NLTYPE_ANYCRLF)
6882
{
6883
fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6884
if (common->mode != PCRE2_JIT_COMPLETE)
6885
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6886
6887
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6888
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6889
quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6890
}
6891
else
6892
{
6893
fast_forward_char_simd(common, common->newline, common->newline, 0);
6894
6895
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6896
if (common->mode != PCRE2_JIT_COMPLETE)
6897
{
6898
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6899
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6900
}
6901
}
6902
}
6903
else
6904
#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6905
{
6906
read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6907
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6908
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6909
foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6910
check_newlinechar(common, common->nltype, &newline, FALSE);
6911
set_jumps(newline, loop);
6912
}
6913
6914
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6915
{
6916
if (quit == NULL)
6917
{
6918
quit = JUMP(SLJIT_JUMP);
6919
JUMPHERE(foundcr);
6920
}
6921
6922
notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6923
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6924
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6925
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6926
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6927
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6928
#endif
6929
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6930
JUMPHERE(notfoundnl);
6931
JUMPHERE(quit);
6932
}
6933
6934
if (lastchar)
6935
JUMPHERE(lastchar);
6936
JUMPHERE(firstchar);
6937
6938
if (common->match_end_ptr != 0)
6939
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6940
}
6941
6942
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6943
6944
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6945
{
6946
DEFINE_COMPILER;
6947
const sljit_u8 *start_bits = common->re->start_bitmap;
6948
struct sljit_label *start;
6949
struct sljit_jump *partial_quit;
6950
#if PCRE2_CODE_UNIT_WIDTH != 8
6951
struct sljit_jump *found = NULL;
6952
#endif
6953
jump_list *matches = NULL;
6954
6955
if (common->match_end_ptr != 0)
6956
{
6957
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6958
OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6959
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6960
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6961
SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6962
}
6963
6964
start = LABEL();
6965
6966
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6967
if (common->mode == PCRE2_JIT_COMPLETE)
6968
add_jump(compiler, &common->failed_match, partial_quit);
6969
6970
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6971
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6972
6973
if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6974
{
6975
#if PCRE2_CODE_UNIT_WIDTH != 8
6976
if ((start_bits[31] & 0x80) != 0)
6977
found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6978
else
6979
CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6980
#elif defined SUPPORT_UNICODE
6981
if (common->utf && is_char7_bitset(start_bits, FALSE))
6982
CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6983
#endif
6984
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6985
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6986
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6987
if (!HAS_VIRTUAL_REGISTERS)
6988
{
6989
OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6990
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6991
}
6992
else
6993
{
6994
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6995
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6996
}
6997
JUMPTO(SLJIT_ZERO, start);
6998
}
6999
else
7000
set_jumps(matches, start);
7001
7002
#if PCRE2_CODE_UNIT_WIDTH != 8
7003
if (found != NULL)
7004
JUMPHERE(found);
7005
#endif
7006
7007
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7008
7009
if (common->mode != PCRE2_JIT_COMPLETE)
7010
JUMPHERE(partial_quit);
7011
7012
if (common->match_end_ptr != 0)
7013
OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
7014
}
7015
7016
static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
7017
{
7018
DEFINE_COMPILER;
7019
struct sljit_label *loop;
7020
struct sljit_jump *toolong;
7021
struct sljit_jump *already_found;
7022
struct sljit_jump *found;
7023
struct sljit_jump *found_oc = NULL;
7024
jump_list *not_found = NULL;
7025
sljit_u32 oc, bit;
7026
7027
SLJIT_ASSERT(common->req_char_ptr != 0);
7028
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
7029
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
7030
toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
7031
already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
7032
7033
if (has_firstchar)
7034
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7035
else
7036
OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
7037
7038
oc = req_char;
7039
if (caseless)
7040
{
7041
oc = TABLE_GET(req_char, common->fcc, req_char);
7042
#if defined SUPPORT_UNICODE
7043
if (req_char > 127 && (common->utf || common->ucp))
7044
oc = UCD_OTHERCASE(req_char);
7045
#endif
7046
}
7047
7048
#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
7049
if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
7050
{
7051
not_found = fast_requested_char_simd(common, req_char, oc);
7052
}
7053
else
7054
#endif
7055
{
7056
loop = LABEL();
7057
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
7058
7059
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
7060
7061
if (req_char == oc)
7062
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
7063
else
7064
{
7065
bit = req_char ^ oc;
7066
if (is_powerof2(bit))
7067
{
7068
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
7069
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
7070
}
7071
else
7072
{
7073
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
7074
found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
7075
}
7076
}
7077
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7078
JUMPTO(SLJIT_JUMP, loop);
7079
7080
JUMPHERE(found);
7081
if (found_oc)
7082
JUMPHERE(found_oc);
7083
}
7084
7085
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
7086
7087
JUMPHERE(already_found);
7088
JUMPHERE(toolong);
7089
return not_found;
7090
}
7091
7092
static void do_revertframes(compiler_common *common)
7093
{
7094
DEFINE_COMPILER;
7095
struct sljit_jump *jump;
7096
struct sljit_label *mainloop;
7097
7098
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7099
GET_LOCAL_BASE(TMP1, 0, 0);
7100
7101
/* Drop frames until we reach STACK_TOP. */
7102
mainloop = LABEL();
7103
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
7104
OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
7105
jump = JUMP(SLJIT_SIG_LESS_EQUAL);
7106
7107
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7108
if (HAS_VIRTUAL_REGISTERS)
7109
{
7110
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
7111
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
7112
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
7113
}
7114
else
7115
{
7116
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
7117
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
7118
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
7119
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
7120
GET_LOCAL_BASE(TMP1, 0, 0);
7121
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
7122
}
7123
JUMPTO(SLJIT_JUMP, mainloop);
7124
7125
JUMPHERE(jump);
7126
sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
7127
jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
7128
/* End of reverting values. */
7129
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7130
7131
JUMPHERE(jump);
7132
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, TMP2, 0);
7133
if (HAS_VIRTUAL_REGISTERS)
7134
{
7135
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
7136
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
7137
}
7138
else
7139
{
7140
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
7141
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
7142
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
7143
}
7144
JUMPTO(SLJIT_JUMP, mainloop);
7145
}
7146
7147
#ifdef SUPPORT_UNICODE
7148
#define UCPCAT(bit) (1 << (bit))
7149
#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
7150
#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
7151
#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
7152
#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
7153
#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
7154
#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
7155
#endif
7156
7157
static void check_wordboundary(compiler_common *common, BOOL ucp)
7158
{
7159
DEFINE_COMPILER;
7160
struct sljit_jump *skipread;
7161
jump_list *skipread_list = NULL;
7162
#ifdef SUPPORT_UNICODE
7163
struct sljit_label *valid_utf;
7164
jump_list *invalid_utf1 = NULL;
7165
#endif /* SUPPORT_UNICODE */
7166
jump_list *invalid_utf2 = NULL;
7167
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
7168
struct sljit_jump *jump;
7169
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
7170
7171
SLJIT_UNUSED_ARG(ucp);
7172
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
7173
7174
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
7175
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7176
/* Get type of the previous char, and put it to TMP3. */
7177
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7178
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7179
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
7180
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
7181
7182
#ifdef SUPPORT_UNICODE
7183
if (common->invalid_utf)
7184
{
7185
peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
7186
7187
if (common->mode != PCRE2_JIT_COMPLETE)
7188
{
7189
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7190
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
7191
move_back(common, NULL, TRUE);
7192
check_start_used_ptr(common);
7193
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7194
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
7195
}
7196
}
7197
else
7198
#endif /* SUPPORT_UNICODE */
7199
{
7200
if (common->mode == PCRE2_JIT_COMPLETE)
7201
peek_char_back(common, READ_CHAR_MAX, NULL);
7202
else
7203
{
7204
move_back(common, NULL, TRUE);
7205
check_start_used_ptr(common);
7206
read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
7207
}
7208
}
7209
7210
/* Testing char type. */
7211
#ifdef SUPPORT_UNICODE
7212
if (ucp)
7213
{
7214
add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
7215
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
7216
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
7217
OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
7218
}
7219
else
7220
#endif /* SUPPORT_UNICODE */
7221
{
7222
#if PCRE2_CODE_UNIT_WIDTH != 8
7223
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7224
#elif defined SUPPORT_UNICODE
7225
/* Here TMP3 has already been zeroed. */
7226
jump = NULL;
7227
if (common->utf)
7228
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7229
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7230
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
7231
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
7232
OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
7233
#if PCRE2_CODE_UNIT_WIDTH != 8
7234
JUMPHERE(jump);
7235
#elif defined SUPPORT_UNICODE
7236
if (jump != NULL)
7237
JUMPHERE(jump);
7238
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7239
}
7240
JUMPHERE(skipread);
7241
7242
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7243
check_str_end(common, &skipread_list);
7244
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, &invalid_utf2);
7245
7246
/* Testing char type. This is a code duplication. */
7247
#ifdef SUPPORT_UNICODE
7248
7249
valid_utf = LABEL();
7250
7251
if (ucp)
7252
{
7253
add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
7254
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
7255
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
7256
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
7257
}
7258
else
7259
#endif /* SUPPORT_UNICODE */
7260
{
7261
#if PCRE2_CODE_UNIT_WIDTH != 8
7262
/* TMP2 may be destroyed by peek_char. */
7263
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7264
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7265
#elif defined SUPPORT_UNICODE
7266
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7267
jump = NULL;
7268
if (common->utf)
7269
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7270
#endif
7271
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
7272
OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
7273
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7274
#if PCRE2_CODE_UNIT_WIDTH != 8
7275
JUMPHERE(jump);
7276
#elif defined SUPPORT_UNICODE
7277
if (jump != NULL)
7278
JUMPHERE(jump);
7279
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7280
}
7281
set_jumps(skipread_list, LABEL());
7282
7283
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7284
OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
7285
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7286
7287
#ifdef SUPPORT_UNICODE
7288
if (common->invalid_utf)
7289
{
7290
set_jumps(invalid_utf1, LABEL());
7291
7292
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, NULL);
7293
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
7294
7295
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7296
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
7297
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7298
7299
set_jumps(invalid_utf2, LABEL());
7300
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7301
OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
7302
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7303
}
7304
#endif /* SUPPORT_UNICODE */
7305
}
7306
7307
static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7308
{
7309
/* May destroy TMP1. */
7310
DEFINE_COMPILER;
7311
int ranges[MAX_CLASS_RANGE_SIZE];
7312
sljit_u8 bit, cbit, all;
7313
int i, byte, length = 0;
7314
7315
bit = bits[0] & 0x1;
7316
/* All bits will be zero or one (since bit is zero or one). */
7317
all = (sljit_u8)-bit;
7318
7319
for (i = 0; i < 256; )
7320
{
7321
byte = i >> 3;
7322
if ((i & 0x7) == 0 && bits[byte] == all)
7323
i += 8;
7324
else
7325
{
7326
cbit = (bits[byte] >> (i & 0x7)) & 0x1;
7327
if (cbit != bit)
7328
{
7329
if (length >= MAX_CLASS_RANGE_SIZE)
7330
return FALSE;
7331
ranges[length] = i;
7332
length++;
7333
bit = cbit;
7334
all = (sljit_u8)-cbit; /* sign extend bit into byte */
7335
}
7336
i++;
7337
}
7338
}
7339
7340
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
7341
{
7342
if (length >= MAX_CLASS_RANGE_SIZE)
7343
return FALSE;
7344
ranges[length] = 256;
7345
length++;
7346
}
7347
7348
if (length < 0 || length > 4)
7349
return FALSE;
7350
7351
bit = bits[0] & 0x1;
7352
if (invert) bit ^= 0x1;
7353
7354
/* No character is accepted. */
7355
if (length == 0 && bit == 0)
7356
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7357
7358
switch(length)
7359
{
7360
case 0:
7361
/* When bit != 0, all characters are accepted. */
7362
return TRUE;
7363
7364
case 1:
7365
add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7366
return TRUE;
7367
7368
case 2:
7369
if (ranges[0] + 1 != ranges[1])
7370
{
7371
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7372
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7373
}
7374
else
7375
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7376
return TRUE;
7377
7378
case 3:
7379
if (bit != 0)
7380
{
7381
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7382
if (ranges[0] + 1 != ranges[1])
7383
{
7384
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7385
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7386
}
7387
else
7388
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7389
return TRUE;
7390
}
7391
7392
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7393
if (ranges[1] + 1 != ranges[2])
7394
{
7395
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7396
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7397
}
7398
else
7399
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7400
return TRUE;
7401
7402
case 4:
7403
if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7404
&& (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7405
&& (ranges[1] & (ranges[2] - ranges[0])) == 0
7406
&& is_powerof2(ranges[2] - ranges[0]))
7407
{
7408
SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7409
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7410
if (ranges[2] + 1 != ranges[3])
7411
{
7412
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7413
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7414
}
7415
else
7416
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7417
return TRUE;
7418
}
7419
7420
if (bit != 0)
7421
{
7422
i = 0;
7423
if (ranges[0] + 1 != ranges[1])
7424
{
7425
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7426
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7427
i = ranges[0];
7428
}
7429
else
7430
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7431
7432
if (ranges[2] + 1 != ranges[3])
7433
{
7434
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7435
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7436
}
7437
else
7438
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7439
return TRUE;
7440
}
7441
7442
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7443
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7444
if (ranges[1] + 1 != ranges[2])
7445
{
7446
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7447
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7448
}
7449
else
7450
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7451
return TRUE;
7452
7453
default:
7454
SLJIT_UNREACHABLE();
7455
return FALSE;
7456
}
7457
}
7458
7459
static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7460
{
7461
/* May destroy TMP1. */
7462
DEFINE_COMPILER;
7463
uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7464
uint8_t byte;
7465
sljit_s32 type;
7466
int i, j, k, len, c;
7467
7468
if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7469
return FALSE;
7470
7471
len = 0;
7472
7473
for (i = 0; i < 32; i++)
7474
{
7475
byte = bits[i];
7476
7477
if (nclass)
7478
byte = (sljit_u8)~byte;
7479
7480
j = 0;
7481
while (byte != 0)
7482
{
7483
if (byte & 0x1)
7484
{
7485
c = i * 8 + j;
7486
7487
k = len;
7488
7489
if ((c & 0x20) != 0)
7490
{
7491
for (k = 0; k < len; k++)
7492
if (char_list[k] == c - 0x20)
7493
{
7494
char_list[k] |= 0x120;
7495
break;
7496
}
7497
}
7498
7499
if (k == len)
7500
{
7501
if (len >= MAX_CLASS_CHARS_SIZE)
7502
return FALSE;
7503
7504
char_list[len++] = (uint16_t) c;
7505
}
7506
}
7507
7508
byte >>= 1;
7509
j++;
7510
}
7511
}
7512
7513
if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
7514
7515
i = 0;
7516
j = 0;
7517
7518
if (char_list[0] == 0)
7519
{
7520
i++;
7521
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7522
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7523
}
7524
else
7525
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7526
7527
while (i < len)
7528
{
7529
if ((char_list[i] & 0x100) != 0)
7530
j++;
7531
else
7532
{
7533
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7534
SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7535
}
7536
i++;
7537
}
7538
7539
if (j != 0)
7540
{
7541
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7542
7543
for (i = 0; i < len; i++)
7544
if ((char_list[i] & 0x100) != 0)
7545
{
7546
j--;
7547
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7548
SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7549
}
7550
}
7551
7552
if (invert)
7553
nclass = !nclass;
7554
7555
type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7556
add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7557
return TRUE;
7558
}
7559
7560
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7561
{
7562
/* May destroy TMP1. */
7563
if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7564
return TRUE;
7565
return optimize_class_chars(common, bits, nclass, invert, backtracks);
7566
}
7567
7568
static void check_anynewline(compiler_common *common)
7569
{
7570
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7571
DEFINE_COMPILER;
7572
7573
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7574
7575
#ifdef EBCDIC
7576
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_LF);
7577
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7578
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_VT);
7579
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7580
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_FF);
7581
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7582
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR);
7583
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7584
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL);
7585
#else
7586
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, CHAR_LF);
7587
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR - CHAR_LF);
7588
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7589
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL - CHAR_LF);
7590
#endif
7591
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7592
#if PCRE2_CODE_UNIT_WIDTH == 8
7593
if (common->utf)
7594
{
7595
#endif
7596
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7597
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7598
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - CHAR_LF);
7599
#if PCRE2_CODE_UNIT_WIDTH == 8
7600
}
7601
#endif
7602
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7603
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7604
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7605
}
7606
7607
static void check_hspace(compiler_common *common)
7608
{
7609
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7610
DEFINE_COMPILER;
7611
7612
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7613
7614
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_HT);
7615
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7616
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_SPACE);
7617
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7618
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NBSP);
7619
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7620
#if PCRE2_CODE_UNIT_WIDTH == 8
7621
if (common->utf)
7622
{
7623
#endif
7624
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7625
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7626
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7627
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7628
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7629
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7630
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200a - 0x2000);
7631
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7632
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7633
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7634
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7635
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7636
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7637
#if PCRE2_CODE_UNIT_WIDTH == 8
7638
}
7639
#endif
7640
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7641
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7642
7643
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7644
}
7645
7646
static void check_vspace(compiler_common *common)
7647
{
7648
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7649
DEFINE_COMPILER;
7650
7651
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7652
7653
#ifdef EBCDIC
7654
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_LF);
7655
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7656
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_VT);
7657
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7658
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_FF);
7659
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7660
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR);
7661
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7662
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL);
7663
#else
7664
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, CHAR_LF);
7665
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR - CHAR_LF);
7666
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7667
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL - CHAR_LF);
7668
#endif
7669
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7670
#if PCRE2_CODE_UNIT_WIDTH == 8
7671
if (common->utf)
7672
{
7673
#endif
7674
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7675
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7676
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - CHAR_LF);
7677
#if PCRE2_CODE_UNIT_WIDTH == 8
7678
}
7679
#endif
7680
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7681
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7682
7683
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7684
}
7685
7686
static void do_casefulcmp(compiler_common *common)
7687
{
7688
DEFINE_COMPILER;
7689
struct sljit_jump *jump;
7690
struct sljit_label *label;
7691
int char1_reg;
7692
int char2_reg;
7693
7694
if (HAS_VIRTUAL_REGISTERS)
7695
{
7696
char1_reg = STR_END;
7697
char2_reg = STACK_TOP;
7698
}
7699
else
7700
{
7701
char1_reg = TMP3;
7702
char2_reg = RETURN_ADDR;
7703
}
7704
7705
/* Update ref_update_local_size() when this changes. */
7706
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
7707
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7708
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7709
7710
if (char1_reg == STR_END)
7711
{
7712
OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7713
OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7714
}
7715
7716
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7717
{
7718
label = LABEL();
7719
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7720
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7721
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7722
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7723
JUMPTO(SLJIT_NOT_ZERO, label);
7724
7725
JUMPHERE(jump);
7726
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7727
}
7728
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7729
{
7730
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7731
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7732
7733
label = LABEL();
7734
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7735
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7736
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7737
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7738
JUMPTO(SLJIT_NOT_ZERO, label);
7739
7740
JUMPHERE(jump);
7741
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7742
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7743
}
7744
else
7745
{
7746
label = LABEL();
7747
OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7748
OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7749
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7750
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7751
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7752
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7753
JUMPTO(SLJIT_NOT_ZERO, label);
7754
7755
JUMPHERE(jump);
7756
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7757
}
7758
7759
if (char1_reg == STR_END)
7760
{
7761
OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7762
OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7763
}
7764
7765
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7766
}
7767
7768
static void do_caselesscmp(compiler_common *common)
7769
{
7770
DEFINE_COMPILER;
7771
struct sljit_jump *jump;
7772
struct sljit_label *label;
7773
int char1_reg = STR_END;
7774
int char2_reg;
7775
int lcc_table;
7776
int opt_type = 0;
7777
7778
if (HAS_VIRTUAL_REGISTERS)
7779
{
7780
char2_reg = STACK_TOP;
7781
lcc_table = STACK_LIMIT;
7782
}
7783
else
7784
{
7785
char2_reg = RETURN_ADDR;
7786
lcc_table = TMP3;
7787
}
7788
7789
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7790
opt_type = 1;
7791
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7792
opt_type = 2;
7793
7794
/* Update ref_update_local_size() when this changes. */
7795
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
7796
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7797
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7798
7799
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, char1_reg, 0);
7800
7801
if (char2_reg == STACK_TOP)
7802
{
7803
OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7804
OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7805
}
7806
7807
OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7808
7809
if (opt_type == 1)
7810
{
7811
label = LABEL();
7812
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7813
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7814
}
7815
else if (opt_type == 2)
7816
{
7817
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7818
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7819
7820
label = LABEL();
7821
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7822
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7823
}
7824
else
7825
{
7826
label = LABEL();
7827
OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7828
OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7829
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7830
}
7831
7832
#if PCRE2_CODE_UNIT_WIDTH != 8
7833
jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7834
#endif
7835
OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7836
#if PCRE2_CODE_UNIT_WIDTH != 8
7837
JUMPHERE(jump);
7838
jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7839
#endif
7840
OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7841
#if PCRE2_CODE_UNIT_WIDTH != 8
7842
JUMPHERE(jump);
7843
#endif
7844
7845
if (opt_type == 0)
7846
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7847
7848
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7849
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7850
JUMPTO(SLJIT_NOT_ZERO, label);
7851
7852
JUMPHERE(jump);
7853
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7854
7855
if (opt_type == 2)
7856
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7857
7858
if (char2_reg == STACK_TOP)
7859
{
7860
OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7861
OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7862
}
7863
7864
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
7865
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7866
}
7867
7868
#include "pcre2_jit_char_inc.h"
7869
7870
static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
7871
{
7872
DEFINE_COMPILER;
7873
struct sljit_jump *jump[4];
7874
7875
switch(type)
7876
{
7877
case OP_SOD:
7878
if (HAS_VIRTUAL_REGISTERS)
7879
{
7880
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7881
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7882
}
7883
else
7884
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
7885
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7886
return cc;
7887
7888
case OP_SOM:
7889
if (HAS_VIRTUAL_REGISTERS)
7890
{
7891
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7892
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7893
}
7894
else
7895
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
7896
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7897
return cc;
7898
7899
case OP_NOT_WORD_BOUNDARY:
7900
case OP_WORD_BOUNDARY:
7901
case OP_NOT_UCP_WORD_BOUNDARY:
7902
case OP_UCP_WORD_BOUNDARY:
7903
add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
7904
#ifdef SUPPORT_UNICODE
7905
if (common->invalid_utf)
7906
{
7907
add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7908
return cc;
7909
}
7910
#endif /* SUPPORT_UNICODE */
7911
sljit_set_current_flags(compiler, SLJIT_SET_Z);
7912
add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7913
return cc;
7914
7915
case OP_EODN:
7916
/* Requires rather complex checks. */
7917
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
7918
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7919
{
7920
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7921
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7922
if (common->mode == PCRE2_JIT_COMPLETE)
7923
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7924
else
7925
{
7926
jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
7927
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
7928
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
7929
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
7930
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
7931
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
7932
check_partial(common, TRUE);
7933
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7934
JUMPHERE(jump[1]);
7935
}
7936
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7937
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7938
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7939
}
7940
else if (common->nltype == NLTYPE_FIXED)
7941
{
7942
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7943
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7944
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7945
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
7946
}
7947
else
7948
{
7949
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7950
jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
7951
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7952
OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
7953
jump[2] = JUMP(SLJIT_GREATER);
7954
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
7955
/* Equal. */
7956
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7957
jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
7958
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7959
7960
JUMPHERE(jump[1]);
7961
if (common->nltype == NLTYPE_ANYCRLF)
7962
{
7963
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7964
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
7965
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
7966
}
7967
else
7968
{
7969
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7970
read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
7971
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
7972
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
7973
sljit_set_current_flags(compiler, SLJIT_SET_Z);
7974
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
7975
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7976
}
7977
JUMPHERE(jump[2]);
7978
JUMPHERE(jump[3]);
7979
}
7980
JUMPHERE(jump[0]);
7981
if (common->mode != PCRE2_JIT_COMPLETE)
7982
check_partial(common, TRUE);
7983
return cc;
7984
7985
case OP_EOD:
7986
add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
7987
if (common->mode != PCRE2_JIT_COMPLETE)
7988
check_partial(common, TRUE);
7989
return cc;
7990
7991
case OP_DOLL:
7992
if (HAS_VIRTUAL_REGISTERS)
7993
{
7994
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7995
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7996
}
7997
else
7998
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7999
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8000
8001
if (!common->endonly)
8002
compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8003
else
8004
{
8005
add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8006
check_partial(common, FALSE);
8007
}
8008
return cc;
8009
8010
case OP_DOLLM:
8011
jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8012
if (HAS_VIRTUAL_REGISTERS)
8013
{
8014
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8015
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8016
}
8017
else
8018
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8019
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8020
check_partial(common, FALSE);
8021
jump[0] = JUMP(SLJIT_JUMP);
8022
JUMPHERE(jump[1]);
8023
8024
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8025
{
8026
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8027
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8028
if (common->mode == PCRE2_JIT_COMPLETE)
8029
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8030
else
8031
{
8032
jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8033
/* STR_PTR = STR_END - IN_UCHARS(1) */
8034
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8035
check_partial(common, TRUE);
8036
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8037
JUMPHERE(jump[1]);
8038
}
8039
8040
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8041
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8042
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8043
}
8044
else
8045
{
8046
peek_char(common, common->nlmax, TMP3, 0, NULL);
8047
check_newlinechar(common, common->nltype, backtracks, FALSE);
8048
}
8049
JUMPHERE(jump[0]);
8050
return cc;
8051
8052
case OP_CIRC:
8053
if (HAS_VIRTUAL_REGISTERS)
8054
{
8055
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8056
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8057
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8058
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8059
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8060
}
8061
else
8062
{
8063
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8064
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8065
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8066
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8067
}
8068
return cc;
8069
8070
case OP_CIRCM:
8071
/* TMP2 might be used by peek_char_back. */
8072
if (HAS_VIRTUAL_REGISTERS)
8073
{
8074
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8075
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8076
jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8077
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8078
}
8079
else
8080
{
8081
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8082
jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8083
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8084
}
8085
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8086
jump[0] = JUMP(SLJIT_JUMP);
8087
JUMPHERE(jump[1]);
8088
8089
if (!common->alt_circumflex)
8090
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8091
8092
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8093
{
8094
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8095
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8096
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8097
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8098
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8099
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8100
}
8101
else
8102
{
8103
peek_char_back(common, common->nlmax, backtracks);
8104
check_newlinechar(common, common->nltype, backtracks, FALSE);
8105
}
8106
JUMPHERE(jump[0]);
8107
return cc;
8108
}
8109
SLJIT_UNREACHABLE();
8110
return cc;
8111
}
8112
8113
/* Forward definitions. */
8114
static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
8115
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
8116
8117
#define PUSH_BACKTRACK(size, ccstart, error) \
8118
do \
8119
{ \
8120
backtrack = sljit_alloc_memory(compiler, (size)); \
8121
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8122
return error; \
8123
memset(backtrack, 0, size); \
8124
backtrack->prev = parent->top; \
8125
backtrack->cc = (ccstart); \
8126
parent->top = backtrack; \
8127
} \
8128
while (0)
8129
8130
#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
8131
do \
8132
{ \
8133
backtrack = sljit_alloc_memory(compiler, (size)); \
8134
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8135
return; \
8136
memset(backtrack, 0, size); \
8137
backtrack->prev = parent->top; \
8138
backtrack->cc = (ccstart); \
8139
parent->top = backtrack; \
8140
} \
8141
while (0)
8142
8143
#define BACKTRACK_AS(type) ((type *)backtrack)
8144
8145
static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
8146
{
8147
/* The OVECTOR offset goes to TMP2. */
8148
DEFINE_COMPILER;
8149
int count = GET2(cc, 1 + IMM2_SIZE);
8150
PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
8151
unsigned int offset;
8152
jump_list *found = NULL;
8153
8154
SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
8155
8156
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8157
8158
count--;
8159
while (count-- > 0)
8160
{
8161
offset = GET2(slot, 0) << 1;
8162
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8163
add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8164
slot += common->name_entry_size;
8165
}
8166
8167
offset = GET2(slot, 0) << 1;
8168
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8169
if (backtracks != NULL && !common->unset_backref)
8170
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8171
8172
set_jumps(found, LABEL());
8173
}
8174
8175
static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
8176
{
8177
DEFINE_COMPILER;
8178
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8179
int offset = 0;
8180
struct sljit_jump *jump = NULL;
8181
struct sljit_jump *partial;
8182
struct sljit_jump *nopartial;
8183
#if defined SUPPORT_UNICODE
8184
struct sljit_label *loop;
8185
struct sljit_label *caseless_loop;
8186
struct sljit_jump *turkish_ascii_i = NULL;
8187
struct sljit_jump *turkish_non_ascii_i = NULL;
8188
jump_list *no_match = NULL;
8189
int source_reg = COUNT_MATCH;
8190
int source_end_reg = ARGUMENTS;
8191
int char1_reg = STACK_LIMIT;
8192
PCRE2_UCHAR refi_flag = 0;
8193
8194
if (*cc == OP_REFI || *cc == OP_DNREFI)
8195
refi_flag = cc[PRIV(OP_lengths)[*cc] - 1];
8196
#endif /* SUPPORT_UNICODE */
8197
8198
if (ref)
8199
{
8200
offset = GET2(cc, 1) << 1;
8201
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8202
/* OVECTOR(1) contains the "string begin - 1" constant. */
8203
if (withchecks && !common->unset_backref)
8204
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8205
}
8206
else
8207
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8208
8209
#if defined SUPPORT_UNICODE
8210
if ((common->utf || common->ucp) && (*cc == OP_REFI || *cc == OP_DNREFI))
8211
{
8212
/* Update ref_update_local_size() when this changes. */
8213
SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
8214
8215
if (ref)
8216
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8217
else
8218
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8219
8220
if (withchecks && emptyfail)
8221
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
8222
8223
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, source_reg, 0);
8224
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, source_end_reg, 0);
8225
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, char1_reg, 0);
8226
8227
OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
8228
OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
8229
8230
loop = LABEL();
8231
jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
8232
partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8233
8234
/* Read original character. It must be a valid UTF character. */
8235
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8236
OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
8237
8238
read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
8239
8240
OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
8241
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8242
OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
8243
8244
/* Read second character. */
8245
read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
8246
8247
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8248
8249
if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==
8250
REFI_FLAG_TURKISH_CASING)
8251
{
8252
OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x20);
8253
turkish_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x69);
8254
8255
OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x1);
8256
turkish_non_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x131);
8257
}
8258
8259
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
8260
8261
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
8262
8263
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
8264
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
8265
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
8266
8267
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
8268
8269
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
8270
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
8271
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
8272
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8273
8274
add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8275
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
8276
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
8277
8278
if (refi_flag & REFI_FLAG_CASELESS_RESTRICT)
8279
add_jump(compiler, &no_match, CMP(SLJIT_LESS | SLJIT_32, SLJIT_MEM1(TMP2), 0, SLJIT_IMM, 128));
8280
8281
caseless_loop = LABEL();
8282
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8283
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
8284
OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
8285
JUMPTO(SLJIT_EQUAL, loop);
8286
JUMPTO(SLJIT_LESS, caseless_loop);
8287
8288
if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==
8289
REFI_FLAG_TURKISH_CASING)
8290
{
8291
add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));
8292
JUMPHERE(turkish_ascii_i);
8293
8294
OP2(SLJIT_LSHR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);
8295
OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8296
OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8297
OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x130);
8298
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8299
8300
add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));
8301
JUMPHERE(turkish_non_ascii_i);
8302
8303
OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8304
OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8305
OP2(SLJIT_SHL, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);
8306
OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x49);
8307
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8308
}
8309
8310
set_jumps(no_match, LABEL());
8311
if (common->mode == PCRE2_JIT_COMPLETE)
8312
JUMPHERE(partial);
8313
8314
OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8315
OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8316
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8317
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8318
8319
if (common->mode != PCRE2_JIT_COMPLETE)
8320
{
8321
JUMPHERE(partial);
8322
OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8323
OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8324
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8325
8326
check_partial(common, FALSE);
8327
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8328
}
8329
8330
JUMPHERE(jump);
8331
OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8332
OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8333
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8334
return;
8335
}
8336
else
8337
#endif /* SUPPORT_UNICODE */
8338
{
8339
if (ref)
8340
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
8341
else
8342
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
8343
8344
if (withchecks)
8345
jump = JUMP(SLJIT_ZERO);
8346
8347
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
8348
partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
8349
if (common->mode == PCRE2_JIT_COMPLETE)
8350
add_jump(compiler, backtracks, partial);
8351
8352
add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
8353
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8354
8355
if (common->mode != PCRE2_JIT_COMPLETE)
8356
{
8357
nopartial = JUMP(SLJIT_JUMP);
8358
JUMPHERE(partial);
8359
/* TMP2 -= STR_END - STR_PTR */
8360
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
8361
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
8362
partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
8363
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
8364
add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
8365
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8366
JUMPHERE(partial);
8367
check_partial(common, FALSE);
8368
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8369
JUMPHERE(nopartial);
8370
}
8371
}
8372
8373
if (jump != NULL)
8374
{
8375
if (emptyfail)
8376
add_jump(compiler, backtracks, jump);
8377
else
8378
JUMPHERE(jump);
8379
}
8380
}
8381
8382
static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8383
{
8384
DEFINE_COMPILER;
8385
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8386
backtrack_common *backtrack;
8387
PCRE2_UCHAR type;
8388
int local_start = LOCAL2;
8389
int offset = 0;
8390
struct sljit_label *label;
8391
struct sljit_jump *zerolength;
8392
struct sljit_jump *jump = NULL;
8393
PCRE2_SPTR ccbegin = cc;
8394
int min = 0, max = 0;
8395
BOOL minimize;
8396
8397
PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
8398
8399
if (ref)
8400
offset = GET2(cc, 1) << 1;
8401
else
8402
cc += IMM2_SIZE;
8403
8404
if (*ccbegin == OP_REFI || *ccbegin == OP_DNREFI)
8405
{
8406
cc += 1;
8407
#ifdef SUPPORT_UNICODE
8408
if (common->utf || common->ucp)
8409
local_start = LOCAL3;
8410
#endif
8411
}
8412
8413
type = cc[1 + IMM2_SIZE];
8414
8415
SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
8416
/* Update ref_update_local_size() when this changes. */
8417
SLJIT_ASSERT(local_start + 2 * SSIZE_OF(sw) <= (int)LOCAL0 + common->locals_size);
8418
minimize = (type & 0x1) != 0;
8419
switch(type)
8420
{
8421
case OP_CRSTAR:
8422
case OP_CRMINSTAR:
8423
min = 0;
8424
max = 0;
8425
cc += 1 + IMM2_SIZE + 1;
8426
break;
8427
case OP_CRPLUS:
8428
case OP_CRMINPLUS:
8429
min = 1;
8430
max = 0;
8431
cc += 1 + IMM2_SIZE + 1;
8432
break;
8433
case OP_CRQUERY:
8434
case OP_CRMINQUERY:
8435
min = 0;
8436
max = 1;
8437
cc += 1 + IMM2_SIZE + 1;
8438
break;
8439
case OP_CRRANGE:
8440
case OP_CRMINRANGE:
8441
min = GET2(cc, 1 + IMM2_SIZE + 1);
8442
max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
8443
cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
8444
break;
8445
default:
8446
SLJIT_UNREACHABLE();
8447
break;
8448
}
8449
8450
if (!minimize)
8451
{
8452
if (min == 0)
8453
{
8454
allocate_stack(common, 2);
8455
if (ref)
8456
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8457
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8458
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8459
/* Temporary release of STR_PTR. */
8460
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8461
/* Handles both invalid and empty cases. Since the minimum repeat,
8462
is zero the invalid case is basically the same as an empty case. */
8463
if (ref)
8464
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8465
else
8466
{
8467
compile_dnref_search(common, ccbegin, NULL);
8468
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8469
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);
8470
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8471
}
8472
/* Restore if not zero length. */
8473
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8474
}
8475
else
8476
{
8477
allocate_stack(common, 1);
8478
if (ref)
8479
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8480
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8481
8482
if (ref)
8483
{
8484
if (!common->unset_backref)
8485
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8486
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8487
}
8488
else
8489
{
8490
compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
8491
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8492
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);
8493
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8494
}
8495
}
8496
8497
if (min > 1 || max > 1)
8498
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, SLJIT_IMM, 0);
8499
8500
label = LABEL();
8501
if (!ref)
8502
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw));
8503
compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
8504
8505
if (min > 1 || max > 1)
8506
{
8507
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), local_start);
8508
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8509
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, TMP1, 0);
8510
if (min > 1)
8511
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
8512
if (max > 1)
8513
{
8514
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
8515
allocate_stack(common, 1);
8516
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8517
JUMPTO(SLJIT_JUMP, label);
8518
JUMPHERE(jump);
8519
}
8520
}
8521
8522
if (max == 0)
8523
{
8524
/* Includes min > 1 case as well. */
8525
allocate_stack(common, 1);
8526
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8527
JUMPTO(SLJIT_JUMP, label);
8528
}
8529
8530
JUMPHERE(zerolength);
8531
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8532
8533
count_match(common);
8534
return cc;
8535
}
8536
8537
allocate_stack(common, ref ? 2 : 3);
8538
if (ref)
8539
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8540
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8541
if (type != OP_CRMINSTAR)
8542
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8543
8544
if (min == 0)
8545
{
8546
/* Handles both invalid and empty cases. Since the minimum repeat,
8547
is zero the invalid case is basically the same as an empty case. */
8548
if (ref)
8549
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8550
else
8551
{
8552
compile_dnref_search(common, ccbegin, NULL);
8553
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8554
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8555
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8556
}
8557
/* Length is non-zero, we can match real repeats. */
8558
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8559
jump = JUMP(SLJIT_JUMP);
8560
}
8561
else
8562
{
8563
if (ref)
8564
{
8565
if (!common->unset_backref)
8566
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8567
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8568
}
8569
else
8570
{
8571
compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
8572
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8573
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8574
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8575
}
8576
}
8577
8578
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8579
if (max > 0)
8580
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
8581
8582
if (!ref)
8583
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8584
compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
8585
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8586
8587
if (min > 1)
8588
{
8589
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8590
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8591
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
8592
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
8593
}
8594
else if (max > 0)
8595
OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
8596
8597
if (jump != NULL)
8598
JUMPHERE(jump);
8599
JUMPHERE(zerolength);
8600
8601
count_match(common);
8602
return cc;
8603
}
8604
8605
static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8606
{
8607
DEFINE_COMPILER;
8608
backtrack_common *backtrack;
8609
recurse_entry *entry = common->entries;
8610
recurse_entry *prev = NULL;
8611
PCRE2_SPTR end;
8612
sljit_sw start = GET(cc, 1);
8613
sljit_uw arg_size;
8614
PCRE2_SPTR start_cc;
8615
BOOL needs_control_head;
8616
8617
end = cc + 1 + LINK_SIZE;
8618
8619
while (*end == OP_CREF)
8620
end += 1 + IMM2_SIZE;
8621
8622
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, end);
8623
8624
/* Inlining simple patterns. */
8625
if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
8626
{
8627
start_cc = common->start + start;
8628
compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
8629
BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
8630
return end;
8631
}
8632
8633
cc += 1 + LINK_SIZE;
8634
arg_size = (sljit_uw)IN_UCHARS(end - cc);
8635
while (entry != NULL)
8636
{
8637
if (entry->start == start && entry->arg_size == arg_size
8638
&& (arg_size == 0 || memcmp(cc, entry->arg_start, arg_size) == 0))
8639
break;
8640
prev = entry;
8641
entry = entry->next;
8642
}
8643
8644
if (entry == NULL)
8645
{
8646
entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
8647
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8648
return end;
8649
entry->next = NULL;
8650
entry->entry_label = NULL;
8651
entry->backtrack_label = NULL;
8652
entry->entry_calls = NULL;
8653
entry->backtrack_calls = NULL;
8654
entry->start = start;
8655
entry->arg_start = cc;
8656
entry->arg_size = arg_size;
8657
8658
if (prev != NULL)
8659
prev->next = entry;
8660
else
8661
common->entries = entry;
8662
}
8663
8664
BACKTRACK_AS(recurse_backtrack)->entry = entry;
8665
8666
if (entry->entry_label == NULL)
8667
add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
8668
else
8669
JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
8670
/* Leave if the match is failed. */
8671
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
8672
BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
8673
return end;
8674
}
8675
8676
static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
8677
{
8678
PCRE2_SPTR begin;
8679
PCRE2_SIZE *ovector;
8680
sljit_u32 oveccount, capture_top;
8681
8682
if (arguments->callout == NULL)
8683
return 0;
8684
8685
SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
8686
8687
begin = arguments->begin;
8688
ovector = (PCRE2_SIZE*)(callout_block + 1);
8689
oveccount = callout_block->capture_top;
8690
8691
SLJIT_ASSERT(oveccount >= 1);
8692
8693
callout_block->version = 2;
8694
callout_block->callout_flags = 0;
8695
8696
/* Offsets in subject. */
8697
callout_block->subject_length = arguments->end - arguments->begin;
8698
callout_block->start_match = jit_ovector[0] - begin;
8699
callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
8700
callout_block->subject = begin;
8701
8702
/* Convert and copy the JIT offset vector to the ovector array. */
8703
callout_block->capture_top = 1;
8704
callout_block->offset_vector = ovector;
8705
8706
ovector[0] = PCRE2_UNSET;
8707
ovector[1] = PCRE2_UNSET;
8708
ovector += 2;
8709
jit_ovector += 2;
8710
capture_top = 1;
8711
8712
/* Convert pointers to sizes. */
8713
while (--oveccount != 0)
8714
{
8715
capture_top++;
8716
8717
ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
8718
ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
8719
8720
if (ovector[0] != PCRE2_UNSET)
8721
callout_block->capture_top = capture_top;
8722
8723
ovector += 2;
8724
jit_ovector += 2;
8725
}
8726
8727
return (arguments->callout)(callout_block, arguments->callout_data);
8728
}
8729
8730
#define CALLOUT_ARG_OFFSET(arg) \
8731
SLJIT_OFFSETOF(pcre2_callout_block, arg)
8732
8733
static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8734
{
8735
DEFINE_COMPILER;
8736
backtrack_common *backtrack;
8737
sljit_s32 mov_opcode;
8738
unsigned int callout_length = (*cc == OP_CALLOUT)
8739
? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
8740
sljit_sw value1;
8741
sljit_sw value2;
8742
sljit_sw value3;
8743
sljit_s32 callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw); /* top_bracket is uint16 so maximum is 1MiB */
8744
8745
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8746
8747
callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
8748
8749
allocate_stack(common, callout_arg_size);
8750
8751
SLJIT_ASSERT(common->capture_last_ptr != 0);
8752
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8753
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8754
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
8755
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
8756
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
8757
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
8758
8759
/* These pointer sized fields temporarly stores internal variables. */
8760
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
8761
8762
if (common->mark_ptr != 0)
8763
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
8764
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
8765
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
8766
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
8767
8768
if (*cc == OP_CALLOUT)
8769
{
8770
value1 = 0;
8771
value2 = 0;
8772
value3 = 0;
8773
}
8774
else
8775
{
8776
value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
8777
value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
8778
value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
8779
}
8780
8781
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
8782
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
8783
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
8784
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
8785
8786
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8787
8788
/* Needed to save important temporary registers. */
8789
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
8790
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
8791
/* SLJIT_R0 = arguments */
8792
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
8793
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
8794
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
8795
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8796
free_stack(common, callout_arg_size);
8797
8798
/* Check return value. */
8799
OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
8800
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
8801
if (common->abort_label == NULL)
8802
add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
8803
else
8804
JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
8805
return cc + callout_length;
8806
}
8807
8808
#undef CALLOUT_ARG_SIZE
8809
#undef CALLOUT_ARG_OFFSET
8810
8811
static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8812
{
8813
DEFINE_COMPILER;
8814
backtrack_common *backtrack = NULL;
8815
jump_list **reverse_failed;
8816
unsigned int lmin, lmax;
8817
#ifdef SUPPORT_UNICODE
8818
struct sljit_jump *jump;
8819
struct sljit_label *label;
8820
#endif
8821
8822
SLJIT_ASSERT(parent->top == NULL);
8823
8824
if (*cc == OP_REVERSE)
8825
{
8826
reverse_failed = &parent->own_backtracks;
8827
lmin = GET2(cc, 1);
8828
lmax = lmin;
8829
cc += 1 + IMM2_SIZE;
8830
8831
SLJIT_ASSERT(lmin > 0);
8832
}
8833
else
8834
{
8835
SLJIT_ASSERT(*cc == OP_VREVERSE);
8836
PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, cc + 1 + 2 * IMM2_SIZE);
8837
8838
reverse_failed = &backtrack->own_backtracks;
8839
lmin = GET2(cc, 1);
8840
lmax = GET2(cc, 1 + IMM2_SIZE);
8841
cc += 1 + 2 * IMM2_SIZE;
8842
8843
SLJIT_ASSERT(lmin < lmax);
8844
}
8845
8846
if (HAS_VIRTUAL_REGISTERS)
8847
{
8848
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8849
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8850
}
8851
else
8852
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8853
8854
#ifdef SUPPORT_UNICODE
8855
if (common->utf)
8856
{
8857
if (lmin > 0)
8858
{
8859
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
8860
label = LABEL();
8861
add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8862
move_back(common, reverse_failed, FALSE);
8863
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8864
JUMPTO(SLJIT_NOT_ZERO, label);
8865
}
8866
8867
if (lmin < lmax)
8868
{
8869
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8870
8871
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
8872
label = LABEL();
8873
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
8874
move_back(common, reverse_failed, FALSE);
8875
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8876
JUMPTO(SLJIT_NOT_ZERO, label);
8877
8878
JUMPHERE(jump);
8879
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
8880
}
8881
}
8882
else
8883
#endif
8884
{
8885
if (lmin > 0)
8886
{
8887
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
8888
add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8889
}
8890
8891
if (lmin < lmax)
8892
{
8893
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8894
8895
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
8896
OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
8897
SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
8898
8899
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
8900
}
8901
}
8902
8903
check_start_used_ptr(common);
8904
8905
if (lmin < lmax)
8906
BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
8907
8908
return cc;
8909
}
8910
8911
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
8912
{
8913
while (TRUE)
8914
{
8915
switch (*cc)
8916
{
8917
case OP_CALLOUT_STR:
8918
cc += GET(cc, 1 + 2*LINK_SIZE);
8919
break;
8920
8921
case OP_NOT_WORD_BOUNDARY:
8922
case OP_WORD_BOUNDARY:
8923
case OP_CIRC:
8924
case OP_CIRCM:
8925
case OP_DOLL:
8926
case OP_DOLLM:
8927
case OP_CALLOUT:
8928
case OP_ALT:
8929
case OP_NOT_UCP_WORD_BOUNDARY:
8930
case OP_UCP_WORD_BOUNDARY:
8931
cc += PRIV(OP_lengths)[*cc];
8932
break;
8933
8934
case OP_KET:
8935
return FALSE;
8936
8937
default:
8938
return TRUE;
8939
}
8940
}
8941
}
8942
8943
static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
8944
{
8945
DEFINE_COMPILER;
8946
int framesize;
8947
int extrasize;
8948
BOOL local_quit_available = FALSE;
8949
BOOL needs_control_head;
8950
BOOL end_block_size = 0;
8951
BOOL has_vreverse;
8952
int private_data_ptr;
8953
backtrack_common altbacktrack;
8954
PCRE2_SPTR ccbegin;
8955
PCRE2_UCHAR opcode;
8956
PCRE2_UCHAR bra = OP_BRA;
8957
jump_list *tmp = NULL;
8958
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
8959
jump_list **found;
8960
/* Saving previous accept variables. */
8961
BOOL save_local_quit_available = common->local_quit_available;
8962
BOOL save_in_positive_assertion = common->in_positive_assertion;
8963
sljit_s32 save_restore_end_ptr = common->restore_end_ptr;
8964
then_trap_backtrack *save_then_trap = common->then_trap;
8965
struct sljit_label *save_quit_label = common->quit_label;
8966
struct sljit_label *save_accept_label = common->accept_label;
8967
jump_list *save_quit = common->quit;
8968
jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
8969
jump_list *save_accept = common->accept;
8970
struct sljit_jump *jump;
8971
struct sljit_jump *brajump = NULL;
8972
8973
/* Assert captures then. */
8974
common->then_trap = NULL;
8975
8976
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8977
{
8978
SLJIT_ASSERT(!conditional);
8979
bra = *cc;
8980
cc++;
8981
}
8982
8983
private_data_ptr = PRIVATE_DATA(cc);
8984
SLJIT_ASSERT(private_data_ptr != 0);
8985
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8986
backtrack->framesize = framesize;
8987
backtrack->private_data_ptr = private_data_ptr;
8988
opcode = *cc;
8989
SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
8990
found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
8991
ccbegin = cc;
8992
cc += GET(cc, 1);
8993
8994
if (bra == OP_BRAMINZERO)
8995
{
8996
/* This is a braminzero backtrack path. */
8997
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8998
free_stack(common, 1);
8999
brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9000
}
9001
9002
if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
9003
end_block_size = 3;
9004
9005
if (framesize < 0)
9006
{
9007
extrasize = 1;
9008
if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9009
extrasize = 0;
9010
9011
extrasize += end_block_size;
9012
9013
if (needs_control_head)
9014
extrasize++;
9015
9016
if (framesize == no_frame)
9017
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9018
9019
if (extrasize > 0)
9020
allocate_stack(common, extrasize);
9021
9022
if (needs_control_head)
9023
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9024
9025
if (extrasize > 0)
9026
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9027
9028
if (needs_control_head)
9029
{
9030
SLJIT_ASSERT(extrasize == end_block_size + 2);
9031
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9032
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
9033
}
9034
}
9035
else
9036
{
9037
extrasize = (needs_control_head ? 3 : 2) + end_block_size;
9038
9039
OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
9040
allocate_stack(common, framesize + extrasize);
9041
9042
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9043
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9044
if (needs_control_head)
9045
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9046
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9047
9048
if (needs_control_head)
9049
{
9050
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
9051
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
9052
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9053
}
9054
else
9055
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
9056
9057
init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9058
}
9059
9060
if (end_block_size > 0)
9061
{
9062
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
9063
OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
9064
}
9065
9066
memset(&altbacktrack, 0, sizeof(backtrack_common));
9067
if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9068
{
9069
/* Control verbs cannot escape from these asserts. */
9070
local_quit_available = TRUE;
9071
common->restore_end_ptr = 0;
9072
common->local_quit_available = TRUE;
9073
common->quit_label = NULL;
9074
common->quit = NULL;
9075
}
9076
9077
common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9078
common->positive_assertion_quit = NULL;
9079
9080
while (1)
9081
{
9082
common->accept_label = NULL;
9083
common->accept = NULL;
9084
altbacktrack.top = NULL;
9085
altbacktrack.own_backtracks = NULL;
9086
9087
if (*ccbegin == OP_ALT && extrasize > 0)
9088
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9089
9090
altbacktrack.cc = ccbegin;
9091
ccbegin += 1 + LINK_SIZE;
9092
9093
has_vreverse = (*ccbegin == OP_VREVERSE);
9094
if (*ccbegin == OP_REVERSE || has_vreverse)
9095
ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
9096
9097
compile_matchingpath(common, ccbegin, cc, &altbacktrack);
9098
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9099
{
9100
if (local_quit_available)
9101
{
9102
common->local_quit_available = save_local_quit_available;
9103
common->quit_label = save_quit_label;
9104
common->quit = save_quit;
9105
}
9106
common->in_positive_assertion = save_in_positive_assertion;
9107
common->restore_end_ptr = save_restore_end_ptr;
9108
common->then_trap = save_then_trap;
9109
common->accept_label = save_accept_label;
9110
common->positive_assertion_quit = save_positive_assertion_quit;
9111
common->accept = save_accept;
9112
return NULL;
9113
}
9114
9115
if (has_vreverse)
9116
{
9117
SLJIT_ASSERT(altbacktrack.top != NULL);
9118
add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
9119
}
9120
9121
common->accept_label = LABEL();
9122
if (common->accept != NULL)
9123
set_jumps(common->accept, common->accept_label);
9124
9125
/* Reset stack. */
9126
if (framesize < 0)
9127
{
9128
if (framesize == no_frame)
9129
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9130
else if (extrasize > 0)
9131
free_stack(common, extrasize);
9132
9133
if (end_block_size > 0)
9134
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9135
9136
if (needs_control_head)
9137
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9138
}
9139
else
9140
{
9141
if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9142
{
9143
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9144
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9145
9146
if (end_block_size > 0)
9147
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
9148
9149
if (needs_control_head)
9150
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9151
}
9152
else
9153
{
9154
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9155
9156
if (end_block_size > 0)
9157
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
9158
9159
if (needs_control_head)
9160
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9161
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9162
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9163
}
9164
}
9165
9166
if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9167
{
9168
/* We know that STR_PTR was stored on the top of the stack. */
9169
if (conditional)
9170
{
9171
if (extrasize > 0)
9172
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
9173
}
9174
else if (bra == OP_BRAZERO)
9175
{
9176
if (framesize < 0)
9177
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9178
else
9179
{
9180
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9181
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9182
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9183
}
9184
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9185
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9186
}
9187
else if (framesize >= 0)
9188
{
9189
/* For OP_BRA and OP_BRAMINZERO. */
9190
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9191
}
9192
}
9193
add_jump(compiler, found, JUMP(SLJIT_JUMP));
9194
9195
compile_backtrackingpath(common, altbacktrack.top);
9196
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9197
{
9198
if (local_quit_available)
9199
{
9200
common->local_quit_available = save_local_quit_available;
9201
common->quit_label = save_quit_label;
9202
common->quit = save_quit;
9203
}
9204
common->in_positive_assertion = save_in_positive_assertion;
9205
common->restore_end_ptr = save_restore_end_ptr;
9206
common->then_trap = save_then_trap;
9207
common->accept_label = save_accept_label;
9208
common->positive_assertion_quit = save_positive_assertion_quit;
9209
common->accept = save_accept;
9210
return NULL;
9211
}
9212
set_jumps(altbacktrack.own_backtracks, LABEL());
9213
9214
if (*cc != OP_ALT)
9215
break;
9216
9217
ccbegin = cc;
9218
cc += GET(cc, 1);
9219
}
9220
9221
if (local_quit_available)
9222
{
9223
SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9224
/* Makes the check less complicated below. */
9225
common->positive_assertion_quit = common->quit;
9226
}
9227
9228
/* None of them matched. */
9229
if (common->positive_assertion_quit != NULL)
9230
{
9231
jump = JUMP(SLJIT_JUMP);
9232
set_jumps(common->positive_assertion_quit, LABEL());
9233
SLJIT_ASSERT(framesize != no_stack);
9234
if (framesize < 0)
9235
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9236
else
9237
{
9238
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9239
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9240
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9241
}
9242
JUMPHERE(jump);
9243
}
9244
9245
if (end_block_size > 0)
9246
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9247
9248
if (needs_control_head)
9249
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
9250
9251
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9252
{
9253
/* Assert is failed. */
9254
if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9255
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9256
9257
if (framesize < 0)
9258
{
9259
/* The topmost item should be 0. */
9260
if (bra == OP_BRAZERO)
9261
{
9262
if (extrasize >= 2)
9263
free_stack(common, extrasize - 1);
9264
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9265
}
9266
else if (extrasize > 0)
9267
free_stack(common, extrasize);
9268
}
9269
else
9270
{
9271
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9272
/* The topmost item should be 0. */
9273
if (bra == OP_BRAZERO)
9274
{
9275
free_stack(common, framesize + extrasize - 1);
9276
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9277
}
9278
else
9279
free_stack(common, framesize + extrasize);
9280
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9281
}
9282
jump = JUMP(SLJIT_JUMP);
9283
if (bra != OP_BRAZERO)
9284
add_jump(compiler, target, jump);
9285
9286
/* Assert is successful. */
9287
set_jumps(tmp, LABEL());
9288
if (framesize < 0)
9289
{
9290
/* We know that STR_PTR was stored on the top of the stack. */
9291
if (extrasize > 0)
9292
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9293
9294
/* Keep the STR_PTR on the top of the stack. */
9295
if (bra == OP_BRAZERO)
9296
{
9297
/* This allocation is always successful. */
9298
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9299
if (extrasize >= 2)
9300
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9301
}
9302
else if (bra == OP_BRAMINZERO)
9303
{
9304
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9305
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9306
}
9307
}
9308
else
9309
{
9310
if (bra == OP_BRA)
9311
{
9312
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9313
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9314
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9315
}
9316
else
9317
{
9318
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9319
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
9320
9321
if (extrasize == 2 + end_block_size)
9322
{
9323
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9324
if (bra == OP_BRAMINZERO)
9325
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9326
}
9327
else
9328
{
9329
SLJIT_ASSERT(extrasize == 3 + end_block_size);
9330
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9331
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9332
}
9333
}
9334
}
9335
9336
if (bra == OP_BRAZERO)
9337
{
9338
backtrack->matchingpath = LABEL();
9339
SET_LABEL(jump, backtrack->matchingpath);
9340
}
9341
else if (bra == OP_BRAMINZERO)
9342
{
9343
JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9344
JUMPHERE(brajump);
9345
SLJIT_ASSERT(framesize != 0);
9346
if (framesize > 0)
9347
{
9348
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9349
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9350
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9351
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9352
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9353
}
9354
set_jumps(backtrack->common.own_backtracks, LABEL());
9355
}
9356
}
9357
else
9358
{
9359
/* AssertNot is successful. */
9360
if (framesize < 0)
9361
{
9362
if (extrasize > 0)
9363
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9364
9365
if (bra != OP_BRA)
9366
{
9367
if (extrasize >= 2)
9368
free_stack(common, extrasize - 1);
9369
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9370
}
9371
else if (extrasize > 0)
9372
free_stack(common, extrasize);
9373
}
9374
else
9375
{
9376
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9377
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9378
/* The topmost item should be 0. */
9379
if (bra != OP_BRA)
9380
{
9381
free_stack(common, framesize + extrasize - 1);
9382
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9383
}
9384
else
9385
free_stack(common, framesize + extrasize);
9386
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9387
}
9388
9389
if (bra == OP_BRAZERO)
9390
backtrack->matchingpath = LABEL();
9391
else if (bra == OP_BRAMINZERO)
9392
{
9393
JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9394
JUMPHERE(brajump);
9395
}
9396
9397
if (bra != OP_BRA)
9398
{
9399
SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
9400
set_jumps(backtrack->common.own_backtracks, LABEL());
9401
backtrack->common.own_backtracks = NULL;
9402
}
9403
}
9404
9405
if (local_quit_available)
9406
{
9407
common->local_quit_available = save_local_quit_available;
9408
common->quit_label = save_quit_label;
9409
common->quit = save_quit;
9410
}
9411
9412
common->in_positive_assertion = save_in_positive_assertion;
9413
common->restore_end_ptr = save_restore_end_ptr;
9414
common->then_trap = save_then_trap;
9415
common->accept_label = save_accept_label;
9416
common->positive_assertion_quit = save_positive_assertion_quit;
9417
common->accept = save_accept;
9418
return cc + 1 + LINK_SIZE;
9419
}
9420
9421
static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
9422
{
9423
DEFINE_COMPILER;
9424
int stacksize;
9425
9426
if (framesize < 0)
9427
{
9428
if (framesize == no_frame)
9429
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9430
else
9431
{
9432
stacksize = needs_control_head ? 1 : 0;
9433
if (ket != OP_KET || has_alternatives)
9434
stacksize++;
9435
9436
if (stacksize > 0)
9437
free_stack(common, stacksize);
9438
}
9439
9440
if (needs_control_head)
9441
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
9442
9443
/* TMP2 which is set here used by OP_KETRMAX below. */
9444
if (ket == OP_KETRMAX)
9445
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9446
else if (ket == OP_KETRMIN)
9447
{
9448
/* Move the STR_PTR to the private_data_ptr. */
9449
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9450
}
9451
}
9452
else
9453
{
9454
stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
9455
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
9456
if (needs_control_head)
9457
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9458
9459
if (ket == OP_KETRMAX)
9460
{
9461
/* TMP2 which is set here used by OP_KETRMAX below. */
9462
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9463
}
9464
}
9465
if (needs_control_head)
9466
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
9467
}
9468
9469
static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
9470
{
9471
DEFINE_COMPILER;
9472
9473
if (common->capture_last_ptr != 0)
9474
{
9475
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9476
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
9477
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9478
stacksize++;
9479
}
9480
if (!is_optimized_cbracket(common, offset >> 1))
9481
{
9482
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9483
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9484
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9485
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9486
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
9487
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9488
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9489
stacksize += 2;
9490
}
9491
return stacksize;
9492
}
9493
9494
static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
9495
{
9496
if (PRIV(script_run)(ptr, endptr, FALSE))
9497
return endptr;
9498
return NULL;
9499
}
9500
9501
#ifdef SUPPORT_UNICODE
9502
9503
static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
9504
{
9505
if (PRIV(script_run)(ptr, endptr, TRUE))
9506
return endptr;
9507
return NULL;
9508
}
9509
9510
#endif /* SUPPORT_UNICODE */
9511
9512
static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
9513
{
9514
DEFINE_COMPILER;
9515
9516
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9517
9518
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9519
#ifdef SUPPORT_UNICODE
9520
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9521
common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
9522
#else
9523
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
9524
#endif
9525
9526
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9527
add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9528
}
9529
9530
/*
9531
Handling bracketed expressions is probably the most complex part.
9532
9533
Stack layout naming characters:
9534
S - Push the current STR_PTR
9535
0 - Push a 0 (NULL)
9536
A - Push the current STR_PTR. Needed for restoring the STR_PTR
9537
before the next alternative. Not pushed if there are no alternatives.
9538
M - Any values pushed by the current alternative. Can be empty, or anything.
9539
C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
9540
L - Push the previous local (pointed by localptr) to the stack
9541
() - opional values stored on the stack
9542
()* - optonal, can be stored multiple times
9543
9544
The following list shows the regular expression templates, their PCRE byte codes
9545
and stack layout supported by pcre-sljit.
9546
9547
(?:) OP_BRA | OP_KET A M
9548
() OP_CBRA | OP_KET C M
9549
(?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
9550
OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
9551
(?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
9552
OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
9553
()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
9554
OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
9555
()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
9556
OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
9557
(?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
9558
(?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
9559
()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
9560
()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
9561
(?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
9562
OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
9563
(?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
9564
OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
9565
()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
9566
OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
9567
()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
9568
OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
9569
9570
9571
Stack layout naming characters:
9572
A - Push the alternative index (starting from 0) on the stack.
9573
Not pushed if there is no alternatives.
9574
M - Any values pushed by the current alternative. Can be empty, or anything.
9575
9576
The next list shows the possible content of a bracket:
9577
(|) OP_*BRA | OP_ALT ... M A
9578
(?()|) OP_*COND | OP_ALT M A
9579
(?>|) OP_ONCE | OP_ALT ... [stack trace] M A
9580
Or nothing, if trace is unnecessary
9581
*/
9582
9583
static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9584
{
9585
DEFINE_COMPILER;
9586
backtrack_common *backtrack;
9587
PCRE2_UCHAR opcode;
9588
int private_data_ptr = 0;
9589
int offset = 0;
9590
int i, stacksize;
9591
int repeat_ptr = 0, repeat_length = 0;
9592
int repeat_type = 0, repeat_count = 0;
9593
PCRE2_SPTR ccbegin;
9594
PCRE2_SPTR matchingpath;
9595
PCRE2_SPTR slot;
9596
PCRE2_UCHAR bra = OP_BRA;
9597
PCRE2_UCHAR ket;
9598
assert_backtrack *assert;
9599
BOOL has_alternatives;
9600
BOOL needs_control_head = FALSE;
9601
BOOL has_vreverse = FALSE;
9602
struct sljit_jump *jump;
9603
struct sljit_jump *skip;
9604
jump_list *jumplist;
9605
struct sljit_label *rmax_label = NULL;
9606
struct sljit_jump *braminzero = NULL;
9607
9608
PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
9609
9610
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9611
{
9612
bra = *cc;
9613
cc++;
9614
opcode = *cc;
9615
}
9616
9617
opcode = *cc;
9618
ccbegin = cc;
9619
matchingpath = bracketend(cc) - 1 - LINK_SIZE;
9620
ket = *matchingpath;
9621
if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
9622
{
9623
repeat_ptr = PRIVATE_DATA(matchingpath);
9624
repeat_length = PRIVATE_DATA(matchingpath + 1);
9625
repeat_type = PRIVATE_DATA(matchingpath + 2);
9626
repeat_count = PRIVATE_DATA(matchingpath + 3);
9627
SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
9628
if (repeat_type == OP_UPTO)
9629
ket = OP_KETRMAX;
9630
if (repeat_type == OP_MINUPTO)
9631
ket = OP_KETRMIN;
9632
}
9633
9634
matchingpath = ccbegin + 1 + LINK_SIZE;
9635
SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
9636
SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
9637
cc += GET(cc, 1);
9638
9639
has_alternatives = *cc == OP_ALT;
9640
if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
9641
{
9642
SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
9643
compile_time_checks_must_be_grouped_together);
9644
has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
9645
}
9646
9647
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
9648
opcode = OP_SCOND;
9649
9650
if (opcode == OP_CBRA || opcode == OP_SCBRA)
9651
{
9652
/* Capturing brackets has a pre-allocated space. */
9653
offset = GET2(ccbegin, 1 + LINK_SIZE);
9654
if (!is_optimized_cbracket(common, offset))
9655
{
9656
private_data_ptr = OVECTOR_PRIV(offset);
9657
offset <<= 1;
9658
}
9659
else
9660
{
9661
offset <<= 1;
9662
private_data_ptr = OVECTOR(offset);
9663
}
9664
BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
9665
matchingpath += IMM2_SIZE;
9666
}
9667
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE
9668
|| opcode == OP_ASSERT_SCS || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
9669
{
9670
/* Other brackets simply allocate the next entry. */
9671
private_data_ptr = PRIVATE_DATA(ccbegin);
9672
SLJIT_ASSERT(private_data_ptr != 0);
9673
BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
9674
if (opcode == OP_ONCE)
9675
BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
9676
}
9677
9678
/* Instructions before the first alternative. */
9679
stacksize = 0;
9680
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9681
stacksize++;
9682
if (bra == OP_BRAZERO)
9683
stacksize++;
9684
9685
if (stacksize > 0)
9686
allocate_stack(common, stacksize);
9687
9688
stacksize = 0;
9689
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9690
{
9691
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9692
stacksize++;
9693
}
9694
9695
if (bra == OP_BRAZERO)
9696
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9697
9698
if (bra == OP_BRAMINZERO)
9699
{
9700
/* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
9701
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9702
if (ket != OP_KETRMIN)
9703
{
9704
free_stack(common, 1);
9705
braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9706
}
9707
else if (opcode == OP_ONCE || opcode >= OP_SBRA)
9708
{
9709
jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9710
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9711
/* Nothing stored during the first run. */
9712
skip = JUMP(SLJIT_JUMP);
9713
JUMPHERE(jump);
9714
/* Checking zero-length iteration. */
9715
if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9716
{
9717
/* When we come from outside, private_data_ptr contains the previous STR_PTR. */
9718
braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9719
}
9720
else
9721
{
9722
/* Except when the whole stack frame must be saved. */
9723
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9724
braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
9725
}
9726
JUMPHERE(skip);
9727
}
9728
else
9729
{
9730
jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9731
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9732
JUMPHERE(jump);
9733
}
9734
}
9735
9736
if (repeat_type != 0)
9737
{
9738
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
9739
if (repeat_type == OP_EXACT)
9740
rmax_label = LABEL();
9741
}
9742
9743
if (ket == OP_KETRMIN)
9744
BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
9745
9746
if (ket == OP_KETRMAX)
9747
{
9748
rmax_label = LABEL();
9749
if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
9750
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
9751
}
9752
9753
/* Handling capturing brackets and alternatives. */
9754
if (opcode == OP_ONCE)
9755
{
9756
stacksize = 0;
9757
if (needs_control_head)
9758
{
9759
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9760
stacksize++;
9761
}
9762
9763
if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9764
{
9765
/* Neither capturing brackets nor recursions are found in the block. */
9766
if (ket == OP_KETRMIN)
9767
{
9768
stacksize += 2;
9769
if (!needs_control_head)
9770
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9771
}
9772
else
9773
{
9774
if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9775
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9776
if (ket == OP_KETRMAX || has_alternatives)
9777
stacksize++;
9778
}
9779
9780
if (stacksize > 0)
9781
allocate_stack(common, stacksize);
9782
9783
stacksize = 0;
9784
if (needs_control_head)
9785
{
9786
stacksize++;
9787
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9788
}
9789
9790
if (ket == OP_KETRMIN)
9791
{
9792
if (needs_control_head)
9793
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9794
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9795
if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9796
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
9797
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
9798
}
9799
else if (ket == OP_KETRMAX || has_alternatives)
9800
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9801
}
9802
else
9803
{
9804
if (ket != OP_KET || has_alternatives)
9805
stacksize++;
9806
9807
stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
9808
allocate_stack(common, stacksize);
9809
9810
if (needs_control_head)
9811
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9812
9813
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9814
OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9815
9816
stacksize = needs_control_head ? 1 : 0;
9817
if (ket != OP_KET || has_alternatives)
9818
{
9819
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9820
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9821
stacksize++;
9822
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9823
}
9824
else
9825
{
9826
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9827
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9828
}
9829
init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
9830
}
9831
}
9832
else if (opcode == OP_CBRA || opcode == OP_SCBRA)
9833
{
9834
/* Saving the previous values. */
9835
if (is_optimized_cbracket(common, offset >> 1))
9836
{
9837
SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
9838
allocate_stack(common, 2);
9839
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9840
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9841
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9842
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9843
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9844
}
9845
else
9846
{
9847
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9848
allocate_stack(common, 1);
9849
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9850
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9851
}
9852
}
9853
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
9854
{
9855
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9856
allocate_stack(common, 4);
9857
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9858
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9859
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9860
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9861
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9862
OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
9863
9864
has_vreverse = (*matchingpath == OP_VREVERSE);
9865
if (*matchingpath == OP_REVERSE || has_vreverse)
9866
matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
9867
}
9868
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
9869
{
9870
/* Saving the previous value. */
9871
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9872
allocate_stack(common, 1);
9873
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9874
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9875
9876
if (*matchingpath == OP_REVERSE)
9877
matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
9878
}
9879
else if (opcode == OP_ASSERT_SCS)
9880
{
9881
/* Nested scs blocks will not update this variable. */
9882
if (common->restore_end_ptr == 0)
9883
common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);
9884
9885
if (*matchingpath == OP_CREF && (matchingpath[1 + IMM2_SIZE] != OP_CREF && matchingpath[1 + IMM2_SIZE] != OP_DNCREF))
9886
{
9887
/* Optimized case for a single capture reference. */
9888
i = OVECTOR(GET2(matchingpath, 1) << 1);
9889
9890
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), i);
9891
9892
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9893
matchingpath += 1 + IMM2_SIZE;
9894
9895
allocate_stack(common, has_alternatives ? 3 : 2);
9896
9897
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9898
OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9899
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9900
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), i + sizeof(sljit_sw));
9901
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9902
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
9903
}
9904
else
9905
{
9906
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9907
jumplist = NULL;
9908
9909
while (TRUE)
9910
{
9911
if (*matchingpath == OP_CREF)
9912
{
9913
sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(matchingpath, 1) << 1));
9914
matchingpath += 1 + IMM2_SIZE;
9915
}
9916
else
9917
{
9918
SLJIT_ASSERT(*matchingpath == OP_DNCREF);
9919
9920
i = GET2(matchingpath, 1 + IMM2_SIZE);
9921
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9922
9923
while (i-- > 1)
9924
{
9925
sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));
9926
add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9927
slot += common->name_entry_size;
9928
}
9929
9930
sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));
9931
matchingpath += 1 + 2 * IMM2_SIZE;
9932
}
9933
9934
if (*matchingpath != OP_CREF && *matchingpath != OP_DNCREF)
9935
break;
9936
9937
add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9938
}
9939
9940
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),
9941
CMP(SLJIT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9942
9943
set_jumps(jumplist, LABEL());
9944
9945
allocate_stack(common, has_alternatives ? 3 : 2);
9946
9947
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9948
OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9949
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9950
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), 0);
9951
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9952
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9953
}
9954
9955
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9956
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_TMP_DEST_REG, 0);
9957
9958
if (has_alternatives)
9959
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
9960
}
9961
else if (has_alternatives)
9962
{
9963
/* Pushing the starting string pointer. */
9964
allocate_stack(common, 1);
9965
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9966
}
9967
9968
/* Generating code for the first alternative. */
9969
if (opcode == OP_COND || opcode == OP_SCOND)
9970
{
9971
if (*matchingpath == OP_CREF)
9972
{
9973
SLJIT_ASSERT(has_alternatives);
9974
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),
9975
CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9976
matchingpath += 1 + IMM2_SIZE;
9977
}
9978
else if (*matchingpath == OP_DNCREF)
9979
{
9980
SLJIT_ASSERT(has_alternatives);
9981
9982
i = GET2(matchingpath, 1 + IMM2_SIZE);
9983
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9984
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9985
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9986
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9987
slot += common->name_entry_size;
9988
i--;
9989
while (i-- > 0)
9990
{
9991
OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9992
OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
9993
slot += common->name_entry_size;
9994
}
9995
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9996
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), JUMP(SLJIT_ZERO));
9997
matchingpath += 1 + 2 * IMM2_SIZE;
9998
}
9999
else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10000
{
10001
/* Never has other case. */
10002
BACKTRACK_AS(bracket_backtrack)->u.no_capture = NULL;
10003
SLJIT_ASSERT(!has_alternatives);
10004
10005
if (*matchingpath == OP_TRUE)
10006
{
10007
stacksize = 1;
10008
matchingpath++;
10009
}
10010
else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10011
stacksize = 0;
10012
else if (*matchingpath == OP_RREF)
10013
{
10014
stacksize = GET2(matchingpath, 1);
10015
if (common->currententry == NULL)
10016
stacksize = 0;
10017
else if (stacksize == RREF_ANY)
10018
stacksize = 1;
10019
else if (common->currententry->start == 0)
10020
stacksize = stacksize == 0;
10021
else
10022
stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10023
10024
if (stacksize != 0)
10025
matchingpath += 1 + IMM2_SIZE;
10026
}
10027
else
10028
{
10029
if (common->currententry == NULL || common->currententry->start == 0)
10030
stacksize = 0;
10031
else
10032
{
10033
stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10034
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10035
i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10036
while (stacksize > 0)
10037
{
10038
if ((int)GET2(slot, 0) == i)
10039
break;
10040
slot += common->name_entry_size;
10041
stacksize--;
10042
}
10043
}
10044
10045
if (stacksize != 0)
10046
matchingpath += 1 + 2 * IMM2_SIZE;
10047
}
10048
10049
/* The stacksize == 0 is a common "else" case. */
10050
if (stacksize == 0)
10051
{
10052
if (*cc == OP_ALT)
10053
{
10054
matchingpath = cc + 1 + LINK_SIZE;
10055
cc += GET(cc, 1);
10056
}
10057
else
10058
matchingpath = cc;
10059
}
10060
}
10061
else
10062
{
10063
SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10064
/* Similar code as PUSH_BACKTRACK macro. */
10065
assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10066
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10067
return NULL;
10068
memset(assert, 0, sizeof(assert_backtrack));
10069
assert->common.cc = matchingpath;
10070
BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10071
matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10072
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10073
return NULL;
10074
}
10075
}
10076
10077
compile_matchingpath(common, matchingpath, cc, backtrack);
10078
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10079
return NULL;
10080
10081
switch (opcode)
10082
{
10083
case OP_ASSERTBACK_NA:
10084
if (has_vreverse)
10085
{
10086
SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
10087
add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
10088
}
10089
10090
if (PRIVATE_DATA(ccbegin + 1))
10091
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10092
break;
10093
case OP_ONCE:
10094
match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10095
break;
10096
case OP_SCRIPT_RUN:
10097
match_script_run_common(common, private_data_ptr, backtrack);
10098
break;
10099
}
10100
10101
stacksize = 0;
10102
if (repeat_type == OP_MINUPTO)
10103
{
10104
/* We need to preserve the counter. TMP2 will be used below. */
10105
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10106
stacksize++;
10107
}
10108
if (ket != OP_KET || bra != OP_BRA)
10109
stacksize++;
10110
if (offset != 0)
10111
{
10112
if (common->capture_last_ptr != 0)
10113
stacksize++;
10114
if (!is_optimized_cbracket(common, offset >> 1))
10115
stacksize += 2;
10116
}
10117
if (has_alternatives && opcode != OP_ONCE)
10118
stacksize++;
10119
10120
if (stacksize > 0)
10121
allocate_stack(common, stacksize);
10122
10123
stacksize = 0;
10124
if (repeat_type == OP_MINUPTO)
10125
{
10126
/* TMP2 was set above. */
10127
OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10128
stacksize++;
10129
}
10130
10131
if (ket != OP_KET || bra != OP_BRA)
10132
{
10133
if (ket != OP_KET)
10134
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10135
else
10136
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10137
stacksize++;
10138
}
10139
10140
if (offset != 0)
10141
stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10142
10143
/* Skip and count the other alternatives. */
10144
i = 1;
10145
while (*cc == OP_ALT)
10146
{
10147
cc += GET(cc, 1);
10148
i++;
10149
}
10150
10151
if (has_alternatives)
10152
{
10153
if (opcode != OP_ONCE)
10154
{
10155
if (i <= 3)
10156
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10157
else
10158
BACKTRACK_AS(bracket_backtrack)->matching_mov_addr = sljit_emit_op_addr(compiler, SLJIT_MOV_ADDR, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10159
}
10160
if (ket != OP_KETRMAX)
10161
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10162
}
10163
10164
/* Must be after the matchingpath label. */
10165
if (offset != 0 && is_optimized_cbracket(common, offset >> 1))
10166
{
10167
SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10168
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10169
}
10170
else switch (opcode)
10171
{
10172
case OP_ASSERT_NA:
10173
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10174
break;
10175
case OP_ASSERT_SCS:
10176
OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);
10177
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10178
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10179
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);
10180
10181
/* Nested scs blocks will not update this variable. */
10182
if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))
10183
common->restore_end_ptr = 0;
10184
break;
10185
}
10186
10187
if (ket == OP_KETRMAX)
10188
{
10189
if (repeat_type != 0)
10190
{
10191
if (has_alternatives)
10192
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10193
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10194
JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10195
/* Drop STR_PTR for greedy plus quantifier. */
10196
if (opcode != OP_ONCE)
10197
free_stack(common, 1);
10198
}
10199
else if (opcode < OP_BRA || opcode >= OP_SBRA)
10200
{
10201
if (has_alternatives)
10202
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10203
10204
/* Checking zero-length iteration. */
10205
if (opcode != OP_ONCE)
10206
{
10207
/* This case includes opcodes such as OP_SCRIPT_RUN. */
10208
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10209
/* Drop STR_PTR for greedy plus quantifier. */
10210
if (bra != OP_BRAZERO)
10211
free_stack(common, 1);
10212
}
10213
else
10214
/* TMP2 must contain the starting STR_PTR. */
10215
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10216
}
10217
else
10218
JUMPTO(SLJIT_JUMP, rmax_label);
10219
BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10220
}
10221
10222
if (repeat_type == OP_EXACT)
10223
{
10224
count_match(common);
10225
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10226
JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10227
}
10228
else if (repeat_type == OP_UPTO)
10229
{
10230
/* We need to preserve the counter. */
10231
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10232
allocate_stack(common, 1);
10233
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10234
}
10235
10236
if (bra == OP_BRAZERO)
10237
BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10238
10239
if (bra == OP_BRAMINZERO)
10240
{
10241
/* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10242
JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10243
if (braminzero != NULL)
10244
{
10245
JUMPHERE(braminzero);
10246
/* We need to release the end pointer to perform the
10247
backtrack for the zero-length iteration. When
10248
framesize is < 0, OP_ONCE will do the release itself. */
10249
if (opcode == OP_ONCE)
10250
{
10251
int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
10252
10253
SLJIT_ASSERT(framesize != 0);
10254
if (framesize > 0)
10255
{
10256
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10257
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10258
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10259
}
10260
}
10261
else if (ket == OP_KETRMIN)
10262
free_stack(common, 1);
10263
}
10264
/* Continue to the normal backtrack. */
10265
}
10266
10267
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))
10268
count_match(common);
10269
10270
cc += 1 + LINK_SIZE;
10271
10272
if (opcode == OP_ONCE)
10273
{
10274
int data;
10275
int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
10276
10277
SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);
10278
/* We temporarily encode the needs_control_head in the lowest bit.
10279
The real value should be short enough for this operation to work
10280
without triggering Undefined Behaviour. */
10281
data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));
10282
BACKTRACK_AS(bracket_backtrack)->u.framesize = data;
10283
}
10284
return cc + repeat_length;
10285
}
10286
10287
static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10288
{
10289
DEFINE_COMPILER;
10290
backtrack_common *backtrack;
10291
PCRE2_UCHAR opcode;
10292
int private_data_ptr;
10293
int cbraprivptr = 0;
10294
BOOL needs_control_head;
10295
int framesize;
10296
int stacksize;
10297
int offset = 0;
10298
BOOL zero = FALSE;
10299
PCRE2_SPTR ccbegin = NULL;
10300
int stack; /* Also contains the offset of control head. */
10301
struct sljit_label *loop = NULL;
10302
struct jump_list *emptymatch = NULL;
10303
10304
PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10305
if (*cc == OP_BRAPOSZERO)
10306
{
10307
zero = TRUE;
10308
cc++;
10309
}
10310
10311
opcode = *cc;
10312
private_data_ptr = PRIVATE_DATA(cc);
10313
SLJIT_ASSERT(private_data_ptr != 0);
10314
BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10315
switch(opcode)
10316
{
10317
case OP_BRAPOS:
10318
case OP_SBRAPOS:
10319
ccbegin = cc + 1 + LINK_SIZE;
10320
break;
10321
10322
case OP_CBRAPOS:
10323
case OP_SCBRAPOS:
10324
offset = GET2(cc, 1 + LINK_SIZE);
10325
/* This case cannot be optimized in the same way as
10326
normal capturing brackets. */
10327
SLJIT_ASSERT(!is_optimized_cbracket(common, offset));
10328
cbraprivptr = OVECTOR_PRIV(offset);
10329
offset <<= 1;
10330
ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10331
break;
10332
10333
default:
10334
SLJIT_UNREACHABLE();
10335
break;
10336
}
10337
10338
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10339
BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10340
if (framesize < 0)
10341
{
10342
if (offset != 0)
10343
{
10344
stacksize = 2;
10345
if (common->capture_last_ptr != 0)
10346
stacksize++;
10347
}
10348
else
10349
stacksize = 1;
10350
10351
if (needs_control_head)
10352
stacksize++;
10353
if (!zero)
10354
stacksize++;
10355
10356
BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10357
allocate_stack(common, stacksize);
10358
if (framesize == no_frame)
10359
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10360
10361
stack = 0;
10362
if (offset != 0)
10363
{
10364
stack = 2;
10365
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10366
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10367
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10368
if (common->capture_last_ptr != 0)
10369
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10370
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10371
if (needs_control_head)
10372
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10373
if (common->capture_last_ptr != 0)
10374
{
10375
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10376
stack = 3;
10377
}
10378
}
10379
else
10380
{
10381
if (needs_control_head)
10382
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10383
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10384
stack = 1;
10385
}
10386
10387
if (needs_control_head)
10388
stack++;
10389
if (!zero)
10390
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10391
if (needs_control_head)
10392
{
10393
stack--;
10394
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10395
}
10396
}
10397
else
10398
{
10399
stacksize = framesize + 1;
10400
if (!zero)
10401
stacksize++;
10402
if (needs_control_head)
10403
stacksize++;
10404
if (offset == 0)
10405
stacksize++;
10406
BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10407
10408
allocate_stack(common, stacksize);
10409
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10410
if (needs_control_head)
10411
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10412
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10413
10414
stack = 0;
10415
if (!zero)
10416
{
10417
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10418
stack = 1;
10419
}
10420
if (needs_control_head)
10421
{
10422
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10423
stack++;
10424
}
10425
if (offset == 0)
10426
{
10427
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10428
stack++;
10429
}
10430
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10431
init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10432
stack -= 1 + (offset == 0);
10433
}
10434
10435
if (offset != 0)
10436
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10437
10438
loop = LABEL();
10439
while (*cc != OP_KETRPOS)
10440
{
10441
backtrack->top = NULL;
10442
backtrack->own_backtracks = NULL;
10443
cc += GET(cc, 1);
10444
10445
compile_matchingpath(common, ccbegin, cc, backtrack);
10446
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10447
return NULL;
10448
10449
if (framesize < 0)
10450
{
10451
if (framesize == no_frame)
10452
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10453
10454
if (offset != 0)
10455
{
10456
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10457
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10458
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10459
if (common->capture_last_ptr != 0)
10460
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10461
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10462
}
10463
else
10464
{
10465
if (opcode == OP_SBRAPOS)
10466
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10467
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10468
}
10469
10470
/* Even if the match is empty, we need to reset the control head. */
10471
if (needs_control_head)
10472
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10473
10474
if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10475
add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10476
10477
if (!zero)
10478
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10479
}
10480
else
10481
{
10482
if (offset != 0)
10483
{
10484
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10485
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10486
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10487
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10488
if (common->capture_last_ptr != 0)
10489
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10490
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10491
}
10492
else
10493
{
10494
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10495
OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10496
if (opcode == OP_SBRAPOS)
10497
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10498
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10499
}
10500
10501
/* Even if the match is empty, we need to reset the control head. */
10502
if (needs_control_head)
10503
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10504
10505
if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10506
add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10507
10508
if (!zero)
10509
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10510
}
10511
10512
JUMPTO(SLJIT_JUMP, loop);
10513
flush_stubs(common);
10514
10515
compile_backtrackingpath(common, backtrack->top);
10516
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10517
return NULL;
10518
set_jumps(backtrack->own_backtracks, LABEL());
10519
10520
if (framesize < 0)
10521
{
10522
if (offset != 0)
10523
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10524
else
10525
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10526
}
10527
else
10528
{
10529
if (offset != 0)
10530
{
10531
/* Last alternative. */
10532
if (*cc == OP_KETRPOS)
10533
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10534
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10535
}
10536
else
10537
{
10538
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10539
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10540
}
10541
}
10542
10543
if (*cc == OP_KETRPOS)
10544
break;
10545
ccbegin = cc + 1 + LINK_SIZE;
10546
}
10547
10548
/* We don't have to restore the control head in case of a failed match. */
10549
10550
backtrack->own_backtracks = NULL;
10551
if (!zero)
10552
{
10553
if (framesize < 0)
10554
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
10555
else /* TMP2 is set to [private_data_ptr] above. */
10556
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
10557
}
10558
10559
/* None of them matched. */
10560
set_jumps(emptymatch, LABEL());
10561
count_match(common);
10562
return cc + 1 + LINK_SIZE;
10563
}
10564
10565
static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
10566
{
10567
int class_len;
10568
10569
*opcode = *cc;
10570
*exact = 0;
10571
10572
if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
10573
{
10574
cc++;
10575
*type = OP_CHAR;
10576
}
10577
else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
10578
{
10579
cc++;
10580
*type = OP_CHARI;
10581
*opcode -= OP_STARI - OP_STAR;
10582
}
10583
else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
10584
{
10585
cc++;
10586
*type = OP_NOT;
10587
*opcode -= OP_NOTSTAR - OP_STAR;
10588
}
10589
else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
10590
{
10591
cc++;
10592
*type = OP_NOTI;
10593
*opcode -= OP_NOTSTARI - OP_STAR;
10594
}
10595
else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
10596
{
10597
cc++;
10598
*opcode -= OP_TYPESTAR - OP_STAR;
10599
*type = OP_END;
10600
}
10601
else
10602
{
10603
SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS || *opcode == OP_ECLASS);
10604
*type = *opcode;
10605
class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 1);
10606
*opcode = cc[class_len];
10607
cc++;
10608
10609
if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
10610
{
10611
*opcode -= OP_CRSTAR - OP_STAR;
10612
*end = cc + class_len;
10613
10614
if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
10615
{
10616
*exact = 1;
10617
*opcode -= OP_PLUS - OP_STAR;
10618
}
10619
return cc;
10620
}
10621
10622
if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
10623
{
10624
*opcode -= OP_CRPOSSTAR - OP_POSSTAR;
10625
*end = cc + class_len;
10626
10627
if (*opcode == OP_POSPLUS)
10628
{
10629
*exact = 1;
10630
*opcode = OP_POSSTAR;
10631
}
10632
return cc;
10633
}
10634
10635
SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
10636
*max = GET2(cc, (class_len + IMM2_SIZE));
10637
*exact = GET2(cc, class_len);
10638
*end = cc + class_len + 2 * IMM2_SIZE;
10639
10640
if (*max == 0)
10641
{
10642
SLJIT_ASSERT(*exact > 1);
10643
if (*opcode == OP_CRRANGE)
10644
*opcode = OP_UPTO;
10645
else if (*opcode == OP_CRPOSRANGE)
10646
*opcode = OP_POSUPTO;
10647
else
10648
*opcode = OP_MINSTAR;
10649
return cc;
10650
}
10651
10652
*max -= *exact;
10653
if (*max == 0)
10654
*opcode = OP_EXACT;
10655
else
10656
{
10657
SLJIT_ASSERT(*exact > 0 || *max > 1);
10658
if (*opcode == OP_CRRANGE)
10659
*opcode = OP_UPTO;
10660
else if (*opcode == OP_CRPOSRANGE)
10661
*opcode = OP_POSUPTO;
10662
else if (*max == 1)
10663
*opcode = OP_MINQUERY;
10664
else
10665
*opcode = OP_MINUPTO;
10666
}
10667
return cc;
10668
}
10669
10670
switch(*opcode)
10671
{
10672
case OP_EXACT:
10673
*exact = GET2(cc, 0);
10674
cc += IMM2_SIZE;
10675
break;
10676
10677
case OP_PLUS:
10678
case OP_MINPLUS:
10679
*exact = 1;
10680
*opcode -= OP_PLUS - OP_STAR;
10681
break;
10682
10683
case OP_POSPLUS:
10684
*exact = 1;
10685
*opcode = OP_POSSTAR;
10686
break;
10687
10688
case OP_UPTO:
10689
case OP_MINUPTO:
10690
case OP_POSUPTO:
10691
*max = GET2(cc, 0);
10692
cc += IMM2_SIZE;
10693
break;
10694
}
10695
10696
if (*type == OP_END)
10697
{
10698
*type = *cc;
10699
*end = next_opcode(common, cc);
10700
cc++;
10701
return cc;
10702
}
10703
10704
*end = cc + 1;
10705
#ifdef SUPPORT_UNICODE
10706
if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
10707
#endif
10708
return cc;
10709
}
10710
10711
static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent, jump_list **prev_backtracks)
10712
{
10713
DEFINE_COMPILER;
10714
backtrack_common *backtrack = NULL;
10715
PCRE2_SPTR begin = cc;
10716
PCRE2_UCHAR opcode;
10717
PCRE2_UCHAR type;
10718
sljit_u32 max = 0, exact;
10719
sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
10720
sljit_s32 early_fail_type;
10721
BOOL charpos_enabled, use_tmp;
10722
PCRE2_UCHAR charpos_char;
10723
unsigned int charpos_othercasebit;
10724
PCRE2_SPTR end;
10725
jump_list *no_match = NULL;
10726
jump_list *no_char1_match = NULL;
10727
struct sljit_jump *jump = NULL;
10728
struct sljit_label *label;
10729
int private_data_ptr = PRIVATE_DATA(cc);
10730
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
10731
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
10732
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
10733
int tmp_base, tmp_offset;
10734
10735
early_fail_type = (early_fail_ptr & 0x7);
10736
early_fail_ptr >>= 3;
10737
10738
/* During recursion, these optimizations are disabled. */
10739
if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
10740
{
10741
early_fail_ptr = 0;
10742
early_fail_type = type_skip;
10743
}
10744
10745
SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
10746
|| (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
10747
10748
if (early_fail_type == type_fail)
10749
add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
10750
10751
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
10752
10753
if (type != OP_EXTUNI)
10754
{
10755
tmp_base = TMP3;
10756
tmp_offset = 0;
10757
}
10758
else
10759
{
10760
tmp_base = SLJIT_MEM1(SLJIT_SP);
10761
tmp_offset = LOCAL2;
10762
}
10763
10764
if (opcode == OP_EXACT)
10765
{
10766
SLJIT_ASSERT(early_fail_ptr == 0 && exact >= 2);
10767
10768
if (common->mode == PCRE2_JIT_COMPLETE
10769
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10770
&& !common->utf
10771
#endif
10772
&& type != OP_ANYNL && type != OP_EXTUNI)
10773
{
10774
OP2(SLJIT_SUB, TMP1, 0, STR_END, 0, STR_PTR, 0);
10775
add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, IN_UCHARS(exact)));
10776
10777
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
10778
if (type == OP_ALLANY && !common->invalid_utf)
10779
#else
10780
if (type == OP_ALLANY)
10781
#endif
10782
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
10783
else
10784
{
10785
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
10786
label = LABEL();
10787
compile_char1_matchingpath(common, type, cc, prev_backtracks, FALSE);
10788
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10789
JUMPTO(SLJIT_NOT_ZERO, label);
10790
}
10791
}
10792
else
10793
{
10794
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
10795
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
10796
label = LABEL();
10797
compile_char1_matchingpath(common, type, cc, prev_backtracks, TRUE);
10798
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10799
JUMPTO(SLJIT_NOT_ZERO, label);
10800
}
10801
}
10802
10803
if (early_fail_type == type_fail_range)
10804
{
10805
/* Range end first, followed by range start. */
10806
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
10807
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
10808
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
10809
OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
10810
add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
10811
10812
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10813
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
10814
}
10815
10816
if (opcode < OP_EXACT)
10817
PUSH_BACKTRACK(sizeof(char_iterator_backtrack), begin, NULL);
10818
10819
switch(opcode)
10820
{
10821
case OP_STAR:
10822
case OP_UPTO:
10823
SLJIT_ASSERT(backtrack != NULL && (early_fail_ptr == 0 || opcode == OP_STAR));
10824
max += exact;
10825
10826
if (type == OP_EXTUNI)
10827
{
10828
SLJIT_ASSERT(private_data_ptr == 0);
10829
SLJIT_ASSERT(early_fail_ptr == 0);
10830
10831
if (exact == 1)
10832
{
10833
SLJIT_ASSERT(opcode == OP_STAR);
10834
allocate_stack(common, 1);
10835
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10836
}
10837
else
10838
{
10839
/* If OP_EXTUNI is present, it has a separate EXACT opcode. */
10840
SLJIT_ASSERT(exact == 0);
10841
10842
allocate_stack(common, 2);
10843
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10844
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
10845
}
10846
10847
if (opcode == OP_UPTO)
10848
{
10849
SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
10850
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, SLJIT_IMM, max);
10851
}
10852
10853
label = LABEL();
10854
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
10855
if (opcode == OP_UPTO)
10856
{
10857
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
10858
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10859
jump = JUMP(SLJIT_ZERO);
10860
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, TMP1, 0);
10861
}
10862
10863
/* We cannot use TMP3 because of allocate_stack. */
10864
allocate_stack(common, 1);
10865
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10866
JUMPTO(SLJIT_JUMP, label);
10867
if (jump != NULL)
10868
JUMPHERE(jump);
10869
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10870
break;
10871
}
10872
#ifdef SUPPORT_UNICODE
10873
else if (type == OP_ALLANY && !common->invalid_utf)
10874
#else
10875
else if (type == OP_ALLANY)
10876
#endif
10877
{
10878
if (opcode == OP_STAR)
10879
{
10880
if (exact == 1)
10881
detect_partial_match(common, prev_backtracks);
10882
10883
if (private_data_ptr == 0)
10884
allocate_stack(common, 2);
10885
10886
OP1(SLJIT_MOV, base, offset0, STR_END, 0);
10887
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10888
10889
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
10890
process_partial_match(common);
10891
10892
if (early_fail_ptr != 0)
10893
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
10894
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10895
break;
10896
}
10897
#ifdef SUPPORT_UNICODE
10898
else if (!common->utf)
10899
#else
10900
else
10901
#endif
10902
{
10903
/* If OP_ALLANY is present, it has a separate EXACT opcode. */
10904
SLJIT_ASSERT(exact == 0);
10905
10906
if (private_data_ptr == 0)
10907
allocate_stack(common, 2);
10908
10909
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10910
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
10911
10912
if (common->mode == PCRE2_JIT_COMPLETE)
10913
{
10914
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
10915
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
10916
}
10917
else
10918
{
10919
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
10920
process_partial_match(common);
10921
JUMPHERE(jump);
10922
}
10923
10924
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10925
10926
if (early_fail_ptr != 0)
10927
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10928
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10929
break;
10930
}
10931
}
10932
10933
charpos_enabled = FALSE;
10934
charpos_char = 0;
10935
charpos_othercasebit = 0;
10936
10937
SLJIT_ASSERT(tmp_base == TMP3);
10938
if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
10939
{
10940
#ifdef SUPPORT_UNICODE
10941
charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
10942
#else
10943
charpos_enabled = TRUE;
10944
#endif
10945
if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
10946
{
10947
charpos_othercasebit = char_get_othercase_bit(common, end + 1);
10948
if (charpos_othercasebit == 0)
10949
charpos_enabled = FALSE;
10950
}
10951
10952
if (charpos_enabled)
10953
{
10954
charpos_char = end[1];
10955
/* Consume the OP_CHAR opcode. */
10956
end += 2;
10957
#if PCRE2_CODE_UNIT_WIDTH == 8
10958
SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
10959
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
10960
SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
10961
if ((charpos_othercasebit & 0x100) != 0)
10962
charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
10963
#endif
10964
if (charpos_othercasebit != 0)
10965
charpos_char |= charpos_othercasebit;
10966
10967
BACKTRACK_AS(char_iterator_backtrack)->charpos.charpos_enabled = TRUE;
10968
BACKTRACK_AS(char_iterator_backtrack)->charpos.chr = charpos_char;
10969
BACKTRACK_AS(char_iterator_backtrack)->charpos.othercasebit = charpos_othercasebit;
10970
10971
if (private_data_ptr == 0)
10972
allocate_stack(common, 2);
10973
10974
use_tmp = (opcode == OP_STAR);
10975
10976
if (use_tmp)
10977
{
10978
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10979
OP1(SLJIT_MOV, base, offset0, TMP3, 0);
10980
}
10981
else
10982
{
10983
OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);
10984
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, 0);
10985
OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);
10986
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : (max + 1));
10987
}
10988
10989
/* Search the first instance of charpos_char. */
10990
if (exact > 0)
10991
detect_partial_match(common, &no_match);
10992
else
10993
jump = JUMP(SLJIT_JUMP);
10994
10995
label = LABEL();
10996
10997
if (opcode == OP_UPTO)
10998
{
10999
if (exact == max)
11000
OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11001
else
11002
{
11003
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11004
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11005
}
11006
}
11007
11008
compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11009
11010
if (early_fail_ptr != 0)
11011
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11012
11013
if (exact == 0)
11014
JUMPHERE(jump);
11015
11016
detect_partial_match(common, &no_match);
11017
11018
if (opcode == OP_UPTO && exact > 0)
11019
{
11020
if (exact == max)
11021
CMPTO(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact, label);
11022
else
11023
CMPTO(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, (max + 1) - exact, label);
11024
}
11025
11026
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11027
if (charpos_othercasebit != 0)
11028
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11029
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11030
11031
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11032
if (use_tmp)
11033
{
11034
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, SLJIT_IMM, 0);
11035
SELECT(SLJIT_EQUAL, TMP3, STR_PTR, 0, TMP3);
11036
}
11037
else
11038
{
11039
OP2U(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, SLJIT_IMM, 0);
11040
SELECT(SLJIT_EQUAL, COUNT_MATCH, STR_PTR, 0, COUNT_MATCH);
11041
}
11042
JUMPTO(SLJIT_JUMP, label);
11043
11044
set_jumps(no_match, LABEL());
11045
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11046
if (use_tmp)
11047
OP1(SLJIT_MOV, base, offset1, TMP3, 0);
11048
else
11049
{
11050
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
11051
OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);
11052
OP1(SLJIT_MOV, COUNT_MATCH, 0, TMP1, 0);
11053
}
11054
11055
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
11056
11057
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11058
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11059
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11060
break;
11061
}
11062
}
11063
11064
if (private_data_ptr == 0)
11065
allocate_stack(common, 2);
11066
11067
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11068
use_tmp = (opcode == OP_STAR);
11069
11070
if (common->utf)
11071
{
11072
if (!use_tmp)
11073
OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);
11074
11075
OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
11076
}
11077
#endif
11078
11079
if (opcode == OP_UPTO)
11080
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? -(sljit_sw)exact : (sljit_sw)max);
11081
11082
if (opcode == OP_UPTO && exact > 0)
11083
{
11084
label = LABEL();
11085
detect_partial_match(common, &no_match);
11086
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11087
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11088
if (common->utf)
11089
OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
11090
#endif
11091
11092
if (exact == max)
11093
{
11094
OP2(SLJIT_ADD | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11095
JUMPTO(SLJIT_NOT_ZERO, label);
11096
}
11097
else
11098
{
11099
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11100
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11101
CMPTO(SLJIT_NOT_EQUAL, TMP3, 0, SLJIT_IMM, max - exact, label);
11102
}
11103
11104
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11105
JUMPTO(SLJIT_JUMP, label);
11106
}
11107
else
11108
{
11109
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11110
11111
detect_partial_match(common, &no_match);
11112
label = LABEL();
11113
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11114
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11115
if (common->utf)
11116
OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
11117
#endif
11118
11119
if (opcode == OP_UPTO)
11120
{
11121
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11122
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11123
}
11124
11125
detect_partial_match_to(common, label);
11126
}
11127
11128
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11129
if (common->utf)
11130
{
11131
set_jumps(no_char1_match, LABEL());
11132
set_jumps(no_match, LABEL());
11133
if (use_tmp)
11134
{
11135
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11136
OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11137
}
11138
else
11139
{
11140
OP1(SLJIT_MOV, STR_PTR, 0, COUNT_MATCH, 0);
11141
OP1(SLJIT_MOV, COUNT_MATCH, 0, base, offset0);
11142
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11143
}
11144
}
11145
else
11146
#endif
11147
{
11148
if (opcode != OP_UPTO || exact == 0)
11149
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11150
set_jumps(no_char1_match, LABEL());
11151
11152
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11153
set_jumps(no_match, LABEL());
11154
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11155
}
11156
11157
if (opcode == OP_UPTO)
11158
{
11159
if (exact > 0)
11160
{
11161
if (max == exact)
11162
jump = CMP(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, -(sljit_sw)exact);
11163
else
11164
jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);
11165
11166
add_jump(compiler, &backtrack->own_backtracks, jump);
11167
}
11168
}
11169
else if (exact == 1)
11170
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, base, offset1, STR_PTR, 0));
11171
11172
if (early_fail_ptr != 0)
11173
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11174
11175
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11176
break;
11177
11178
case OP_QUERY:
11179
SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);
11180
if (private_data_ptr == 0)
11181
allocate_stack(common, 1);
11182
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11183
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11184
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11185
break;
11186
11187
case OP_MINSTAR:
11188
case OP_MINQUERY:
11189
SLJIT_ASSERT(backtrack != NULL && (opcode == OP_MINSTAR || early_fail_ptr == 0));
11190
if (private_data_ptr == 0)
11191
allocate_stack(common, 1);
11192
11193
if (exact >= 1)
11194
{
11195
if (exact >= 2)
11196
{
11197
/* Extuni has a separate exact opcode. */
11198
SLJIT_ASSERT(tmp_base == TMP3 && early_fail_ptr == 0);
11199
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);
11200
}
11201
11202
if (opcode == OP_MINQUERY)
11203
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, -1);
11204
11205
label = LABEL();
11206
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;
11207
11208
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11209
11210
if (exact >= 2)
11211
{
11212
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11213
JUMPTO(SLJIT_NOT_ZERO, label);
11214
}
11215
11216
if (opcode == OP_MINQUERY)
11217
OP2(SLJIT_AND, base, offset0, base, offset0, STR_PTR, 0);
11218
else
11219
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11220
}
11221
else
11222
{
11223
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11224
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11225
}
11226
11227
if (early_fail_ptr != 0)
11228
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11229
break;
11230
11231
case OP_MINUPTO:
11232
SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);
11233
if (private_data_ptr == 0)
11234
allocate_stack(common, 2);
11235
11236
OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11237
11238
if (exact == 0)
11239
{
11240
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11241
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11242
break;
11243
}
11244
11245
if (exact >= 2)
11246
{
11247
/* Extuni has a separate exact opcode. */
11248
SLJIT_ASSERT(tmp_base == TMP3);
11249
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);
11250
}
11251
11252
label = LABEL();
11253
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;
11254
11255
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11256
11257
if (exact >= 2)
11258
{
11259
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11260
JUMPTO(SLJIT_NOT_ZERO, label);
11261
}
11262
11263
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11264
break;
11265
11266
case OP_EXACT:
11267
SLJIT_ASSERT(backtrack == NULL);
11268
break;
11269
11270
case OP_POSSTAR:
11271
SLJIT_ASSERT(backtrack == NULL);
11272
#if defined SUPPORT_UNICODE
11273
if (type == OP_ALLANY && !common->invalid_utf)
11274
#else
11275
if (type == OP_ALLANY)
11276
#endif
11277
{
11278
if (exact == 1)
11279
detect_partial_match(common, prev_backtracks);
11280
11281
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11282
process_partial_match(common);
11283
if (early_fail_ptr != 0)
11284
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11285
break;
11286
}
11287
11288
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11289
if (common->utf)
11290
{
11291
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
11292
11293
if (tmp_base != TMP3)
11294
{
11295
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);
11296
tmp_base = COUNT_MATCH;
11297
}
11298
11299
OP1(SLJIT_MOV, tmp_base, 0, exact == 1 ? SLJIT_IMM : STR_PTR, 0);
11300
detect_partial_match(common, &no_match);
11301
label = LABEL();
11302
compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11303
OP1(SLJIT_MOV, tmp_base, 0, STR_PTR, 0);
11304
detect_partial_match_to(common, label);
11305
11306
set_jumps(no_match, LABEL());
11307
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, 0);
11308
11309
if (tmp_base != TMP3)
11310
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
11311
11312
if (exact == 1)
11313
add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
11314
11315
if (early_fail_ptr != 0)
11316
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11317
break;
11318
}
11319
#endif
11320
11321
if (exact == 1)
11322
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11323
11324
detect_partial_match(common, &no_match);
11325
label = LABEL();
11326
/* Extuni never fails, so no_char1_match is not used in that case.
11327
Anynl optionally reads an extra character on success. */
11328
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11329
detect_partial_match_to(common, label);
11330
if (type != OP_EXTUNI)
11331
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11332
11333
set_jumps(no_char1_match, LABEL());
11334
if (type != OP_EXTUNI)
11335
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11336
11337
set_jumps(no_match, LABEL());
11338
11339
if (exact == 1)
11340
add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, tmp_base, tmp_offset, STR_PTR, 0));
11341
11342
if (early_fail_ptr != 0)
11343
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11344
break;
11345
11346
case OP_POSUPTO:
11347
SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);
11348
max += exact;
11349
11350
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11351
if (type == OP_EXTUNI || common->utf)
11352
#else
11353
if (type == OP_EXTUNI)
11354
#endif
11355
{
11356
SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
11357
11358
/* Count match is not modified by compile_char1_matchingpath. */
11359
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);
11360
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, exact == max ? 0 : max);
11361
11362
label = LABEL();
11363
/* Extuni only modifies TMP3 on successful match. */
11364
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11365
compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11366
11367
if (exact == max)
11368
{
11369
OP2(SLJIT_ADD, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
11370
JUMPTO(SLJIT_JUMP, label);
11371
}
11372
else
11373
{
11374
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
11375
JUMPTO(SLJIT_NOT_ZERO, label);
11376
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11377
}
11378
11379
set_jumps(no_match, LABEL());
11380
11381
if (exact > 0)
11382
{
11383
if (exact == max)
11384
OP2U(SLJIT_SUB | SLJIT_SET_LESS, COUNT_MATCH, 0, SLJIT_IMM, exact);
11385
else
11386
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, COUNT_MATCH, 0, SLJIT_IMM, max - exact);
11387
}
11388
11389
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
11390
11391
if (exact > 0)
11392
add_jump(compiler, prev_backtracks, JUMP(exact == max ? SLJIT_LESS : SLJIT_GREATER));
11393
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11394
break;
11395
}
11396
11397
SLJIT_ASSERT(tmp_base == TMP3);
11398
11399
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : max);
11400
11401
detect_partial_match(common, &no_match);
11402
label = LABEL();
11403
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11404
11405
if (exact == max)
11406
OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11407
else
11408
{
11409
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11410
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11411
}
11412
detect_partial_match_to(common, label);
11413
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11414
11415
set_jumps(no_char1_match, LABEL());
11416
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11417
set_jumps(no_match, LABEL());
11418
11419
if (exact > 0)
11420
{
11421
if (exact == max)
11422
jump = CMP(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact);
11423
else
11424
jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);
11425
11426
add_jump(compiler, prev_backtracks, jump);
11427
}
11428
break;
11429
11430
case OP_POSQUERY:
11431
SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);
11432
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
11433
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11434
compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11435
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11436
set_jumps(no_match, LABEL());
11437
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11438
break;
11439
11440
default:
11441
SLJIT_UNREACHABLE();
11442
break;
11443
}
11444
11445
count_match(common);
11446
return end;
11447
}
11448
11449
static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11450
{
11451
DEFINE_COMPILER;
11452
backtrack_common *backtrack;
11453
11454
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11455
11456
if (*cc == OP_FAIL)
11457
{
11458
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
11459
return cc + 1;
11460
}
11461
11462
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11463
add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11464
11465
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11466
{
11467
/* No need to check notempty conditions. */
11468
if (common->accept_label == NULL)
11469
add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11470
else
11471
JUMPTO(SLJIT_JUMP, common->accept_label);
11472
return cc + 1;
11473
}
11474
11475
if (common->accept_label == NULL)
11476
add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11477
else
11478
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11479
11480
if (HAS_VIRTUAL_REGISTERS)
11481
{
11482
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11483
OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11484
}
11485
else
11486
OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11487
11488
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11489
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
11490
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11491
if (common->accept_label == NULL)
11492
add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11493
else
11494
JUMPTO(SLJIT_ZERO, common->accept_label);
11495
11496
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11497
if (common->accept_label == NULL)
11498
add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11499
else
11500
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11501
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
11502
return cc + 1;
11503
}
11504
11505
static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11506
{
11507
DEFINE_COMPILER;
11508
int offset = GET2(cc, 1);
11509
BOOL optimized_cbracket = is_optimized_cbracket(common, offset);
11510
11511
/* Data will be discarded anyway... */
11512
if (common->currententry != NULL)
11513
return cc + 1 + IMM2_SIZE;
11514
11515
if (!optimized_cbracket)
11516
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11517
offset <<= 1;
11518
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11519
if (!optimized_cbracket)
11520
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11521
return cc + 1 + IMM2_SIZE;
11522
}
11523
11524
static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11525
{
11526
DEFINE_COMPILER;
11527
backtrack_common *backtrack;
11528
PCRE2_UCHAR opcode = *cc;
11529
PCRE2_SPTR ccend = cc + 1;
11530
11531
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11532
opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11533
ccend += 2 + cc[1];
11534
11535
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11536
11537
if (opcode == OP_SKIP)
11538
{
11539
allocate_stack(common, 1);
11540
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11541
return ccend;
11542
}
11543
11544
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11545
{
11546
if (HAS_VIRTUAL_REGISTERS)
11547
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11548
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11549
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11550
OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11551
}
11552
11553
return ccend;
11554
}
11555
11556
static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11557
11558
static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11559
{
11560
DEFINE_COMPILER;
11561
backtrack_common *backtrack;
11562
BOOL needs_control_head;
11563
int size;
11564
11565
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11566
common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11567
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11568
BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11569
BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11570
11571
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11572
size = 3 + (size < 0 ? 0 : size);
11573
11574
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11575
allocate_stack(common, size);
11576
if (size > 3)
11577
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11578
else
11579
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11580
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11581
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11582
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11583
11584
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11585
if (size >= 0)
11586
init_frame(common, cc, ccend, size - 1, 0);
11587
}
11588
11589
static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11590
{
11591
DEFINE_COMPILER;
11592
backtrack_common *backtrack;
11593
BOOL has_then_trap = FALSE;
11594
then_trap_backtrack *save_then_trap = NULL;
11595
size_t op_len;
11596
11597
SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11598
11599
if (common->has_then && common->then_offsets[cc - common->start] != 0)
11600
{
11601
SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11602
has_then_trap = TRUE;
11603
save_then_trap = common->then_trap;
11604
/* Tail item on backtrack. */
11605
compile_then_trap_matchingpath(common, cc, ccend, parent);
11606
}
11607
11608
while (cc < ccend)
11609
{
11610
switch(*cc)
11611
{
11612
case OP_SOD:
11613
case OP_SOM:
11614
case OP_NOT_WORD_BOUNDARY:
11615
case OP_WORD_BOUNDARY:
11616
case OP_EODN:
11617
case OP_EOD:
11618
case OP_DOLL:
11619
case OP_DOLLM:
11620
case OP_CIRC:
11621
case OP_CIRCM:
11622
case OP_NOT_UCP_WORD_BOUNDARY:
11623
case OP_UCP_WORD_BOUNDARY:
11624
cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11625
break;
11626
11627
case OP_NOT_DIGIT:
11628
case OP_DIGIT:
11629
case OP_NOT_WHITESPACE:
11630
case OP_WHITESPACE:
11631
case OP_NOT_WORDCHAR:
11632
case OP_WORDCHAR:
11633
case OP_ANY:
11634
case OP_ALLANY:
11635
case OP_ANYBYTE:
11636
case OP_NOTPROP:
11637
case OP_PROP:
11638
case OP_ANYNL:
11639
case OP_NOT_HSPACE:
11640
case OP_HSPACE:
11641
case OP_NOT_VSPACE:
11642
case OP_VSPACE:
11643
case OP_EXTUNI:
11644
case OP_NOT:
11645
case OP_NOTI:
11646
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11647
break;
11648
11649
case OP_SET_SOM:
11650
PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11651
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11652
allocate_stack(common, 1);
11653
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11654
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11655
cc++;
11656
break;
11657
11658
case OP_CHAR:
11659
case OP_CHARI:
11660
if (common->mode == PCRE2_JIT_COMPLETE)
11661
cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11662
else
11663
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11664
break;
11665
11666
case OP_STAR:
11667
case OP_MINSTAR:
11668
case OP_PLUS:
11669
case OP_MINPLUS:
11670
case OP_QUERY:
11671
case OP_MINQUERY:
11672
case OP_UPTO:
11673
case OP_MINUPTO:
11674
case OP_EXACT:
11675
case OP_POSSTAR:
11676
case OP_POSPLUS:
11677
case OP_POSQUERY:
11678
case OP_POSUPTO:
11679
case OP_STARI:
11680
case OP_MINSTARI:
11681
case OP_PLUSI:
11682
case OP_MINPLUSI:
11683
case OP_QUERYI:
11684
case OP_MINQUERYI:
11685
case OP_UPTOI:
11686
case OP_MINUPTOI:
11687
case OP_EXACTI:
11688
case OP_POSSTARI:
11689
case OP_POSPLUSI:
11690
case OP_POSQUERYI:
11691
case OP_POSUPTOI:
11692
case OP_NOTSTAR:
11693
case OP_NOTMINSTAR:
11694
case OP_NOTPLUS:
11695
case OP_NOTMINPLUS:
11696
case OP_NOTQUERY:
11697
case OP_NOTMINQUERY:
11698
case OP_NOTUPTO:
11699
case OP_NOTMINUPTO:
11700
case OP_NOTEXACT:
11701
case OP_NOTPOSSTAR:
11702
case OP_NOTPOSPLUS:
11703
case OP_NOTPOSQUERY:
11704
case OP_NOTPOSUPTO:
11705
case OP_NOTSTARI:
11706
case OP_NOTMINSTARI:
11707
case OP_NOTPLUSI:
11708
case OP_NOTMINPLUSI:
11709
case OP_NOTQUERYI:
11710
case OP_NOTMINQUERYI:
11711
case OP_NOTUPTOI:
11712
case OP_NOTMINUPTOI:
11713
case OP_NOTEXACTI:
11714
case OP_NOTPOSSTARI:
11715
case OP_NOTPOSPLUSI:
11716
case OP_NOTPOSQUERYI:
11717
case OP_NOTPOSUPTOI:
11718
case OP_TYPESTAR:
11719
case OP_TYPEMINSTAR:
11720
case OP_TYPEPLUS:
11721
case OP_TYPEMINPLUS:
11722
case OP_TYPEQUERY:
11723
case OP_TYPEMINQUERY:
11724
case OP_TYPEUPTO:
11725
case OP_TYPEMINUPTO:
11726
case OP_TYPEEXACT:
11727
case OP_TYPEPOSSTAR:
11728
case OP_TYPEPOSPLUS:
11729
case OP_TYPEPOSQUERY:
11730
case OP_TYPEPOSUPTO:
11731
cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11732
break;
11733
11734
case OP_CLASS:
11735
case OP_NCLASS:
11736
if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
11737
cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11738
else
11739
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11740
break;
11741
11742
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11743
case OP_XCLASS:
11744
case OP_ECLASS:
11745
op_len = GET(cc, 1);
11746
if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11747
cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11748
else
11749
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11750
break;
11751
#endif
11752
11753
case OP_REF:
11754
case OP_REFI:
11755
op_len = PRIV(OP_lengths)[*cc];
11756
if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11757
cc = compile_ref_iterator_matchingpath(common, cc, parent);
11758
else
11759
{
11760
compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
11761
cc += op_len;
11762
}
11763
break;
11764
11765
case OP_DNREF:
11766
case OP_DNREFI:
11767
op_len = PRIV(OP_lengths)[*cc];
11768
if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11769
cc = compile_ref_iterator_matchingpath(common, cc, parent);
11770
else
11771
{
11772
compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11773
compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
11774
cc += op_len;
11775
}
11776
break;
11777
11778
case OP_RECURSE:
11779
cc = compile_recurse_matchingpath(common, cc, parent);
11780
break;
11781
11782
case OP_CALLOUT:
11783
case OP_CALLOUT_STR:
11784
cc = compile_callout_matchingpath(common, cc, parent);
11785
break;
11786
11787
case OP_ASSERT:
11788
case OP_ASSERT_NOT:
11789
case OP_ASSERTBACK:
11790
case OP_ASSERTBACK_NOT:
11791
PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
11792
cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
11793
break;
11794
11795
case OP_BRAMINZERO:
11796
PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
11797
cc = bracketend(cc + 1);
11798
if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
11799
{
11800
allocate_stack(common, 1);
11801
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11802
}
11803
else
11804
{
11805
allocate_stack(common, 2);
11806
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11807
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
11808
}
11809
BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
11810
count_match(common);
11811
break;
11812
11813
case OP_ASSERT_NA:
11814
case OP_ASSERTBACK_NA:
11815
case OP_ASSERT_SCS:
11816
case OP_ONCE:
11817
case OP_SCRIPT_RUN:
11818
case OP_BRA:
11819
case OP_CBRA:
11820
case OP_COND:
11821
case OP_SBRA:
11822
case OP_SCBRA:
11823
case OP_SCOND:
11824
cc = compile_bracket_matchingpath(common, cc, parent);
11825
break;
11826
11827
case OP_BRAZERO:
11828
if (cc[1] > OP_ASSERTBACK_NOT)
11829
cc = compile_bracket_matchingpath(common, cc, parent);
11830
else
11831
{
11832
PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
11833
cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
11834
}
11835
break;
11836
11837
case OP_BRAPOS:
11838
case OP_CBRAPOS:
11839
case OP_SBRAPOS:
11840
case OP_SCBRAPOS:
11841
case OP_BRAPOSZERO:
11842
cc = compile_bracketpos_matchingpath(common, cc, parent);
11843
break;
11844
11845
case OP_MARK:
11846
PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11847
SLJIT_ASSERT(common->mark_ptr != 0);
11848
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
11849
allocate_stack(common, common->has_skip_arg ? 5 : 1);
11850
if (HAS_VIRTUAL_REGISTERS)
11851
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11852
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
11853
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11854
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11855
OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11856
if (common->has_skip_arg)
11857
{
11858
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11859
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11860
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
11861
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
11862
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
11863
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11864
}
11865
cc += 1 + 2 + cc[1];
11866
break;
11867
11868
case OP_PRUNE:
11869
case OP_PRUNE_ARG:
11870
case OP_SKIP:
11871
case OP_SKIP_ARG:
11872
case OP_THEN:
11873
case OP_THEN_ARG:
11874
case OP_COMMIT:
11875
case OP_COMMIT_ARG:
11876
cc = compile_control_verb_matchingpath(common, cc, parent);
11877
break;
11878
11879
case OP_FAIL:
11880
case OP_ACCEPT:
11881
case OP_ASSERT_ACCEPT:
11882
cc = compile_fail_accept_matchingpath(common, cc, parent);
11883
break;
11884
11885
case OP_CLOSE:
11886
cc = compile_close_matchingpath(common, cc);
11887
break;
11888
11889
case OP_SKIPZERO:
11890
cc = bracketend(cc + 1);
11891
break;
11892
11893
default:
11894
SLJIT_UNREACHABLE();
11895
return;
11896
}
11897
if (cc == NULL)
11898
return;
11899
}
11900
11901
if (has_then_trap)
11902
{
11903
/* Head item on backtrack. */
11904
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11905
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11906
BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
11907
common->then_trap = save_then_trap;
11908
}
11909
SLJIT_ASSERT(cc == ccend);
11910
}
11911
11912
#undef PUSH_BACKTRACK
11913
#undef PUSH_BACKTRACK_NOVALUE
11914
#undef BACKTRACK_AS
11915
11916
#define COMPILE_BACKTRACKINGPATH(current) \
11917
do \
11918
{ \
11919
compile_backtrackingpath(common, (current)); \
11920
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
11921
return; \
11922
} \
11923
while (0)
11924
11925
#define CURRENT_AS(type) ((type *)current)
11926
11927
static void compile_newline_move_back(compiler_common *common)
11928
{
11929
DEFINE_COMPILER;
11930
struct sljit_jump *jump;
11931
11932
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11933
jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, TMP2, 0);
11934
/* All newlines are single byte, or their last byte
11935
is not equal to CHAR_NL/CHAR_CR even if UTF is enabled. */
11936
OP1(MOV_UCHAR, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
11937
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
11938
OP2(SLJIT_SHL, SLJIT_TMP_DEST_REG, 0, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 8);
11939
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_TMP_DEST_REG, 0);
11940
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR << 8 | CHAR_NL);
11941
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
11942
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11943
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
11944
#endif
11945
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
11946
JUMPHERE(jump);
11947
}
11948
11949
static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11950
{
11951
DEFINE_COMPILER;
11952
PCRE2_SPTR cc = current->cc;
11953
PCRE2_UCHAR opcode;
11954
PCRE2_UCHAR type;
11955
sljit_u32 max = 0, exact;
11956
struct sljit_label *label = NULL;
11957
struct sljit_jump *jump = NULL;
11958
jump_list *jumplist = NULL;
11959
PCRE2_SPTR end;
11960
int private_data_ptr = PRIVATE_DATA(cc);
11961
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11962
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11963
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11964
11965
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11966
11967
switch(opcode)
11968
{
11969
case OP_STAR:
11970
case OP_UPTO:
11971
if (type == OP_EXTUNI)
11972
{
11973
SLJIT_ASSERT(private_data_ptr == 0);
11974
set_jumps(current->own_backtracks, LABEL());
11975
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11976
free_stack(common, 1);
11977
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11978
}
11979
else
11980
{
11981
if (CURRENT_AS(char_iterator_backtrack)->charpos.charpos_enabled)
11982
{
11983
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11984
OP1(SLJIT_MOV, TMP2, 0, base, offset1);
11985
11986
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
11987
label = LABEL();
11988
if (type == OP_ANYNL)
11989
compile_newline_move_back(common);
11990
move_back(common, NULL, TRUE);
11991
11992
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11993
if (CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit != 0)
11994
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit);
11995
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11996
/* The range beginning must match, no need to compare. */
11997
JUMPTO(SLJIT_JUMP, label);
11998
11999
set_jumps(current->own_backtracks, LABEL());
12000
current->own_backtracks = NULL;
12001
}
12002
else
12003
{
12004
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12005
12006
if (opcode == OP_STAR && exact == 1)
12007
{
12008
if (type == OP_ANYNL)
12009
{
12010
OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12011
compile_newline_move_back(common);
12012
}
12013
12014
move_back(common, NULL, TRUE);
12015
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12016
}
12017
else
12018
{
12019
if (type == OP_ANYNL)
12020
{
12021
OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12022
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12023
compile_newline_move_back(common);
12024
}
12025
else
12026
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12027
12028
move_back(common, NULL, TRUE);
12029
}
12030
12031
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12032
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12033
12034
set_jumps(current->own_backtracks, LABEL());
12035
}
12036
12037
JUMPHERE(jump);
12038
if (private_data_ptr == 0)
12039
free_stack(common, 2);
12040
}
12041
break;
12042
12043
case OP_QUERY:
12044
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12045
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12046
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12047
jump = JUMP(SLJIT_JUMP);
12048
set_jumps(current->own_backtracks, LABEL());
12049
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12050
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12051
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12052
JUMPHERE(jump);
12053
if (private_data_ptr == 0)
12054
free_stack(common, 1);
12055
break;
12056
12057
case OP_MINSTAR:
12058
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12059
if (exact == 0)
12060
{
12061
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12062
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12063
}
12064
else if (exact > 1)
12065
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
12066
12067
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12068
set_jumps(exact > 0 ? current->own_backtracks : jumplist, LABEL());
12069
if (private_data_ptr == 0)
12070
free_stack(common, 1);
12071
break;
12072
12073
case OP_MINUPTO:
12074
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12075
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12076
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12077
12078
if (exact == 0)
12079
{
12080
add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12081
12082
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12083
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12084
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12085
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12086
12087
set_jumps(jumplist, LABEL());
12088
}
12089
else
12090
{
12091
if (exact > 1)
12092
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
12093
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12094
JUMPTO(SLJIT_NOT_ZERO, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12095
12096
set_jumps(current->own_backtracks, LABEL());
12097
}
12098
12099
if (private_data_ptr == 0)
12100
free_stack(common, 2);
12101
break;
12102
12103
case OP_MINQUERY:
12104
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12105
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12106
12107
if (exact >= 1)
12108
{
12109
if (exact >= 2)
12110
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
12111
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12112
set_jumps(current->own_backtracks, LABEL());
12113
}
12114
else
12115
{
12116
jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12117
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12118
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12119
set_jumps(jumplist, LABEL());
12120
JUMPHERE(jump);
12121
}
12122
12123
if (private_data_ptr == 0)
12124
free_stack(common, 1);
12125
break;
12126
12127
default:
12128
SLJIT_UNREACHABLE();
12129
break;
12130
}
12131
}
12132
12133
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12134
{
12135
DEFINE_COMPILER;
12136
PCRE2_SPTR cc = current->cc;
12137
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12138
PCRE2_UCHAR type;
12139
12140
type = cc[PRIV(OP_lengths)[*cc]];
12141
12142
if ((type & 0x1) == 0)
12143
{
12144
/* Maximize case. */
12145
set_jumps(current->own_backtracks, LABEL());
12146
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12147
free_stack(common, 1);
12148
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12149
return;
12150
}
12151
12152
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12153
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12154
set_jumps(current->own_backtracks, LABEL());
12155
free_stack(common, ref ? 2 : 3);
12156
}
12157
12158
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12159
{
12160
DEFINE_COMPILER;
12161
recurse_entry *entry;
12162
12163
if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12164
{
12165
entry = CURRENT_AS(recurse_backtrack)->entry;
12166
if (entry->backtrack_label == NULL)
12167
add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12168
else
12169
JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12170
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12171
}
12172
else
12173
compile_backtrackingpath(common, current->top);
12174
12175
set_jumps(current->own_backtracks, LABEL());
12176
}
12177
12178
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12179
{
12180
DEFINE_COMPILER;
12181
PCRE2_SPTR cc = current->cc;
12182
PCRE2_UCHAR bra = OP_BRA;
12183
struct sljit_jump *brajump = NULL;
12184
12185
SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12186
if (*cc == OP_BRAZERO)
12187
{
12188
bra = *cc;
12189
cc++;
12190
}
12191
12192
if (bra == OP_BRAZERO)
12193
{
12194
SLJIT_ASSERT(current->own_backtracks == NULL);
12195
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12196
}
12197
12198
if (CURRENT_AS(assert_backtrack)->framesize < 0)
12199
{
12200
set_jumps(current->own_backtracks, LABEL());
12201
12202
if (bra == OP_BRAZERO)
12203
{
12204
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12205
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12206
free_stack(common, 1);
12207
}
12208
return;
12209
}
12210
12211
if (bra == OP_BRAZERO)
12212
{
12213
if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12214
{
12215
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12216
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12217
free_stack(common, 1);
12218
return;
12219
}
12220
free_stack(common, 1);
12221
brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12222
}
12223
12224
if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12225
{
12226
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12227
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12228
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12229
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12230
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12231
12232
set_jumps(current->own_backtracks, LABEL());
12233
}
12234
else
12235
set_jumps(current->own_backtracks, LABEL());
12236
12237
if (bra == OP_BRAZERO)
12238
{
12239
/* We know there is enough place on the stack. */
12240
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12241
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12242
JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12243
JUMPHERE(brajump);
12244
}
12245
}
12246
12247
static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12248
{
12249
DEFINE_COMPILER;
12250
int opcode, stacksize, alt_count, alt_max;
12251
int offset = 0;
12252
int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12253
int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12254
PCRE2_SPTR cc = current->cc;
12255
PCRE2_SPTR ccbegin;
12256
PCRE2_SPTR ccprev;
12257
PCRE2_UCHAR bra = OP_BRA;
12258
PCRE2_UCHAR ket;
12259
const assert_backtrack *assert;
12260
BOOL has_alternatives;
12261
BOOL needs_control_head = FALSE;
12262
BOOL has_vreverse;
12263
struct sljit_jump *brazero = NULL;
12264
struct sljit_jump *next_alt = NULL;
12265
struct sljit_jump *once = NULL;
12266
struct sljit_jump *cond = NULL;
12267
struct sljit_label *rmin_label = NULL;
12268
struct sljit_label *exact_label = NULL;
12269
struct sljit_jump *mov_addr = NULL;
12270
12271
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12272
{
12273
bra = *cc;
12274
cc++;
12275
}
12276
12277
opcode = *cc;
12278
ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12279
ket = *ccbegin;
12280
if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12281
{
12282
repeat_ptr = PRIVATE_DATA(ccbegin);
12283
repeat_type = PRIVATE_DATA(ccbegin + 2);
12284
repeat_count = PRIVATE_DATA(ccbegin + 3);
12285
SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12286
if (repeat_type == OP_UPTO)
12287
ket = OP_KETRMAX;
12288
if (repeat_type == OP_MINUPTO)
12289
ket = OP_KETRMIN;
12290
}
12291
ccbegin = cc;
12292
cc += GET(cc, 1);
12293
has_alternatives = *cc == OP_ALT;
12294
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12295
has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.no_capture != NULL;
12296
if (opcode == OP_CBRA || opcode == OP_SCBRA)
12297
offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12298
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12299
opcode = OP_SCOND;
12300
12301
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12302
12303
/* Decoding the needs_control_head in framesize. */
12304
if (opcode == OP_ONCE)
12305
{
12306
needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12307
CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12308
}
12309
12310
if (ket != OP_KET && repeat_type != 0)
12311
{
12312
/* TMP1 is used in OP_KETRMIN below. */
12313
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12314
free_stack(common, 1);
12315
if (repeat_type == OP_UPTO)
12316
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12317
else
12318
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12319
}
12320
12321
if (ket == OP_KETRMAX)
12322
{
12323
if (bra == OP_BRAZERO)
12324
{
12325
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12326
free_stack(common, 1);
12327
brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12328
}
12329
}
12330
else if (ket == OP_KETRMIN)
12331
{
12332
if (bra != OP_BRAMINZERO)
12333
{
12334
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12335
if (repeat_type != 0)
12336
{
12337
/* TMP1 was set a few lines above. */
12338
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12339
/* Drop STR_PTR for non-greedy plus quantifier. */
12340
if (opcode != OP_ONCE)
12341
free_stack(common, 1);
12342
}
12343
else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12344
{
12345
/* Checking zero-length iteration. */
12346
if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12347
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12348
else
12349
{
12350
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12351
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12352
}
12353
/* Drop STR_PTR for non-greedy plus quantifier. */
12354
if (opcode != OP_ONCE)
12355
free_stack(common, 1);
12356
}
12357
else
12358
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12359
}
12360
rmin_label = LABEL();
12361
if (repeat_type != 0)
12362
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12363
}
12364
else if (bra == OP_BRAZERO)
12365
{
12366
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12367
free_stack(common, 1);
12368
brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12369
}
12370
else if (repeat_type == OP_EXACT)
12371
{
12372
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12373
exact_label = LABEL();
12374
}
12375
12376
if (offset != 0)
12377
{
12378
if (common->capture_last_ptr != 0)
12379
{
12380
SLJIT_ASSERT(!is_optimized_cbracket(common, offset >> 1));
12381
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12382
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12383
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12384
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12385
free_stack(common, 3);
12386
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12387
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12388
}
12389
else if (!is_optimized_cbracket(common, offset >> 1))
12390
{
12391
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12392
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12393
free_stack(common, 2);
12394
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12395
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12396
}
12397
}
12398
else if (SLJIT_UNLIKELY(opcode == OP_ASSERT_SCS))
12399
{
12400
OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);
12401
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12402
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);
12403
12404
/* Nested scs blocks will not update this variable. */
12405
if (common->restore_end_ptr == 0)
12406
common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);
12407
}
12408
12409
if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12410
{
12411
int framesize = CURRENT_AS(bracket_backtrack)->u.framesize;
12412
12413
SLJIT_ASSERT(framesize != 0);
12414
if (framesize > 0)
12415
{
12416
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12417
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12418
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
12419
}
12420
once = JUMP(SLJIT_JUMP);
12421
}
12422
else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12423
{
12424
if (has_alternatives)
12425
{
12426
/* Always exactly one alternative. */
12427
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12428
free_stack(common, 1);
12429
12430
alt_max = 2;
12431
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12432
}
12433
}
12434
else if (has_alternatives)
12435
{
12436
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12437
free_stack(common, 1);
12438
12439
if (alt_max > 3)
12440
{
12441
sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12442
12443
SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->matching_mov_addr != NULL);
12444
sljit_set_label(CURRENT_AS(bracket_backtrack)->matching_mov_addr, LABEL());
12445
sljit_emit_op0(compiler, SLJIT_ENDBR);
12446
}
12447
else
12448
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12449
}
12450
12451
COMPILE_BACKTRACKINGPATH(current->top);
12452
if (current->own_backtracks)
12453
set_jumps(current->own_backtracks, LABEL());
12454
12455
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12456
{
12457
/* Conditional block always has at most one alternative. */
12458
if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12459
{
12460
SLJIT_ASSERT(has_alternatives);
12461
assert = CURRENT_AS(bracket_backtrack)->u.assert;
12462
SLJIT_ASSERT(assert->framesize != 0);
12463
if (assert->framesize > 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12464
{
12465
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12466
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12467
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12468
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12469
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12470
}
12471
cond = JUMP(SLJIT_JUMP);
12472
set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12473
}
12474
else if (CURRENT_AS(bracket_backtrack)->u.no_capture != NULL)
12475
{
12476
SLJIT_ASSERT(has_alternatives);
12477
cond = JUMP(SLJIT_JUMP);
12478
set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());
12479
}
12480
else
12481
SLJIT_ASSERT(!has_alternatives);
12482
}
12483
12484
if (has_alternatives)
12485
{
12486
alt_count = 1;
12487
do
12488
{
12489
current->top = NULL;
12490
current->own_backtracks = NULL;
12491
current->simple_backtracks = NULL;
12492
/* Conditional blocks always have an additional alternative, even if it is empty. */
12493
if (*cc == OP_ALT)
12494
{
12495
ccprev = cc + 1 + LINK_SIZE;
12496
cc += GET(cc, 1);
12497
12498
has_vreverse = FALSE;
12499
12500
switch (opcode)
12501
{
12502
case OP_ASSERTBACK:
12503
case OP_ASSERTBACK_NA:
12504
SLJIT_ASSERT(private_data_ptr != 0);
12505
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12506
12507
has_vreverse = (*ccprev == OP_VREVERSE);
12508
if (*ccprev == OP_REVERSE || has_vreverse)
12509
ccprev = compile_reverse_matchingpath(common, ccprev, current);
12510
break;
12511
case OP_ASSERT_SCS:
12512
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12513
break;
12514
case OP_ONCE:
12515
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12516
break;
12517
case OP_COND:
12518
case OP_SCOND:
12519
break;
12520
default:
12521
if (private_data_ptr != 0)
12522
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12523
else
12524
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12525
break;
12526
}
12527
12528
compile_matchingpath(common, ccprev, cc, current);
12529
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12530
return;
12531
12532
switch (opcode)
12533
{
12534
case OP_ASSERTBACK_NA:
12535
if (has_vreverse)
12536
{
12537
SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
12538
add_jump(compiler, &current->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
12539
}
12540
12541
if (PRIVATE_DATA(ccbegin + 1))
12542
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12543
break;
12544
case OP_ASSERT_NA:
12545
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12546
break;
12547
case OP_SCRIPT_RUN:
12548
match_script_run_common(common, private_data_ptr, current);
12549
break;
12550
}
12551
}
12552
12553
/* Instructions after the current alternative is successfully matched. */
12554
/* There is a similar code in compile_bracket_matchingpath. */
12555
if (opcode == OP_ONCE)
12556
match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12557
12558
stacksize = 0;
12559
if (repeat_type == OP_MINUPTO)
12560
{
12561
/* We need to preserve the counter. TMP2 will be used below. */
12562
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12563
stacksize++;
12564
}
12565
if (ket != OP_KET || bra != OP_BRA)
12566
stacksize++;
12567
if (offset != 0)
12568
{
12569
if (common->capture_last_ptr != 0)
12570
stacksize++;
12571
if (!is_optimized_cbracket(common, offset >> 1))
12572
stacksize += 2;
12573
}
12574
if (opcode != OP_ONCE)
12575
stacksize++;
12576
12577
if (stacksize > 0)
12578
allocate_stack(common, stacksize);
12579
12580
stacksize = 0;
12581
if (repeat_type == OP_MINUPTO)
12582
{
12583
/* TMP2 was set above. */
12584
OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12585
stacksize++;
12586
}
12587
12588
if (ket != OP_KET || bra != OP_BRA)
12589
{
12590
if (ket != OP_KET)
12591
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12592
else
12593
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12594
stacksize++;
12595
}
12596
12597
if (offset != 0)
12598
stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12599
12600
if (opcode != OP_ONCE)
12601
{
12602
if (alt_max <= 3)
12603
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12604
else
12605
mov_addr = sljit_emit_op_addr(compiler, SLJIT_MOV_ADDR, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12606
}
12607
12608
if (offset != 0 && ket == OP_KETRMAX && is_optimized_cbracket(common, offset >> 1))
12609
{
12610
/* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12611
SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12612
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12613
}
12614
12615
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12616
12617
if (opcode != OP_ONCE)
12618
{
12619
if (alt_max <= 3)
12620
{
12621
JUMPHERE(next_alt);
12622
alt_count++;
12623
if (alt_count < alt_max)
12624
{
12625
SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12626
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12627
}
12628
}
12629
else
12630
{
12631
sljit_set_label(mov_addr, LABEL());
12632
sljit_emit_op0(compiler, SLJIT_ENDBR);
12633
}
12634
}
12635
12636
COMPILE_BACKTRACKINGPATH(current->top);
12637
if (current->own_backtracks)
12638
set_jumps(current->own_backtracks, LABEL());
12639
SLJIT_ASSERT(!current->simple_backtracks);
12640
}
12641
while (*cc == OP_ALT);
12642
12643
if (cond != NULL)
12644
{
12645
SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12646
if (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT)
12647
{
12648
assert = CURRENT_AS(bracket_backtrack)->u.assert;
12649
SLJIT_ASSERT(assert->framesize != 0);
12650
if (assert->framesize > 0)
12651
{
12652
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12653
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12654
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12655
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12656
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12657
}
12658
}
12659
JUMPHERE(cond);
12660
}
12661
12662
/* Free the STR_PTR. */
12663
if (private_data_ptr == 0)
12664
free_stack(common, 1);
12665
}
12666
12667
if (offset != 0)
12668
{
12669
/* Using both tmp register is better for instruction scheduling. */
12670
if (is_optimized_cbracket(common, offset >> 1))
12671
{
12672
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12673
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12674
free_stack(common, 2);
12675
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12676
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12677
}
12678
else
12679
{
12680
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12681
free_stack(common, 1);
12682
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12683
}
12684
}
12685
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
12686
{
12687
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12688
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12689
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12690
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12691
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
12692
free_stack(common, 4);
12693
}
12694
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12695
{
12696
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12697
free_stack(common, 1);
12698
}
12699
else if (opcode == OP_ASSERT_SCS)
12700
{
12701
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12702
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12703
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12704
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12705
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
12706
free_stack(common, has_alternatives ? 3 : 2);
12707
12708
set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());
12709
12710
/* Nested scs blocks will not update this variable. */
12711
if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))
12712
common->restore_end_ptr = 0;
12713
}
12714
else if (opcode == OP_ONCE)
12715
{
12716
cc = ccbegin + GET(ccbegin, 1);
12717
stacksize = needs_control_head ? 1 : 0;
12718
12719
if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12720
{
12721
/* Reset head and drop saved frame. */
12722
stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12723
}
12724
else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12725
{
12726
/* The STR_PTR must be released. */
12727
stacksize++;
12728
}
12729
12730
if (stacksize > 0)
12731
free_stack(common, stacksize);
12732
12733
JUMPHERE(once);
12734
/* Restore previous private_data_ptr */
12735
if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12736
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12737
else if (ket == OP_KETRMIN)
12738
{
12739
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12740
/* See the comment below. */
12741
free_stack(common, 2);
12742
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12743
}
12744
}
12745
12746
if (repeat_type == OP_EXACT)
12747
{
12748
OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12749
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12750
CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12751
}
12752
else if (ket == OP_KETRMAX)
12753
{
12754
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12755
if (bra != OP_BRAZERO)
12756
free_stack(common, 1);
12757
12758
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12759
if (bra == OP_BRAZERO)
12760
{
12761
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12762
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12763
JUMPHERE(brazero);
12764
free_stack(common, 1);
12765
}
12766
}
12767
else if (ket == OP_KETRMIN)
12768
{
12769
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12770
12771
/* OP_ONCE removes everything in case of a backtrack, so we don't
12772
need to explicitly release the STR_PTR. The extra release would
12773
affect badly the free_stack(2) above. */
12774
if (opcode != OP_ONCE)
12775
free_stack(common, 1);
12776
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12777
if (opcode == OP_ONCE)
12778
free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12779
else if (bra == OP_BRAMINZERO)
12780
free_stack(common, 1);
12781
}
12782
else if (bra == OP_BRAZERO)
12783
{
12784
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12785
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12786
JUMPHERE(brazero);
12787
}
12788
}
12789
12790
static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12791
{
12792
DEFINE_COMPILER;
12793
int offset;
12794
struct sljit_jump *jump;
12795
PCRE2_SPTR cc;
12796
12797
/* No retry on backtrack, just drop everything. */
12798
if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12799
{
12800
cc = current->cc;
12801
12802
if (*cc == OP_BRAPOSZERO)
12803
cc++;
12804
12805
if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
12806
{
12807
offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
12808
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12809
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12810
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12811
if (common->capture_last_ptr != 0)
12812
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12813
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12814
if (common->capture_last_ptr != 0)
12815
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12816
}
12817
set_jumps(current->own_backtracks, LABEL());
12818
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12819
return;
12820
}
12821
12822
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12823
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12824
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12825
12826
if (current->own_backtracks)
12827
{
12828
jump = JUMP(SLJIT_JUMP);
12829
set_jumps(current->own_backtracks, LABEL());
12830
/* Drop the stack frame. */
12831
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12832
JUMPHERE(jump);
12833
}
12834
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12835
}
12836
12837
static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12838
{
12839
assert_backtrack backtrack;
12840
12841
current->top = NULL;
12842
current->own_backtracks = NULL;
12843
current->simple_backtracks = NULL;
12844
if (current->cc[1] > OP_ASSERTBACK_NOT)
12845
{
12846
/* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12847
compile_bracket_matchingpath(common, current->cc, current);
12848
if (SLJIT_UNLIKELY(sljit_get_compiler_error(common->compiler)))
12849
return;
12850
compile_bracket_backtrackingpath(common, current->top);
12851
}
12852
else
12853
{
12854
memset(&backtrack, 0, sizeof(backtrack));
12855
backtrack.common.cc = current->cc;
12856
backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12857
/* Manual call of compile_assert_matchingpath. */
12858
compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12859
if (SLJIT_UNLIKELY(sljit_get_compiler_error(common->compiler)))
12860
return;
12861
}
12862
SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
12863
}
12864
12865
static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12866
{
12867
DEFINE_COMPILER;
12868
PCRE2_UCHAR opcode = *current->cc;
12869
struct sljit_label *loop;
12870
struct sljit_jump *jump;
12871
12872
if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12873
{
12874
if (common->then_trap != NULL)
12875
{
12876
SLJIT_ASSERT(common->control_head_ptr != 0);
12877
12878
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12879
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12880
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12881
jump = JUMP(SLJIT_JUMP);
12882
12883
loop = LABEL();
12884
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12885
JUMPHERE(jump);
12886
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12887
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12888
add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12889
return;
12890
}
12891
else if (!common->local_quit_available && common->in_positive_assertion)
12892
{
12893
add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12894
return;
12895
}
12896
}
12897
12898
if (common->restore_end_ptr != 0 && opcode != OP_SKIP_ARG)
12899
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
12900
12901
if (common->local_quit_available)
12902
{
12903
/* Abort match with a fail. */
12904
if (common->quit_label == NULL)
12905
add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12906
else
12907
JUMPTO(SLJIT_JUMP, common->quit_label);
12908
return;
12909
}
12910
12911
if (opcode == OP_SKIP_ARG)
12912
{
12913
SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
12914
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12915
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
12916
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
12917
12918
if (common->restore_end_ptr == 0)
12919
{
12920
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12921
add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
12922
return;
12923
}
12924
12925
jump = CMP(SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0);
12926
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12927
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
12928
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12929
JUMPHERE(jump);
12930
return;
12931
}
12932
12933
if (opcode == OP_SKIP)
12934
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12935
else
12936
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
12937
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12938
}
12939
12940
static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12941
{
12942
DEFINE_COMPILER;
12943
struct sljit_jump *jump;
12944
struct sljit_label *label;
12945
12946
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12947
jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
12948
skip_valid_char(common);
12949
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
12950
JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
12951
12952
label = LABEL();
12953
sljit_set_label(jump, label);
12954
set_jumps(current->own_backtracks, label);
12955
}
12956
12957
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12958
{
12959
DEFINE_COMPILER;
12960
struct sljit_jump *jump;
12961
int framesize;
12962
int size;
12963
12964
if (CURRENT_AS(then_trap_backtrack)->then_trap)
12965
{
12966
common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
12967
return;
12968
}
12969
12970
size = CURRENT_AS(then_trap_backtrack)->framesize;
12971
size = 3 + (size < 0 ? 0 : size);
12972
12973
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
12974
free_stack(common, size);
12975
jump = JUMP(SLJIT_JUMP);
12976
12977
set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
12978
12979
framesize = CURRENT_AS(then_trap_backtrack)->framesize;
12980
SLJIT_ASSERT(framesize != 0);
12981
12982
/* STACK_TOP is set by THEN. */
12983
if (framesize > 0)
12984
{
12985
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12986
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
12987
}
12988
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12989
free_stack(common, 3);
12990
12991
JUMPHERE(jump);
12992
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
12993
}
12994
12995
static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12996
{
12997
DEFINE_COMPILER;
12998
then_trap_backtrack *save_then_trap = common->then_trap;
12999
13000
while (current)
13001
{
13002
if (current->simple_backtracks != NULL)
13003
set_jumps(current->simple_backtracks, LABEL());
13004
switch(*current->cc)
13005
{
13006
case OP_SET_SOM:
13007
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13008
free_stack(common, 1);
13009
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13010
break;
13011
13012
case OP_STAR:
13013
case OP_MINSTAR:
13014
case OP_PLUS:
13015
case OP_MINPLUS:
13016
case OP_QUERY:
13017
case OP_MINQUERY:
13018
case OP_UPTO:
13019
case OP_MINUPTO:
13020
case OP_EXACT:
13021
case OP_POSSTAR:
13022
case OP_POSPLUS:
13023
case OP_POSQUERY:
13024
case OP_POSUPTO:
13025
case OP_STARI:
13026
case OP_MINSTARI:
13027
case OP_PLUSI:
13028
case OP_MINPLUSI:
13029
case OP_QUERYI:
13030
case OP_MINQUERYI:
13031
case OP_UPTOI:
13032
case OP_MINUPTOI:
13033
case OP_EXACTI:
13034
case OP_POSSTARI:
13035
case OP_POSPLUSI:
13036
case OP_POSQUERYI:
13037
case OP_POSUPTOI:
13038
case OP_NOTSTAR:
13039
case OP_NOTMINSTAR:
13040
case OP_NOTPLUS:
13041
case OP_NOTMINPLUS:
13042
case OP_NOTQUERY:
13043
case OP_NOTMINQUERY:
13044
case OP_NOTUPTO:
13045
case OP_NOTMINUPTO:
13046
case OP_NOTEXACT:
13047
case OP_NOTPOSSTAR:
13048
case OP_NOTPOSPLUS:
13049
case OP_NOTPOSQUERY:
13050
case OP_NOTPOSUPTO:
13051
case OP_NOTSTARI:
13052
case OP_NOTMINSTARI:
13053
case OP_NOTPLUSI:
13054
case OP_NOTMINPLUSI:
13055
case OP_NOTQUERYI:
13056
case OP_NOTMINQUERYI:
13057
case OP_NOTUPTOI:
13058
case OP_NOTMINUPTOI:
13059
case OP_NOTEXACTI:
13060
case OP_NOTPOSSTARI:
13061
case OP_NOTPOSPLUSI:
13062
case OP_NOTPOSQUERYI:
13063
case OP_NOTPOSUPTOI:
13064
case OP_TYPESTAR:
13065
case OP_TYPEMINSTAR:
13066
case OP_TYPEPLUS:
13067
case OP_TYPEMINPLUS:
13068
case OP_TYPEQUERY:
13069
case OP_TYPEMINQUERY:
13070
case OP_TYPEUPTO:
13071
case OP_TYPEMINUPTO:
13072
case OP_TYPEEXACT:
13073
case OP_TYPEPOSSTAR:
13074
case OP_TYPEPOSPLUS:
13075
case OP_TYPEPOSQUERY:
13076
case OP_TYPEPOSUPTO:
13077
/* Since classes has no backtracking path, this
13078
backtrackingpath was pushed by an iterator. */
13079
case OP_CLASS:
13080
case OP_NCLASS:
13081
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13082
case OP_XCLASS:
13083
case OP_ECLASS:
13084
#endif
13085
compile_iterator_backtrackingpath(common, current);
13086
break;
13087
13088
case OP_REF:
13089
case OP_REFI:
13090
case OP_DNREF:
13091
case OP_DNREFI:
13092
compile_ref_iterator_backtrackingpath(common, current);
13093
break;
13094
13095
case OP_RECURSE:
13096
compile_recurse_backtrackingpath(common, current);
13097
break;
13098
13099
case OP_ASSERT:
13100
case OP_ASSERT_NOT:
13101
case OP_ASSERTBACK:
13102
case OP_ASSERTBACK_NOT:
13103
compile_assert_backtrackingpath(common, current);
13104
break;
13105
13106
case OP_ASSERT_NA:
13107
case OP_ASSERTBACK_NA:
13108
case OP_ASSERT_SCS:
13109
case OP_ONCE:
13110
case OP_SCRIPT_RUN:
13111
case OP_BRA:
13112
case OP_CBRA:
13113
case OP_COND:
13114
case OP_SBRA:
13115
case OP_SCBRA:
13116
case OP_SCOND:
13117
compile_bracket_backtrackingpath(common, current);
13118
break;
13119
13120
case OP_BRAZERO:
13121
if (current->cc[1] > OP_ASSERTBACK_NOT)
13122
compile_bracket_backtrackingpath(common, current);
13123
else
13124
compile_assert_backtrackingpath(common, current);
13125
break;
13126
13127
case OP_BRAPOS:
13128
case OP_CBRAPOS:
13129
case OP_SBRAPOS:
13130
case OP_SCBRAPOS:
13131
case OP_BRAPOSZERO:
13132
compile_bracketpos_backtrackingpath(common, current);
13133
break;
13134
13135
case OP_BRAMINZERO:
13136
compile_braminzero_backtrackingpath(common, current);
13137
break;
13138
13139
case OP_MARK:
13140
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13141
if (common->has_skip_arg)
13142
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13143
free_stack(common, common->has_skip_arg ? 5 : 1);
13144
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13145
if (common->has_skip_arg)
13146
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13147
break;
13148
13149
case OP_THEN:
13150
case OP_THEN_ARG:
13151
case OP_PRUNE:
13152
case OP_PRUNE_ARG:
13153
case OP_SKIP:
13154
case OP_SKIP_ARG:
13155
compile_control_verb_backtrackingpath(common, current);
13156
break;
13157
13158
case OP_COMMIT:
13159
case OP_COMMIT_ARG:
13160
if (common->restore_end_ptr != 0)
13161
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
13162
13163
if (!common->local_quit_available)
13164
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13165
13166
if (common->quit_label == NULL)
13167
add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13168
else
13169
JUMPTO(SLJIT_JUMP, common->quit_label);
13170
break;
13171
13172
case OP_CALLOUT:
13173
case OP_CALLOUT_STR:
13174
case OP_FAIL:
13175
case OP_ACCEPT:
13176
case OP_ASSERT_ACCEPT:
13177
set_jumps(current->own_backtracks, LABEL());
13178
break;
13179
13180
case OP_VREVERSE:
13181
compile_vreverse_backtrackingpath(common, current);
13182
break;
13183
13184
case OP_THEN_TRAP:
13185
/* A virtual opcode for then traps. */
13186
compile_then_trap_backtrackingpath(common, current);
13187
break;
13188
13189
default:
13190
SLJIT_UNREACHABLE();
13191
break;
13192
}
13193
current = current->prev;
13194
}
13195
common->then_trap = save_then_trap;
13196
}
13197
13198
static SLJIT_INLINE void compile_recurse(compiler_common *common)
13199
{
13200
DEFINE_COMPILER;
13201
PCRE2_SPTR cc = common->start + common->currententry->start;
13202
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13203
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13204
uint32_t recurse_flags = 0;
13205
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13206
int alt_count, alt_max, local_size;
13207
backtrack_common altbacktrack;
13208
jump_list *match = NULL;
13209
struct sljit_jump *next_alt = NULL;
13210
struct sljit_jump *accept_exit = NULL;
13211
struct sljit_label *quit;
13212
struct sljit_jump *mov_addr = NULL;
13213
13214
/* Recurse captures then. */
13215
common->then_trap = NULL;
13216
13217
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13218
13219
alt_max = no_alternatives(cc);
13220
alt_count = 0;
13221
13222
/* Matching path. */
13223
SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13224
common->currententry->entry_label = LABEL();
13225
set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13226
13227
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13228
count_match(common);
13229
13230
local_size = (alt_max > 1) ? 2 : 1;
13231
13232
/* (Reversed) stack layout:
13233
[private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13234
13235
allocate_stack(common, private_data_size + local_size);
13236
/* Save return address. */
13237
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13238
13239
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13240
13241
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13242
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13243
13244
if (recurse_flags & recurse_flag_control_head_found)
13245
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13246
13247
if (alt_max > 1)
13248
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13249
13250
memset(&altbacktrack, 0, sizeof(backtrack_common));
13251
common->quit_label = NULL;
13252
common->accept_label = NULL;
13253
common->quit = NULL;
13254
common->accept = NULL;
13255
altbacktrack.cc = ccbegin;
13256
cc += GET(cc, 1);
13257
while (1)
13258
{
13259
altbacktrack.top = NULL;
13260
altbacktrack.own_backtracks = NULL;
13261
13262
if (altbacktrack.cc != ccbegin)
13263
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13264
13265
compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13266
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13267
return;
13268
13269
allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
13270
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13271
13272
if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
13273
{
13274
if (alt_max > 3)
13275
mov_addr = sljit_emit_op_addr(compiler, SLJIT_MOV_ADDR, SLJIT_MEM1(STACK_TOP), STACK(1));
13276
else
13277
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13278
}
13279
13280
add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13281
13282
if (alt_count == 0)
13283
{
13284
/* Backtracking path entry. */
13285
SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13286
common->currententry->backtrack_label = LABEL();
13287
set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13288
13289
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
13290
13291
if (recurse_flags & recurse_flag_accept_found)
13292
accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13293
13294
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13295
/* Save return address. */
13296
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13297
13298
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13299
13300
if (alt_max > 1)
13301
{
13302
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13303
free_stack(common, 2);
13304
13305
if (alt_max > 3)
13306
{
13307
sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13308
sljit_set_label(mov_addr, LABEL());
13309
sljit_emit_op0(compiler, SLJIT_ENDBR);
13310
}
13311
else
13312
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13313
}
13314
else
13315
free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
13316
}
13317
else if (alt_max > 3)
13318
{
13319
sljit_set_label(mov_addr, LABEL());
13320
sljit_emit_op0(compiler, SLJIT_ENDBR);
13321
}
13322
else
13323
{
13324
JUMPHERE(next_alt);
13325
if (alt_count + 1 < alt_max)
13326
{
13327
SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13328
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13329
}
13330
}
13331
13332
alt_count++;
13333
13334
compile_backtrackingpath(common, altbacktrack.top);
13335
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13336
return;
13337
set_jumps(altbacktrack.own_backtracks, LABEL());
13338
13339
if (*cc != OP_ALT)
13340
break;
13341
13342
altbacktrack.cc = cc + 1 + LINK_SIZE;
13343
cc += GET(cc, 1);
13344
}
13345
13346
/* No alternative is matched. */
13347
13348
quit = LABEL();
13349
13350
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
13351
13352
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13353
free_stack(common, private_data_size + local_size);
13354
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13355
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13356
13357
if (common->quit != NULL)
13358
{
13359
SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
13360
13361
set_jumps(common->quit, LABEL());
13362
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13363
copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13364
JUMPTO(SLJIT_JUMP, quit);
13365
}
13366
13367
if (recurse_flags & recurse_flag_accept_found)
13368
{
13369
JUMPHERE(accept_exit);
13370
free_stack(common, 2);
13371
13372
/* Save return address. */
13373
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13374
13375
copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13376
13377
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13378
free_stack(common, private_data_size + local_size);
13379
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13380
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13381
}
13382
13383
if (common->accept != NULL)
13384
{
13385
SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
13386
13387
set_jumps(common->accept, LABEL());
13388
13389
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13390
OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13391
13392
allocate_stack(common, 2);
13393
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13394
}
13395
13396
set_jumps(match, LABEL());
13397
13398
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13399
13400
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13401
13402
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13403
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13404
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13405
}
13406
13407
#undef COMPILE_BACKTRACKINGPATH
13408
#undef CURRENT_AS
13409
13410
#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13411
(PCRE2_JIT_INVALID_UTF)
13412
13413
static int jit_compile(pcre2_code *code, sljit_u32 mode)
13414
{
13415
pcre2_real_code *re = (pcre2_real_code *)code;
13416
struct sljit_compiler *compiler;
13417
backtrack_common rootbacktrack;
13418
compiler_common common_data;
13419
compiler_common *common = &common_data;
13420
const sljit_u8 *tables = re->tables;
13421
void *allocator_data = &re->memctl;
13422
int private_data_size;
13423
PCRE2_SPTR ccend;
13424
executable_functions *functions;
13425
void *executable_func;
13426
sljit_uw executable_size, private_data_length, total_length;
13427
struct sljit_label *mainloop_label = NULL;
13428
struct sljit_label *continue_match_label;
13429
struct sljit_label *empty_match_found_label = NULL;
13430
struct sljit_label *empty_match_backtrack_label = NULL;
13431
struct sljit_label *reset_match_label;
13432
struct sljit_label *quit_label;
13433
struct sljit_jump *jump;
13434
struct sljit_jump *minlength_check_failed = NULL;
13435
struct sljit_jump *empty_match = NULL;
13436
struct sljit_jump *end_anchor_failed = NULL;
13437
jump_list *reqcu_not_found = NULL;
13438
13439
SLJIT_ASSERT(tables);
13440
13441
#if HAS_VIRTUAL_REGISTERS == 1
13442
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
13443
#elif HAS_VIRTUAL_REGISTERS == 0
13444
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
13445
#else
13446
#error "Invalid value for HAS_VIRTUAL_REGISTERS"
13447
#endif
13448
13449
memset(&rootbacktrack, 0, sizeof(backtrack_common));
13450
memset(common, 0, sizeof(compiler_common));
13451
common->re = re;
13452
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13453
rootbacktrack.cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start);
13454
13455
#ifdef SUPPORT_UNICODE
13456
common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13457
#endif /* SUPPORT_UNICODE */
13458
mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13459
13460
common->start = rootbacktrack.cc;
13461
common->read_only_data_head = NULL;
13462
common->fcc = tables + fcc_offset;
13463
common->lcc = (sljit_sw)(tables + lcc_offset);
13464
common->mode = mode;
13465
common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13466
common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13467
common->nltype = NLTYPE_FIXED;
13468
switch(re->newline_convention)
13469
{
13470
case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13471
case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13472
case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13473
case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13474
case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13475
case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13476
default: return PCRE2_ERROR_INTERNAL;
13477
}
13478
common->nlmax = READ_CHAR_MAX;
13479
common->nlmin = 0;
13480
if (re->bsr_convention == PCRE2_BSR_UNICODE)
13481
common->bsr_nltype = NLTYPE_ANY;
13482
else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13483
common->bsr_nltype = NLTYPE_ANYCRLF;
13484
else
13485
{
13486
#ifdef BSR_ANYCRLF
13487
common->bsr_nltype = NLTYPE_ANYCRLF;
13488
#else
13489
common->bsr_nltype = NLTYPE_ANY;
13490
#endif
13491
}
13492
common->bsr_nlmax = READ_CHAR_MAX;
13493
common->bsr_nlmin = 0;
13494
common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13495
common->ctypes = (sljit_sw)(tables + ctypes_offset);
13496
common->name_count = re->name_count;
13497
common->name_entry_size = re->name_entry_size;
13498
common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13499
common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13500
#ifdef SUPPORT_UNICODE
13501
/* PCRE2_UTF[16|32] have the same value as PCRE2_UTF8. */
13502
common->utf = (re->overall_options & PCRE2_UTF) != 0;
13503
common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13504
if (common->utf)
13505
{
13506
if (common->nltype == NLTYPE_ANY)
13507
common->nlmax = 0x2029;
13508
else if (common->nltype == NLTYPE_ANYCRLF)
13509
common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13510
else
13511
{
13512
/* We only care about the first newline character. */
13513
common->nlmax = common->newline & 0xff;
13514
}
13515
13516
if (common->nltype == NLTYPE_FIXED)
13517
common->nlmin = common->newline & 0xff;
13518
else
13519
common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13520
13521
if (common->bsr_nltype == NLTYPE_ANY)
13522
common->bsr_nlmax = 0x2029;
13523
else
13524
common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13525
common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13526
}
13527
else
13528
common->invalid_utf = FALSE;
13529
#endif /* SUPPORT_UNICODE */
13530
ccend = bracketend(common->start);
13531
13532
/* Calculate the local space size on the stack. */
13533
common->ovector_start = LOCAL0;
13534
/* Allocate space for temporary data structures. */
13535
private_data_length = ccend - common->start;
13536
/* The chance of overflow is very low, but might happen on 32 bit. */
13537
if (private_data_length > ~(sljit_uw)0 / sizeof(sljit_s32))
13538
return PCRE2_ERROR_NOMEMORY;
13539
13540
private_data_length *= sizeof(sljit_s32);
13541
/* Align to 32 bit. */
13542
common->cbracket_bitset_length = ((re->top_bracket + 1) + (sljit_u32)7) & ~(sljit_u32)7;
13543
total_length = common->cbracket_bitset_length << 1;
13544
if (~(sljit_uw)0 - private_data_length < total_length)
13545
return PCRE2_ERROR_NOMEMORY;
13546
13547
total_length += private_data_length;
13548
common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length, allocator_data);
13549
if (!common->private_data_ptrs)
13550
return PCRE2_ERROR_NOMEMORY;
13551
13552
memset(common->private_data_ptrs, 0, private_data_length);
13553
common->optimized_cbrackets = ((sljit_u8 *)common->private_data_ptrs) + private_data_length;
13554
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13555
memset(common->optimized_cbrackets, 0, common->cbracket_bitset_length);
13556
#else
13557
memset(common->optimized_cbrackets, 0xff, common->cbracket_bitset_length);
13558
#endif
13559
common->cbracket_bitset = common->optimized_cbrackets + common->cbracket_bitset_length;
13560
13561
SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13562
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13563
common->capture_last_ptr = common->ovector_start;
13564
common->ovector_start += sizeof(sljit_sw);
13565
#endif
13566
if (!check_opcode_types(common, common->start, ccend))
13567
{
13568
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13569
return PCRE2_ERROR_JIT_UNSUPPORTED;
13570
}
13571
13572
/* Checking flags and updating ovector_start. */
13573
if (mode == PCRE2_JIT_COMPLETE &&
13574
(re->flags & PCRE2_LASTSET) != 0 &&
13575
(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13576
{
13577
common->req_char_ptr = common->ovector_start;
13578
common->ovector_start += sizeof(sljit_sw);
13579
}
13580
13581
if (mode != PCRE2_JIT_COMPLETE)
13582
{
13583
common->start_used_ptr = common->ovector_start;
13584
common->ovector_start += sizeof(sljit_sw);
13585
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13586
{
13587
common->hit_start = common->ovector_start;
13588
common->ovector_start += sizeof(sljit_sw);
13589
}
13590
}
13591
13592
if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13593
{
13594
common->match_end_ptr = common->ovector_start;
13595
common->ovector_start += sizeof(sljit_sw);
13596
}
13597
13598
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13599
common->control_head_ptr = 1;
13600
#endif
13601
13602
if (common->control_head_ptr != 0)
13603
{
13604
common->control_head_ptr = common->ovector_start;
13605
common->ovector_start += sizeof(sljit_sw);
13606
}
13607
13608
if (common->has_set_som)
13609
{
13610
/* Saving the real start pointer is necessary. */
13611
common->start_ptr = common->ovector_start;
13612
common->ovector_start += sizeof(sljit_sw);
13613
}
13614
13615
/* Aligning ovector to even number of sljit words. */
13616
if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13617
common->ovector_start += sizeof(sljit_sw);
13618
13619
if (common->start_ptr == 0)
13620
common->start_ptr = OVECTOR(0);
13621
13622
/* Capturing brackets cannot be optimized if callouts are allowed. */
13623
if (common->capture_last_ptr != 0)
13624
memset(common->optimized_cbrackets, 0, common->cbracket_bitset_length);
13625
13626
SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13627
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13628
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13629
13630
if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
13631
(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&
13632
!common->has_skip_in_assert_back)
13633
detect_early_fail(common, common->start, &private_data_size, 0, 0);
13634
13635
set_private_data_ptrs(common, &private_data_size, ccend);
13636
13637
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13638
13639
if (private_data_size > 65536)
13640
{
13641
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13642
return PCRE2_ERROR_JIT_UNSUPPORTED;
13643
}
13644
13645
if (common->has_then)
13646
{
13647
total_length = ccend - common->start;
13648
common->then_offsets = (sljit_u8 *)SLJIT_MALLOC(total_length, allocator_data);
13649
if (!common->then_offsets)
13650
{
13651
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13652
return PCRE2_ERROR_NOMEMORY;
13653
}
13654
memset(common->then_offsets, 0, total_length);
13655
set_then_offsets(common, common->start, NULL);
13656
}
13657
13658
compiler = sljit_create_compiler(allocator_data);
13659
if (!compiler)
13660
{
13661
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13662
if (common->has_then)
13663
SLJIT_FREE(common->then_offsets, allocator_data);
13664
return PCRE2_ERROR_NOMEMORY;
13665
}
13666
common->compiler = compiler;
13667
13668
/* Main pcre2_jit_exec entry. */
13669
SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13670
sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS), 5, private_data_size);
13671
13672
/* Register init. */
13673
reset_ovector(common, (re->top_bracket + 1) * 2);
13674
if (common->req_char_ptr != 0)
13675
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13676
13677
OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13678
OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13679
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13680
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13681
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13682
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13683
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13684
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13685
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13686
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13687
13688
if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13689
reset_early_fail(common);
13690
13691
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13692
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13693
if (common->mark_ptr != 0)
13694
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13695
if (common->control_head_ptr != 0)
13696
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13697
13698
/* Main part of the matching */
13699
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13700
{
13701
mainloop_label = mainloop_entry(common);
13702
continue_match_label = LABEL();
13703
/* Forward search if possible. */
13704
if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13705
{
13706
if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13707
;
13708
else if ((re->flags & PCRE2_FIRSTSET) != 0)
13709
fast_forward_first_char(common);
13710
else if ((re->flags & PCRE2_STARTLINE) != 0)
13711
fast_forward_newline(common);
13712
else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13713
fast_forward_start_bits(common);
13714
}
13715
}
13716
else
13717
continue_match_label = LABEL();
13718
13719
if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 &&
13720
(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13721
{
13722
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13723
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13724
minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13725
}
13726
if (common->req_char_ptr != 0)
13727
reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13728
13729
/* Store the current STR_PTR in OVECTOR(0). */
13730
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13731
/* Copy the limit of allowed recursions. */
13732
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13733
if (common->capture_last_ptr != 0)
13734
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13735
if (common->fast_forward_bc_ptr != NULL)
13736
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13737
13738
if (common->start_ptr != OVECTOR(0))
13739
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13740
13741
/* Copy the beginning of the string. */
13742
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13743
{
13744
jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13745
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13746
JUMPHERE(jump);
13747
}
13748
else if (mode == PCRE2_JIT_PARTIAL_HARD)
13749
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13750
13751
compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13752
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13753
{
13754
sljit_free_compiler(compiler);
13755
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13756
if (common->has_then)
13757
SLJIT_FREE(common->then_offsets, allocator_data);
13758
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13759
return PCRE2_ERROR_NOMEMORY;
13760
}
13761
13762
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13763
end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13764
13765
if (common->might_be_empty)
13766
{
13767
empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13768
empty_match_found_label = LABEL();
13769
}
13770
13771
common->accept_label = LABEL();
13772
if (common->accept != NULL)
13773
set_jumps(common->accept, common->accept_label);
13774
13775
/* Fail if we detect that the start position was moved to be either after
13776
the end position (\K in lookahead) or before the start offset (\K in
13777
lookbehind). */
13778
13779
if (common->has_set_som &&
13780
(common->re->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0)
13781
{
13782
if (HAS_VIRTUAL_REGISTERS)
13783
{
13784
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13785
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13786
}
13787
else
13788
{
13789
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
13790
}
13791
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13792
13793
/* (ovector[0] < jit_arguments->str)? */
13794
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, TMP1, 0);
13795
/* Unconditionally set R0 (aka TMP1), in between the comparison that needs to
13796
use TMP1, but before the jump. */
13797
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_BAD_BACKSLASH_K);
13798
add_jump(compiler, &common->abort, JUMP(SLJIT_LESS));
13799
/* (ovector[0] > STR_PTR)? NB. ovector[1] hasn't yet been set to STR_PTR. */
13800
add_jump(compiler, &common->abort, CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0));
13801
}
13802
13803
/* This means we have a match. Update the ovector. */
13804
copy_ovector(common, re->top_bracket + 1);
13805
common->quit_label = common->abort_label = LABEL();
13806
if (common->quit != NULL)
13807
set_jumps(common->quit, common->quit_label);
13808
if (common->abort != NULL)
13809
set_jumps(common->abort, common->abort_label);
13810
if (minlength_check_failed != NULL)
13811
SET_LABEL(minlength_check_failed, common->abort_label);
13812
13813
sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13814
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13815
13816
if (common->failed_match != NULL)
13817
{
13818
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13819
set_jumps(common->failed_match, LABEL());
13820
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13821
JUMPTO(SLJIT_JUMP, common->abort_label);
13822
}
13823
13824
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13825
JUMPHERE(end_anchor_failed);
13826
13827
if (mode != PCRE2_JIT_COMPLETE)
13828
{
13829
common->partialmatchlabel = LABEL();
13830
set_jumps(common->partialmatch, common->partialmatchlabel);
13831
return_with_partial_match(common, common->quit_label);
13832
}
13833
13834
if (common->might_be_empty)
13835
empty_match_backtrack_label = LABEL();
13836
compile_backtrackingpath(common, rootbacktrack.top);
13837
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13838
{
13839
sljit_free_compiler(compiler);
13840
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13841
if (common->has_then)
13842
SLJIT_FREE(common->then_offsets, allocator_data);
13843
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13844
return PCRE2_ERROR_NOMEMORY;
13845
}
13846
13847
SLJIT_ASSERT(rootbacktrack.prev == NULL);
13848
reset_match_label = LABEL();
13849
13850
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13851
{
13852
/* Update hit_start only in the first time. */
13853
jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13854
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13855
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13856
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13857
JUMPHERE(jump);
13858
}
13859
13860
/* Check we have remaining characters. */
13861
if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13862
{
13863
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13864
}
13865
13866
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13867
(common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13868
13869
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13870
{
13871
if (common->ff_newline_shortcut != NULL)
13872
{
13873
/* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13874
if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13875
{
13876
if (common->match_end_ptr != 0)
13877
{
13878
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13879
OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13880
CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13881
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13882
}
13883
else
13884
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13885
}
13886
}
13887
else
13888
CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13889
}
13890
13891
/* No more remaining characters. */
13892
if (reqcu_not_found != NULL)
13893
set_jumps(reqcu_not_found, LABEL());
13894
13895
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13896
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13897
13898
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13899
JUMPTO(SLJIT_JUMP, common->quit_label);
13900
13901
flush_stubs(common);
13902
13903
if (common->might_be_empty)
13904
{
13905
JUMPHERE(empty_match);
13906
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13907
OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13908
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13909
JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13910
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13911
JUMPTO(SLJIT_ZERO, empty_match_found_label);
13912
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13913
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13914
JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13915
}
13916
13917
common->fast_forward_bc_ptr = NULL;
13918
common->early_fail_start_ptr = 0;
13919
common->early_fail_end_ptr = 0;
13920
common->currententry = common->entries;
13921
common->local_quit_available = TRUE;
13922
quit_label = common->quit_label;
13923
SLJIT_ASSERT(common->restore_end_ptr == 0);
13924
13925
if (common->currententry != NULL)
13926
{
13927
/* A free bit for each private data. */
13928
common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
13929
SLJIT_ASSERT(common->recurse_bitset_size > 0);
13930
common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
13931
13932
if (common->recurse_bitset != NULL)
13933
{
13934
do
13935
{
13936
/* Might add new entries. */
13937
compile_recurse(common);
13938
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13939
break;
13940
flush_stubs(common);
13941
common->currententry = common->currententry->next;
13942
}
13943
while (common->currententry != NULL);
13944
13945
SLJIT_FREE(common->recurse_bitset, allocator_data);
13946
}
13947
13948
if (common->currententry != NULL)
13949
{
13950
/* The common->recurse_bitset has been freed. */
13951
SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
13952
13953
sljit_free_compiler(compiler);
13954
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13955
if (common->has_then)
13956
SLJIT_FREE(common->then_offsets, allocator_data);
13957
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13958
return PCRE2_ERROR_NOMEMORY;
13959
}
13960
}
13961
13962
common->local_quit_available = FALSE;
13963
common->quit_label = quit_label;
13964
SLJIT_ASSERT(common->restore_end_ptr == 0);
13965
13966
/* Allocating stack, returns with PCRE2_ERROR_JIT_STACKLIMIT if fails. */
13967
/* This is a (really) rare case. */
13968
set_jumps(common->stackalloc, LABEL());
13969
/* RETURN_ADDR is not a saved register. */
13970
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
13971
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
13972
13973
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13974
13975
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, STR_PTR, 0);
13976
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13977
OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13978
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13979
OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13980
13981
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
13982
13983
jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13984
OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13985
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13986
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
13987
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
13988
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13989
13990
/* Allocation failed. */
13991
JUMPHERE(jump);
13992
/* We break the return address cache here, but this is a really rare case. */
13993
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13994
JUMPTO(SLJIT_JUMP, common->quit_label);
13995
13996
/* Call limit reached. */
13997
set_jumps(common->calllimit, LABEL());
13998
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13999
JUMPTO(SLJIT_JUMP, common->quit_label);
14000
14001
if (common->revertframes != NULL)
14002
{
14003
set_jumps(common->revertframes, LABEL());
14004
do_revertframes(common);
14005
}
14006
if (common->wordboundary != NULL)
14007
{
14008
set_jumps(common->wordboundary, LABEL());
14009
check_wordboundary(common, FALSE);
14010
}
14011
if (common->ucp_wordboundary != NULL)
14012
{
14013
set_jumps(common->ucp_wordboundary, LABEL());
14014
check_wordboundary(common, TRUE);
14015
}
14016
if (common->anynewline != NULL)
14017
{
14018
set_jumps(common->anynewline, LABEL());
14019
check_anynewline(common);
14020
}
14021
if (common->hspace != NULL)
14022
{
14023
set_jumps(common->hspace, LABEL());
14024
check_hspace(common);
14025
}
14026
if (common->vspace != NULL)
14027
{
14028
set_jumps(common->vspace, LABEL());
14029
check_vspace(common);
14030
}
14031
if (common->casefulcmp != NULL)
14032
{
14033
set_jumps(common->casefulcmp, LABEL());
14034
do_casefulcmp(common);
14035
}
14036
if (common->caselesscmp != NULL)
14037
{
14038
set_jumps(common->caselesscmp, LABEL());
14039
do_caselesscmp(common);
14040
}
14041
if (common->reset_match != NULL || common->restart_match != NULL)
14042
{
14043
if (common->restart_match != NULL)
14044
{
14045
set_jumps(common->restart_match, LABEL());
14046
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14047
}
14048
14049
set_jumps(common->reset_match, LABEL());
14050
do_reset_match(common, (re->top_bracket + 1) * 2);
14051
/* The value of restart_match is in TMP1. */
14052
CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14053
OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14054
JUMPTO(SLJIT_JUMP, reset_match_label);
14055
}
14056
#ifdef SUPPORT_UNICODE
14057
#if PCRE2_CODE_UNIT_WIDTH == 8
14058
if (common->utfreadchar != NULL)
14059
{
14060
set_jumps(common->utfreadchar, LABEL());
14061
do_utfreadchar(common);
14062
}
14063
if (common->utfreadtype8 != NULL)
14064
{
14065
set_jumps(common->utfreadtype8, LABEL());
14066
do_utfreadtype8(common);
14067
}
14068
if (common->utfpeakcharback != NULL)
14069
{
14070
set_jumps(common->utfpeakcharback, LABEL());
14071
do_utfpeakcharback(common);
14072
}
14073
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14074
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14075
if (common->utfreadchar_invalid != NULL)
14076
{
14077
set_jumps(common->utfreadchar_invalid, LABEL());
14078
do_utfreadchar_invalid(common);
14079
}
14080
if (common->utfreadnewline_invalid != NULL)
14081
{
14082
set_jumps(common->utfreadnewline_invalid, LABEL());
14083
do_utfreadnewline_invalid(common);
14084
}
14085
if (common->utfmoveback_invalid)
14086
{
14087
set_jumps(common->utfmoveback_invalid, LABEL());
14088
do_utfmoveback_invalid(common);
14089
}
14090
if (common->utfpeakcharback_invalid)
14091
{
14092
set_jumps(common->utfpeakcharback_invalid, LABEL());
14093
do_utfpeakcharback_invalid(common);
14094
}
14095
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14096
if (common->getucd != NULL)
14097
{
14098
set_jumps(common->getucd, LABEL());
14099
do_getucd(common);
14100
}
14101
if (common->getucdtype != NULL)
14102
{
14103
set_jumps(common->getucdtype, LABEL());
14104
do_getucdtype(common);
14105
}
14106
#endif /* SUPPORT_UNICODE */
14107
14108
SLJIT_FREE(common->private_data_ptrs, allocator_data);
14109
if (common->has_then)
14110
SLJIT_FREE(common->then_offsets, allocator_data);
14111
14112
executable_func = sljit_generate_code(compiler, 0, NULL);
14113
executable_size = sljit_get_generated_code_size(compiler);
14114
sljit_free_compiler(compiler);
14115
14116
if (executable_func == NULL)
14117
{
14118
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14119
return PCRE2_ERROR_NOMEMORY;
14120
}
14121
14122
/* Reuse the function descriptor if possible. */
14123
if (re->executable_jit != NULL)
14124
functions = (executable_functions *)re->executable_jit;
14125
else
14126
{
14127
functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14128
if (functions == NULL)
14129
{
14130
/* This case is highly unlikely since we just recently
14131
freed a lot of memory. Not impossible though. */
14132
sljit_free_code(executable_func, NULL);
14133
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14134
return PCRE2_ERROR_NOMEMORY;
14135
}
14136
memset(functions, 0, sizeof(executable_functions));
14137
functions->top_bracket = re->top_bracket + 1;
14138
functions->limit_match = re->limit_match;
14139
re->executable_jit = functions;
14140
}
14141
14142
/* Turn mode into an index. */
14143
if (mode == PCRE2_JIT_COMPLETE)
14144
mode = 0;
14145
else
14146
mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14147
14148
SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14149
functions->executable_funcs[mode] = executable_func;
14150
functions->read_only_data_heads[mode] = common->read_only_data_head;
14151
functions->executable_sizes[mode] = executable_size;
14152
return 0;
14153
}
14154
14155
#endif
14156
14157
/*************************************************
14158
* JIT compile a Regular Expression *
14159
*************************************************/
14160
14161
/* This function used JIT to convert a previously-compiled pattern into machine
14162
code.
14163
14164
Arguments:
14165
code a compiled pattern
14166
options JIT option bits
14167
14168
Returns: 0: success or (*NOJIT) was used
14169
<0: an error code
14170
*/
14171
14172
#define PUBLIC_JIT_COMPILE_OPTIONS \
14173
(PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14174
14175
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
14176
pcre2_jit_compile(pcre2_code *code, uint32_t options)
14177
{
14178
pcre2_real_code *re = (pcre2_real_code *)code;
14179
#ifdef SUPPORT_JIT
14180
void *exec_memory;
14181
executable_functions *functions;
14182
static int executable_allocator_is_working = -1;
14183
14184
if (executable_allocator_is_working == -1)
14185
{
14186
/* Checks whether the executable allocator is working. This check
14187
might run multiple times in multi-threaded environments, but the
14188
result should not be affected by it. */
14189
exec_memory = SLJIT_MALLOC_EXEC(32, NULL);
14190
if (exec_memory != NULL)
14191
{
14192
SLJIT_FREE_EXEC(((sljit_u8*)(exec_memory)) + SLJIT_EXEC_OFFSET(exec_memory), NULL);
14193
executable_allocator_is_working = 1;
14194
}
14195
else executable_allocator_is_working = 0;
14196
}
14197
#endif
14198
14199
if (options & PCRE2_JIT_TEST_ALLOC)
14200
{
14201
if (options != PCRE2_JIT_TEST_ALLOC)
14202
return PCRE2_ERROR_JIT_BADOPTION;
14203
14204
#ifdef SUPPORT_JIT
14205
return executable_allocator_is_working ? 0 : PCRE2_ERROR_NOMEMORY;
14206
#else
14207
return PCRE2_ERROR_JIT_UNSUPPORTED;
14208
#endif
14209
}
14210
14211
if (code == NULL)
14212
return PCRE2_ERROR_NULL;
14213
14214
if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14215
return PCRE2_ERROR_JIT_BADOPTION;
14216
14217
/* Support for invalid UTF was first introduced in JIT, with the option
14218
PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14219
compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14220
preferred feature, with the earlier option deprecated. However, for backward
14221
compatibility, if the earlier option is set, it forces the new option so that
14222
if JIT matching falls back to the interpreter, there is still support for
14223
invalid UTF. However, if this function has already been successfully called
14224
without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14225
non-invalid-supporting JIT code was compiled), give an error.
14226
14227
If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14228
actions are needed:
14229
14230
1. Remove the definition from pcre2.h.in and from the list in
14231
PUBLIC_JIT_COMPILE_OPTIONS above.
14232
14233
2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14234
14235
3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14236
14237
4. Delete the following short block of code. The setting of "re" and
14238
"functions" can be moved into the JIT-only block below, but if that is
14239
done, (void)re and (void)functions will be needed in the non-JIT case, to
14240
avoid compiler warnings.
14241
*/
14242
14243
#ifdef SUPPORT_JIT
14244
functions = (executable_functions *)re->executable_jit;
14245
#endif
14246
14247
if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14248
{
14249
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14250
{
14251
#ifdef SUPPORT_JIT
14252
if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14253
#endif
14254
re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14255
}
14256
}
14257
14258
/* The above tests are run with and without JIT support. This means that
14259
PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14260
interpreter support) even in the absence of JIT. But now, if there is no JIT
14261
support, give an error return. */
14262
14263
#ifndef SUPPORT_JIT
14264
return PCRE2_ERROR_JIT_BADOPTION;
14265
#else /* SUPPORT_JIT */
14266
14267
/* There is JIT support. Do the necessary. */
14268
14269
if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14270
14271
if (!executable_allocator_is_working)
14272
return PCRE2_ERROR_NOMEMORY;
14273
14274
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14275
options |= PCRE2_JIT_INVALID_UTF;
14276
14277
if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14278
|| functions->executable_funcs[0] == NULL)) {
14279
uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14280
int result = jit_compile(code, options & ~excluded_options);
14281
if (result != 0)
14282
return result;
14283
}
14284
14285
if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14286
|| functions->executable_funcs[1] == NULL)) {
14287
uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14288
int result = jit_compile(code, options & ~excluded_options);
14289
if (result != 0)
14290
return result;
14291
}
14292
14293
if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14294
|| functions->executable_funcs[2] == NULL)) {
14295
uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14296
int result = jit_compile(code, options & ~excluded_options);
14297
if (result != 0)
14298
return result;
14299
}
14300
14301
return 0;
14302
14303
#endif /* SUPPORT_JIT */
14304
}
14305
14306
/* JIT compiler uses an all-in-one approach. This improves security,
14307
since the code generator functions are not exported. */
14308
14309
#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14310
14311
#include "pcre2_jit_match_inc.h"
14312
#include "pcre2_jit_misc_inc.h"
14313
14314
/* End of pcre2_jit_compile.c */
14315
14316