Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_jit_compile.c
9903 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
This module by Zoltan Herczeg
10
Original API code Copyright (c) 1997-2012 University of Cambridge
11
New API code Copyright (c) 2016-2024 University of Cambridge
12
13
-----------------------------------------------------------------------------
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions are met:
16
17
* Redistributions of source code must retain the above copyright notice,
18
this list of conditions and the following disclaimer.
19
20
* Redistributions in binary form must reproduce the above copyright
21
notice, this list of conditions and the following disclaimer in the
22
documentation and/or other materials provided with the distribution.
23
24
* Neither the name of the University of Cambridge nor the names of its
25
contributors may be used to endorse or promote products derived from
26
this software without specific prior written permission.
27
28
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
-----------------------------------------------------------------------------
40
*/
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#if defined(__has_feature)
47
#if __has_feature(memory_sanitizer)
48
#include <sanitizer/msan_interface.h>
49
#endif /* __has_feature(memory_sanitizer) */
50
#endif /* defined(__has_feature) */
51
52
#include "pcre2_internal.h"
53
54
#ifdef SUPPORT_JIT
55
56
/* All-in-one: Since we use the JIT compiler only from here,
57
we just include it. This way we don't need to touch the build
58
system files. */
59
60
#define SLJIT_CONFIG_AUTO 1
61
#define SLJIT_CONFIG_STATIC 1
62
#define SLJIT_VERBOSE 0
63
64
#ifdef PCRE2_DEBUG
65
#define SLJIT_DEBUG 1
66
#else
67
#define SLJIT_DEBUG 0
68
#endif
69
70
#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
71
#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
72
73
static void * pcre2_jit_malloc(size_t size, void *allocator_data)
74
{
75
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76
return allocator->malloc(size, allocator->memory_data);
77
}
78
79
static void pcre2_jit_free(void *ptr, void *allocator_data)
80
{
81
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
82
allocator->free(ptr, allocator->memory_data);
83
}
84
85
#include "../deps/sljit/sljit_src/sljitLir.c"
86
87
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
88
#error Unsupported architecture
89
#endif
90
91
/* Defines for debugging purposes. */
92
93
/* 1 - Use unoptimized capturing brackets.
94
2 - Enable capture_last_ptr (includes option 1). */
95
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
96
97
/* 1 - Always have a control head. */
98
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
99
100
/* Allocate memory for the regex stack on the real machine stack.
101
Fast, but limited size. */
102
#define MACHINE_STACK_SIZE 32768
103
104
/* Growth rate for stack allocated by the OS. Should be the multiply
105
of page size. */
106
#define STACK_GROWTH_RATE 8192
107
108
/* Enable to check that the allocation could destroy temporaries. */
109
#if defined SLJIT_DEBUG && SLJIT_DEBUG
110
#define DESTROY_REGISTERS 1
111
#endif
112
113
/*
114
Short summary about the backtracking mechanism empolyed by the jit code generator:
115
116
The code generator follows the recursive nature of the PERL compatible regular
117
expressions. The basic blocks of regular expressions are condition checkers
118
whose execute different commands depending on the result of the condition check.
119
The relationship between the operators can be horizontal (concatenation) and
120
vertical (sub-expression) (See struct backtrack_common for more details).
121
122
'ab' - 'a' and 'b' regexps are concatenated
123
'a+' - 'a' is the sub-expression of the '+' operator
124
125
The condition checkers are boolean (true/false) checkers. Machine code is generated
126
for the checker itself and for the actions depending on the result of the checker.
127
The 'true' case is called as the matching path (expected path), and the other is called as
128
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
129
branches on the matching path.
130
131
Greedy star operator (*) :
132
Matching path: match happens.
133
Backtrack path: match failed.
134
Non-greedy star operator (*?) :
135
Matching path: no need to perform a match.
136
Backtrack path: match is required.
137
138
The following example shows how the code generated for a capturing bracket
139
with two alternatives. Let A, B, C, D are arbirary regular expressions, and
140
we have the following regular expression:
141
142
A(B|C)D
143
144
The generated code will be the following:
145
146
A matching path
147
'(' matching path (pushing arguments to the stack)
148
B matching path
149
')' matching path (pushing arguments to the stack)
150
D matching path
151
return with successful match
152
153
D backtrack path
154
')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
155
B backtrack path
156
C expected path
157
jump to D matching path
158
C backtrack path
159
A backtrack path
160
161
Notice, that the order of backtrack code paths are the opposite of the fast
162
code paths. In this way the topmost value on the stack is always belong
163
to the current backtrack code path. The backtrack path must check
164
whether there is a next alternative. If so, it needs to jump back to
165
the matching path eventually. Otherwise it needs to clear out its own stack
166
frame and continue the execution on the backtrack code paths.
167
*/
168
169
/*
170
Saved stack frames:
171
172
Atomic blocks and asserts require reloading the values of private data
173
when the backtrack mechanism performed. Because of OP_RECURSE, the data
174
are not necessarly known in compile time, thus we need a dynamic restore
175
mechanism.
176
177
The stack frames are stored in a chain list, and have the following format:
178
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
179
180
Thus we can restore the private data to a particular point in the stack.
181
*/
182
183
typedef struct jit_arguments {
184
/* Pointers first. */
185
struct sljit_stack *stack;
186
PCRE2_SPTR str;
187
PCRE2_SPTR begin;
188
PCRE2_SPTR end;
189
pcre2_match_data *match_data;
190
PCRE2_SPTR startchar_ptr;
191
PCRE2_UCHAR *mark_ptr;
192
int (*callout)(pcre2_callout_block *, void *);
193
void *callout_data;
194
/* Everything else after. */
195
sljit_uw offset_limit;
196
sljit_u32 limit_match;
197
sljit_u32 oveccount;
198
sljit_u32 options;
199
} jit_arguments;
200
201
#define JIT_NUMBER_OF_COMPILE_MODES 3
202
203
typedef struct executable_functions {
204
void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
205
void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
206
sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
207
sljit_u32 top_bracket;
208
sljit_u32 limit_match;
209
} executable_functions;
210
211
typedef struct jump_list {
212
struct sljit_jump *jump;
213
struct jump_list *next;
214
} jump_list;
215
216
typedef struct stub_list {
217
struct sljit_jump *start;
218
struct sljit_label *quit;
219
struct stub_list *next;
220
} stub_list;
221
222
enum frame_types {
223
no_frame = -1,
224
no_stack = -2
225
};
226
227
enum control_types {
228
type_mark = 0,
229
type_then_trap = 1
230
};
231
232
enum early_fail_types {
233
type_skip = 0,
234
type_fail = 1,
235
type_fail_range = 2
236
};
237
238
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
239
240
/* The following structure is the key data type for the recursive
241
code generator. It is allocated by compile_matchingpath, and contains
242
the arguments for compile_backtrackingpath. Must be the first member
243
of its descendants. */
244
typedef struct backtrack_common {
245
/* Backtracking path of an opcode, which falls back
246
to our opcode, if it cannot resume matching. */
247
struct backtrack_common *prev;
248
/* Backtracks for opcodes without backtracking path.
249
These opcodes are between 'prev' and the current
250
opcode, and they never resume the match. */
251
jump_list *simple_backtracks;
252
/* Internal backtracking list for block constructs
253
which contains other opcodes, such as brackets,
254
asserts, conditionals, etc. */
255
struct backtrack_common *top;
256
/* Backtracks used internally by the opcode. For component
257
opcodes, this list is also used by those opcodes without
258
backtracking path which follows the 'top' backtrack. */
259
jump_list *own_backtracks;
260
/* Opcode pointer. */
261
PCRE2_SPTR cc;
262
} backtrack_common;
263
264
typedef struct assert_backtrack {
265
backtrack_common common;
266
jump_list *condfailed;
267
/* Less than 0 if a frame is not needed. */
268
int framesize;
269
/* Points to our private memory word on the stack. */
270
int private_data_ptr;
271
/* For iterators. */
272
struct sljit_label *matchingpath;
273
} assert_backtrack;
274
275
typedef struct bracket_backtrack {
276
backtrack_common common;
277
/* Where to coninue if an alternative is successfully matched. */
278
struct sljit_label *alternative_matchingpath;
279
/* For rmin and rmax iterators. */
280
struct sljit_label *recursive_matchingpath;
281
/* For greedy ? operator. */
282
struct sljit_label *zero_matchingpath;
283
/* Contains the branches of a failed condition. */
284
union {
285
/* Both for OP_COND, OP_SCOND, OP_ASSERT_SCS. */
286
jump_list *no_capture;
287
assert_backtrack *assert;
288
/* For OP_ONCE. Less than 0 if not needed. */
289
int framesize;
290
} u;
291
/* For brackets with >3 alternatives. */
292
struct sljit_jump *matching_mov_addr;
293
/* Points to our private memory word on the stack. */
294
int private_data_ptr;
295
} bracket_backtrack;
296
297
typedef struct bracketpos_backtrack {
298
backtrack_common common;
299
/* Points to our private memory word on the stack. */
300
int private_data_ptr;
301
/* Reverting stack is needed. */
302
int framesize;
303
/* Allocated stack size. */
304
int stacksize;
305
} bracketpos_backtrack;
306
307
typedef struct braminzero_backtrack {
308
backtrack_common common;
309
struct sljit_label *matchingpath;
310
} braminzero_backtrack;
311
312
typedef struct char_iterator_backtrack {
313
backtrack_common common;
314
/* Next iteration. */
315
struct sljit_label *matchingpath;
316
/* Creating a range based on the next character. */
317
struct {
318
unsigned int othercasebit;
319
PCRE2_UCHAR chr;
320
BOOL charpos_enabled;
321
} charpos;
322
} char_iterator_backtrack;
323
324
typedef struct ref_iterator_backtrack {
325
backtrack_common common;
326
/* Next iteration. */
327
struct sljit_label *matchingpath;
328
} ref_iterator_backtrack;
329
330
typedef struct recurse_entry {
331
struct recurse_entry *next;
332
/* Contains the function entry label. */
333
struct sljit_label *entry_label;
334
/* Contains the function entry label. */
335
struct sljit_label *backtrack_label;
336
/* Collects the entry calls until the function is not created. */
337
jump_list *entry_calls;
338
/* Collects the backtrack calls until the function is not created. */
339
jump_list *backtrack_calls;
340
/* Points to the starting opcode. */
341
sljit_sw start;
342
} recurse_entry;
343
344
typedef struct recurse_backtrack {
345
backtrack_common common;
346
/* Return to the matching path. */
347
struct sljit_label *matchingpath;
348
/* Recursive pattern. */
349
recurse_entry *entry;
350
/* Pattern is inlined. */
351
BOOL inlined_pattern;
352
} recurse_backtrack;
353
354
typedef struct vreverse_backtrack {
355
backtrack_common common;
356
/* Return to the matching path. */
357
struct sljit_label *matchingpath;
358
} vreverse_backtrack;
359
360
#define OP_THEN_TRAP OP_TABLE_LENGTH
361
362
typedef struct then_trap_backtrack {
363
backtrack_common common;
364
/* If then_trap is not NULL, this structure contains the real
365
then_trap for the backtracking path. */
366
struct then_trap_backtrack *then_trap;
367
/* Points to the starting opcode. */
368
sljit_sw start;
369
/* Exit point for the then opcodes of this alternative. */
370
jump_list *quit;
371
/* Frame size of the current alternative. */
372
int framesize;
373
} then_trap_backtrack;
374
375
#define MAX_N_CHARS 12
376
#define MAX_DIFF_CHARS 5
377
378
typedef struct fast_forward_char_data {
379
/* Number of characters in the chars array, 255 for any character. */
380
sljit_u8 count;
381
/* Number of last UTF-8 characters in the chars array. */
382
sljit_u8 last_count;
383
/* Available characters in the current position. */
384
PCRE2_UCHAR chars[MAX_DIFF_CHARS];
385
} fast_forward_char_data;
386
387
#define MAX_CLASS_RANGE_SIZE 4
388
#define MAX_CLASS_CHARS_SIZE 3
389
390
typedef struct compiler_common {
391
/* The sljit ceneric compiler. */
392
struct sljit_compiler *compiler;
393
/* Compiled regular expression. */
394
pcre2_real_code *re;
395
/* First byte code. */
396
PCRE2_SPTR start;
397
/* Maps private data offset to each opcode. */
398
sljit_s32 *private_data_ptrs;
399
/* Chain list of read-only data ptrs. */
400
void *read_only_data_head;
401
/* Tells whether the capturing bracket is optimized. */
402
sljit_u8 *optimized_cbracket;
403
/* Tells whether the starting offset is a target of then. */
404
sljit_u8 *then_offsets;
405
/* Current position where a THEN must jump. */
406
then_trap_backtrack *then_trap;
407
/* Starting offset of private data for capturing brackets. */
408
sljit_s32 cbra_ptr;
409
#if defined SLJIT_DEBUG && SLJIT_DEBUG
410
/* End offset of locals for assertions. */
411
sljit_s32 locals_size;
412
#endif
413
/* Output vector starting point. Must be divisible by 2. */
414
sljit_s32 ovector_start;
415
/* Points to the starting character of the current match. */
416
sljit_s32 start_ptr;
417
/* Last known position of the requested byte. */
418
sljit_s32 req_char_ptr;
419
/* Head of the last recursion. */
420
sljit_s32 recursive_head_ptr;
421
/* First inspected character for partial matching.
422
(Needed for avoiding zero length partial matches.) */
423
sljit_s32 start_used_ptr;
424
/* Starting pointer for partial soft matches. */
425
sljit_s32 hit_start;
426
/* Pointer of the match end position. */
427
sljit_s32 match_end_ptr;
428
/* Points to the marked string. */
429
sljit_s32 mark_ptr;
430
/* Head of the recursive control verb management chain.
431
Each item must have a previous offset and type
432
(see control_types) values. See do_search_mark. */
433
sljit_s32 control_head_ptr;
434
/* The offset of the saved STR_END in the outermost
435
scan substring block. Since scan substring restores
436
STR_END after a match, it is enough to restore
437
STR_END inside a scan substring block. */
438
sljit_s32 restore_end_ptr;
439
/* Points to the last matched capture block index. */
440
sljit_s32 capture_last_ptr;
441
/* Fast forward skipping byte code pointer. */
442
PCRE2_SPTR fast_forward_bc_ptr;
443
/* Locals used by fast fail optimization. */
444
sljit_s32 early_fail_start_ptr;
445
sljit_s32 early_fail_end_ptr;
446
/* Variables used by recursive call generator. */
447
sljit_s32 recurse_bitset_size;
448
uint8_t *recurse_bitset;
449
450
/* Flipped and lower case tables. */
451
const sljit_u8 *fcc;
452
sljit_sw lcc;
453
/* Mode can be PCRE2_JIT_COMPLETE and others. */
454
int mode;
455
/* TRUE, when empty match is accepted for partial matching. */
456
BOOL allow_empty_partial;
457
/* TRUE, when minlength is greater than 0. */
458
BOOL might_be_empty;
459
/* \K is found in the pattern. */
460
BOOL has_set_som;
461
/* (*SKIP:arg) is found in the pattern. */
462
BOOL has_skip_arg;
463
/* (*THEN) is found in the pattern. */
464
BOOL has_then;
465
/* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
466
BOOL has_skip_in_assert_back;
467
/* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
468
BOOL local_quit_available;
469
/* Currently in a positive assertion. */
470
BOOL in_positive_assertion;
471
/* Newline control. */
472
int nltype;
473
sljit_u32 nlmax;
474
sljit_u32 nlmin;
475
int newline;
476
int bsr_nltype;
477
sljit_u32 bsr_nlmax;
478
sljit_u32 bsr_nlmin;
479
/* Dollar endonly. */
480
int endonly;
481
/* Tables. */
482
sljit_sw ctypes;
483
/* Named capturing brackets. */
484
PCRE2_SPTR name_table;
485
sljit_sw name_count;
486
sljit_sw name_entry_size;
487
488
/* Labels and jump lists. */
489
struct sljit_label *partialmatchlabel;
490
struct sljit_label *quit_label;
491
struct sljit_label *abort_label;
492
struct sljit_label *accept_label;
493
struct sljit_label *ff_newline_shortcut;
494
stub_list *stubs;
495
recurse_entry *entries;
496
recurse_entry *currententry;
497
jump_list *partialmatch;
498
jump_list *quit;
499
jump_list *positive_assertion_quit;
500
jump_list *abort;
501
jump_list *failed_match;
502
jump_list *accept;
503
jump_list *calllimit;
504
jump_list *stackalloc;
505
jump_list *revertframes;
506
jump_list *wordboundary;
507
jump_list *ucp_wordboundary;
508
jump_list *anynewline;
509
jump_list *hspace;
510
jump_list *vspace;
511
jump_list *casefulcmp;
512
jump_list *caselesscmp;
513
jump_list *reset_match;
514
/* Same as reset_match, but resets the STR_PTR as well. */
515
jump_list *restart_match;
516
BOOL unset_backref;
517
BOOL alt_circumflex;
518
#ifdef SUPPORT_UNICODE
519
BOOL utf;
520
BOOL invalid_utf;
521
BOOL ucp;
522
/* Points to saving area for iref. */
523
jump_list *getucd;
524
jump_list *getucdtype;
525
#if PCRE2_CODE_UNIT_WIDTH == 8
526
jump_list *utfreadchar;
527
jump_list *utfreadtype8;
528
jump_list *utfpeakcharback;
529
#endif
530
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
531
jump_list *utfreadchar_invalid;
532
jump_list *utfreadnewline_invalid;
533
jump_list *utfmoveback_invalid;
534
jump_list *utfpeakcharback_invalid;
535
#endif
536
#endif /* SUPPORT_UNICODE */
537
} compiler_common;
538
539
/* For byte_sequence_compare. */
540
541
typedef struct compare_context {
542
int length;
543
int sourcereg;
544
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
545
int ucharptr;
546
union {
547
sljit_s32 asint;
548
sljit_u16 asushort;
549
#if PCRE2_CODE_UNIT_WIDTH == 8
550
sljit_u8 asbyte;
551
sljit_u8 asuchars[4];
552
#elif PCRE2_CODE_UNIT_WIDTH == 16
553
sljit_u16 asuchars[2];
554
#elif PCRE2_CODE_UNIT_WIDTH == 32
555
sljit_u32 asuchars[1];
556
#endif
557
} c;
558
union {
559
sljit_s32 asint;
560
sljit_u16 asushort;
561
#if PCRE2_CODE_UNIT_WIDTH == 8
562
sljit_u8 asbyte;
563
sljit_u8 asuchars[4];
564
#elif PCRE2_CODE_UNIT_WIDTH == 16
565
sljit_u16 asuchars[2];
566
#elif PCRE2_CODE_UNIT_WIDTH == 32
567
sljit_u32 asuchars[1];
568
#endif
569
} oc;
570
#endif
571
} compare_context;
572
573
/* Undefine sljit macros. */
574
#undef CMP
575
576
/* Used for accessing the elements of the stack. */
577
#define STACK(i) ((i) * SSIZE_OF(sw))
578
579
#ifdef SLJIT_PREF_SHIFT_REG
580
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
581
/* Nothing. */
582
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
583
#define SHIFT_REG_IS_R3
584
#else
585
#error "Unsupported shift register"
586
#endif
587
#endif
588
589
#define TMP1 SLJIT_R0
590
#ifdef SHIFT_REG_IS_R3
591
#define TMP2 SLJIT_R3
592
#define TMP3 SLJIT_R2
593
#else
594
#define TMP2 SLJIT_R2
595
#define TMP3 SLJIT_R3
596
#endif
597
#define STR_PTR SLJIT_R1
598
#define STR_END SLJIT_S0
599
#define STACK_TOP SLJIT_S1
600
#define STACK_LIMIT SLJIT_S2
601
#define COUNT_MATCH SLJIT_S3
602
#define ARGUMENTS SLJIT_S4
603
#define RETURN_ADDR SLJIT_R4
604
605
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
606
#define HAS_VIRTUAL_REGISTERS 1
607
#else
608
#define HAS_VIRTUAL_REGISTERS 0
609
#endif
610
611
/* Local space layout. */
612
/* Max limit of recursions. */
613
#define LIMIT_MATCH (0 * sizeof(sljit_sw))
614
/* Local variables. Their number is computed by check_opcode_types. */
615
#define LOCAL0 (1 * sizeof(sljit_sw))
616
#define LOCAL1 (2 * sizeof(sljit_sw))
617
#define LOCAL2 (3 * sizeof(sljit_sw))
618
#define LOCAL3 (4 * sizeof(sljit_sw))
619
#define LOCAL4 (5 * sizeof(sljit_sw))
620
/* The output vector is stored on the stack, and contains pointers
621
to characters. The vector data is divided into two groups: the first
622
group contains the start / end character pointers, and the second is
623
the start pointers when the end of the capturing group has not yet reached. */
624
#define OVECTOR_START (common->ovector_start)
625
#define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw))
626
#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw))
627
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
628
629
#if PCRE2_CODE_UNIT_WIDTH == 8
630
#define MOV_UCHAR SLJIT_MOV_U8
631
#define IN_UCHARS(x) (x)
632
#elif PCRE2_CODE_UNIT_WIDTH == 16
633
#define MOV_UCHAR SLJIT_MOV_U16
634
#define UCHAR_SHIFT (1)
635
#define IN_UCHARS(x) ((x) * 2)
636
#elif PCRE2_CODE_UNIT_WIDTH == 32
637
#define MOV_UCHAR SLJIT_MOV_U32
638
#define UCHAR_SHIFT (2)
639
#define IN_UCHARS(x) ((x) * 4)
640
#else
641
#error Unsupported compiling mode
642
#endif
643
644
/* Shortcuts. */
645
#define DEFINE_COMPILER \
646
struct sljit_compiler *compiler = common->compiler
647
#define OP1(op, dst, dstw, src, srcw) \
648
sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
649
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
650
sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
651
#define OP2U(op, src1, src1w, src2, src2w) \
652
sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
653
#define OP_SRC(op, src, srcw) \
654
sljit_emit_op_src(compiler, (op), (src), (srcw))
655
#define LABEL() \
656
sljit_emit_label(compiler)
657
#define JUMP(type) \
658
sljit_emit_jump(compiler, (type))
659
#define JUMPTO(type, label) \
660
sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
661
#define JUMPHERE(jump) \
662
sljit_set_label((jump), sljit_emit_label(compiler))
663
#define SET_LABEL(jump, label) \
664
sljit_set_label((jump), (label))
665
#define CMP(type, src1, src1w, src2, src2w) \
666
sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
667
#define CMPTO(type, src1, src1w, src2, src2w, label) \
668
sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
669
#define OP_FLAGS(op, dst, dstw, type) \
670
sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
671
#define SELECT(type, dst_reg, src1, src1w, src2_reg) \
672
sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
673
#define GET_LOCAL_BASE(dst, dstw, offset) \
674
sljit_get_local_base(compiler, (dst), (dstw), (offset))
675
676
#define READ_CHAR_MAX ((sljit_u32)0xffffffff)
677
678
#define INVALID_UTF_CHAR -1
679
#define UNASSIGNED_UTF_CHAR 888
680
681
#if defined SUPPORT_UNICODE
682
#if PCRE2_CODE_UNIT_WIDTH == 8
683
684
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
685
{ \
686
if (ptr[0] <= 0x7f) \
687
c = *ptr++; \
688
else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
689
{ \
690
c = ptr[1] - 0x80; \
691
\
692
if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
693
{ \
694
c |= (ptr[0] - 0xc0) << 6; \
695
ptr += 2; \
696
} \
697
else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
698
{ \
699
c = c << 6 | (ptr[2] - 0x80); \
700
\
701
if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
702
{ \
703
c |= (ptr[0] - 0xe0) << 12; \
704
ptr += 3; \
705
\
706
if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
707
{ \
708
invalid_action; \
709
} \
710
} \
711
else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
712
{ \
713
c = c << 6 | (ptr[3] - 0x80); \
714
\
715
if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
716
{ \
717
c |= (ptr[0] - 0xf0) << 18; \
718
ptr += 4; \
719
\
720
if (c >= 0x110000 || c < 0x10000) \
721
{ \
722
invalid_action; \
723
} \
724
} \
725
else \
726
{ \
727
invalid_action; \
728
} \
729
} \
730
else \
731
{ \
732
invalid_action; \
733
} \
734
} \
735
else \
736
{ \
737
invalid_action; \
738
} \
739
} \
740
else \
741
{ \
742
invalid_action; \
743
} \
744
}
745
746
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
747
{ \
748
c = ptr[-1]; \
749
if (c <= 0x7f) \
750
ptr--; \
751
else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
752
{ \
753
c -= 0x80; \
754
\
755
if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
756
{ \
757
c |= (ptr[-2] - 0xc0) << 6; \
758
ptr -= 2; \
759
} \
760
else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
761
{ \
762
c = c << 6 | (ptr[-2] - 0x80); \
763
\
764
if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
765
{ \
766
c |= (ptr[-3] - 0xe0) << 12; \
767
ptr -= 3; \
768
\
769
if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
770
{ \
771
invalid_action; \
772
} \
773
} \
774
else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
775
{ \
776
c = c << 6 | (ptr[-3] - 0x80); \
777
\
778
if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
779
{ \
780
c |= (ptr[-4] - 0xf0) << 18; \
781
ptr -= 4; \
782
\
783
if (c >= 0x110000 || c < 0x10000) \
784
{ \
785
invalid_action; \
786
} \
787
} \
788
else \
789
{ \
790
invalid_action; \
791
} \
792
} \
793
else \
794
{ \
795
invalid_action; \
796
} \
797
} \
798
else \
799
{ \
800
invalid_action; \
801
} \
802
} \
803
else \
804
{ \
805
invalid_action; \
806
} \
807
}
808
809
#elif PCRE2_CODE_UNIT_WIDTH == 16
810
811
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
812
{ \
813
if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
814
c = *ptr++; \
815
else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
816
{ \
817
c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
818
ptr += 2; \
819
} \
820
else \
821
{ \
822
invalid_action; \
823
} \
824
}
825
826
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
827
{ \
828
c = ptr[-1]; \
829
if (c < 0xd800 || c >= 0xe000) \
830
ptr--; \
831
else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
832
{ \
833
c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
834
ptr -= 2; \
835
} \
836
else \
837
{ \
838
invalid_action; \
839
} \
840
}
841
842
843
#elif PCRE2_CODE_UNIT_WIDTH == 32
844
845
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
846
{ \
847
if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
848
c = *ptr++; \
849
else \
850
{ \
851
invalid_action; \
852
} \
853
}
854
855
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
856
{ \
857
c = ptr[-1]; \
858
if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
859
ptr--; \
860
else \
861
{ \
862
invalid_action; \
863
} \
864
}
865
866
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
867
#endif /* SUPPORT_UNICODE */
868
869
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
870
{
871
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
872
do cc += GET(cc, 1); while (*cc == OP_ALT);
873
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
874
cc += 1 + LINK_SIZE;
875
return cc;
876
}
877
878
static int no_alternatives(PCRE2_SPTR cc)
879
{
880
int count = 0;
881
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
882
do
883
{
884
cc += GET(cc, 1);
885
count++;
886
}
887
while (*cc == OP_ALT);
888
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
889
return count;
890
}
891
892
static BOOL find_vreverse(PCRE2_SPTR cc)
893
{
894
SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA);
895
896
do
897
{
898
if (cc[1 + LINK_SIZE] == OP_VREVERSE)
899
return TRUE;
900
cc += GET(cc, 1);
901
}
902
while (*cc == OP_ALT);
903
904
return FALSE;
905
}
906
907
/* Functions whose might need modification for all new supported opcodes:
908
next_opcode
909
check_opcode_types
910
set_private_data_ptrs
911
get_framesize
912
init_frame
913
get_recurse_data_length
914
copy_recurse_data
915
compile_matchingpath
916
compile_backtrackingpath
917
*/
918
919
static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
920
{
921
SLJIT_UNUSED_ARG(common);
922
switch(*cc)
923
{
924
case OP_SOD:
925
case OP_SOM:
926
case OP_SET_SOM:
927
case OP_NOT_WORD_BOUNDARY:
928
case OP_WORD_BOUNDARY:
929
case OP_NOT_DIGIT:
930
case OP_DIGIT:
931
case OP_NOT_WHITESPACE:
932
case OP_WHITESPACE:
933
case OP_NOT_WORDCHAR:
934
case OP_WORDCHAR:
935
case OP_ANY:
936
case OP_ALLANY:
937
case OP_NOTPROP:
938
case OP_PROP:
939
case OP_ANYNL:
940
case OP_NOT_HSPACE:
941
case OP_HSPACE:
942
case OP_NOT_VSPACE:
943
case OP_VSPACE:
944
case OP_EXTUNI:
945
case OP_EODN:
946
case OP_EOD:
947
case OP_CIRC:
948
case OP_CIRCM:
949
case OP_DOLL:
950
case OP_DOLLM:
951
case OP_CRSTAR:
952
case OP_CRMINSTAR:
953
case OP_CRPLUS:
954
case OP_CRMINPLUS:
955
case OP_CRQUERY:
956
case OP_CRMINQUERY:
957
case OP_CRRANGE:
958
case OP_CRMINRANGE:
959
case OP_CRPOSSTAR:
960
case OP_CRPOSPLUS:
961
case OP_CRPOSQUERY:
962
case OP_CRPOSRANGE:
963
case OP_CLASS:
964
case OP_NCLASS:
965
case OP_REF:
966
case OP_REFI:
967
case OP_DNREF:
968
case OP_DNREFI:
969
case OP_RECURSE:
970
case OP_CALLOUT:
971
case OP_ALT:
972
case OP_KET:
973
case OP_KETRMAX:
974
case OP_KETRMIN:
975
case OP_KETRPOS:
976
case OP_REVERSE:
977
case OP_VREVERSE:
978
case OP_ASSERT:
979
case OP_ASSERT_NOT:
980
case OP_ASSERTBACK:
981
case OP_ASSERTBACK_NOT:
982
case OP_ASSERT_NA:
983
case OP_ASSERTBACK_NA:
984
case OP_ASSERT_SCS:
985
case OP_ONCE:
986
case OP_SCRIPT_RUN:
987
case OP_BRA:
988
case OP_BRAPOS:
989
case OP_CBRA:
990
case OP_CBRAPOS:
991
case OP_COND:
992
case OP_SBRA:
993
case OP_SBRAPOS:
994
case OP_SCBRA:
995
case OP_SCBRAPOS:
996
case OP_SCOND:
997
case OP_CREF:
998
case OP_DNCREF:
999
case OP_RREF:
1000
case OP_DNRREF:
1001
case OP_FALSE:
1002
case OP_TRUE:
1003
case OP_BRAZERO:
1004
case OP_BRAMINZERO:
1005
case OP_BRAPOSZERO:
1006
case OP_PRUNE:
1007
case OP_SKIP:
1008
case OP_THEN:
1009
case OP_COMMIT:
1010
case OP_FAIL:
1011
case OP_ACCEPT:
1012
case OP_ASSERT_ACCEPT:
1013
case OP_CLOSE:
1014
case OP_SKIPZERO:
1015
case OP_NOT_UCP_WORD_BOUNDARY:
1016
case OP_UCP_WORD_BOUNDARY:
1017
return cc + PRIV(OP_lengths)[*cc];
1018
1019
case OP_CHAR:
1020
case OP_CHARI:
1021
case OP_NOT:
1022
case OP_NOTI:
1023
case OP_STAR:
1024
case OP_MINSTAR:
1025
case OP_PLUS:
1026
case OP_MINPLUS:
1027
case OP_QUERY:
1028
case OP_MINQUERY:
1029
case OP_UPTO:
1030
case OP_MINUPTO:
1031
case OP_EXACT:
1032
case OP_POSSTAR:
1033
case OP_POSPLUS:
1034
case OP_POSQUERY:
1035
case OP_POSUPTO:
1036
case OP_STARI:
1037
case OP_MINSTARI:
1038
case OP_PLUSI:
1039
case OP_MINPLUSI:
1040
case OP_QUERYI:
1041
case OP_MINQUERYI:
1042
case OP_UPTOI:
1043
case OP_MINUPTOI:
1044
case OP_EXACTI:
1045
case OP_POSSTARI:
1046
case OP_POSPLUSI:
1047
case OP_POSQUERYI:
1048
case OP_POSUPTOI:
1049
case OP_NOTSTAR:
1050
case OP_NOTMINSTAR:
1051
case OP_NOTPLUS:
1052
case OP_NOTMINPLUS:
1053
case OP_NOTQUERY:
1054
case OP_NOTMINQUERY:
1055
case OP_NOTUPTO:
1056
case OP_NOTMINUPTO:
1057
case OP_NOTEXACT:
1058
case OP_NOTPOSSTAR:
1059
case OP_NOTPOSPLUS:
1060
case OP_NOTPOSQUERY:
1061
case OP_NOTPOSUPTO:
1062
case OP_NOTSTARI:
1063
case OP_NOTMINSTARI:
1064
case OP_NOTPLUSI:
1065
case OP_NOTMINPLUSI:
1066
case OP_NOTQUERYI:
1067
case OP_NOTMINQUERYI:
1068
case OP_NOTUPTOI:
1069
case OP_NOTMINUPTOI:
1070
case OP_NOTEXACTI:
1071
case OP_NOTPOSSTARI:
1072
case OP_NOTPOSPLUSI:
1073
case OP_NOTPOSQUERYI:
1074
case OP_NOTPOSUPTOI:
1075
cc += PRIV(OP_lengths)[*cc];
1076
#ifdef SUPPORT_UNICODE
1077
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1078
#endif
1079
return cc;
1080
1081
/* Special cases. */
1082
case OP_TYPESTAR:
1083
case OP_TYPEMINSTAR:
1084
case OP_TYPEPLUS:
1085
case OP_TYPEMINPLUS:
1086
case OP_TYPEQUERY:
1087
case OP_TYPEMINQUERY:
1088
case OP_TYPEUPTO:
1089
case OP_TYPEMINUPTO:
1090
case OP_TYPEEXACT:
1091
case OP_TYPEPOSSTAR:
1092
case OP_TYPEPOSPLUS:
1093
case OP_TYPEPOSQUERY:
1094
case OP_TYPEPOSUPTO:
1095
return cc + PRIV(OP_lengths)[*cc] - 1;
1096
1097
case OP_ANYBYTE:
1098
#ifdef SUPPORT_UNICODE
1099
if (common->utf) return NULL;
1100
#endif
1101
return cc + 1;
1102
1103
case OP_CALLOUT_STR:
1104
return cc + GET(cc, 1 + 2*LINK_SIZE);
1105
1106
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1107
case OP_ECLASS:
1108
case OP_XCLASS:
1109
SLJIT_COMPILE_ASSERT(OP_XCLASS + 1 == OP_ECLASS && OP_CLASS + 1 == OP_NCLASS && OP_NCLASS < OP_XCLASS, class_byte_code_order);
1110
return cc + GET(cc, 1);
1111
#endif
1112
1113
case OP_MARK:
1114
case OP_COMMIT_ARG:
1115
case OP_PRUNE_ARG:
1116
case OP_SKIP_ARG:
1117
case OP_THEN_ARG:
1118
return cc + 1 + 2 + cc[1];
1119
1120
default:
1121
SLJIT_UNREACHABLE();
1122
return NULL;
1123
}
1124
}
1125
1126
static sljit_s32 ref_update_local_size(compiler_common *common, PCRE2_SPTR cc, sljit_s32 current_locals_size)
1127
{
1128
/* Depends on do_casefulcmp(), do_caselesscmp(), and compile_ref_matchingpath() */
1129
int locals_size = 2 * SSIZE_OF(sw);
1130
SLJIT_UNUSED_ARG(common);
1131
1132
#ifdef SUPPORT_UNICODE
1133
if ((*cc == OP_REFI || *cc == OP_DNREFI) && (common->utf || common->ucp))
1134
locals_size = 3 * SSIZE_OF(sw);
1135
#endif
1136
1137
cc += PRIV(OP_lengths)[*cc];
1138
/* Although do_casefulcmp() uses only one local, the allocate_stack()
1139
calls during the repeat destroys LOCAL1 variables. */
1140
if (*cc >= OP_CRSTAR && *cc <= OP_CRPOSRANGE)
1141
locals_size += 2 * SSIZE_OF(sw);
1142
1143
return (current_locals_size >= locals_size) ? current_locals_size : locals_size;
1144
}
1145
1146
static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1147
{
1148
int count;
1149
PCRE2_SPTR slot;
1150
PCRE2_SPTR assert_back_end = cc - 1;
1151
PCRE2_SPTR assert_na_end = cc - 1;
1152
sljit_s32 locals_size = 2 * SSIZE_OF(sw);
1153
BOOL set_recursive_head = FALSE;
1154
BOOL set_capture_last = FALSE;
1155
BOOL set_mark = FALSE;
1156
1157
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1158
while (cc < ccend)
1159
{
1160
switch(*cc)
1161
{
1162
case OP_SET_SOM:
1163
common->has_set_som = TRUE;
1164
common->might_be_empty = TRUE;
1165
cc += 1;
1166
break;
1167
1168
case OP_TYPEUPTO:
1169
case OP_TYPEEXACT:
1170
if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1171
locals_size = 3 * SSIZE_OF(sw);
1172
cc += (2 + IMM2_SIZE) - 1;
1173
break;
1174
1175
case OP_TYPEPOSSTAR:
1176
case OP_TYPEPOSPLUS:
1177
case OP_TYPEPOSQUERY:
1178
if (cc[1] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1179
locals_size = 3 * SSIZE_OF(sw);
1180
cc += 2 - 1;
1181
break;
1182
1183
case OP_TYPEPOSUPTO:
1184
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1185
if (common->utf && locals_size <= 3 * SSIZE_OF(sw))
1186
locals_size = 3 * SSIZE_OF(sw);
1187
#endif
1188
if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1189
locals_size = 3 * SSIZE_OF(sw);
1190
cc += (2 + IMM2_SIZE) - 1;
1191
break;
1192
1193
case OP_REFI:
1194
case OP_REF:
1195
locals_size = ref_update_local_size(common, cc, locals_size);
1196
common->optimized_cbracket[GET2(cc, 1)] = 0;
1197
cc += PRIV(OP_lengths)[*cc];
1198
break;
1199
1200
case OP_ASSERT_NA:
1201
case OP_ASSERTBACK_NA:
1202
case OP_ASSERT_SCS:
1203
slot = bracketend(cc);
1204
if (slot > assert_na_end)
1205
assert_na_end = slot;
1206
cc += 1 + LINK_SIZE;
1207
break;
1208
1209
case OP_CBRAPOS:
1210
case OP_SCBRAPOS:
1211
common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1212
cc += 1 + LINK_SIZE + IMM2_SIZE;
1213
break;
1214
1215
case OP_COND:
1216
case OP_SCOND:
1217
/* Only AUTO_CALLOUT can insert this opcode. We do
1218
not intend to support this case. */
1219
if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1220
return FALSE;
1221
cc += 1 + LINK_SIZE;
1222
break;
1223
1224
case OP_CREF:
1225
common->optimized_cbracket[GET2(cc, 1)] = 0;
1226
cc += 1 + IMM2_SIZE;
1227
break;
1228
1229
case OP_DNREFI:
1230
case OP_DNREF:
1231
locals_size = ref_update_local_size(common, cc, locals_size);
1232
/* Fall through */
1233
case OP_DNCREF:
1234
count = GET2(cc, 1 + IMM2_SIZE);
1235
slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1236
while (count-- > 0)
1237
{
1238
common->optimized_cbracket[GET2(slot, 0)] = 0;
1239
slot += common->name_entry_size;
1240
}
1241
cc += PRIV(OP_lengths)[*cc];
1242
break;
1243
1244
case OP_RECURSE:
1245
/* Set its value only once. */
1246
set_recursive_head = TRUE;
1247
cc += 1 + LINK_SIZE;
1248
break;
1249
1250
case OP_CALLOUT:
1251
case OP_CALLOUT_STR:
1252
set_capture_last = TRUE;
1253
cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1254
break;
1255
1256
case OP_ASSERTBACK:
1257
slot = bracketend(cc);
1258
if (slot > assert_back_end)
1259
assert_back_end = slot;
1260
cc += 1 + LINK_SIZE;
1261
break;
1262
1263
case OP_THEN_ARG:
1264
common->has_then = TRUE;
1265
common->control_head_ptr = 1;
1266
/* Fall through. */
1267
1268
case OP_COMMIT_ARG:
1269
case OP_PRUNE_ARG:
1270
case OP_MARK:
1271
set_mark = TRUE;
1272
cc += 1 + 2 + cc[1];
1273
break;
1274
1275
case OP_THEN:
1276
common->has_then = TRUE;
1277
common->control_head_ptr = 1;
1278
cc += 1;
1279
break;
1280
1281
case OP_SKIP:
1282
if (cc < assert_back_end)
1283
common->has_skip_in_assert_back = TRUE;
1284
cc += 1;
1285
break;
1286
1287
case OP_SKIP_ARG:
1288
common->control_head_ptr = 1;
1289
common->has_skip_arg = TRUE;
1290
if (cc < assert_back_end)
1291
common->has_skip_in_assert_back = TRUE;
1292
cc += 1 + 2 + cc[1];
1293
break;
1294
1295
case OP_ASSERT_ACCEPT:
1296
if (cc < assert_na_end)
1297
return FALSE;
1298
cc++;
1299
break;
1300
1301
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1302
case OP_CRPOSRANGE:
1303
/* The second value can be 0 for infinite repeats. */
1304
if (common->utf && GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE) && locals_size <= 3 * SSIZE_OF(sw))
1305
locals_size = 3 * SSIZE_OF(sw);
1306
cc += 1 + 2 * IMM2_SIZE;
1307
break;
1308
1309
case OP_POSUPTO:
1310
case OP_POSUPTOI:
1311
case OP_NOTPOSUPTO:
1312
case OP_NOTPOSUPTOI:
1313
if (common->utf && locals_size <= 3 * SSIZE_OF(sw))
1314
locals_size = 3 * SSIZE_OF(sw);
1315
#endif
1316
/* Fall through */
1317
default:
1318
cc = next_opcode(common, cc);
1319
if (cc == NULL)
1320
return FALSE;
1321
break;
1322
}
1323
}
1324
1325
SLJIT_ASSERT((locals_size & (SSIZE_OF(sw) - 1)) == 0);
1326
#if defined SLJIT_DEBUG && SLJIT_DEBUG
1327
common->locals_size = locals_size;
1328
#endif
1329
1330
if (locals_size > 0)
1331
common->ovector_start += locals_size;
1332
1333
if (set_mark)
1334
{
1335
SLJIT_ASSERT(common->mark_ptr == 0);
1336
common->mark_ptr = common->ovector_start;
1337
common->ovector_start += sizeof(sljit_sw);
1338
}
1339
1340
if (set_recursive_head)
1341
{
1342
SLJIT_ASSERT(common->recursive_head_ptr == 0);
1343
common->recursive_head_ptr = common->ovector_start;
1344
common->ovector_start += sizeof(sljit_sw);
1345
}
1346
1347
if (set_capture_last)
1348
{
1349
SLJIT_ASSERT(common->capture_last_ptr == 0);
1350
common->capture_last_ptr = common->ovector_start;
1351
common->ovector_start += sizeof(sljit_sw);
1352
}
1353
1354
return TRUE;
1355
}
1356
1357
#define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1358
1359
/*
1360
Start represent the number of allowed early fail enhancements
1361
1362
The 0-2 values has a special meaning:
1363
0 - skip is allowed for all iterators
1364
1 - fail is allowed for all iterators
1365
2 - fail is allowed for greedy iterators
1366
3 - only ranged early fail is allowed
1367
>3 - (start - 3) number of remaining ranged early fails allowed
1368
1369
return: the updated value of start
1370
*/
1371
static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1372
int *private_data_start, sljit_s32 depth, int start)
1373
{
1374
PCRE2_SPTR begin = cc;
1375
PCRE2_SPTR next_alt;
1376
PCRE2_SPTR end;
1377
PCRE2_SPTR accelerated_start;
1378
int result = 0;
1379
int count, prev_count;
1380
1381
SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1382
SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1383
SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1384
1385
next_alt = cc + GET(cc, 1);
1386
if (*next_alt == OP_ALT && start < 1)
1387
start = 1;
1388
1389
do
1390
{
1391
count = start;
1392
cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1393
1394
while (TRUE)
1395
{
1396
accelerated_start = NULL;
1397
1398
switch(*cc)
1399
{
1400
case OP_SOD:
1401
case OP_SOM:
1402
case OP_SET_SOM:
1403
case OP_NOT_WORD_BOUNDARY:
1404
case OP_WORD_BOUNDARY:
1405
case OP_EODN:
1406
case OP_EOD:
1407
case OP_CIRC:
1408
case OP_CIRCM:
1409
case OP_DOLL:
1410
case OP_DOLLM:
1411
case OP_NOT_UCP_WORD_BOUNDARY:
1412
case OP_UCP_WORD_BOUNDARY:
1413
/* Zero width assertions. */
1414
cc++;
1415
continue;
1416
1417
case OP_NOT_DIGIT:
1418
case OP_DIGIT:
1419
case OP_NOT_WHITESPACE:
1420
case OP_WHITESPACE:
1421
case OP_NOT_WORDCHAR:
1422
case OP_WORDCHAR:
1423
case OP_ANY:
1424
case OP_ALLANY:
1425
case OP_ANYBYTE:
1426
case OP_NOT_HSPACE:
1427
case OP_HSPACE:
1428
case OP_NOT_VSPACE:
1429
case OP_VSPACE:
1430
if (count < 1)
1431
count = 1;
1432
cc++;
1433
continue;
1434
1435
case OP_ANYNL:
1436
case OP_EXTUNI:
1437
if (count < 3)
1438
count = 3;
1439
cc++;
1440
continue;
1441
1442
case OP_NOTPROP:
1443
case OP_PROP:
1444
if (count < 1)
1445
count = 1;
1446
cc += 1 + 2;
1447
continue;
1448
1449
case OP_CHAR:
1450
case OP_CHARI:
1451
case OP_NOT:
1452
case OP_NOTI:
1453
if (count < 1)
1454
count = 1;
1455
cc += 2;
1456
#ifdef SUPPORT_UNICODE
1457
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1458
#endif
1459
continue;
1460
1461
case OP_TYPEMINSTAR:
1462
case OP_TYPEMINPLUS:
1463
if (count == 2)
1464
count = 3;
1465
/* Fall through */
1466
1467
case OP_TYPESTAR:
1468
case OP_TYPEPLUS:
1469
case OP_TYPEPOSSTAR:
1470
case OP_TYPEPOSPLUS:
1471
/* The type or prop opcode is skipped in the next iteration. */
1472
cc += 1;
1473
1474
if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1475
{
1476
accelerated_start = cc - 1;
1477
break;
1478
}
1479
1480
if (count < 3)
1481
count = 3;
1482
continue;
1483
1484
case OP_TYPEEXACT:
1485
if (count < 1)
1486
count = 1;
1487
cc += 1 + IMM2_SIZE;
1488
continue;
1489
1490
case OP_TYPEUPTO:
1491
case OP_TYPEMINUPTO:
1492
case OP_TYPEPOSUPTO:
1493
cc += IMM2_SIZE;
1494
/* Fall through */
1495
1496
case OP_TYPEQUERY:
1497
case OP_TYPEMINQUERY:
1498
case OP_TYPEPOSQUERY:
1499
/* The type or prop opcode is skipped in the next iteration. */
1500
if (count < 3)
1501
count = 3;
1502
cc += 1;
1503
continue;
1504
1505
case OP_MINSTAR:
1506
case OP_MINPLUS:
1507
case OP_MINSTARI:
1508
case OP_MINPLUSI:
1509
case OP_NOTMINSTAR:
1510
case OP_NOTMINPLUS:
1511
case OP_NOTMINSTARI:
1512
case OP_NOTMINPLUSI:
1513
if (count == 2)
1514
count = 3;
1515
/* Fall through */
1516
1517
case OP_STAR:
1518
case OP_PLUS:
1519
case OP_POSSTAR:
1520
case OP_POSPLUS:
1521
1522
case OP_STARI:
1523
case OP_PLUSI:
1524
case OP_POSSTARI:
1525
case OP_POSPLUSI:
1526
1527
case OP_NOTSTAR:
1528
case OP_NOTPLUS:
1529
case OP_NOTPOSSTAR:
1530
case OP_NOTPOSPLUS:
1531
1532
case OP_NOTSTARI:
1533
case OP_NOTPLUSI:
1534
case OP_NOTPOSSTARI:
1535
case OP_NOTPOSPLUSI:
1536
accelerated_start = cc;
1537
cc += 2;
1538
#ifdef SUPPORT_UNICODE
1539
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1540
#endif
1541
break;
1542
1543
case OP_EXACT:
1544
if (count < 1)
1545
count = 1;
1546
cc += 2 + IMM2_SIZE;
1547
#ifdef SUPPORT_UNICODE
1548
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1549
#endif
1550
continue;
1551
1552
case OP_UPTO:
1553
case OP_MINUPTO:
1554
case OP_POSUPTO:
1555
case OP_UPTOI:
1556
case OP_MINUPTOI:
1557
case OP_EXACTI:
1558
case OP_POSUPTOI:
1559
case OP_NOTUPTO:
1560
case OP_NOTMINUPTO:
1561
case OP_NOTEXACT:
1562
case OP_NOTPOSUPTO:
1563
case OP_NOTUPTOI:
1564
case OP_NOTMINUPTOI:
1565
case OP_NOTEXACTI:
1566
case OP_NOTPOSUPTOI:
1567
cc += IMM2_SIZE;
1568
/* Fall through */
1569
1570
case OP_QUERY:
1571
case OP_MINQUERY:
1572
case OP_POSQUERY:
1573
case OP_QUERYI:
1574
case OP_MINQUERYI:
1575
case OP_POSQUERYI:
1576
case OP_NOTQUERY:
1577
case OP_NOTMINQUERY:
1578
case OP_NOTPOSQUERY:
1579
case OP_NOTQUERYI:
1580
case OP_NOTMINQUERYI:
1581
case OP_NOTPOSQUERYI:
1582
if (count < 3)
1583
count = 3;
1584
cc += 2;
1585
#ifdef SUPPORT_UNICODE
1586
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1587
#endif
1588
continue;
1589
1590
case OP_CLASS:
1591
case OP_NCLASS:
1592
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1593
case OP_XCLASS:
1594
case OP_ECLASS:
1595
accelerated_start = cc;
1596
cc += (*cc >= OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)));
1597
#else
1598
accelerated_start = cc;
1599
cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1600
#endif
1601
1602
switch (*cc)
1603
{
1604
case OP_CRMINSTAR:
1605
case OP_CRMINPLUS:
1606
if (count == 2)
1607
count = 3;
1608
/* Fall through */
1609
1610
case OP_CRSTAR:
1611
case OP_CRPLUS:
1612
case OP_CRPOSSTAR:
1613
case OP_CRPOSPLUS:
1614
cc++;
1615
break;
1616
1617
case OP_CRRANGE:
1618
case OP_CRMINRANGE:
1619
case OP_CRPOSRANGE:
1620
if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1621
{
1622
/* Exact repeat. */
1623
cc += 1 + 2 * IMM2_SIZE;
1624
if (count < 1)
1625
count = 1;
1626
continue;
1627
}
1628
1629
cc += 2 * IMM2_SIZE;
1630
/* Fall through */
1631
case OP_CRQUERY:
1632
case OP_CRMINQUERY:
1633
case OP_CRPOSQUERY:
1634
cc++;
1635
if (count < 3)
1636
count = 3;
1637
continue;
1638
1639
default:
1640
/* No repeat. */
1641
if (count < 1)
1642
count = 1;
1643
continue;
1644
}
1645
break;
1646
1647
case OP_BRA:
1648
case OP_CBRA:
1649
prev_count = count;
1650
if (count < 1)
1651
count = 1;
1652
1653
if (depth >= 4)
1654
break;
1655
1656
if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1657
count = 3;
1658
1659
end = bracketend(cc);
1660
if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1661
break;
1662
1663
prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1664
1665
if (prev_count > count)
1666
count = prev_count;
1667
1668
if (PRIVATE_DATA(cc) != 0)
1669
common->private_data_ptrs[begin - common->start] = 1;
1670
1671
if (count < EARLY_FAIL_ENHANCE_MAX)
1672
{
1673
cc = end;
1674
continue;
1675
}
1676
break;
1677
1678
case OP_KET:
1679
SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1680
if (cc >= next_alt)
1681
break;
1682
cc += 1 + LINK_SIZE;
1683
continue;
1684
}
1685
1686
if (accelerated_start == NULL)
1687
break;
1688
1689
if (count == 0)
1690
{
1691
common->fast_forward_bc_ptr = accelerated_start;
1692
common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1693
*private_data_start += sizeof(sljit_sw);
1694
count = 4;
1695
}
1696
else if (count < 3)
1697
{
1698
common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1699
1700
if (common->early_fail_start_ptr == 0)
1701
common->early_fail_start_ptr = *private_data_start;
1702
1703
*private_data_start += sizeof(sljit_sw);
1704
common->early_fail_end_ptr = *private_data_start;
1705
1706
if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1707
return EARLY_FAIL_ENHANCE_MAX;
1708
1709
count = 4;
1710
}
1711
else
1712
{
1713
common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1714
1715
if (common->early_fail_start_ptr == 0)
1716
common->early_fail_start_ptr = *private_data_start;
1717
1718
*private_data_start += 2 * sizeof(sljit_sw);
1719
common->early_fail_end_ptr = *private_data_start;
1720
1721
if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1722
return EARLY_FAIL_ENHANCE_MAX;
1723
1724
count++;
1725
}
1726
1727
/* Cannot be part of a repeat. */
1728
common->private_data_ptrs[begin - common->start] = 1;
1729
1730
if (count >= EARLY_FAIL_ENHANCE_MAX)
1731
break;
1732
}
1733
1734
if (*cc != OP_ALT && *cc != OP_KET)
1735
result = EARLY_FAIL_ENHANCE_MAX;
1736
else if (result < count)
1737
result = count;
1738
1739
cc = next_alt;
1740
next_alt = cc + GET(cc, 1);
1741
}
1742
while (*cc == OP_ALT);
1743
1744
return result;
1745
}
1746
1747
static int get_class_iterator_size(PCRE2_SPTR cc)
1748
{
1749
sljit_u32 min;
1750
sljit_u32 max;
1751
switch(*cc)
1752
{
1753
case OP_CRSTAR:
1754
case OP_CRPLUS:
1755
return 2;
1756
1757
case OP_CRMINSTAR:
1758
case OP_CRMINPLUS:
1759
case OP_CRQUERY:
1760
case OP_CRMINQUERY:
1761
return 1;
1762
1763
case OP_CRRANGE:
1764
case OP_CRMINRANGE:
1765
min = GET2(cc, 1);
1766
max = GET2(cc, 1 + IMM2_SIZE);
1767
if (max == 0)
1768
return (*cc == OP_CRRANGE) ? 2 : 1;
1769
max -= min;
1770
if (max > (sljit_u32)(*cc == OP_CRRANGE ? 0 : 1))
1771
max = 2;
1772
return max;
1773
1774
default:
1775
return 0;
1776
}
1777
}
1778
1779
static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1780
{
1781
PCRE2_SPTR end = bracketend(begin);
1782
PCRE2_SPTR next;
1783
PCRE2_SPTR next_end;
1784
PCRE2_SPTR max_end;
1785
PCRE2_UCHAR type;
1786
sljit_sw length = end - begin;
1787
sljit_s32 min, max, i;
1788
1789
/* Detect fixed iterations first. */
1790
if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1791
return FALSE;
1792
1793
/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1794
* Skip the check of the second part. */
1795
if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1796
return TRUE;
1797
1798
next = end;
1799
min = 1;
1800
while (1)
1801
{
1802
if (*next != *begin)
1803
break;
1804
next_end = bracketend(next);
1805
if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1806
break;
1807
next = next_end;
1808
min++;
1809
}
1810
1811
if (min == 2)
1812
return FALSE;
1813
1814
max = 0;
1815
max_end = next;
1816
if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1817
{
1818
type = *next;
1819
while (1)
1820
{
1821
if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1822
break;
1823
next_end = bracketend(next + 2 + LINK_SIZE);
1824
if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1825
break;
1826
next = next_end;
1827
max++;
1828
}
1829
1830
if (next[0] == type && next[1] == *begin && max >= 1)
1831
{
1832
next_end = bracketend(next + 1);
1833
if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1834
{
1835
for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1836
if (*next_end != OP_KET)
1837
break;
1838
1839
if (i == max)
1840
{
1841
common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1842
common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1843
/* +2 the original and the last. */
1844
common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1845
if (min == 1)
1846
return TRUE;
1847
min--;
1848
max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1849
}
1850
}
1851
}
1852
}
1853
1854
if (min >= 3)
1855
{
1856
common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1857
common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1858
common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1859
return TRUE;
1860
}
1861
1862
return FALSE;
1863
}
1864
1865
#define CASE_ITERATOR_PRIVATE_DATA_1 \
1866
case OP_MINSTAR: \
1867
case OP_MINPLUS: \
1868
case OP_QUERY: \
1869
case OP_MINQUERY: \
1870
case OP_MINSTARI: \
1871
case OP_MINPLUSI: \
1872
case OP_QUERYI: \
1873
case OP_MINQUERYI: \
1874
case OP_NOTMINSTAR: \
1875
case OP_NOTMINPLUS: \
1876
case OP_NOTQUERY: \
1877
case OP_NOTMINQUERY: \
1878
case OP_NOTMINSTARI: \
1879
case OP_NOTMINPLUSI: \
1880
case OP_NOTQUERYI: \
1881
case OP_NOTMINQUERYI:
1882
1883
#define CASE_ITERATOR_PRIVATE_DATA_2A \
1884
case OP_STAR: \
1885
case OP_PLUS: \
1886
case OP_STARI: \
1887
case OP_PLUSI: \
1888
case OP_NOTSTAR: \
1889
case OP_NOTPLUS: \
1890
case OP_NOTSTARI: \
1891
case OP_NOTPLUSI:
1892
1893
#define CASE_ITERATOR_PRIVATE_DATA_2B \
1894
case OP_UPTO: \
1895
case OP_MINUPTO: \
1896
case OP_UPTOI: \
1897
case OP_MINUPTOI: \
1898
case OP_NOTUPTO: \
1899
case OP_NOTMINUPTO: \
1900
case OP_NOTUPTOI: \
1901
case OP_NOTMINUPTOI:
1902
1903
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1904
case OP_TYPEMINSTAR: \
1905
case OP_TYPEMINPLUS: \
1906
case OP_TYPEQUERY: \
1907
case OP_TYPEMINQUERY:
1908
1909
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1910
case OP_TYPESTAR: \
1911
case OP_TYPEPLUS:
1912
1913
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1914
case OP_TYPEUPTO: \
1915
case OP_TYPEMINUPTO:
1916
1917
static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1918
{
1919
PCRE2_SPTR cc = common->start;
1920
PCRE2_SPTR alternative;
1921
PCRE2_SPTR end = NULL;
1922
int private_data_ptr = *private_data_start;
1923
int space, size, bracketlen;
1924
BOOL repeat_check = TRUE;
1925
1926
while (cc < ccend)
1927
{
1928
space = 0;
1929
size = 0;
1930
bracketlen = 0;
1931
if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1932
break;
1933
1934
/* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1935
if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1936
{
1937
if (detect_repeat(common, cc))
1938
{
1939
/* These brackets are converted to repeats, so no global
1940
based single character repeat is allowed. */
1941
if (cc >= end)
1942
end = bracketend(cc);
1943
}
1944
}
1945
repeat_check = TRUE;
1946
1947
switch(*cc)
1948
{
1949
case OP_KET:
1950
if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1951
{
1952
common->private_data_ptrs[cc - common->start] = private_data_ptr;
1953
private_data_ptr += sizeof(sljit_sw);
1954
cc += common->private_data_ptrs[cc + 1 - common->start];
1955
}
1956
cc += 1 + LINK_SIZE;
1957
break;
1958
1959
case OP_ASSERT:
1960
case OP_ASSERT_NOT:
1961
case OP_ASSERTBACK:
1962
case OP_ASSERTBACK_NOT:
1963
case OP_ASSERT_NA:
1964
case OP_ONCE:
1965
case OP_SCRIPT_RUN:
1966
case OP_BRAPOS:
1967
case OP_SBRA:
1968
case OP_SBRAPOS:
1969
case OP_SCOND:
1970
common->private_data_ptrs[cc - common->start] = private_data_ptr;
1971
private_data_ptr += sizeof(sljit_sw);
1972
bracketlen = 1 + LINK_SIZE;
1973
break;
1974
1975
case OP_ASSERTBACK_NA:
1976
common->private_data_ptrs[cc - common->start] = private_data_ptr;
1977
private_data_ptr += sizeof(sljit_sw);
1978
1979
if (find_vreverse(cc))
1980
{
1981
common->private_data_ptrs[cc + 1 - common->start] = 1;
1982
private_data_ptr += sizeof(sljit_sw);
1983
}
1984
1985
bracketlen = 1 + LINK_SIZE;
1986
break;
1987
1988
case OP_ASSERT_SCS:
1989
common->private_data_ptrs[cc - common->start] = private_data_ptr;
1990
private_data_ptr += 2 * sizeof(sljit_sw);
1991
bracketlen = 1 + LINK_SIZE;
1992
break;
1993
1994
case OP_CBRAPOS:
1995
case OP_SCBRAPOS:
1996
common->private_data_ptrs[cc - common->start] = private_data_ptr;
1997
private_data_ptr += sizeof(sljit_sw);
1998
bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1999
break;
2000
2001
case OP_COND:
2002
/* Might be a hidden SCOND. */
2003
common->private_data_ptrs[cc - common->start] = 0;
2004
alternative = cc + GET(cc, 1);
2005
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2006
{
2007
common->private_data_ptrs[cc - common->start] = private_data_ptr;
2008
private_data_ptr += sizeof(sljit_sw);
2009
}
2010
bracketlen = 1 + LINK_SIZE;
2011
break;
2012
2013
case OP_BRA:
2014
bracketlen = 1 + LINK_SIZE;
2015
break;
2016
2017
case OP_CBRA:
2018
case OP_SCBRA:
2019
bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
2020
break;
2021
2022
case OP_BRAZERO:
2023
case OP_BRAMINZERO:
2024
case OP_BRAPOSZERO:
2025
size = 1;
2026
repeat_check = FALSE;
2027
break;
2028
2029
CASE_ITERATOR_PRIVATE_DATA_1
2030
size = -2;
2031
space = 1;
2032
break;
2033
2034
CASE_ITERATOR_PRIVATE_DATA_2A
2035
size = -2;
2036
space = 2;
2037
break;
2038
2039
CASE_ITERATOR_PRIVATE_DATA_2B
2040
size = -(2 + IMM2_SIZE);
2041
space = 2;
2042
break;
2043
2044
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2045
size = 1;
2046
space = 1;
2047
break;
2048
2049
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2050
size = 1;
2051
if (cc[1] != OP_EXTUNI)
2052
space = 2;
2053
break;
2054
2055
case OP_TYPEUPTO:
2056
size = 1 + IMM2_SIZE;
2057
if (cc[1 + IMM2_SIZE] != OP_EXTUNI)
2058
space = 2;
2059
break;
2060
2061
case OP_TYPEMINUPTO:
2062
size = 1 + IMM2_SIZE;
2063
space = 2;
2064
break;
2065
2066
case OP_CLASS:
2067
case OP_NCLASS:
2068
size = 1 + 32 / sizeof(PCRE2_UCHAR);
2069
space = get_class_iterator_size(cc + size);
2070
break;
2071
2072
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2073
case OP_XCLASS:
2074
case OP_ECLASS:
2075
size = GET(cc, 1);
2076
space = get_class_iterator_size(cc + size);
2077
break;
2078
#endif
2079
2080
default:
2081
cc = next_opcode(common, cc);
2082
SLJIT_ASSERT(cc != NULL);
2083
break;
2084
}
2085
2086
/* Character iterators, which are not inside a repeated bracket,
2087
gets a private slot instead of allocating it on the stack. */
2088
if (space > 0 && cc >= end)
2089
{
2090
common->private_data_ptrs[cc - common->start] = private_data_ptr;
2091
private_data_ptr += sizeof(sljit_sw) * space;
2092
}
2093
2094
if (size != 0)
2095
{
2096
if (size < 0)
2097
{
2098
cc += -size;
2099
#ifdef SUPPORT_UNICODE
2100
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2101
#endif
2102
}
2103
else
2104
cc += size;
2105
}
2106
2107
if (bracketlen > 0)
2108
{
2109
if (cc >= end)
2110
{
2111
end = bracketend(cc);
2112
if (end[-1 - LINK_SIZE] == OP_KET)
2113
end = NULL;
2114
}
2115
cc += bracketlen;
2116
}
2117
}
2118
*private_data_start = private_data_ptr;
2119
}
2120
2121
/* Returns with a frame_types (always < 0) if no need for frame. */
2122
static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2123
{
2124
int length = 0;
2125
int possessive = 0;
2126
BOOL stack_restore = FALSE;
2127
BOOL setsom_found = recursive;
2128
BOOL setmark_found = recursive;
2129
/* The last capture is a local variable even for recursions. */
2130
BOOL capture_last_found = FALSE;
2131
2132
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2133
SLJIT_ASSERT(common->control_head_ptr != 0);
2134
*needs_control_head = TRUE;
2135
#else
2136
*needs_control_head = FALSE;
2137
#endif
2138
2139
if (ccend == NULL)
2140
{
2141
ccend = bracketend(cc) - (1 + LINK_SIZE);
2142
if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2143
{
2144
possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2145
/* This is correct regardless of common->capture_last_ptr. */
2146
capture_last_found = TRUE;
2147
}
2148
cc = next_opcode(common, cc);
2149
}
2150
2151
SLJIT_ASSERT(cc != NULL);
2152
while (cc < ccend)
2153
switch(*cc)
2154
{
2155
case OP_SET_SOM:
2156
SLJIT_ASSERT(common->has_set_som);
2157
stack_restore = TRUE;
2158
if (!setsom_found)
2159
{
2160
length += 2;
2161
setsom_found = TRUE;
2162
}
2163
cc += 1;
2164
break;
2165
2166
case OP_MARK:
2167
case OP_COMMIT_ARG:
2168
case OP_PRUNE_ARG:
2169
case OP_THEN_ARG:
2170
SLJIT_ASSERT(common->mark_ptr != 0);
2171
stack_restore = TRUE;
2172
if (!setmark_found)
2173
{
2174
length += 2;
2175
setmark_found = TRUE;
2176
}
2177
if (common->control_head_ptr != 0)
2178
*needs_control_head = TRUE;
2179
cc += 1 + 2 + cc[1];
2180
break;
2181
2182
case OP_RECURSE:
2183
stack_restore = TRUE;
2184
if (common->has_set_som && !setsom_found)
2185
{
2186
length += 2;
2187
setsom_found = TRUE;
2188
}
2189
if (common->mark_ptr != 0 && !setmark_found)
2190
{
2191
length += 2;
2192
setmark_found = TRUE;
2193
}
2194
if (common->capture_last_ptr != 0 && !capture_last_found)
2195
{
2196
length += 2;
2197
capture_last_found = TRUE;
2198
}
2199
cc += 1 + LINK_SIZE;
2200
break;
2201
2202
case OP_CBRA:
2203
case OP_CBRAPOS:
2204
case OP_SCBRA:
2205
case OP_SCBRAPOS:
2206
stack_restore = TRUE;
2207
if (common->capture_last_ptr != 0 && !capture_last_found)
2208
{
2209
length += 2;
2210
capture_last_found = TRUE;
2211
}
2212
length += 3;
2213
cc += 1 + LINK_SIZE + IMM2_SIZE;
2214
break;
2215
2216
case OP_THEN:
2217
stack_restore = TRUE;
2218
if (common->control_head_ptr != 0)
2219
*needs_control_head = TRUE;
2220
cc ++;
2221
break;
2222
2223
default:
2224
stack_restore = TRUE;
2225
/* Fall through. */
2226
2227
case OP_NOT_WORD_BOUNDARY:
2228
case OP_WORD_BOUNDARY:
2229
case OP_NOT_DIGIT:
2230
case OP_DIGIT:
2231
case OP_NOT_WHITESPACE:
2232
case OP_WHITESPACE:
2233
case OP_NOT_WORDCHAR:
2234
case OP_WORDCHAR:
2235
case OP_ANY:
2236
case OP_ALLANY:
2237
case OP_ANYBYTE:
2238
case OP_NOTPROP:
2239
case OP_PROP:
2240
case OP_ANYNL:
2241
case OP_NOT_HSPACE:
2242
case OP_HSPACE:
2243
case OP_NOT_VSPACE:
2244
case OP_VSPACE:
2245
case OP_EXTUNI:
2246
case OP_EODN:
2247
case OP_EOD:
2248
case OP_CIRC:
2249
case OP_CIRCM:
2250
case OP_DOLL:
2251
case OP_DOLLM:
2252
case OP_CHAR:
2253
case OP_CHARI:
2254
case OP_NOT:
2255
case OP_NOTI:
2256
2257
case OP_EXACT:
2258
case OP_POSSTAR:
2259
case OP_POSPLUS:
2260
case OP_POSQUERY:
2261
case OP_POSUPTO:
2262
2263
case OP_EXACTI:
2264
case OP_POSSTARI:
2265
case OP_POSPLUSI:
2266
case OP_POSQUERYI:
2267
case OP_POSUPTOI:
2268
2269
case OP_NOTEXACT:
2270
case OP_NOTPOSSTAR:
2271
case OP_NOTPOSPLUS:
2272
case OP_NOTPOSQUERY:
2273
case OP_NOTPOSUPTO:
2274
2275
case OP_NOTEXACTI:
2276
case OP_NOTPOSSTARI:
2277
case OP_NOTPOSPLUSI:
2278
case OP_NOTPOSQUERYI:
2279
case OP_NOTPOSUPTOI:
2280
2281
case OP_TYPEEXACT:
2282
case OP_TYPEPOSSTAR:
2283
case OP_TYPEPOSPLUS:
2284
case OP_TYPEPOSQUERY:
2285
case OP_TYPEPOSUPTO:
2286
2287
case OP_CLASS:
2288
case OP_NCLASS:
2289
case OP_XCLASS:
2290
case OP_ECLASS:
2291
2292
case OP_CALLOUT:
2293
case OP_CALLOUT_STR:
2294
2295
case OP_NOT_UCP_WORD_BOUNDARY:
2296
case OP_UCP_WORD_BOUNDARY:
2297
2298
cc = next_opcode(common, cc);
2299
SLJIT_ASSERT(cc != NULL);
2300
break;
2301
}
2302
2303
/* Possessive quantifiers can use a special case. */
2304
if (SLJIT_UNLIKELY(possessive == length))
2305
return stack_restore ? no_frame : no_stack;
2306
2307
if (length > 0)
2308
return length + 1;
2309
return stack_restore ? no_frame : no_stack;
2310
}
2311
2312
static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2313
{
2314
DEFINE_COMPILER;
2315
BOOL setsom_found = FALSE;
2316
BOOL setmark_found = FALSE;
2317
/* The last capture is a local variable even for recursions. */
2318
BOOL capture_last_found = FALSE;
2319
int offset;
2320
2321
/* >= 1 + shortest item size (2) */
2322
SLJIT_UNUSED_ARG(stacktop);
2323
SLJIT_ASSERT(stackpos >= stacktop + 2);
2324
2325
stackpos = STACK(stackpos);
2326
if (ccend == NULL)
2327
{
2328
ccend = bracketend(cc) - (1 + LINK_SIZE);
2329
if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2330
cc = next_opcode(common, cc);
2331
}
2332
2333
/* The data is restored by do_revertframes(). */
2334
SLJIT_ASSERT(cc != NULL);
2335
while (cc < ccend)
2336
switch(*cc)
2337
{
2338
case OP_SET_SOM:
2339
SLJIT_ASSERT(common->has_set_som);
2340
if (!setsom_found)
2341
{
2342
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2343
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2344
stackpos -= SSIZE_OF(sw);
2345
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2346
stackpos -= SSIZE_OF(sw);
2347
setsom_found = TRUE;
2348
}
2349
cc += 1;
2350
break;
2351
2352
case OP_MARK:
2353
case OP_COMMIT_ARG:
2354
case OP_PRUNE_ARG:
2355
case OP_THEN_ARG:
2356
SLJIT_ASSERT(common->mark_ptr != 0);
2357
if (!setmark_found)
2358
{
2359
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2360
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2361
stackpos -= SSIZE_OF(sw);
2362
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2363
stackpos -= SSIZE_OF(sw);
2364
setmark_found = TRUE;
2365
}
2366
cc += 1 + 2 + cc[1];
2367
break;
2368
2369
case OP_RECURSE:
2370
if (common->has_set_som && !setsom_found)
2371
{
2372
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2373
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2374
stackpos -= SSIZE_OF(sw);
2375
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2376
stackpos -= SSIZE_OF(sw);
2377
setsom_found = TRUE;
2378
}
2379
if (common->mark_ptr != 0 && !setmark_found)
2380
{
2381
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2382
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2383
stackpos -= SSIZE_OF(sw);
2384
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2385
stackpos -= SSIZE_OF(sw);
2386
setmark_found = TRUE;
2387
}
2388
if (common->capture_last_ptr != 0 && !capture_last_found)
2389
{
2390
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2391
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2392
stackpos -= SSIZE_OF(sw);
2393
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2394
stackpos -= SSIZE_OF(sw);
2395
capture_last_found = TRUE;
2396
}
2397
cc += 1 + LINK_SIZE;
2398
break;
2399
2400
case OP_CBRA:
2401
case OP_CBRAPOS:
2402
case OP_SCBRA:
2403
case OP_SCBRAPOS:
2404
if (common->capture_last_ptr != 0 && !capture_last_found)
2405
{
2406
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2407
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2408
stackpos -= SSIZE_OF(sw);
2409
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2410
stackpos -= SSIZE_OF(sw);
2411
capture_last_found = TRUE;
2412
}
2413
offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2414
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2415
stackpos -= SSIZE_OF(sw);
2416
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2417
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2418
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2419
stackpos -= SSIZE_OF(sw);
2420
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2421
stackpos -= SSIZE_OF(sw);
2422
2423
cc += 1 + LINK_SIZE + IMM2_SIZE;
2424
break;
2425
2426
default:
2427
cc = next_opcode(common, cc);
2428
SLJIT_ASSERT(cc != NULL);
2429
break;
2430
}
2431
2432
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2433
SLJIT_ASSERT(stackpos == STACK(stacktop));
2434
}
2435
2436
#define RECURSE_TMP_REG_COUNT 3
2437
2438
typedef struct delayed_mem_copy_status {
2439
struct sljit_compiler *compiler;
2440
int store_bases[RECURSE_TMP_REG_COUNT];
2441
int store_offsets[RECURSE_TMP_REG_COUNT];
2442
int tmp_regs[RECURSE_TMP_REG_COUNT];
2443
int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2444
int next_tmp_reg;
2445
} delayed_mem_copy_status;
2446
2447
static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2448
{
2449
int i;
2450
2451
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2452
{
2453
SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2454
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2455
2456
status->store_bases[i] = -1;
2457
}
2458
status->next_tmp_reg = 0;
2459
status->compiler = common->compiler;
2460
}
2461
2462
static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2463
int store_base, sljit_sw store_offset)
2464
{
2465
struct sljit_compiler *compiler = status->compiler;
2466
int next_tmp_reg = status->next_tmp_reg;
2467
int tmp_reg = status->tmp_regs[next_tmp_reg];
2468
2469
SLJIT_ASSERT(load_base > 0 && store_base > 0);
2470
2471
if (status->store_bases[next_tmp_reg] == -1)
2472
{
2473
/* Preserve virtual registers. */
2474
if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2475
OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2476
}
2477
else
2478
OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2479
2480
OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2481
status->store_bases[next_tmp_reg] = store_base;
2482
status->store_offsets[next_tmp_reg] = store_offset;
2483
2484
status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2485
}
2486
2487
static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2488
{
2489
struct sljit_compiler *compiler = status->compiler;
2490
int next_tmp_reg = status->next_tmp_reg;
2491
int tmp_reg, saved_tmp_reg, i;
2492
2493
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2494
{
2495
if (status->store_bases[next_tmp_reg] != -1)
2496
{
2497
tmp_reg = status->tmp_regs[next_tmp_reg];
2498
saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2499
2500
OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2501
2502
/* Restore virtual registers. */
2503
if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2504
OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2505
}
2506
2507
next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2508
}
2509
}
2510
2511
#undef RECURSE_TMP_REG_COUNT
2512
2513
static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2514
{
2515
uint8_t *byte;
2516
uint8_t mask;
2517
2518
SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2519
2520
bit_index >>= SLJIT_WORD_SHIFT;
2521
2522
SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2523
2524
mask = 1 << (bit_index & 0x7);
2525
byte = common->recurse_bitset + (bit_index >> 3);
2526
2527
if (*byte & mask)
2528
return FALSE;
2529
2530
*byte |= mask;
2531
return TRUE;
2532
}
2533
2534
enum get_recurse_flags {
2535
recurse_flag_quit_found = (1 << 0),
2536
recurse_flag_accept_found = (1 << 1),
2537
recurse_flag_setsom_found = (1 << 2),
2538
recurse_flag_setmark_found = (1 << 3),
2539
recurse_flag_control_head_found = (1 << 4),
2540
};
2541
2542
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2543
{
2544
int length = 1;
2545
int size, offset;
2546
PCRE2_SPTR alternative;
2547
uint32_t recurse_flags = 0;
2548
2549
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2550
2551
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2552
SLJIT_ASSERT(common->control_head_ptr != 0);
2553
recurse_flags |= recurse_flag_control_head_found;
2554
#endif
2555
2556
/* Calculate the sum of the private machine words. */
2557
while (cc < ccend)
2558
{
2559
size = 0;
2560
switch(*cc)
2561
{
2562
case OP_SET_SOM:
2563
SLJIT_ASSERT(common->has_set_som);
2564
recurse_flags |= recurse_flag_setsom_found;
2565
cc += 1;
2566
break;
2567
2568
case OP_RECURSE:
2569
if (common->has_set_som)
2570
recurse_flags |= recurse_flag_setsom_found;
2571
if (common->mark_ptr != 0)
2572
recurse_flags |= recurse_flag_setmark_found;
2573
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2574
length++;
2575
cc += 1 + LINK_SIZE;
2576
break;
2577
2578
case OP_KET:
2579
offset = PRIVATE_DATA(cc);
2580
if (offset != 0)
2581
{
2582
if (recurse_check_bit(common, offset))
2583
length++;
2584
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2585
cc += PRIVATE_DATA(cc + 1);
2586
}
2587
cc += 1 + LINK_SIZE;
2588
break;
2589
2590
case OP_ASSERT:
2591
case OP_ASSERT_NOT:
2592
case OP_ASSERTBACK:
2593
case OP_ASSERTBACK_NOT:
2594
case OP_ASSERT_NA:
2595
case OP_ASSERTBACK_NA:
2596
case OP_ONCE:
2597
case OP_SCRIPT_RUN:
2598
case OP_BRAPOS:
2599
case OP_SBRA:
2600
case OP_SBRAPOS:
2601
case OP_SCOND:
2602
SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2603
if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2604
length++;
2605
cc += 1 + LINK_SIZE;
2606
break;
2607
2608
case OP_ASSERT_SCS:
2609
SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2610
if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2611
length += 2;
2612
cc += 1 + LINK_SIZE;
2613
break;
2614
2615
case OP_CBRA:
2616
case OP_SCBRA:
2617
offset = GET2(cc, 1 + LINK_SIZE);
2618
if (recurse_check_bit(common, OVECTOR(offset << 1)))
2619
{
2620
SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2621
length += 2;
2622
}
2623
if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2624
length++;
2625
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2626
length++;
2627
cc += 1 + LINK_SIZE + IMM2_SIZE;
2628
break;
2629
2630
case OP_CBRAPOS:
2631
case OP_SCBRAPOS:
2632
offset = GET2(cc, 1 + LINK_SIZE);
2633
if (recurse_check_bit(common, OVECTOR(offset << 1)))
2634
{
2635
SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2636
length += 2;
2637
}
2638
if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2639
length++;
2640
if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2641
length++;
2642
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2643
length++;
2644
cc += 1 + LINK_SIZE + IMM2_SIZE;
2645
break;
2646
2647
case OP_COND:
2648
/* Might be a hidden SCOND. */
2649
alternative = cc + GET(cc, 1);
2650
if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2651
length++;
2652
cc += 1 + LINK_SIZE;
2653
break;
2654
2655
CASE_ITERATOR_PRIVATE_DATA_1
2656
offset = PRIVATE_DATA(cc);
2657
if (offset != 0 && recurse_check_bit(common, offset))
2658
length++;
2659
cc += 2;
2660
#ifdef SUPPORT_UNICODE
2661
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2662
#endif
2663
break;
2664
2665
CASE_ITERATOR_PRIVATE_DATA_2A
2666
offset = PRIVATE_DATA(cc);
2667
if (offset != 0 && recurse_check_bit(common, offset))
2668
{
2669
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2670
length += 2;
2671
}
2672
cc += 2;
2673
#ifdef SUPPORT_UNICODE
2674
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2675
#endif
2676
break;
2677
2678
CASE_ITERATOR_PRIVATE_DATA_2B
2679
offset = PRIVATE_DATA(cc);
2680
if (offset != 0 && recurse_check_bit(common, offset))
2681
{
2682
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2683
length += 2;
2684
}
2685
cc += 2 + IMM2_SIZE;
2686
#ifdef SUPPORT_UNICODE
2687
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2688
#endif
2689
break;
2690
2691
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2692
offset = PRIVATE_DATA(cc);
2693
if (offset != 0 && recurse_check_bit(common, offset))
2694
length++;
2695
cc += 1;
2696
break;
2697
2698
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2699
offset = PRIVATE_DATA(cc);
2700
if (offset != 0 && recurse_check_bit(common, offset))
2701
{
2702
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2703
length += 2;
2704
}
2705
cc += 1;
2706
break;
2707
2708
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2709
offset = PRIVATE_DATA(cc);
2710
if (offset != 0 && recurse_check_bit(common, offset))
2711
{
2712
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2713
length += 2;
2714
}
2715
cc += 1 + IMM2_SIZE;
2716
break;
2717
2718
case OP_CLASS:
2719
case OP_NCLASS:
2720
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2721
case OP_XCLASS:
2722
case OP_ECLASS:
2723
size = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2724
#else
2725
size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2726
#endif
2727
2728
offset = PRIVATE_DATA(cc);
2729
if (offset != 0 && recurse_check_bit(common, offset))
2730
length += get_class_iterator_size(cc + size);
2731
cc += size;
2732
break;
2733
2734
case OP_MARK:
2735
case OP_COMMIT_ARG:
2736
case OP_PRUNE_ARG:
2737
case OP_THEN_ARG:
2738
SLJIT_ASSERT(common->mark_ptr != 0);
2739
recurse_flags |= recurse_flag_setmark_found;
2740
if (common->control_head_ptr != 0)
2741
recurse_flags |= recurse_flag_control_head_found;
2742
if (*cc != OP_MARK)
2743
recurse_flags |= recurse_flag_quit_found;
2744
2745
cc += 1 + 2 + cc[1];
2746
break;
2747
2748
case OP_PRUNE:
2749
case OP_SKIP:
2750
case OP_COMMIT:
2751
recurse_flags |= recurse_flag_quit_found;
2752
cc++;
2753
break;
2754
2755
case OP_SKIP_ARG:
2756
recurse_flags |= recurse_flag_quit_found;
2757
cc += 1 + 2 + cc[1];
2758
break;
2759
2760
case OP_THEN:
2761
SLJIT_ASSERT(common->control_head_ptr != 0);
2762
recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2763
cc++;
2764
break;
2765
2766
case OP_ACCEPT:
2767
case OP_ASSERT_ACCEPT:
2768
recurse_flags |= recurse_flag_accept_found;
2769
cc++;
2770
break;
2771
2772
default:
2773
cc = next_opcode(common, cc);
2774
SLJIT_ASSERT(cc != NULL);
2775
break;
2776
}
2777
}
2778
SLJIT_ASSERT(cc == ccend);
2779
2780
if (recurse_flags & recurse_flag_control_head_found)
2781
length++;
2782
if (recurse_flags & recurse_flag_quit_found)
2783
{
2784
if (recurse_flags & recurse_flag_setsom_found)
2785
length++;
2786
if (recurse_flags & recurse_flag_setmark_found)
2787
length++;
2788
}
2789
2790
*result_flags = recurse_flags;
2791
return length;
2792
}
2793
2794
enum copy_recurse_data_types {
2795
recurse_copy_from_global,
2796
recurse_copy_private_to_global,
2797
recurse_copy_shared_to_global,
2798
recurse_copy_kept_shared_to_global,
2799
recurse_swap_global
2800
};
2801
2802
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2803
int type, int stackptr, int stacktop, uint32_t recurse_flags)
2804
{
2805
delayed_mem_copy_status status;
2806
PCRE2_SPTR alternative;
2807
sljit_sw private_srcw[2];
2808
sljit_sw shared_srcw[3];
2809
sljit_sw kept_shared_srcw[2];
2810
int private_count, shared_count, kept_shared_count;
2811
int from_sp, base_reg, offset, i;
2812
2813
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2814
2815
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2816
SLJIT_ASSERT(common->control_head_ptr != 0);
2817
recurse_check_bit(common, common->control_head_ptr);
2818
#endif
2819
2820
switch (type)
2821
{
2822
case recurse_copy_from_global:
2823
from_sp = TRUE;
2824
base_reg = STACK_TOP;
2825
break;
2826
2827
case recurse_copy_private_to_global:
2828
case recurse_copy_shared_to_global:
2829
case recurse_copy_kept_shared_to_global:
2830
from_sp = FALSE;
2831
base_reg = STACK_TOP;
2832
break;
2833
2834
default:
2835
SLJIT_ASSERT(type == recurse_swap_global);
2836
from_sp = FALSE;
2837
base_reg = TMP2;
2838
break;
2839
}
2840
2841
stackptr = STACK(stackptr);
2842
stacktop = STACK(stacktop);
2843
2844
status.tmp_regs[0] = TMP1;
2845
status.saved_tmp_regs[0] = TMP1;
2846
2847
if (base_reg != TMP2)
2848
{
2849
status.tmp_regs[1] = TMP2;
2850
status.saved_tmp_regs[1] = TMP2;
2851
}
2852
else
2853
{
2854
status.saved_tmp_regs[1] = RETURN_ADDR;
2855
if (HAS_VIRTUAL_REGISTERS)
2856
status.tmp_regs[1] = STR_PTR;
2857
else
2858
status.tmp_regs[1] = RETURN_ADDR;
2859
}
2860
2861
status.saved_tmp_regs[2] = TMP3;
2862
if (HAS_VIRTUAL_REGISTERS)
2863
status.tmp_regs[2] = STR_END;
2864
else
2865
status.tmp_regs[2] = TMP3;
2866
2867
delayed_mem_copy_init(&status, common);
2868
2869
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2870
{
2871
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2872
2873
if (!from_sp)
2874
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2875
2876
if (from_sp || type == recurse_swap_global)
2877
delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2878
}
2879
2880
stackptr += sizeof(sljit_sw);
2881
2882
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2883
if (type != recurse_copy_shared_to_global)
2884
{
2885
if (!from_sp)
2886
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2887
2888
if (from_sp || type == recurse_swap_global)
2889
delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2890
}
2891
2892
stackptr += sizeof(sljit_sw);
2893
#endif
2894
2895
while (cc < ccend)
2896
{
2897
private_count = 0;
2898
shared_count = 0;
2899
kept_shared_count = 0;
2900
2901
switch(*cc)
2902
{
2903
case OP_SET_SOM:
2904
SLJIT_ASSERT(common->has_set_som);
2905
if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2906
{
2907
kept_shared_srcw[0] = OVECTOR(0);
2908
kept_shared_count = 1;
2909
}
2910
cc += 1;
2911
break;
2912
2913
case OP_RECURSE:
2914
if (recurse_flags & recurse_flag_quit_found)
2915
{
2916
if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2917
{
2918
kept_shared_srcw[0] = OVECTOR(0);
2919
kept_shared_count = 1;
2920
}
2921
if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2922
{
2923
kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2924
kept_shared_count++;
2925
}
2926
}
2927
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2928
{
2929
shared_srcw[0] = common->capture_last_ptr;
2930
shared_count = 1;
2931
}
2932
cc += 1 + LINK_SIZE;
2933
break;
2934
2935
case OP_KET:
2936
private_srcw[0] = PRIVATE_DATA(cc);
2937
if (private_srcw[0] != 0)
2938
{
2939
if (recurse_check_bit(common, private_srcw[0]))
2940
private_count = 1;
2941
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2942
cc += PRIVATE_DATA(cc + 1);
2943
}
2944
cc += 1 + LINK_SIZE;
2945
break;
2946
2947
case OP_ASSERT:
2948
case OP_ASSERT_NOT:
2949
case OP_ASSERTBACK:
2950
case OP_ASSERTBACK_NOT:
2951
case OP_ASSERT_NA:
2952
case OP_ASSERTBACK_NA:
2953
case OP_ONCE:
2954
case OP_SCRIPT_RUN:
2955
case OP_BRAPOS:
2956
case OP_SBRA:
2957
case OP_SBRAPOS:
2958
case OP_SCOND:
2959
private_srcw[0] = PRIVATE_DATA(cc);
2960
if (recurse_check_bit(common, private_srcw[0]))
2961
private_count = 1;
2962
cc += 1 + LINK_SIZE;
2963
break;
2964
2965
case OP_ASSERT_SCS:
2966
private_srcw[0] = PRIVATE_DATA(cc);
2967
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2968
if (recurse_check_bit(common, private_srcw[0]))
2969
private_count = 2;
2970
cc += 1 + LINK_SIZE;
2971
break;
2972
2973
case OP_CBRA:
2974
case OP_SCBRA:
2975
offset = GET2(cc, 1 + LINK_SIZE);
2976
shared_srcw[0] = OVECTOR(offset << 1);
2977
if (recurse_check_bit(common, shared_srcw[0]))
2978
{
2979
shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2980
SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2981
shared_count = 2;
2982
}
2983
2984
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2985
{
2986
shared_srcw[shared_count] = common->capture_last_ptr;
2987
shared_count++;
2988
}
2989
2990
if (common->optimized_cbracket[offset] == 0)
2991
{
2992
private_srcw[0] = OVECTOR_PRIV(offset);
2993
if (recurse_check_bit(common, private_srcw[0]))
2994
private_count = 1;
2995
}
2996
2997
cc += 1 + LINK_SIZE + IMM2_SIZE;
2998
break;
2999
3000
case OP_CBRAPOS:
3001
case OP_SCBRAPOS:
3002
offset = GET2(cc, 1 + LINK_SIZE);
3003
shared_srcw[0] = OVECTOR(offset << 1);
3004
if (recurse_check_bit(common, shared_srcw[0]))
3005
{
3006
shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
3007
SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
3008
shared_count = 2;
3009
}
3010
3011
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
3012
{
3013
shared_srcw[shared_count] = common->capture_last_ptr;
3014
shared_count++;
3015
}
3016
3017
private_srcw[0] = PRIVATE_DATA(cc);
3018
if (recurse_check_bit(common, private_srcw[0]))
3019
private_count = 1;
3020
3021
offset = OVECTOR_PRIV(offset);
3022
if (recurse_check_bit(common, offset))
3023
{
3024
private_srcw[private_count] = offset;
3025
private_count++;
3026
}
3027
cc += 1 + LINK_SIZE + IMM2_SIZE;
3028
break;
3029
3030
case OP_COND:
3031
/* Might be a hidden SCOND. */
3032
alternative = cc + GET(cc, 1);
3033
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
3034
{
3035
private_srcw[0] = PRIVATE_DATA(cc);
3036
if (recurse_check_bit(common, private_srcw[0]))
3037
private_count = 1;
3038
}
3039
cc += 1 + LINK_SIZE;
3040
break;
3041
3042
CASE_ITERATOR_PRIVATE_DATA_1
3043
private_srcw[0] = PRIVATE_DATA(cc);
3044
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3045
private_count = 1;
3046
cc += 2;
3047
#ifdef SUPPORT_UNICODE
3048
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3049
#endif
3050
break;
3051
3052
CASE_ITERATOR_PRIVATE_DATA_2A
3053
private_srcw[0] = PRIVATE_DATA(cc);
3054
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3055
{
3056
private_count = 2;
3057
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3058
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3059
}
3060
cc += 2;
3061
#ifdef SUPPORT_UNICODE
3062
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3063
#endif
3064
break;
3065
3066
CASE_ITERATOR_PRIVATE_DATA_2B
3067
private_srcw[0] = PRIVATE_DATA(cc);
3068
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3069
{
3070
private_count = 2;
3071
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3072
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3073
}
3074
cc += 2 + IMM2_SIZE;
3075
#ifdef SUPPORT_UNICODE
3076
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3077
#endif
3078
break;
3079
3080
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3081
private_srcw[0] = PRIVATE_DATA(cc);
3082
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3083
private_count = 1;
3084
cc += 1;
3085
break;
3086
3087
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3088
private_srcw[0] = PRIVATE_DATA(cc);
3089
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3090
{
3091
private_count = 2;
3092
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3093
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3094
}
3095
cc += 1;
3096
break;
3097
3098
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3099
private_srcw[0] = PRIVATE_DATA(cc);
3100
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3101
{
3102
private_count = 2;
3103
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3104
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3105
}
3106
cc += 1 + IMM2_SIZE;
3107
break;
3108
3109
case OP_CLASS:
3110
case OP_NCLASS:
3111
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3112
case OP_XCLASS:
3113
case OP_ECLASS:
3114
i = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3115
#else
3116
i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3117
#endif
3118
if (PRIVATE_DATA(cc) != 0)
3119
{
3120
private_count = 1;
3121
private_srcw[0] = PRIVATE_DATA(cc);
3122
switch(get_class_iterator_size(cc + i))
3123
{
3124
case 1:
3125
break;
3126
3127
case 2:
3128
if (recurse_check_bit(common, private_srcw[0]))
3129
{
3130
private_count = 2;
3131
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3132
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3133
}
3134
break;
3135
3136
default:
3137
SLJIT_UNREACHABLE();
3138
break;
3139
}
3140
}
3141
cc += i;
3142
break;
3143
3144
case OP_MARK:
3145
case OP_COMMIT_ARG:
3146
case OP_PRUNE_ARG:
3147
case OP_THEN_ARG:
3148
SLJIT_ASSERT(common->mark_ptr != 0);
3149
if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3150
{
3151
kept_shared_srcw[0] = common->mark_ptr;
3152
kept_shared_count = 1;
3153
}
3154
if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3155
{
3156
private_srcw[0] = common->control_head_ptr;
3157
private_count = 1;
3158
}
3159
cc += 1 + 2 + cc[1];
3160
break;
3161
3162
case OP_THEN:
3163
SLJIT_ASSERT(common->control_head_ptr != 0);
3164
if (recurse_check_bit(common, common->control_head_ptr))
3165
{
3166
private_srcw[0] = common->control_head_ptr;
3167
private_count = 1;
3168
}
3169
cc++;
3170
break;
3171
3172
default:
3173
cc = next_opcode(common, cc);
3174
SLJIT_ASSERT(cc != NULL);
3175
continue;
3176
}
3177
3178
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3179
{
3180
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3181
3182
for (i = 0; i < private_count; i++)
3183
{
3184
SLJIT_ASSERT(private_srcw[i] != 0);
3185
3186
if (!from_sp)
3187
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
3188
3189
if (from_sp || type == recurse_swap_global)
3190
delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3191
3192
stackptr += sizeof(sljit_sw);
3193
}
3194
}
3195
else
3196
stackptr += sizeof(sljit_sw) * private_count;
3197
3198
if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3199
{
3200
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3201
3202
for (i = 0; i < shared_count; i++)
3203
{
3204
SLJIT_ASSERT(shared_srcw[i] != 0);
3205
3206
if (!from_sp)
3207
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3208
3209
if (from_sp || type == recurse_swap_global)
3210
delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3211
3212
stackptr += sizeof(sljit_sw);
3213
}
3214
}
3215
else
3216
stackptr += sizeof(sljit_sw) * shared_count;
3217
3218
if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3219
{
3220
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3221
3222
for (i = 0; i < kept_shared_count; i++)
3223
{
3224
SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3225
3226
if (!from_sp)
3227
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3228
3229
if (from_sp || type == recurse_swap_global)
3230
delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3231
3232
stackptr += sizeof(sljit_sw);
3233
}
3234
}
3235
else
3236
stackptr += sizeof(sljit_sw) * kept_shared_count;
3237
}
3238
3239
SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3240
3241
delayed_mem_copy_finish(&status);
3242
}
3243
3244
static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3245
{
3246
PCRE2_SPTR end = bracketend(cc);
3247
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3248
3249
/* Assert captures *THEN verb even if it has no alternatives. */
3250
if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
3251
current_offset = NULL;
3252
else if (*cc >= OP_ASSERT_NA && *cc <= OP_ASSERT_SCS)
3253
has_alternatives = TRUE;
3254
/* Conditional block does never capture. */
3255
else if (*cc == OP_COND || *cc == OP_SCOND)
3256
has_alternatives = FALSE;
3257
3258
cc = next_opcode(common, cc);
3259
3260
if (has_alternatives)
3261
{
3262
switch (*cc)
3263
{
3264
case OP_REVERSE:
3265
case OP_CREF:
3266
cc += 1 + IMM2_SIZE;
3267
break;
3268
case OP_VREVERSE:
3269
case OP_DNCREF:
3270
cc += 1 + 2 * IMM2_SIZE;
3271
break;
3272
}
3273
3274
current_offset = common->then_offsets + (cc - common->start);
3275
}
3276
3277
while (cc < end)
3278
{
3279
if (*cc >= OP_ASSERT && *cc <= OP_SCOND)
3280
{
3281
cc = set_then_offsets(common, cc, current_offset);
3282
continue;
3283
}
3284
3285
if (*cc == OP_ALT && has_alternatives)
3286
{
3287
cc += 1 + LINK_SIZE;
3288
3289
if (*cc == OP_REVERSE)
3290
cc += 1 + IMM2_SIZE;
3291
else if (*cc == OP_VREVERSE)
3292
cc += 1 + 2 * IMM2_SIZE;
3293
3294
current_offset = common->then_offsets + (cc - common->start);
3295
continue;
3296
}
3297
3298
if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3299
*current_offset = 1;
3300
cc = next_opcode(common, cc);
3301
}
3302
3303
cc = end - 1 - LINK_SIZE;
3304
3305
/* Ignore repeats. */
3306
if (*cc == OP_KET && PRIVATE_DATA(cc) != 0)
3307
end += PRIVATE_DATA(cc + 1);
3308
3309
return end;
3310
}
3311
3312
#undef CASE_ITERATOR_PRIVATE_DATA_1
3313
#undef CASE_ITERATOR_PRIVATE_DATA_2A
3314
#undef CASE_ITERATOR_PRIVATE_DATA_2B
3315
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3316
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3317
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3318
3319
static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3320
{
3321
return (value & (value - 1)) == 0;
3322
}
3323
3324
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3325
{
3326
while (list != NULL)
3327
{
3328
/* sljit_set_label is clever enough to do nothing
3329
if either the jump or the label is NULL. */
3330
SET_LABEL(list->jump, label);
3331
list = list->next;
3332
}
3333
}
3334
3335
static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3336
{
3337
jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3338
if (list_item)
3339
{
3340
list_item->next = *list;
3341
list_item->jump = jump;
3342
*list = list_item;
3343
}
3344
}
3345
3346
static void add_stub(compiler_common *common, struct sljit_jump *start)
3347
{
3348
DEFINE_COMPILER;
3349
stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3350
3351
if (list_item)
3352
{
3353
list_item->start = start;
3354
list_item->quit = LABEL();
3355
list_item->next = common->stubs;
3356
common->stubs = list_item;
3357
}
3358
}
3359
3360
static void flush_stubs(compiler_common *common)
3361
{
3362
DEFINE_COMPILER;
3363
stub_list *list_item = common->stubs;
3364
3365
while (list_item)
3366
{
3367
JUMPHERE(list_item->start);
3368
add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3369
JUMPTO(SLJIT_JUMP, list_item->quit);
3370
list_item = list_item->next;
3371
}
3372
common->stubs = NULL;
3373
}
3374
3375
static SLJIT_INLINE void count_match(compiler_common *common)
3376
{
3377
DEFINE_COMPILER;
3378
3379
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3380
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3381
}
3382
3383
static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3384
{
3385
/* May destroy all locals and registers except TMP2. */
3386
DEFINE_COMPILER;
3387
3388
SLJIT_ASSERT(size > 0);
3389
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3390
#ifdef DESTROY_REGISTERS
3391
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3392
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3393
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3394
#if defined SLJIT_DEBUG && SLJIT_DEBUG
3395
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
3396
/* These two are also used by the stackalloc calls. */
3397
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP1, 0);
3398
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP1, 0);
3399
#endif
3400
#endif
3401
add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3402
}
3403
3404
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3405
{
3406
DEFINE_COMPILER;
3407
3408
SLJIT_ASSERT(size > 0);
3409
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3410
}
3411
3412
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3413
{
3414
DEFINE_COMPILER;
3415
sljit_uw *result;
3416
3417
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3418
return NULL;
3419
3420
result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3421
if (SLJIT_UNLIKELY(result == NULL))
3422
{
3423
sljit_set_compiler_memory_error(compiler);
3424
return NULL;
3425
}
3426
3427
*(void**)result = common->read_only_data_head;
3428
common->read_only_data_head = (void *)result;
3429
return result + 1;
3430
}
3431
3432
static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3433
{
3434
DEFINE_COMPILER;
3435
struct sljit_label *loop;
3436
sljit_s32 i;
3437
3438
/* At this point we can freely use all temporary registers. */
3439
SLJIT_ASSERT(length > 1);
3440
/* TMP1 returns with begin - 1. */
3441
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3442
if (length < 8)
3443
{
3444
for (i = 1; i < length; i++)
3445
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3446
}
3447
else
3448
{
3449
if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3450
{
3451
GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3452
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3453
loop = LABEL();
3454
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3455
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3456
JUMPTO(SLJIT_NOT_ZERO, loop);
3457
}
3458
else
3459
{
3460
GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3461
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3462
loop = LABEL();
3463
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3464
OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3465
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3466
JUMPTO(SLJIT_NOT_ZERO, loop);
3467
}
3468
}
3469
}
3470
3471
static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3472
{
3473
DEFINE_COMPILER;
3474
sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3475
sljit_u32 uncleared_size;
3476
sljit_s32 src = SLJIT_IMM;
3477
sljit_s32 i;
3478
struct sljit_label *loop;
3479
3480
SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3481
3482
if (size == sizeof(sljit_sw))
3483
{
3484
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3485
return;
3486
}
3487
3488
if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3489
{
3490
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3491
src = TMP3;
3492
}
3493
3494
if (size <= 6 * sizeof(sljit_sw))
3495
{
3496
for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3497
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3498
return;
3499
}
3500
3501
GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3502
3503
uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3504
3505
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3506
3507
loop = LABEL();
3508
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3509
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3510
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3511
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3512
CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3513
3514
if (uncleared_size >= sizeof(sljit_sw))
3515
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3516
3517
if (uncleared_size >= 2 * sizeof(sljit_sw))
3518
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3519
}
3520
3521
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3522
{
3523
DEFINE_COMPILER;
3524
struct sljit_label *loop;
3525
int i;
3526
3527
SLJIT_ASSERT(length > 1);
3528
/* OVECTOR(1) contains the "string begin - 1" constant. */
3529
if (length > 2)
3530
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3531
if (length < 8)
3532
{
3533
for (i = 2; i < length; i++)
3534
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3535
}
3536
else
3537
{
3538
if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3539
{
3540
GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3541
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3542
loop = LABEL();
3543
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3544
OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3545
JUMPTO(SLJIT_NOT_ZERO, loop);
3546
}
3547
else
3548
{
3549
GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3550
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3551
loop = LABEL();
3552
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3553
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3554
OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3555
JUMPTO(SLJIT_NOT_ZERO, loop);
3556
}
3557
}
3558
3559
if (!HAS_VIRTUAL_REGISTERS)
3560
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3561
else
3562
OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3563
3564
if (common->mark_ptr != 0)
3565
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3566
if (common->control_head_ptr != 0)
3567
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3568
if (HAS_VIRTUAL_REGISTERS)
3569
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3570
3571
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3572
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3573
}
3574
3575
static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3576
{
3577
while (current != NULL)
3578
{
3579
switch (current[1])
3580
{
3581
case type_then_trap:
3582
break;
3583
3584
case type_mark:
3585
if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3586
return current[3];
3587
break;
3588
3589
default:
3590
SLJIT_UNREACHABLE();
3591
break;
3592
}
3593
SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3594
current = (sljit_sw*)current[0];
3595
}
3596
return 0;
3597
}
3598
3599
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3600
{
3601
DEFINE_COMPILER;
3602
struct sljit_label *loop;
3603
BOOL has_pre;
3604
3605
/* At this point we can freely use all registers. */
3606
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3607
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3608
3609
if (HAS_VIRTUAL_REGISTERS)
3610
{
3611
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3612
OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3613
if (common->mark_ptr != 0)
3614
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3615
OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3616
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3617
if (common->mark_ptr != 0)
3618
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3619
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3620
SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3621
}
3622
else
3623
{
3624
OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3625
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3626
if (common->mark_ptr != 0)
3627
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3628
OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3629
OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3630
if (common->mark_ptr != 0)
3631
OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3632
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3633
}
3634
3635
has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3636
3637
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3638
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3639
3640
loop = LABEL();
3641
3642
if (has_pre)
3643
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3644
else
3645
{
3646
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3647
OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3648
}
3649
3650
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3651
OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3652
/* Copy the integer value to the output buffer */
3653
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3654
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3655
#endif
3656
3657
SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3658
OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3659
3660
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3661
JUMPTO(SLJIT_NOT_ZERO, loop);
3662
3663
/* Calculate the return value, which is the maximum ovector value. */
3664
if (topbracket > 1)
3665
{
3666
if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3667
{
3668
GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3669
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3670
3671
/* OVECTOR(0) is never equal to SLJIT_S2. */
3672
loop = LABEL();
3673
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3674
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3675
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3676
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3677
}
3678
else
3679
{
3680
GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3681
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3682
3683
/* OVECTOR(0) is never equal to SLJIT_S2. */
3684
loop = LABEL();
3685
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3686
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3687
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3688
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3689
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3690
}
3691
}
3692
else
3693
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3694
}
3695
3696
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3697
{
3698
DEFINE_COMPILER;
3699
sljit_s32 mov_opcode;
3700
sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3701
3702
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3703
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3704
&& (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3705
3706
if (arguments_reg != ARGUMENTS)
3707
OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3708
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3709
common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3710
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3711
3712
/* Store match begin and end. */
3713
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3714
OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3715
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3716
3717
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3718
3719
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3720
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3721
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3722
#endif
3723
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3724
3725
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3726
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3727
OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3728
#endif
3729
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3730
3731
JUMPTO(SLJIT_JUMP, quit);
3732
}
3733
3734
static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3735
{
3736
/* May destroy TMP1. */
3737
DEFINE_COMPILER;
3738
struct sljit_jump *jump;
3739
3740
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3741
{
3742
/* The value of -1 must be kept for start_used_ptr! */
3743
OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3744
/* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3745
is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3746
jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3747
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3748
JUMPHERE(jump);
3749
}
3750
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3751
{
3752
jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3753
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3754
JUMPHERE(jump);
3755
}
3756
}
3757
3758
static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3759
{
3760
/* Detects if the character has an othercase. */
3761
unsigned int c;
3762
3763
#ifdef SUPPORT_UNICODE
3764
if (common->utf || common->ucp)
3765
{
3766
if (common->utf)
3767
{
3768
GETCHAR(c, cc);
3769
}
3770
else
3771
c = *cc;
3772
3773
if (c > 127)
3774
return c != UCD_OTHERCASE(c);
3775
3776
return common->fcc[c] != c;
3777
}
3778
else
3779
#endif
3780
c = *cc;
3781
return MAX_255(c) ? common->fcc[c] != c : FALSE;
3782
}
3783
3784
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3785
{
3786
/* Returns with the othercase. */
3787
#ifdef SUPPORT_UNICODE
3788
if ((common->utf || common->ucp) && c > 127)
3789
return UCD_OTHERCASE(c);
3790
#endif
3791
return TABLE_GET(c, common->fcc, c);
3792
}
3793
3794
static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3795
{
3796
/* Detects if the character and its othercase has only 1 bit difference. */
3797
unsigned int c, oc, bit;
3798
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3799
int n;
3800
#endif
3801
3802
#ifdef SUPPORT_UNICODE
3803
if (common->utf || common->ucp)
3804
{
3805
if (common->utf)
3806
{
3807
GETCHAR(c, cc);
3808
}
3809
else
3810
c = *cc;
3811
3812
if (c <= 127)
3813
oc = common->fcc[c];
3814
else
3815
oc = UCD_OTHERCASE(c);
3816
}
3817
else
3818
{
3819
c = *cc;
3820
oc = TABLE_GET(c, common->fcc, c);
3821
}
3822
#else
3823
c = *cc;
3824
oc = TABLE_GET(c, common->fcc, c);
3825
#endif
3826
3827
SLJIT_ASSERT(c != oc);
3828
3829
bit = c ^ oc;
3830
/* Optimized for English alphabet. */
3831
if (c <= 127 && bit == 0x20)
3832
return (0 << 8) | 0x20;
3833
3834
/* Since c != oc, they must have at least 1 bit difference. */
3835
if (!is_powerof2(bit))
3836
return 0;
3837
3838
#if PCRE2_CODE_UNIT_WIDTH == 8
3839
3840
#ifdef SUPPORT_UNICODE
3841
if (common->utf && c > 127)
3842
{
3843
n = GET_EXTRALEN(*cc);
3844
while ((bit & 0x3f) == 0)
3845
{
3846
n--;
3847
bit >>= 6;
3848
}
3849
return (n << 8) | bit;
3850
}
3851
#endif /* SUPPORT_UNICODE */
3852
return (0 << 8) | bit;
3853
3854
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3855
3856
#ifdef SUPPORT_UNICODE
3857
if (common->utf && c > 65535)
3858
{
3859
if (bit >= (1u << 10))
3860
bit >>= 10;
3861
else
3862
return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3863
}
3864
#endif /* SUPPORT_UNICODE */
3865
return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3866
3867
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3868
}
3869
3870
static void check_partial(compiler_common *common, BOOL force)
3871
{
3872
/* Checks whether a partial matching is occurred. Does not modify registers. */
3873
DEFINE_COMPILER;
3874
struct sljit_jump *jump = NULL;
3875
3876
SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3877
3878
if (common->mode == PCRE2_JIT_COMPLETE)
3879
return;
3880
3881
if (!force && !common->allow_empty_partial)
3882
jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3883
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3884
jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3885
3886
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3887
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3888
else
3889
{
3890
if (common->partialmatchlabel != NULL)
3891
JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3892
else
3893
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3894
}
3895
3896
if (jump != NULL)
3897
JUMPHERE(jump);
3898
}
3899
3900
static void check_str_end(compiler_common *common, jump_list **end_reached)
3901
{
3902
/* Does not affect registers. Usually used in a tight spot. */
3903
DEFINE_COMPILER;
3904
struct sljit_jump *jump;
3905
3906
if (common->mode == PCRE2_JIT_COMPLETE)
3907
{
3908
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3909
return;
3910
}
3911
3912
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3913
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3914
{
3915
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3916
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3917
add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3918
}
3919
else
3920
{
3921
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3922
if (common->partialmatchlabel != NULL)
3923
JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3924
else
3925
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3926
}
3927
JUMPHERE(jump);
3928
}
3929
3930
static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3931
{
3932
DEFINE_COMPILER;
3933
struct sljit_jump *jump;
3934
3935
if (common->mode == PCRE2_JIT_COMPLETE)
3936
{
3937
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3938
return;
3939
}
3940
3941
/* Partial matching mode. */
3942
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3943
if (!common->allow_empty_partial)
3944
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3945
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3946
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3947
3948
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3949
{
3950
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3951
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3952
}
3953
else
3954
{
3955
if (common->partialmatchlabel != NULL)
3956
JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3957
else
3958
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3959
}
3960
JUMPHERE(jump);
3961
}
3962
3963
static void process_partial_match(compiler_common *common)
3964
{
3965
DEFINE_COMPILER;
3966
struct sljit_jump *jump;
3967
3968
/* Partial matching mode. */
3969
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3970
{
3971
jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3972
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3973
JUMPHERE(jump);
3974
}
3975
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3976
{
3977
if (common->partialmatchlabel != NULL)
3978
CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3979
else
3980
add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3981
}
3982
}
3983
3984
static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3985
{
3986
DEFINE_COMPILER;
3987
3988
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3989
process_partial_match(common);
3990
}
3991
3992
static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3993
{
3994
/* Reads the character into TMP1, keeps STR_PTR.
3995
Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3996
DEFINE_COMPILER;
3997
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3998
struct sljit_jump *jump;
3999
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4000
4001
SLJIT_UNUSED_ARG(max);
4002
SLJIT_UNUSED_ARG(dst);
4003
SLJIT_UNUSED_ARG(dstw);
4004
SLJIT_UNUSED_ARG(backtracks);
4005
4006
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4007
4008
#ifdef SUPPORT_UNICODE
4009
#if PCRE2_CODE_UNIT_WIDTH == 8
4010
if (common->utf)
4011
{
4012
if (max < 128) return;
4013
4014
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4015
OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
4016
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4017
add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4018
OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
4019
if (backtracks && common->invalid_utf)
4020
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4021
JUMPHERE(jump);
4022
}
4023
#elif PCRE2_CODE_UNIT_WIDTH == 16
4024
if (common->utf)
4025
{
4026
if (max < 0xd800) return;
4027
4028
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4029
4030
if (common->invalid_utf)
4031
{
4032
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4033
OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
4034
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4035
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4036
OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
4037
if (backtracks && common->invalid_utf)
4038
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4039
}
4040
else
4041
{
4042
/* TMP2 contains the high surrogate. */
4043
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4044
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4045
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4046
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4047
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4048
}
4049
4050
JUMPHERE(jump);
4051
}
4052
#elif PCRE2_CODE_UNIT_WIDTH == 32
4053
if (common->invalid_utf)
4054
{
4055
if (max < 0xd800) return;
4056
4057
if (backtracks != NULL)
4058
{
4059
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4060
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4061
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4062
}
4063
else
4064
{
4065
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4066
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4067
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4068
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4069
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4070
}
4071
}
4072
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4073
#endif /* SUPPORT_UNICODE */
4074
}
4075
4076
static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
4077
{
4078
/* Reads one character back without moving STR_PTR. TMP2 must
4079
contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
4080
DEFINE_COMPILER;
4081
4082
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4083
struct sljit_jump *jump;
4084
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4085
4086
SLJIT_UNUSED_ARG(max);
4087
SLJIT_UNUSED_ARG(backtracks);
4088
4089
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4090
4091
#ifdef SUPPORT_UNICODE
4092
#if PCRE2_CODE_UNIT_WIDTH == 8
4093
if (common->utf)
4094
{
4095
if (max < 128) return;
4096
4097
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4098
if (common->invalid_utf)
4099
{
4100
add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
4101
if (backtracks != NULL)
4102
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4103
}
4104
else
4105
add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
4106
JUMPHERE(jump);
4107
}
4108
#elif PCRE2_CODE_UNIT_WIDTH == 16
4109
if (common->utf)
4110
{
4111
if (max < 0xd800) return;
4112
4113
if (common->invalid_utf)
4114
{
4115
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4116
add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
4117
if (backtracks != NULL)
4118
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4119
}
4120
else
4121
{
4122
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4123
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
4124
/* TMP2 contains the low surrogate. */
4125
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4126
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4127
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4128
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4129
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4130
}
4131
JUMPHERE(jump);
4132
}
4133
#elif PCRE2_CODE_UNIT_WIDTH == 32
4134
if (common->invalid_utf)
4135
{
4136
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4137
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4138
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4139
}
4140
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4141
#endif /* SUPPORT_UNICODE */
4142
}
4143
4144
#define READ_CHAR_UPDATE_STR_PTR 0x1
4145
#define READ_CHAR_UTF8_NEWLINE 0x2
4146
#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4147
#define READ_CHAR_VALID_UTF 0x4
4148
4149
static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4150
jump_list **backtracks, sljit_u32 options)
4151
{
4152
/* Reads the precise value of a character into TMP1, if the character is
4153
between min and max (c >= min && c <= max). Otherwise it returns with a value
4154
outside the range. Does not check STR_END. */
4155
DEFINE_COMPILER;
4156
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4157
struct sljit_jump *jump;
4158
#endif
4159
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4160
struct sljit_jump *jump2;
4161
#endif
4162
4163
SLJIT_UNUSED_ARG(min);
4164
SLJIT_UNUSED_ARG(max);
4165
SLJIT_UNUSED_ARG(backtracks);
4166
SLJIT_UNUSED_ARG(options);
4167
SLJIT_ASSERT(min <= max);
4168
4169
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4170
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4171
4172
#ifdef SUPPORT_UNICODE
4173
#if PCRE2_CODE_UNIT_WIDTH == 8
4174
if (common->utf)
4175
{
4176
if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4177
4178
if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4179
{
4180
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4181
4182
if (options & READ_CHAR_UTF8_NEWLINE)
4183
add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4184
else
4185
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4186
4187
if (backtracks != NULL)
4188
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4189
JUMPHERE(jump);
4190
return;
4191
}
4192
4193
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4194
if (min >= 0x10000)
4195
{
4196
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4197
if (options & READ_CHAR_UPDATE_STR_PTR)
4198
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4199
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4200
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4201
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4202
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4203
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4204
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4205
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4206
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4207
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4208
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4209
if (!(options & READ_CHAR_UPDATE_STR_PTR))
4210
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4211
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4212
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4213
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4214
JUMPHERE(jump2);
4215
if (options & READ_CHAR_UPDATE_STR_PTR)
4216
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4217
}
4218
else if (min >= 0x800 && max <= 0xffff)
4219
{
4220
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4221
if (options & READ_CHAR_UPDATE_STR_PTR)
4222
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4223
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4224
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4225
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4226
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4227
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4228
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4229
if (!(options & READ_CHAR_UPDATE_STR_PTR))
4230
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4231
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4232
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4233
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4234
JUMPHERE(jump2);
4235
if (options & READ_CHAR_UPDATE_STR_PTR)
4236
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4237
}
4238
else if (max >= 0x800)
4239
{
4240
add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4241
}
4242
else if (max < 128)
4243
{
4244
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4245
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4246
}
4247
else
4248
{
4249
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4250
if (!(options & READ_CHAR_UPDATE_STR_PTR))
4251
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4252
else
4253
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4254
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4255
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4256
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4257
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4258
if (options & READ_CHAR_UPDATE_STR_PTR)
4259
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4260
}
4261
JUMPHERE(jump);
4262
}
4263
#elif PCRE2_CODE_UNIT_WIDTH == 16
4264
if (common->utf)
4265
{
4266
if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4267
4268
if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4269
{
4270
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4271
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4272
4273
if (options & READ_CHAR_UTF8_NEWLINE)
4274
add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4275
else
4276
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4277
4278
if (backtracks != NULL)
4279
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4280
JUMPHERE(jump);
4281
return;
4282
}
4283
4284
if (max >= 0x10000)
4285
{
4286
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4287
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4288
/* TMP2 contains the high surrogate. */
4289
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4290
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4291
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4292
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4293
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4294
JUMPHERE(jump);
4295
return;
4296
}
4297
4298
/* Skip low surrogate if necessary. */
4299
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4300
4301
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4302
{
4303
if (options & READ_CHAR_UPDATE_STR_PTR)
4304
OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4305
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4306
if (options & READ_CHAR_UPDATE_STR_PTR)
4307
SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4308
if (max >= 0xd800)
4309
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4310
}
4311
else
4312
{
4313
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4314
if (options & READ_CHAR_UPDATE_STR_PTR)
4315
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4316
if (max >= 0xd800)
4317
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4318
JUMPHERE(jump);
4319
}
4320
}
4321
#elif PCRE2_CODE_UNIT_WIDTH == 32
4322
if (common->invalid_utf)
4323
{
4324
if (backtracks != NULL)
4325
{
4326
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4327
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4328
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4329
}
4330
else
4331
{
4332
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4333
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4334
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4335
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4336
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4337
}
4338
}
4339
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4340
#endif /* SUPPORT_UNICODE */
4341
}
4342
4343
static void skip_valid_char(compiler_common *common)
4344
{
4345
DEFINE_COMPILER;
4346
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4347
struct sljit_jump *jump;
4348
#endif
4349
4350
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4351
if (common->utf)
4352
{
4353
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4354
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4355
#if PCRE2_CODE_UNIT_WIDTH == 8
4356
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4357
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4358
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4359
#elif PCRE2_CODE_UNIT_WIDTH == 16
4360
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4361
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4362
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4363
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4364
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4365
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4366
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4367
JUMPHERE(jump);
4368
return;
4369
}
4370
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4371
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4372
}
4373
4374
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4375
4376
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4377
{
4378
/* Tells whether the character codes below 128 are enough
4379
to determine a match. */
4380
const sljit_u8 value = nclass ? 0xff : 0;
4381
const sljit_u8 *end = bitset + 32;
4382
4383
bitset += 16;
4384
do
4385
{
4386
if (*bitset++ != value)
4387
return FALSE;
4388
}
4389
while (bitset < end);
4390
return TRUE;
4391
}
4392
4393
static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4394
{
4395
/* Reads the precise character type of a character into TMP1, if the character
4396
is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4397
full_read argument tells whether characters above max are accepted or not. */
4398
DEFINE_COMPILER;
4399
struct sljit_jump *jump;
4400
4401
SLJIT_ASSERT(common->utf);
4402
4403
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4404
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4405
4406
/* All values > 127 are zero in ctypes. */
4407
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4408
4409
if (negated)
4410
{
4411
jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4412
4413
if (common->invalid_utf)
4414
{
4415
OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4416
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4417
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4418
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4419
}
4420
else
4421
{
4422
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4423
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4424
}
4425
JUMPHERE(jump);
4426
}
4427
}
4428
4429
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4430
4431
static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4432
{
4433
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4434
DEFINE_COMPILER;
4435
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4436
struct sljit_jump *jump;
4437
#endif
4438
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4439
struct sljit_jump *jump2;
4440
#endif
4441
4442
SLJIT_UNUSED_ARG(backtracks);
4443
SLJIT_UNUSED_ARG(negated);
4444
4445
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4446
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4447
4448
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4449
if (common->utf)
4450
{
4451
/* The result of this read may be unused, but saves an "else" part. */
4452
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4453
jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4454
4455
if (!negated)
4456
{
4457
if (common->invalid_utf)
4458
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4459
4460
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4461
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4462
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4463
if (common->invalid_utf)
4464
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4465
4466
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4467
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4468
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4469
if (common->invalid_utf)
4470
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4471
4472
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4473
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4474
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4475
JUMPHERE(jump2);
4476
}
4477
else if (common->invalid_utf)
4478
{
4479
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4480
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4481
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4482
4483
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4484
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4485
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4486
JUMPHERE(jump2);
4487
}
4488
else
4489
add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4490
4491
JUMPHERE(jump);
4492
return;
4493
}
4494
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4495
4496
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4497
if (common->invalid_utf && negated)
4498
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4499
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4500
4501
#if PCRE2_CODE_UNIT_WIDTH != 8
4502
/* The ctypes array contains only 256 values. */
4503
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4504
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4505
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4506
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4507
#if PCRE2_CODE_UNIT_WIDTH != 8
4508
JUMPHERE(jump);
4509
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4510
4511
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4512
if (common->utf && negated)
4513
{
4514
/* Skip low surrogate if necessary. */
4515
if (!common->invalid_utf)
4516
{
4517
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4518
4519
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4520
{
4521
OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4522
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4523
SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4524
}
4525
else
4526
{
4527
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4528
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4529
JUMPHERE(jump);
4530
}
4531
return;
4532
}
4533
4534
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4535
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4536
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4537
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4538
4539
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4540
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4541
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4542
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4543
4544
JUMPHERE(jump);
4545
return;
4546
}
4547
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4548
}
4549
4550
static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4551
{
4552
/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4553
TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4554
and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4555
DEFINE_COMPILER;
4556
4557
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4558
struct sljit_jump *jump;
4559
#endif
4560
4561
#ifdef SUPPORT_UNICODE
4562
#if PCRE2_CODE_UNIT_WIDTH == 8
4563
struct sljit_label *label;
4564
4565
if (common->utf)
4566
{
4567
if (!must_be_valid && common->invalid_utf)
4568
{
4569
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4570
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4571
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4572
add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4573
if (backtracks != NULL)
4574
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4575
JUMPHERE(jump);
4576
return;
4577
}
4578
4579
label = LABEL();
4580
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4581
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4582
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4583
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4584
return;
4585
}
4586
#elif PCRE2_CODE_UNIT_WIDTH == 16
4587
if (common->utf)
4588
{
4589
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4590
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4591
4592
if (!must_be_valid && common->invalid_utf)
4593
{
4594
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4595
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4596
add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4597
if (backtracks != NULL)
4598
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4599
JUMPHERE(jump);
4600
return;
4601
}
4602
4603
/* Skip low surrogate if necessary. */
4604
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4605
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4606
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4607
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4608
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4609
return;
4610
}
4611
#elif PCRE2_CODE_UNIT_WIDTH == 32
4612
if (common->invalid_utf && !must_be_valid)
4613
{
4614
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4615
if (backtracks != NULL)
4616
{
4617
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4618
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4619
return;
4620
}
4621
4622
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4623
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4624
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4625
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4626
return;
4627
}
4628
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4629
#endif /* SUPPORT_UNICODE */
4630
4631
SLJIT_UNUSED_ARG(backtracks);
4632
SLJIT_UNUSED_ARG(must_be_valid);
4633
4634
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4635
}
4636
4637
static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4638
{
4639
/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4640
DEFINE_COMPILER;
4641
struct sljit_jump *jump;
4642
4643
if (nltype == NLTYPE_ANY)
4644
{
4645
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4646
sljit_set_current_flags(compiler, SLJIT_SET_Z);
4647
add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4648
}
4649
else if (nltype == NLTYPE_ANYCRLF)
4650
{
4651
if (jumpifmatch)
4652
{
4653
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4654
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4655
}
4656
else
4657
{
4658
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4659
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4660
JUMPHERE(jump);
4661
}
4662
}
4663
else
4664
{
4665
SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4666
add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4667
}
4668
}
4669
4670
#ifdef SUPPORT_UNICODE
4671
4672
#if PCRE2_CODE_UNIT_WIDTH == 8
4673
static void do_utfreadchar(compiler_common *common)
4674
{
4675
/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4676
of the character (>= 0xc0). Return char value in TMP1. */
4677
DEFINE_COMPILER;
4678
struct sljit_jump *jump;
4679
4680
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4681
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4682
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4683
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4684
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4685
4686
/* Searching for the first zero. */
4687
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4688
jump = JUMP(SLJIT_NOT_ZERO);
4689
/* Two byte sequence. */
4690
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4691
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4692
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4693
4694
JUMPHERE(jump);
4695
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4696
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4697
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4698
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4699
4700
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4701
jump = JUMP(SLJIT_NOT_ZERO);
4702
/* Three byte sequence. */
4703
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4704
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4705
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4706
4707
/* Four byte sequence. */
4708
JUMPHERE(jump);
4709
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4710
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4711
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4712
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4713
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4714
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4715
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4716
}
4717
4718
static void do_utfreadtype8(compiler_common *common)
4719
{
4720
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4721
of the character (>= 0xc0). Return value in TMP1. */
4722
DEFINE_COMPILER;
4723
struct sljit_jump *jump;
4724
struct sljit_jump *compare;
4725
4726
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4727
4728
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4729
jump = JUMP(SLJIT_NOT_ZERO);
4730
/* Two byte sequence. */
4731
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4732
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4733
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4734
/* The upper 5 bits are known at this point. */
4735
compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4736
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4737
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4738
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4739
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4740
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4741
4742
JUMPHERE(compare);
4743
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4744
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4745
4746
/* We only have types for characters less than 256. */
4747
JUMPHERE(jump);
4748
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4749
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4750
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4751
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4752
}
4753
4754
static void do_utfreadchar_invalid(compiler_common *common)
4755
{
4756
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4757
of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4758
undefined for invalid characters. */
4759
DEFINE_COMPILER;
4760
sljit_s32 i;
4761
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4762
struct sljit_jump *jump;
4763
struct sljit_jump *buffer_end_close;
4764
struct sljit_label *three_byte_entry;
4765
struct sljit_label *exit_invalid_label;
4766
struct sljit_jump *exit_invalid[11];
4767
4768
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4769
4770
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4771
4772
/* Usually more than 3 characters remained in the subject buffer. */
4773
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4774
4775
/* Not a valid start of a multi-byte sequence, no more bytes read. */
4776
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4777
4778
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4779
4780
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4781
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4782
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4783
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4784
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4785
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4786
4787
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4788
jump = JUMP(SLJIT_NOT_ZERO);
4789
4790
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4791
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4792
4793
JUMPHERE(jump);
4794
4795
/* Three-byte sequence. */
4796
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4797
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4798
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4799
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4800
if (has_cmov)
4801
{
4802
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4803
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4804
exit_invalid[2] = NULL;
4805
}
4806
else
4807
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4808
4809
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4810
jump = JUMP(SLJIT_NOT_ZERO);
4811
4812
three_byte_entry = LABEL();
4813
4814
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4815
if (has_cmov)
4816
{
4817
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4818
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4819
exit_invalid[3] = NULL;
4820
}
4821
else
4822
exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4823
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4824
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4825
4826
if (has_cmov)
4827
{
4828
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4829
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4830
exit_invalid[4] = NULL;
4831
}
4832
else
4833
exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4834
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4835
4836
JUMPHERE(jump);
4837
4838
/* Four-byte sequence. */
4839
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4840
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4841
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4842
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4843
if (has_cmov)
4844
{
4845
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4846
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4847
exit_invalid[5] = NULL;
4848
}
4849
else
4850
exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4851
4852
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4853
if (has_cmov)
4854
{
4855
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4856
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4857
exit_invalid[6] = NULL;
4858
}
4859
else
4860
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4861
4862
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4863
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4864
4865
JUMPHERE(buffer_end_close);
4866
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4867
exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4868
4869
/* Two-byte sequence. */
4870
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4871
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4872
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4873
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4874
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4875
exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4876
4877
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4878
jump = JUMP(SLJIT_NOT_ZERO);
4879
4880
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4881
4882
/* Three-byte sequence. */
4883
JUMPHERE(jump);
4884
exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4885
4886
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4887
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4888
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4889
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4890
if (has_cmov)
4891
{
4892
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4893
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4894
exit_invalid[10] = NULL;
4895
}
4896
else
4897
exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4898
4899
/* One will be substracted from STR_PTR later. */
4900
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4901
4902
/* Four byte sequences are not possible. */
4903
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4904
4905
exit_invalid_label = LABEL();
4906
for (i = 0; i < 11; i++)
4907
sljit_set_label(exit_invalid[i], exit_invalid_label);
4908
4909
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4910
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4911
}
4912
4913
static void do_utfreadnewline_invalid(compiler_common *common)
4914
{
4915
/* Slow decoding a UTF-8 character, specialized for newlines.
4916
TMP1 contains the first byte of the character (>= 0xc0). Return
4917
char value in TMP1. */
4918
DEFINE_COMPILER;
4919
struct sljit_label *loop;
4920
struct sljit_label *skip_start;
4921
struct sljit_label *three_byte_exit;
4922
struct sljit_jump *jump[5];
4923
4924
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4925
4926
if (common->nltype != NLTYPE_ANY)
4927
{
4928
SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4929
4930
/* All newlines are ascii, just skip intermediate octets. */
4931
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4932
loop = LABEL();
4933
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4934
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4935
else
4936
{
4937
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4938
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4939
}
4940
4941
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4942
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4943
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4944
4945
JUMPHERE(jump[0]);
4946
4947
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4948
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4949
return;
4950
}
4951
4952
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4953
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4954
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4955
4956
jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4957
jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4958
4959
skip_start = LABEL();
4960
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4961
jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4962
4963
/* Skip intermediate octets. */
4964
loop = LABEL();
4965
jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4966
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4967
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4968
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4969
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4970
4971
JUMPHERE(jump[3]);
4972
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4973
4974
three_byte_exit = LABEL();
4975
JUMPHERE(jump[0]);
4976
JUMPHERE(jump[4]);
4977
4978
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4979
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4980
4981
/* Two byte long newline: 0x85. */
4982
JUMPHERE(jump[1]);
4983
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4984
4985
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4986
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4987
4988
/* Three byte long newlines: 0x2028 and 0x2029. */
4989
JUMPHERE(jump[2]);
4990
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4991
CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4992
4993
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4994
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4995
4996
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4997
CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4998
4999
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
5000
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5001
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5002
}
5003
5004
static void do_utfmoveback_invalid(compiler_common *common)
5005
{
5006
/* Goes one character back. */
5007
DEFINE_COMPILER;
5008
sljit_s32 i;
5009
struct sljit_jump *jump;
5010
struct sljit_jump *buffer_start_close;
5011
struct sljit_label *exit_ok_label;
5012
struct sljit_label *exit_invalid_label;
5013
struct sljit_jump *exit_invalid[7];
5014
5015
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5016
5017
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
5018
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5019
5020
/* Two-byte sequence. */
5021
buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5022
5023
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
5024
5025
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5026
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
5027
5028
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5029
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5030
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5031
5032
/* Three-byte sequence. */
5033
JUMPHERE(jump);
5034
exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
5035
5036
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5037
5038
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5039
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
5040
5041
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5042
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5043
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5044
5045
/* Four-byte sequence. */
5046
JUMPHERE(jump);
5047
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
5048
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
5049
5050
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5051
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
5052
exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
5053
5054
exit_ok_label = LABEL();
5055
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5056
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5057
5058
/* Two-byte sequence. */
5059
JUMPHERE(buffer_start_close);
5060
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5061
5062
exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5063
5064
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5065
5066
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5067
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
5068
5069
/* Three-byte sequence. */
5070
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5071
exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
5072
exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5073
5074
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5075
5076
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5077
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
5078
5079
/* Four-byte sequences are not possible. */
5080
5081
exit_invalid_label = LABEL();
5082
sljit_set_label(exit_invalid[5], exit_invalid_label);
5083
sljit_set_label(exit_invalid[6], exit_invalid_label);
5084
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5085
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
5086
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5087
5088
JUMPHERE(exit_invalid[4]);
5089
/* -2 + 4 = 2 */
5090
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5091
5092
exit_invalid_label = LABEL();
5093
for (i = 0; i < 4; i++)
5094
sljit_set_label(exit_invalid[i], exit_invalid_label);
5095
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5096
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
5097
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5098
}
5099
5100
static void do_utfpeakcharback(compiler_common *common)
5101
{
5102
/* Peak a character back. Does not modify STR_PTR. */
5103
DEFINE_COMPILER;
5104
struct sljit_jump *jump[2];
5105
5106
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5107
5108
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5109
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5110
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
5111
5112
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5113
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5114
jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
5115
5116
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5117
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
5118
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5119
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5120
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5121
5122
JUMPHERE(jump[1]);
5123
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5124
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5125
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5126
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5127
5128
JUMPHERE(jump[0]);
5129
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5130
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5131
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5132
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5133
5134
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5135
}
5136
5137
static void do_utfpeakcharback_invalid(compiler_common *common)
5138
{
5139
/* Peak a character back. Does not modify STR_PTR. */
5140
DEFINE_COMPILER;
5141
sljit_s32 i;
5142
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5143
struct sljit_jump *jump[2];
5144
struct sljit_label *two_byte_entry;
5145
struct sljit_label *three_byte_entry;
5146
struct sljit_label *exit_invalid_label;
5147
struct sljit_jump *exit_invalid[8];
5148
5149
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5150
5151
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5152
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5153
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5154
5155
/* Two-byte sequence. */
5156
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5157
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5158
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5159
5160
two_byte_entry = LABEL();
5161
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5162
/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5163
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5164
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5165
5166
JUMPHERE(jump[1]);
5167
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5168
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5169
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5170
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5171
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5172
5173
/* Three-byte sequence. */
5174
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5175
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5176
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5177
5178
three_byte_entry = LABEL();
5179
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5180
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5181
5182
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5183
if (has_cmov)
5184
{
5185
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5186
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5187
exit_invalid[2] = NULL;
5188
}
5189
else
5190
exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5191
5192
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5193
if (has_cmov)
5194
{
5195
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5196
SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5197
exit_invalid[3] = NULL;
5198
}
5199
else
5200
exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5201
5202
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5203
5204
JUMPHERE(jump[1]);
5205
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5206
exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5207
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5208
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5209
5210
/* Four-byte sequence. */
5211
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5212
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5213
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5214
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5215
/* ADD is used instead of OR because of the SUB 0x10000 above. */
5216
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5217
5218
if (has_cmov)
5219
{
5220
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5221
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5222
exit_invalid[5] = NULL;
5223
}
5224
else
5225
exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5226
5227
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5228
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5229
5230
JUMPHERE(jump[0]);
5231
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5232
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5233
5234
/* Two-byte sequence. */
5235
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5236
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5237
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5238
5239
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5240
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5241
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5242
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5243
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5244
5245
/* Three-byte sequence. */
5246
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5247
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5248
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5249
5250
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5251
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5252
5253
JUMPHERE(jump[0]);
5254
exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5255
5256
/* Two-byte sequence. */
5257
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5258
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5259
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5260
5261
exit_invalid_label = LABEL();
5262
for (i = 0; i < 8; i++)
5263
sljit_set_label(exit_invalid[i], exit_invalid_label);
5264
5265
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5266
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5267
}
5268
5269
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5270
5271
#if PCRE2_CODE_UNIT_WIDTH == 16
5272
5273
static void do_utfreadchar_invalid(compiler_common *common)
5274
{
5275
/* Slow decoding a UTF-16 character. TMP1 contains the first half
5276
of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5277
undefined for invalid characters. */
5278
DEFINE_COMPILER;
5279
struct sljit_jump *exit_invalid[3];
5280
5281
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5282
5283
/* TMP2 contains the high surrogate. */
5284
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5285
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5286
5287
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5288
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5289
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5290
5291
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5292
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5293
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5294
5295
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5296
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5297
5298
JUMPHERE(exit_invalid[0]);
5299
JUMPHERE(exit_invalid[1]);
5300
JUMPHERE(exit_invalid[2]);
5301
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5302
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5303
}
5304
5305
static void do_utfreadnewline_invalid(compiler_common *common)
5306
{
5307
/* Slow decoding a UTF-16 character, specialized for newlines.
5308
TMP1 contains the first half of the character (>= 0xd800). Return
5309
char value in TMP1. */
5310
5311
DEFINE_COMPILER;
5312
struct sljit_jump *exit_invalid[2];
5313
5314
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5315
5316
/* TMP2 contains the high surrogate. */
5317
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5318
5319
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5320
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5321
5322
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5323
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5324
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5325
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5326
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5327
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5328
5329
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5330
5331
JUMPHERE(exit_invalid[0]);
5332
JUMPHERE(exit_invalid[1]);
5333
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5334
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5335
}
5336
5337
static void do_utfmoveback_invalid(compiler_common *common)
5338
{
5339
/* Goes one character back. */
5340
DEFINE_COMPILER;
5341
struct sljit_jump *exit_invalid[3];
5342
5343
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5344
5345
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5346
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5347
5348
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5349
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5350
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5351
5352
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5353
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5354
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5355
5356
JUMPHERE(exit_invalid[0]);
5357
JUMPHERE(exit_invalid[1]);
5358
JUMPHERE(exit_invalid[2]);
5359
5360
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5361
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5362
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5363
}
5364
5365
static void do_utfpeakcharback_invalid(compiler_common *common)
5366
{
5367
/* Peak a character back. Does not modify STR_PTR. */
5368
DEFINE_COMPILER;
5369
struct sljit_jump *jump;
5370
struct sljit_jump *exit_invalid[3];
5371
5372
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5373
5374
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5375
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5376
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5377
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5378
5379
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5380
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5381
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5382
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5383
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5384
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5385
5386
JUMPHERE(jump);
5387
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5388
5389
JUMPHERE(exit_invalid[0]);
5390
JUMPHERE(exit_invalid[1]);
5391
JUMPHERE(exit_invalid[2]);
5392
5393
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5394
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5395
}
5396
5397
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5398
5399
/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5400
#define UCD_BLOCK_MASK 127
5401
#define UCD_BLOCK_SHIFT 7
5402
5403
static void do_getucd(compiler_common *common)
5404
{
5405
/* Search the UCD record for the character comes in TMP1.
5406
Returns chartype in TMP1 and UCD offset in TMP2. */
5407
DEFINE_COMPILER;
5408
#if PCRE2_CODE_UNIT_WIDTH == 32
5409
struct sljit_jump *jump;
5410
#endif
5411
5412
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5413
/* dummy_ucd_record */
5414
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5415
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5416
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5417
#endif
5418
5419
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5420
5421
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5422
5423
#if PCRE2_CODE_UNIT_WIDTH == 32
5424
if (!common->utf)
5425
{
5426
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5427
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5428
JUMPHERE(jump);
5429
}
5430
#endif
5431
5432
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5433
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5434
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5435
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5436
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5437
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5438
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5439
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5440
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5441
}
5442
5443
static void do_getucdtype(compiler_common *common)
5444
{
5445
/* Search the UCD record for the character comes in TMP1.
5446
Returns chartype in TMP1 and UCD offset in TMP2. */
5447
DEFINE_COMPILER;
5448
#if PCRE2_CODE_UNIT_WIDTH == 32
5449
struct sljit_jump *jump;
5450
#endif
5451
5452
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5453
/* dummy_ucd_record */
5454
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5455
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5456
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5457
#endif
5458
5459
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5460
5461
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5462
5463
#if PCRE2_CODE_UNIT_WIDTH == 32
5464
if (!common->utf)
5465
{
5466
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5467
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5468
JUMPHERE(jump);
5469
}
5470
#endif
5471
5472
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5473
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5474
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5475
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5476
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5477
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5478
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5479
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5480
5481
/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5482
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5483
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5484
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5485
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5486
5487
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5488
}
5489
5490
#endif /* SUPPORT_UNICODE */
5491
5492
static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5493
{
5494
DEFINE_COMPILER;
5495
struct sljit_label *mainloop;
5496
struct sljit_label *newlinelabel = NULL;
5497
struct sljit_jump *start;
5498
struct sljit_jump *end = NULL;
5499
struct sljit_jump *end2 = NULL;
5500
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5501
struct sljit_label *loop;
5502
struct sljit_jump *jump;
5503
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5504
jump_list *newline = NULL;
5505
sljit_u32 overall_options = common->re->overall_options;
5506
BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5507
BOOL newlinecheck = FALSE;
5508
BOOL readuchar = FALSE;
5509
5510
if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5511
&& (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5512
newlinecheck = TRUE;
5513
5514
SLJIT_ASSERT(common->abort_label == NULL);
5515
5516
if ((overall_options & PCRE2_FIRSTLINE) != 0)
5517
{
5518
/* Search for the end of the first line. */
5519
SLJIT_ASSERT(common->match_end_ptr != 0);
5520
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5521
5522
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5523
{
5524
mainloop = LABEL();
5525
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5526
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5527
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5528
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5529
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5530
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5531
JUMPHERE(end);
5532
OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533
}
5534
else
5535
{
5536
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5537
mainloop = LABEL();
5538
/* Continual stores does not cause data dependency. */
5539
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5540
read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5541
check_newlinechar(common, common->nltype, &newline, TRUE);
5542
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5543
JUMPHERE(end);
5544
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5545
set_jumps(newline, LABEL());
5546
}
5547
5548
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5549
}
5550
else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5551
{
5552
/* Check whether offset limit is set and valid. */
5553
SLJIT_ASSERT(common->match_end_ptr != 0);
5554
5555
if (HAS_VIRTUAL_REGISTERS)
5556
{
5557
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5558
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5559
}
5560
else
5561
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5562
5563
OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5564
end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5565
if (HAS_VIRTUAL_REGISTERS)
5566
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5567
else
5568
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5569
5570
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5571
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5572
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5573
if (HAS_VIRTUAL_REGISTERS)
5574
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5575
5576
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5577
end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5578
OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5579
JUMPHERE(end2);
5580
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5581
add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5582
JUMPHERE(end);
5583
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5584
}
5585
5586
start = JUMP(SLJIT_JUMP);
5587
5588
if (newlinecheck)
5589
{
5590
newlinelabel = LABEL();
5591
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5592
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5593
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5594
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5595
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5596
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5597
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5598
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5599
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5600
end2 = JUMP(SLJIT_JUMP);
5601
}
5602
5603
mainloop = LABEL();
5604
5605
/* Increasing the STR_PTR here requires one less jump in the most common case. */
5606
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5607
if (common->utf && !common->invalid_utf) readuchar = TRUE;
5608
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5609
if (newlinecheck) readuchar = TRUE;
5610
5611
if (readuchar)
5612
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5613
5614
if (newlinecheck)
5615
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5616
5617
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5618
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5619
#if PCRE2_CODE_UNIT_WIDTH == 8
5620
if (common->invalid_utf)
5621
{
5622
/* Skip continuation code units. */
5623
loop = LABEL();
5624
jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5625
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5626
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5627
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5628
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5629
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5630
JUMPHERE(jump);
5631
}
5632
else if (common->utf)
5633
{
5634
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5635
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5636
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5637
JUMPHERE(jump);
5638
}
5639
#elif PCRE2_CODE_UNIT_WIDTH == 16
5640
if (common->invalid_utf)
5641
{
5642
/* Skip continuation code units. */
5643
loop = LABEL();
5644
jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5645
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5646
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5647
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5648
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5649
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5650
JUMPHERE(jump);
5651
}
5652
else if (common->utf)
5653
{
5654
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5655
5656
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5657
{
5658
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5659
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5660
SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5661
}
5662
else
5663
{
5664
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5665
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5666
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5667
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5668
}
5669
}
5670
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5671
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5672
JUMPHERE(start);
5673
5674
if (newlinecheck)
5675
{
5676
JUMPHERE(end);
5677
JUMPHERE(end2);
5678
}
5679
5680
return mainloop;
5681
}
5682
5683
5684
static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5685
{
5686
sljit_u32 i, count = chars->count;
5687
5688
if (count == 255)
5689
return;
5690
5691
if (count == 0)
5692
{
5693
chars->count = 1;
5694
chars->chars[0] = chr;
5695
5696
if (last)
5697
chars->last_count = 1;
5698
return;
5699
}
5700
5701
for (i = 0; i < count; i++)
5702
if (chars->chars[i] == chr)
5703
return;
5704
5705
if (count >= MAX_DIFF_CHARS)
5706
{
5707
chars->count = 255;
5708
return;
5709
}
5710
5711
chars->chars[count] = chr;
5712
chars->count = count + 1;
5713
5714
if (last)
5715
chars->last_count++;
5716
}
5717
5718
/* Value can be increased if needed. Patterns
5719
such as /(a|){33}b/ can exhaust the stack.
5720
5721
Note: /(a|){29}b/ already stops scan_prefix()
5722
because it reaches the maximum step_count. */
5723
#define SCAN_PREFIX_STACK_END 32
5724
5725
/*
5726
Scan prefix stores the prefix string in the chars array.
5727
The elements of the chars array is either small character
5728
sets or "any" (count is set to 255).
5729
5730
Examples (the chars array is represented by a simple regex):
5731
5732
/(abc|xbyd)/ prefix: /[ax]b[cy]/ (length: 3)
5733
/a[a-z]b+c/ prefix: a.b (length: 3)
5734
/ab?cd/ prefix: a[bc][cd] (length: 3)
5735
/(ab|cd)|(ef|gh)/ prefix: [aceg][bdfh] (length: 2)
5736
5737
The length is returned by scan_prefix(). The length is
5738
less than or equal than the minimum length of the pattern.
5739
*/
5740
5741
static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars)
5742
{
5743
fast_forward_char_data *chars_start = chars;
5744
fast_forward_char_data *chars_end = chars + MAX_N_CHARS;
5745
PCRE2_SPTR cc_stack[SCAN_PREFIX_STACK_END];
5746
fast_forward_char_data *chars_stack[SCAN_PREFIX_STACK_END];
5747
sljit_u8 next_alternative_stack[SCAN_PREFIX_STACK_END];
5748
BOOL last, any, class, caseless;
5749
int stack_ptr, step_count, repeat, len, len_save;
5750
sljit_u32 chr; /* Any unicode character. */
5751
sljit_u8 *bytes, *bytes_end, byte;
5752
PCRE2_SPTR alternative, cc_save, oc;
5753
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5754
PCRE2_UCHAR othercase[4];
5755
#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5756
PCRE2_UCHAR othercase[2];
5757
#else
5758
PCRE2_UCHAR othercase[1];
5759
#endif
5760
5761
repeat = 1;
5762
stack_ptr = 0;
5763
step_count = 10000;
5764
while (TRUE)
5765
{
5766
if (--step_count == 0)
5767
return 0;
5768
5769
SLJIT_ASSERT(chars <= chars_start + MAX_N_CHARS);
5770
5771
if (chars >= chars_end)
5772
{
5773
if (stack_ptr == 0)
5774
return (int)(chars_end - chars_start);
5775
5776
--stack_ptr;
5777
cc = cc_stack[stack_ptr];
5778
chars = chars_stack[stack_ptr];
5779
5780
if (chars >= chars_end)
5781
continue;
5782
5783
if (next_alternative_stack[stack_ptr] != 0)
5784
{
5785
/* When an alternative is processed, the
5786
next alternative is pushed onto the stack. */
5787
SLJIT_ASSERT(*cc == OP_ALT);
5788
alternative = cc + GET(cc, 1);
5789
if (*alternative == OP_ALT)
5790
{
5791
SLJIT_ASSERT(stack_ptr < SCAN_PREFIX_STACK_END);
5792
SLJIT_ASSERT(chars_stack[stack_ptr] == chars);
5793
SLJIT_ASSERT(next_alternative_stack[stack_ptr] == 1);
5794
cc_stack[stack_ptr] = alternative;
5795
stack_ptr++;
5796
}
5797
cc += 1 + LINK_SIZE;
5798
}
5799
}
5800
5801
last = TRUE;
5802
any = FALSE;
5803
class = FALSE;
5804
caseless = FALSE;
5805
5806
switch (*cc)
5807
{
5808
case OP_CHARI:
5809
caseless = TRUE;
5810
/* Fall through */
5811
case OP_CHAR:
5812
last = FALSE;
5813
cc++;
5814
break;
5815
5816
case OP_SOD:
5817
case OP_SOM:
5818
case OP_SET_SOM:
5819
case OP_NOT_WORD_BOUNDARY:
5820
case OP_WORD_BOUNDARY:
5821
case OP_EODN:
5822
case OP_EOD:
5823
case OP_CIRC:
5824
case OP_CIRCM:
5825
case OP_DOLL:
5826
case OP_DOLLM:
5827
case OP_NOT_UCP_WORD_BOUNDARY:
5828
case OP_UCP_WORD_BOUNDARY:
5829
/* Zero width assertions. */
5830
cc++;
5831
continue;
5832
5833
case OP_ASSERT:
5834
case OP_ASSERT_NOT:
5835
case OP_ASSERTBACK:
5836
case OP_ASSERTBACK_NOT:
5837
case OP_ASSERT_NA:
5838
case OP_ASSERTBACK_NA:
5839
case OP_ASSERT_SCS:
5840
cc = bracketend(cc);
5841
continue;
5842
5843
case OP_PLUSI:
5844
case OP_MINPLUSI:
5845
case OP_POSPLUSI:
5846
caseless = TRUE;
5847
/* Fall through */
5848
case OP_PLUS:
5849
case OP_MINPLUS:
5850
case OP_POSPLUS:
5851
cc++;
5852
break;
5853
5854
case OP_EXACTI:
5855
caseless = TRUE;
5856
/* Fall through */
5857
case OP_EXACT:
5858
repeat = GET2(cc, 1);
5859
last = FALSE;
5860
cc += 1 + IMM2_SIZE;
5861
break;
5862
5863
case OP_QUERYI:
5864
case OP_MINQUERYI:
5865
case OP_POSQUERYI:
5866
caseless = TRUE;
5867
/* Fall through */
5868
case OP_QUERY:
5869
case OP_MINQUERY:
5870
case OP_POSQUERY:
5871
len = 1;
5872
cc++;
5873
#ifdef SUPPORT_UNICODE
5874
if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5875
#endif
5876
if (stack_ptr >= SCAN_PREFIX_STACK_END)
5877
{
5878
chars_end = chars;
5879
continue;
5880
}
5881
5882
cc_stack[stack_ptr] = cc + len;
5883
chars_stack[stack_ptr] = chars;
5884
next_alternative_stack[stack_ptr] = 0;
5885
stack_ptr++;
5886
5887
last = FALSE;
5888
break;
5889
5890
case OP_KET:
5891
cc += 1 + LINK_SIZE;
5892
continue;
5893
5894
case OP_ALT:
5895
cc += GET(cc, 1);
5896
continue;
5897
5898
case OP_ONCE:
5899
case OP_BRA:
5900
case OP_BRAPOS:
5901
case OP_CBRA:
5902
case OP_CBRAPOS:
5903
alternative = cc + GET(cc, 1);
5904
if (*alternative == OP_ALT)
5905
{
5906
if (stack_ptr >= SCAN_PREFIX_STACK_END)
5907
{
5908
chars_end = chars;
5909
continue;
5910
}
5911
5912
cc_stack[stack_ptr] = alternative;
5913
chars_stack[stack_ptr] = chars;
5914
next_alternative_stack[stack_ptr] = 1;
5915
stack_ptr++;
5916
}
5917
5918
if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5919
cc += IMM2_SIZE;
5920
cc += 1 + LINK_SIZE;
5921
continue;
5922
5923
case OP_CLASS:
5924
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5925
if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5926
{
5927
chars_end = chars;
5928
continue;
5929
}
5930
#endif
5931
class = TRUE;
5932
break;
5933
5934
case OP_NCLASS:
5935
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5936
if (common->utf)
5937
{
5938
chars_end = chars;
5939
continue;
5940
}
5941
#endif
5942
class = TRUE;
5943
break;
5944
5945
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5946
case OP_XCLASS:
5947
case OP_ECLASS:
5948
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5949
if (common->utf)
5950
{
5951
chars_end = chars;
5952
continue;
5953
}
5954
#endif
5955
any = TRUE;
5956
cc += GET(cc, 1);
5957
break;
5958
#endif
5959
5960
case OP_DIGIT:
5961
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5962
if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5963
{
5964
chars_end = chars;
5965
continue;
5966
}
5967
#endif
5968
any = TRUE;
5969
cc++;
5970
break;
5971
5972
case OP_WHITESPACE:
5973
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5974
if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5975
{
5976
chars_end = chars;
5977
continue;
5978
}
5979
#endif
5980
any = TRUE;
5981
cc++;
5982
break;
5983
5984
case OP_WORDCHAR:
5985
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5986
if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5987
{
5988
chars_end = chars;
5989
continue;
5990
}
5991
#endif
5992
any = TRUE;
5993
cc++;
5994
break;
5995
5996
case OP_NOT:
5997
case OP_NOTI:
5998
cc++;
5999
/* Fall through. */
6000
case OP_NOT_DIGIT:
6001
case OP_NOT_WHITESPACE:
6002
case OP_NOT_WORDCHAR:
6003
case OP_ANY:
6004
case OP_ALLANY:
6005
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6006
if (common->utf)
6007
{
6008
chars_end = chars;
6009
continue;
6010
}
6011
#endif
6012
any = TRUE;
6013
cc++;
6014
break;
6015
6016
#ifdef SUPPORT_UNICODE
6017
case OP_NOTPROP:
6018
case OP_PROP:
6019
#if PCRE2_CODE_UNIT_WIDTH != 32
6020
if (common->utf)
6021
{
6022
chars_end = chars;
6023
continue;
6024
}
6025
#endif
6026
any = TRUE;
6027
cc += 1 + 2;
6028
break;
6029
#endif
6030
6031
case OP_TYPEEXACT:
6032
repeat = GET2(cc, 1);
6033
cc += 1 + IMM2_SIZE;
6034
continue;
6035
6036
case OP_NOTEXACT:
6037
case OP_NOTEXACTI:
6038
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6039
if (common->utf)
6040
{
6041
chars_end = chars;
6042
continue;
6043
}
6044
#endif
6045
any = TRUE;
6046
repeat = GET2(cc, 1);
6047
cc += 1 + IMM2_SIZE + 1;
6048
break;
6049
6050
default:
6051
chars_end = chars;
6052
continue;
6053
}
6054
6055
SLJIT_ASSERT(chars < chars_end);
6056
6057
if (any)
6058
{
6059
do
6060
{
6061
chars->count = 255;
6062
chars++;
6063
}
6064
while (--repeat > 0 && chars < chars_end);
6065
6066
repeat = 1;
6067
continue;
6068
}
6069
6070
if (class)
6071
{
6072
bytes = (sljit_u8*) (cc + 1);
6073
cc += 1 + 32 / sizeof(PCRE2_UCHAR);
6074
6075
SLJIT_ASSERT(last == TRUE && repeat == 1);
6076
switch (*cc)
6077
{
6078
case OP_CRQUERY:
6079
case OP_CRMINQUERY:
6080
case OP_CRPOSQUERY:
6081
last = FALSE;
6082
/* Fall through */
6083
case OP_CRSTAR:
6084
case OP_CRMINSTAR:
6085
case OP_CRPOSSTAR:
6086
if (stack_ptr >= SCAN_PREFIX_STACK_END)
6087
{
6088
chars_end = chars;
6089
continue;
6090
}
6091
6092
cc_stack[stack_ptr] = ++cc;
6093
chars_stack[stack_ptr] = chars;
6094
next_alternative_stack[stack_ptr] = 0;
6095
stack_ptr++;
6096
break;
6097
6098
default:
6099
case OP_CRPLUS:
6100
case OP_CRMINPLUS:
6101
case OP_CRPOSPLUS:
6102
break;
6103
6104
case OP_CRRANGE:
6105
case OP_CRMINRANGE:
6106
case OP_CRPOSRANGE:
6107
repeat = GET2(cc, 1);
6108
if (repeat <= 0)
6109
{
6110
chars_end = chars;
6111
continue;
6112
}
6113
6114
last = (repeat != (int)GET2(cc, 1 + IMM2_SIZE));
6115
cc += 1 + 2 * IMM2_SIZE;
6116
break;
6117
}
6118
6119
do
6120
{
6121
if (bytes[31] & 0x80)
6122
chars->count = 255;
6123
else if (chars->count != 255)
6124
{
6125
bytes_end = bytes + 32;
6126
chr = 0;
6127
do
6128
{
6129
byte = *bytes++;
6130
SLJIT_ASSERT((chr & 0x7) == 0);
6131
if (byte == 0)
6132
chr += 8;
6133
else
6134
{
6135
do
6136
{
6137
if ((byte & 0x1) != 0)
6138
add_prefix_char(chr, chars, TRUE);
6139
byte >>= 1;
6140
chr++;
6141
}
6142
while (byte != 0);
6143
chr = (chr + 7) & (sljit_u32)(~7);
6144
}
6145
}
6146
while (chars->count != 255 && bytes < bytes_end);
6147
bytes = bytes_end - 32;
6148
}
6149
6150
chars++;
6151
}
6152
while (--repeat > 0 && chars < chars_end);
6153
6154
repeat = 1;
6155
if (last)
6156
chars_end = chars;
6157
continue;
6158
}
6159
6160
len = 1;
6161
#ifdef SUPPORT_UNICODE
6162
if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
6163
#endif
6164
6165
if (caseless && char_has_othercase(common, cc))
6166
{
6167
#ifdef SUPPORT_UNICODE
6168
if (common->utf)
6169
{
6170
GETCHAR(chr, cc);
6171
if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
6172
{
6173
chars_end = chars;
6174
continue;
6175
}
6176
}
6177
else
6178
#endif
6179
{
6180
chr = *cc;
6181
#ifdef SUPPORT_UNICODE
6182
if (common->ucp && chr > 127)
6183
{
6184
chr = UCD_OTHERCASE(chr);
6185
othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;
6186
}
6187
else
6188
#endif
6189
othercase[0] = TABLE_GET(chr, common->fcc, chr);
6190
}
6191
}
6192
else
6193
{
6194
caseless = FALSE;
6195
othercase[0] = 0; /* Stops compiler warning - PH */
6196
}
6197
6198
len_save = len;
6199
cc_save = cc;
6200
while (TRUE)
6201
{
6202
oc = othercase;
6203
do
6204
{
6205
len--;
6206
6207
chr = *cc;
6208
add_prefix_char(*cc, chars, len == 0);
6209
6210
if (caseless)
6211
add_prefix_char(*oc, chars, len == 0);
6212
6213
chars++;
6214
cc++;
6215
oc++;
6216
}
6217
while (len > 0 && chars < chars_end);
6218
6219
if (--repeat == 0 || chars >= chars_end)
6220
break;
6221
6222
len = len_save;
6223
cc = cc_save;
6224
}
6225
6226
repeat = 1;
6227
if (last)
6228
chars_end = chars;
6229
}
6230
}
6231
6232
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6233
static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6234
{
6235
#if PCRE2_CODE_UNIT_WIDTH == 8
6236
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6237
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6238
#elif PCRE2_CODE_UNIT_WIDTH == 16
6239
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6240
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6241
#else
6242
#error "Unknown code width"
6243
#endif
6244
}
6245
#endif
6246
6247
#include "pcre2_jit_simd_inc.h"
6248
6249
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6250
6251
static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6252
{
6253
sljit_s32 i, j, max_i = 0, max_j = 0;
6254
sljit_u32 max_pri = 0;
6255
sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6256
PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6257
6258
for (i = max - 1; i >= 1; i--)
6259
{
6260
if (chars[i].last_count > 2)
6261
{
6262
a1 = chars[i].chars[0];
6263
a2 = chars[i].chars[1];
6264
a_pri = chars[i].last_count;
6265
6266
j = i - max_offset;
6267
if (j < 0)
6268
j = 0;
6269
6270
while (j < i)
6271
{
6272
b_pri = chars[j].last_count;
6273
if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6274
{
6275
b1 = chars[j].chars[0];
6276
b2 = chars[j].chars[1];
6277
6278
if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6279
{
6280
max_pri = a_pri + b_pri;
6281
max_i = i;
6282
max_j = j;
6283
}
6284
}
6285
j++;
6286
}
6287
}
6288
}
6289
6290
if (max_pri == 0)
6291
return FALSE;
6292
6293
fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6294
return TRUE;
6295
}
6296
6297
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6298
6299
static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6300
{
6301
DEFINE_COMPILER;
6302
struct sljit_label *start;
6303
struct sljit_jump *match;
6304
struct sljit_jump *partial_quit;
6305
PCRE2_UCHAR mask;
6306
BOOL has_match_end = (common->match_end_ptr != 0);
6307
6308
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6309
6310
if (has_match_end)
6311
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6312
6313
if (offset > 0)
6314
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6315
6316
if (has_match_end)
6317
{
6318
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6319
6320
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6321
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6322
SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6323
}
6324
6325
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6326
6327
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6328
{
6329
fast_forward_char_simd(common, char1, char2, offset);
6330
6331
if (offset > 0)
6332
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6333
6334
if (has_match_end)
6335
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6336
return;
6337
}
6338
6339
#endif
6340
6341
start = LABEL();
6342
6343
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6344
if (common->mode == PCRE2_JIT_COMPLETE)
6345
add_jump(compiler, &common->failed_match, partial_quit);
6346
6347
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6348
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6349
6350
if (char1 == char2)
6351
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6352
else
6353
{
6354
mask = char1 ^ char2;
6355
if (is_powerof2(mask))
6356
{
6357
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6358
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6359
}
6360
else
6361
{
6362
match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6363
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6364
JUMPHERE(match);
6365
}
6366
}
6367
6368
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6369
if (common->utf && offset > 0)
6370
{
6371
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6372
jumpto_if_not_utf_char_start(compiler, TMP1, start);
6373
}
6374
#endif
6375
6376
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6377
6378
if (common->mode != PCRE2_JIT_COMPLETE)
6379
JUMPHERE(partial_quit);
6380
6381
if (has_match_end)
6382
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6383
}
6384
6385
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6386
{
6387
DEFINE_COMPILER;
6388
struct sljit_label *start;
6389
struct sljit_jump *match;
6390
fast_forward_char_data chars[MAX_N_CHARS];
6391
sljit_s32 offset;
6392
PCRE2_UCHAR mask;
6393
PCRE2_UCHAR *char_set, *char_set_end;
6394
int i, max, from;
6395
int range_right = -1, range_len;
6396
sljit_u8 *update_table = NULL;
6397
BOOL in_range;
6398
6399
for (i = 0; i < MAX_N_CHARS; i++)
6400
{
6401
chars[i].count = 0;
6402
chars[i].last_count = 0;
6403
}
6404
6405
max = scan_prefix(common, common->start, chars);
6406
6407
if (max < 1)
6408
return FALSE;
6409
6410
/* Convert last_count to priority. */
6411
for (i = 0; i < max; i++)
6412
{
6413
SLJIT_ASSERT(chars[i].last_count <= chars[i].count);
6414
6415
switch (chars[i].count)
6416
{
6417
case 0:
6418
chars[i].count = 255;
6419
chars[i].last_count = 0;
6420
break;
6421
6422
case 1:
6423
chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6424
/* Simplifies algorithms later. */
6425
chars[i].chars[1] = chars[i].chars[0];
6426
break;
6427
6428
case 2:
6429
SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6430
6431
if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6432
chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6433
else
6434
chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6435
break;
6436
6437
default:
6438
chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6439
break;
6440
}
6441
}
6442
6443
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6444
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6445
return TRUE;
6446
#endif
6447
6448
in_range = FALSE;
6449
/* Prevent compiler "uninitialized" warning */
6450
from = 0;
6451
range_len = 4 /* minimum length */ - 1;
6452
for (i = 0; i <= max; i++)
6453
{
6454
if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6455
{
6456
range_len = i - from;
6457
range_right = i - 1;
6458
}
6459
6460
if (i < max && chars[i].count < 255)
6461
{
6462
SLJIT_ASSERT(chars[i].count > 0);
6463
if (!in_range)
6464
{
6465
in_range = TRUE;
6466
from = i;
6467
}
6468
}
6469
else
6470
in_range = FALSE;
6471
}
6472
6473
if (range_right >= 0)
6474
{
6475
update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6476
if (update_table == NULL)
6477
return TRUE;
6478
memset(update_table, IN_UCHARS(range_len), 256);
6479
6480
for (i = 0; i < range_len; i++)
6481
{
6482
SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6483
6484
char_set = chars[range_right - i].chars;
6485
char_set_end = char_set + chars[range_right - i].count;
6486
do
6487
{
6488
if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6489
update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6490
char_set++;
6491
}
6492
while (char_set < char_set_end);
6493
}
6494
}
6495
6496
offset = -1;
6497
/* Scan forward. */
6498
for (i = 0; i < max; i++)
6499
{
6500
if (range_right == i)
6501
continue;
6502
6503
if (offset == -1)
6504
{
6505
if (chars[i].last_count >= 2)
6506
offset = i;
6507
}
6508
else if (chars[offset].last_count < chars[i].last_count)
6509
offset = i;
6510
}
6511
6512
SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6513
6514
if (range_right < 0)
6515
{
6516
if (offset < 0)
6517
return FALSE;
6518
/* Works regardless the value is 1 or 2. */
6519
fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6520
return TRUE;
6521
}
6522
6523
SLJIT_ASSERT(range_right != offset);
6524
6525
if (common->match_end_ptr != 0)
6526
{
6527
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6528
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6529
OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6530
add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6531
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6532
SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6533
}
6534
else
6535
{
6536
OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6537
add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6538
}
6539
6540
SLJIT_ASSERT(range_right >= 0);
6541
6542
if (!HAS_VIRTUAL_REGISTERS)
6543
OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6544
6545
start = LABEL();
6546
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6547
6548
#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6549
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6550
#else
6551
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6552
#endif
6553
6554
if (!HAS_VIRTUAL_REGISTERS)
6555
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6556
else
6557
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6558
6559
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6560
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6561
6562
if (offset >= 0)
6563
{
6564
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6565
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6566
6567
if (chars[offset].count == 1)
6568
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6569
else
6570
{
6571
mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6572
if (is_powerof2(mask))
6573
{
6574
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6575
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6576
}
6577
else
6578
{
6579
match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6580
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6581
JUMPHERE(match);
6582
}
6583
}
6584
}
6585
6586
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6587
if (common->utf && offset != 0)
6588
{
6589
if (offset < 0)
6590
{
6591
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6592
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6593
}
6594
else
6595
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6596
6597
jumpto_if_not_utf_char_start(compiler, TMP1, start);
6598
6599
if (offset < 0)
6600
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6601
}
6602
#endif
6603
6604
if (offset >= 0)
6605
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6606
6607
if (common->match_end_ptr != 0)
6608
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6609
else
6610
OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6611
return TRUE;
6612
}
6613
6614
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6615
{
6616
PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6617
PCRE2_UCHAR oc;
6618
6619
oc = first_char;
6620
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6621
{
6622
oc = TABLE_GET(first_char, common->fcc, first_char);
6623
#if defined SUPPORT_UNICODE
6624
if (first_char > 127 && (common->utf || common->ucp))
6625
oc = UCD_OTHERCASE(first_char);
6626
#endif
6627
}
6628
6629
fast_forward_first_char2(common, first_char, oc, 0);
6630
}
6631
6632
static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6633
{
6634
DEFINE_COMPILER;
6635
struct sljit_label *loop;
6636
struct sljit_jump *lastchar = NULL;
6637
struct sljit_jump *firstchar;
6638
struct sljit_jump *quit = NULL;
6639
struct sljit_jump *foundcr = NULL;
6640
struct sljit_jump *notfoundnl;
6641
jump_list *newline = NULL;
6642
6643
if (common->match_end_ptr != 0)
6644
{
6645
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6646
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6647
}
6648
6649
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6650
{
6651
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6652
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6653
{
6654
if (HAS_VIRTUAL_REGISTERS)
6655
{
6656
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6657
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6658
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6659
}
6660
else
6661
{
6662
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6663
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6664
}
6665
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6666
6667
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6668
OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6669
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6670
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6671
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6672
#endif
6673
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6674
6675
fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6676
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6677
}
6678
else
6679
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6680
{
6681
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6682
if (HAS_VIRTUAL_REGISTERS)
6683
{
6684
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6685
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6686
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6687
}
6688
else
6689
{
6690
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6691
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6692
}
6693
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6694
6695
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6696
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6697
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6698
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6699
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6700
#endif
6701
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6702
6703
loop = LABEL();
6704
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6705
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6706
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6707
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6708
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6709
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6710
6711
JUMPHERE(quit);
6712
JUMPHERE(lastchar);
6713
}
6714
6715
JUMPHERE(firstchar);
6716
6717
if (common->match_end_ptr != 0)
6718
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6719
return;
6720
}
6721
6722
if (HAS_VIRTUAL_REGISTERS)
6723
{
6724
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6725
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6726
}
6727
else
6728
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6729
6730
/* Example: match /^/ to \r\n from offset 1. */
6731
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6732
6733
if (common->nltype == NLTYPE_ANY)
6734
move_back(common, NULL, FALSE);
6735
else
6736
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6737
6738
loop = LABEL();
6739
common->ff_newline_shortcut = loop;
6740
6741
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6742
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6743
{
6744
if (common->nltype == NLTYPE_ANYCRLF)
6745
{
6746
fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6747
if (common->mode != PCRE2_JIT_COMPLETE)
6748
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6749
6750
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6751
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6752
quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6753
}
6754
else
6755
{
6756
fast_forward_char_simd(common, common->newline, common->newline, 0);
6757
6758
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6759
if (common->mode != PCRE2_JIT_COMPLETE)
6760
{
6761
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6762
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6763
}
6764
}
6765
}
6766
else
6767
#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6768
{
6769
read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6770
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6771
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6772
foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6773
check_newlinechar(common, common->nltype, &newline, FALSE);
6774
set_jumps(newline, loop);
6775
}
6776
6777
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6778
{
6779
if (quit == NULL)
6780
{
6781
quit = JUMP(SLJIT_JUMP);
6782
JUMPHERE(foundcr);
6783
}
6784
6785
notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6786
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6787
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6788
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6789
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6790
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6791
#endif
6792
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6793
JUMPHERE(notfoundnl);
6794
JUMPHERE(quit);
6795
}
6796
6797
if (lastchar)
6798
JUMPHERE(lastchar);
6799
JUMPHERE(firstchar);
6800
6801
if (common->match_end_ptr != 0)
6802
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6803
}
6804
6805
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6806
6807
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6808
{
6809
DEFINE_COMPILER;
6810
const sljit_u8 *start_bits = common->re->start_bitmap;
6811
struct sljit_label *start;
6812
struct sljit_jump *partial_quit;
6813
#if PCRE2_CODE_UNIT_WIDTH != 8
6814
struct sljit_jump *found = NULL;
6815
#endif
6816
jump_list *matches = NULL;
6817
6818
if (common->match_end_ptr != 0)
6819
{
6820
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6821
OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6822
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6823
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6824
SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6825
}
6826
6827
start = LABEL();
6828
6829
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6830
if (common->mode == PCRE2_JIT_COMPLETE)
6831
add_jump(compiler, &common->failed_match, partial_quit);
6832
6833
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6834
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6835
6836
if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6837
{
6838
#if PCRE2_CODE_UNIT_WIDTH != 8
6839
if ((start_bits[31] & 0x80) != 0)
6840
found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6841
else
6842
CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6843
#elif defined SUPPORT_UNICODE
6844
if (common->utf && is_char7_bitset(start_bits, FALSE))
6845
CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6846
#endif
6847
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6848
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6849
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6850
if (!HAS_VIRTUAL_REGISTERS)
6851
{
6852
OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6853
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6854
}
6855
else
6856
{
6857
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6858
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6859
}
6860
JUMPTO(SLJIT_ZERO, start);
6861
}
6862
else
6863
set_jumps(matches, start);
6864
6865
#if PCRE2_CODE_UNIT_WIDTH != 8
6866
if (found != NULL)
6867
JUMPHERE(found);
6868
#endif
6869
6870
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6871
6872
if (common->mode != PCRE2_JIT_COMPLETE)
6873
JUMPHERE(partial_quit);
6874
6875
if (common->match_end_ptr != 0)
6876
OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6877
}
6878
6879
static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6880
{
6881
DEFINE_COMPILER;
6882
struct sljit_label *loop;
6883
struct sljit_jump *toolong;
6884
struct sljit_jump *already_found;
6885
struct sljit_jump *found;
6886
struct sljit_jump *found_oc = NULL;
6887
jump_list *not_found = NULL;
6888
sljit_u32 oc, bit;
6889
6890
SLJIT_ASSERT(common->req_char_ptr != 0);
6891
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6892
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6893
toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6894
already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6895
6896
if (has_firstchar)
6897
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6898
else
6899
OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6900
6901
oc = req_char;
6902
if (caseless)
6903
{
6904
oc = TABLE_GET(req_char, common->fcc, req_char);
6905
#if defined SUPPORT_UNICODE
6906
if (req_char > 127 && (common->utf || common->ucp))
6907
oc = UCD_OTHERCASE(req_char);
6908
#endif
6909
}
6910
6911
#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6912
if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6913
{
6914
not_found = fast_requested_char_simd(common, req_char, oc);
6915
}
6916
else
6917
#endif
6918
{
6919
loop = LABEL();
6920
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6921
6922
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6923
6924
if (req_char == oc)
6925
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6926
else
6927
{
6928
bit = req_char ^ oc;
6929
if (is_powerof2(bit))
6930
{
6931
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6932
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6933
}
6934
else
6935
{
6936
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6937
found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6938
}
6939
}
6940
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6941
JUMPTO(SLJIT_JUMP, loop);
6942
6943
JUMPHERE(found);
6944
if (found_oc)
6945
JUMPHERE(found_oc);
6946
}
6947
6948
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6949
6950
JUMPHERE(already_found);
6951
JUMPHERE(toolong);
6952
return not_found;
6953
}
6954
6955
static void do_revertframes(compiler_common *common)
6956
{
6957
DEFINE_COMPILER;
6958
struct sljit_jump *jump;
6959
struct sljit_label *mainloop;
6960
6961
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
6962
GET_LOCAL_BASE(TMP1, 0, 0);
6963
6964
/* Drop frames until we reach STACK_TOP. */
6965
mainloop = LABEL();
6966
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6967
OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
6968
jump = JUMP(SLJIT_SIG_LESS_EQUAL);
6969
6970
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6971
if (HAS_VIRTUAL_REGISTERS)
6972
{
6973
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6974
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6975
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6976
}
6977
else
6978
{
6979
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6980
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6981
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6982
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6983
GET_LOCAL_BASE(TMP1, 0, 0);
6984
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6985
}
6986
JUMPTO(SLJIT_JUMP, mainloop);
6987
6988
JUMPHERE(jump);
6989
sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
6990
jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
6991
/* End of reverting values. */
6992
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6993
6994
JUMPHERE(jump);
6995
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, TMP2, 0);
6996
if (HAS_VIRTUAL_REGISTERS)
6997
{
6998
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6999
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
7000
}
7001
else
7002
{
7003
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
7004
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
7005
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
7006
}
7007
JUMPTO(SLJIT_JUMP, mainloop);
7008
}
7009
7010
#ifdef SUPPORT_UNICODE
7011
#define UCPCAT(bit) (1 << (bit))
7012
#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
7013
#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
7014
#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
7015
#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
7016
#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
7017
#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
7018
#endif
7019
7020
static void check_wordboundary(compiler_common *common, BOOL ucp)
7021
{
7022
DEFINE_COMPILER;
7023
struct sljit_jump *skipread;
7024
jump_list *skipread_list = NULL;
7025
#ifdef SUPPORT_UNICODE
7026
struct sljit_label *valid_utf;
7027
jump_list *invalid_utf1 = NULL;
7028
#endif /* SUPPORT_UNICODE */
7029
jump_list *invalid_utf2 = NULL;
7030
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
7031
struct sljit_jump *jump;
7032
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
7033
7034
SLJIT_UNUSED_ARG(ucp);
7035
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
7036
7037
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
7038
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7039
/* Get type of the previous char, and put it to TMP3. */
7040
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7041
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7042
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
7043
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
7044
7045
#ifdef SUPPORT_UNICODE
7046
if (common->invalid_utf)
7047
{
7048
peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
7049
7050
if (common->mode != PCRE2_JIT_COMPLETE)
7051
{
7052
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7053
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
7054
move_back(common, NULL, TRUE);
7055
check_start_used_ptr(common);
7056
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7057
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
7058
}
7059
}
7060
else
7061
#endif /* SUPPORT_UNICODE */
7062
{
7063
if (common->mode == PCRE2_JIT_COMPLETE)
7064
peek_char_back(common, READ_CHAR_MAX, NULL);
7065
else
7066
{
7067
move_back(common, NULL, TRUE);
7068
check_start_used_ptr(common);
7069
read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
7070
}
7071
}
7072
7073
/* Testing char type. */
7074
#ifdef SUPPORT_UNICODE
7075
if (ucp)
7076
{
7077
add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
7078
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
7079
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
7080
OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
7081
}
7082
else
7083
#endif /* SUPPORT_UNICODE */
7084
{
7085
#if PCRE2_CODE_UNIT_WIDTH != 8
7086
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7087
#elif defined SUPPORT_UNICODE
7088
/* Here TMP3 has already been zeroed. */
7089
jump = NULL;
7090
if (common->utf)
7091
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7092
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7093
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
7094
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
7095
OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
7096
#if PCRE2_CODE_UNIT_WIDTH != 8
7097
JUMPHERE(jump);
7098
#elif defined SUPPORT_UNICODE
7099
if (jump != NULL)
7100
JUMPHERE(jump);
7101
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7102
}
7103
JUMPHERE(skipread);
7104
7105
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7106
check_str_end(common, &skipread_list);
7107
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, &invalid_utf2);
7108
7109
/* Testing char type. This is a code duplication. */
7110
#ifdef SUPPORT_UNICODE
7111
7112
valid_utf = LABEL();
7113
7114
if (ucp)
7115
{
7116
add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
7117
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
7118
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
7119
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
7120
}
7121
else
7122
#endif /* SUPPORT_UNICODE */
7123
{
7124
#if PCRE2_CODE_UNIT_WIDTH != 8
7125
/* TMP2 may be destroyed by peek_char. */
7126
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7127
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7128
#elif defined SUPPORT_UNICODE
7129
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7130
jump = NULL;
7131
if (common->utf)
7132
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7133
#endif
7134
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
7135
OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
7136
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7137
#if PCRE2_CODE_UNIT_WIDTH != 8
7138
JUMPHERE(jump);
7139
#elif defined SUPPORT_UNICODE
7140
if (jump != NULL)
7141
JUMPHERE(jump);
7142
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7143
}
7144
set_jumps(skipread_list, LABEL());
7145
7146
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7147
OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
7148
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7149
7150
#ifdef SUPPORT_UNICODE
7151
if (common->invalid_utf)
7152
{
7153
set_jumps(invalid_utf1, LABEL());
7154
7155
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, NULL);
7156
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
7157
7158
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7159
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
7160
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7161
7162
set_jumps(invalid_utf2, LABEL());
7163
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7164
OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
7165
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7166
}
7167
#endif /* SUPPORT_UNICODE */
7168
}
7169
7170
static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7171
{
7172
/* May destroy TMP1. */
7173
DEFINE_COMPILER;
7174
int ranges[MAX_CLASS_RANGE_SIZE];
7175
sljit_u8 bit, cbit, all;
7176
int i, byte, length = 0;
7177
7178
bit = bits[0] & 0x1;
7179
/* All bits will be zero or one (since bit is zero or one). */
7180
all = (sljit_u8)-bit;
7181
7182
for (i = 0; i < 256; )
7183
{
7184
byte = i >> 3;
7185
if ((i & 0x7) == 0 && bits[byte] == all)
7186
i += 8;
7187
else
7188
{
7189
cbit = (bits[byte] >> (i & 0x7)) & 0x1;
7190
if (cbit != bit)
7191
{
7192
if (length >= MAX_CLASS_RANGE_SIZE)
7193
return FALSE;
7194
ranges[length] = i;
7195
length++;
7196
bit = cbit;
7197
all = (sljit_u8)-cbit; /* sign extend bit into byte */
7198
}
7199
i++;
7200
}
7201
}
7202
7203
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
7204
{
7205
if (length >= MAX_CLASS_RANGE_SIZE)
7206
return FALSE;
7207
ranges[length] = 256;
7208
length++;
7209
}
7210
7211
if (length < 0 || length > 4)
7212
return FALSE;
7213
7214
bit = bits[0] & 0x1;
7215
if (invert) bit ^= 0x1;
7216
7217
/* No character is accepted. */
7218
if (length == 0 && bit == 0)
7219
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7220
7221
switch(length)
7222
{
7223
case 0:
7224
/* When bit != 0, all characters are accepted. */
7225
return TRUE;
7226
7227
case 1:
7228
add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7229
return TRUE;
7230
7231
case 2:
7232
if (ranges[0] + 1 != ranges[1])
7233
{
7234
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7235
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7236
}
7237
else
7238
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7239
return TRUE;
7240
7241
case 3:
7242
if (bit != 0)
7243
{
7244
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7245
if (ranges[0] + 1 != ranges[1])
7246
{
7247
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7248
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7249
}
7250
else
7251
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7252
return TRUE;
7253
}
7254
7255
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7256
if (ranges[1] + 1 != ranges[2])
7257
{
7258
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7259
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7260
}
7261
else
7262
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7263
return TRUE;
7264
7265
case 4:
7266
if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7267
&& (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7268
&& (ranges[1] & (ranges[2] - ranges[0])) == 0
7269
&& is_powerof2(ranges[2] - ranges[0]))
7270
{
7271
SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7272
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7273
if (ranges[2] + 1 != ranges[3])
7274
{
7275
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7276
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7277
}
7278
else
7279
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7280
return TRUE;
7281
}
7282
7283
if (bit != 0)
7284
{
7285
i = 0;
7286
if (ranges[0] + 1 != ranges[1])
7287
{
7288
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7289
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7290
i = ranges[0];
7291
}
7292
else
7293
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7294
7295
if (ranges[2] + 1 != ranges[3])
7296
{
7297
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7298
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7299
}
7300
else
7301
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7302
return TRUE;
7303
}
7304
7305
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7306
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7307
if (ranges[1] + 1 != ranges[2])
7308
{
7309
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7310
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7311
}
7312
else
7313
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7314
return TRUE;
7315
7316
default:
7317
SLJIT_UNREACHABLE();
7318
return FALSE;
7319
}
7320
}
7321
7322
static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7323
{
7324
/* May destroy TMP1. */
7325
DEFINE_COMPILER;
7326
uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7327
uint8_t byte;
7328
sljit_s32 type;
7329
int i, j, k, len, c;
7330
7331
if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7332
return FALSE;
7333
7334
len = 0;
7335
7336
for (i = 0; i < 32; i++)
7337
{
7338
byte = bits[i];
7339
7340
if (nclass)
7341
byte = (sljit_u8)~byte;
7342
7343
j = 0;
7344
while (byte != 0)
7345
{
7346
if (byte & 0x1)
7347
{
7348
c = i * 8 + j;
7349
7350
k = len;
7351
7352
if ((c & 0x20) != 0)
7353
{
7354
for (k = 0; k < len; k++)
7355
if (char_list[k] == c - 0x20)
7356
{
7357
char_list[k] |= 0x120;
7358
break;
7359
}
7360
}
7361
7362
if (k == len)
7363
{
7364
if (len >= MAX_CLASS_CHARS_SIZE)
7365
return FALSE;
7366
7367
char_list[len++] = (uint16_t) c;
7368
}
7369
}
7370
7371
byte >>= 1;
7372
j++;
7373
}
7374
}
7375
7376
if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
7377
7378
i = 0;
7379
j = 0;
7380
7381
if (char_list[0] == 0)
7382
{
7383
i++;
7384
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7385
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7386
}
7387
else
7388
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7389
7390
while (i < len)
7391
{
7392
if ((char_list[i] & 0x100) != 0)
7393
j++;
7394
else
7395
{
7396
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7397
SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7398
}
7399
i++;
7400
}
7401
7402
if (j != 0)
7403
{
7404
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7405
7406
for (i = 0; i < len; i++)
7407
if ((char_list[i] & 0x100) != 0)
7408
{
7409
j--;
7410
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7411
SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7412
}
7413
}
7414
7415
if (invert)
7416
nclass = !nclass;
7417
7418
type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7419
add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7420
return TRUE;
7421
}
7422
7423
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7424
{
7425
/* May destroy TMP1. */
7426
if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7427
return TRUE;
7428
return optimize_class_chars(common, bits, nclass, invert, backtracks);
7429
}
7430
7431
static void check_anynewline(compiler_common *common)
7432
{
7433
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7434
DEFINE_COMPILER;
7435
7436
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7437
7438
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7439
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7440
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7441
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7442
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7443
#if PCRE2_CODE_UNIT_WIDTH == 8
7444
if (common->utf)
7445
{
7446
#endif
7447
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7448
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7449
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7450
#if PCRE2_CODE_UNIT_WIDTH == 8
7451
}
7452
#endif
7453
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7454
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7455
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7456
}
7457
7458
static void check_hspace(compiler_common *common)
7459
{
7460
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7461
DEFINE_COMPILER;
7462
7463
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7464
7465
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7466
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7467
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7468
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7469
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7470
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7471
#if PCRE2_CODE_UNIT_WIDTH == 8
7472
if (common->utf)
7473
{
7474
#endif
7475
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7476
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7477
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7478
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7479
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7480
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7481
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7482
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7483
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7484
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7485
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7486
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7487
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7488
#if PCRE2_CODE_UNIT_WIDTH == 8
7489
}
7490
#endif
7491
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7492
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7493
7494
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7495
}
7496
7497
static void check_vspace(compiler_common *common)
7498
{
7499
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7500
DEFINE_COMPILER;
7501
7502
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7503
7504
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7505
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7506
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7507
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7508
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7509
#if PCRE2_CODE_UNIT_WIDTH == 8
7510
if (common->utf)
7511
{
7512
#endif
7513
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7514
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7515
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7516
#if PCRE2_CODE_UNIT_WIDTH == 8
7517
}
7518
#endif
7519
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7520
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7521
7522
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7523
}
7524
7525
static void do_casefulcmp(compiler_common *common)
7526
{
7527
DEFINE_COMPILER;
7528
struct sljit_jump *jump;
7529
struct sljit_label *label;
7530
int char1_reg;
7531
int char2_reg;
7532
7533
if (HAS_VIRTUAL_REGISTERS)
7534
{
7535
char1_reg = STR_END;
7536
char2_reg = STACK_TOP;
7537
}
7538
else
7539
{
7540
char1_reg = TMP3;
7541
char2_reg = RETURN_ADDR;
7542
}
7543
7544
/* Update ref_update_local_size() when this changes. */
7545
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
7546
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7547
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7548
7549
if (char1_reg == STR_END)
7550
{
7551
OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7552
OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7553
}
7554
7555
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7556
{
7557
label = LABEL();
7558
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7559
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7560
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7561
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7562
JUMPTO(SLJIT_NOT_ZERO, label);
7563
7564
JUMPHERE(jump);
7565
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7566
}
7567
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7568
{
7569
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7570
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7571
7572
label = LABEL();
7573
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7574
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7575
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7576
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7577
JUMPTO(SLJIT_NOT_ZERO, label);
7578
7579
JUMPHERE(jump);
7580
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7581
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7582
}
7583
else
7584
{
7585
label = LABEL();
7586
OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7587
OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7588
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7589
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7590
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7591
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7592
JUMPTO(SLJIT_NOT_ZERO, label);
7593
7594
JUMPHERE(jump);
7595
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7596
}
7597
7598
if (char1_reg == STR_END)
7599
{
7600
OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7601
OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7602
}
7603
7604
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7605
}
7606
7607
static void do_caselesscmp(compiler_common *common)
7608
{
7609
DEFINE_COMPILER;
7610
struct sljit_jump *jump;
7611
struct sljit_label *label;
7612
int char1_reg = STR_END;
7613
int char2_reg;
7614
int lcc_table;
7615
int opt_type = 0;
7616
7617
if (HAS_VIRTUAL_REGISTERS)
7618
{
7619
char2_reg = STACK_TOP;
7620
lcc_table = STACK_LIMIT;
7621
}
7622
else
7623
{
7624
char2_reg = RETURN_ADDR;
7625
lcc_table = TMP3;
7626
}
7627
7628
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7629
opt_type = 1;
7630
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7631
opt_type = 2;
7632
7633
/* Update ref_update_local_size() when this changes. */
7634
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
7635
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7636
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7637
7638
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, char1_reg, 0);
7639
7640
if (char2_reg == STACK_TOP)
7641
{
7642
OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7643
OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7644
}
7645
7646
OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7647
7648
if (opt_type == 1)
7649
{
7650
label = LABEL();
7651
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7652
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7653
}
7654
else if (opt_type == 2)
7655
{
7656
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7657
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7658
7659
label = LABEL();
7660
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7661
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7662
}
7663
else
7664
{
7665
label = LABEL();
7666
OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7667
OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7668
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7669
}
7670
7671
#if PCRE2_CODE_UNIT_WIDTH != 8
7672
jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7673
#endif
7674
OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7675
#if PCRE2_CODE_UNIT_WIDTH != 8
7676
JUMPHERE(jump);
7677
jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7678
#endif
7679
OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7680
#if PCRE2_CODE_UNIT_WIDTH != 8
7681
JUMPHERE(jump);
7682
#endif
7683
7684
if (opt_type == 0)
7685
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7686
7687
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7688
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7689
JUMPTO(SLJIT_NOT_ZERO, label);
7690
7691
JUMPHERE(jump);
7692
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7693
7694
if (opt_type == 2)
7695
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7696
7697
if (char2_reg == STACK_TOP)
7698
{
7699
OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7700
OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7701
}
7702
7703
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
7704
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7705
}
7706
7707
#include "pcre2_jit_char_inc.h"
7708
7709
static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
7710
{
7711
DEFINE_COMPILER;
7712
struct sljit_jump *jump[4];
7713
7714
switch(type)
7715
{
7716
case OP_SOD:
7717
if (HAS_VIRTUAL_REGISTERS)
7718
{
7719
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7720
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7721
}
7722
else
7723
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
7724
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7725
return cc;
7726
7727
case OP_SOM:
7728
if (HAS_VIRTUAL_REGISTERS)
7729
{
7730
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7731
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7732
}
7733
else
7734
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
7735
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7736
return cc;
7737
7738
case OP_NOT_WORD_BOUNDARY:
7739
case OP_WORD_BOUNDARY:
7740
case OP_NOT_UCP_WORD_BOUNDARY:
7741
case OP_UCP_WORD_BOUNDARY:
7742
add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
7743
#ifdef SUPPORT_UNICODE
7744
if (common->invalid_utf)
7745
{
7746
add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7747
return cc;
7748
}
7749
#endif /* SUPPORT_UNICODE */
7750
sljit_set_current_flags(compiler, SLJIT_SET_Z);
7751
add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7752
return cc;
7753
7754
case OP_EODN:
7755
/* Requires rather complex checks. */
7756
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
7757
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7758
{
7759
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7760
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7761
if (common->mode == PCRE2_JIT_COMPLETE)
7762
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7763
else
7764
{
7765
jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
7766
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
7767
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
7768
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
7769
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
7770
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
7771
check_partial(common, TRUE);
7772
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7773
JUMPHERE(jump[1]);
7774
}
7775
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7776
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7777
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7778
}
7779
else if (common->nltype == NLTYPE_FIXED)
7780
{
7781
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7782
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7783
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7784
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
7785
}
7786
else
7787
{
7788
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7789
jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
7790
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7791
OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
7792
jump[2] = JUMP(SLJIT_GREATER);
7793
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
7794
/* Equal. */
7795
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7796
jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
7797
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7798
7799
JUMPHERE(jump[1]);
7800
if (common->nltype == NLTYPE_ANYCRLF)
7801
{
7802
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7803
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
7804
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
7805
}
7806
else
7807
{
7808
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7809
read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
7810
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
7811
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
7812
sljit_set_current_flags(compiler, SLJIT_SET_Z);
7813
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
7814
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7815
}
7816
JUMPHERE(jump[2]);
7817
JUMPHERE(jump[3]);
7818
}
7819
JUMPHERE(jump[0]);
7820
if (common->mode != PCRE2_JIT_COMPLETE)
7821
check_partial(common, TRUE);
7822
return cc;
7823
7824
case OP_EOD:
7825
add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
7826
if (common->mode != PCRE2_JIT_COMPLETE)
7827
check_partial(common, TRUE);
7828
return cc;
7829
7830
case OP_DOLL:
7831
if (HAS_VIRTUAL_REGISTERS)
7832
{
7833
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7834
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7835
}
7836
else
7837
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7838
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
7839
7840
if (!common->endonly)
7841
compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
7842
else
7843
{
7844
add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
7845
check_partial(common, FALSE);
7846
}
7847
return cc;
7848
7849
case OP_DOLLM:
7850
jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
7851
if (HAS_VIRTUAL_REGISTERS)
7852
{
7853
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7854
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7855
}
7856
else
7857
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7858
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
7859
check_partial(common, FALSE);
7860
jump[0] = JUMP(SLJIT_JUMP);
7861
JUMPHERE(jump[1]);
7862
7863
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7864
{
7865
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7866
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7867
if (common->mode == PCRE2_JIT_COMPLETE)
7868
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
7869
else
7870
{
7871
jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
7872
/* STR_PTR = STR_END - IN_UCHARS(1) */
7873
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7874
check_partial(common, TRUE);
7875
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7876
JUMPHERE(jump[1]);
7877
}
7878
7879
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7880
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7881
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7882
}
7883
else
7884
{
7885
peek_char(common, common->nlmax, TMP3, 0, NULL);
7886
check_newlinechar(common, common->nltype, backtracks, FALSE);
7887
}
7888
JUMPHERE(jump[0]);
7889
return cc;
7890
7891
case OP_CIRC:
7892
if (HAS_VIRTUAL_REGISTERS)
7893
{
7894
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7895
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
7896
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
7897
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7898
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
7899
}
7900
else
7901
{
7902
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
7903
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
7904
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7905
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
7906
}
7907
return cc;
7908
7909
case OP_CIRCM:
7910
/* TMP2 might be used by peek_char_back. */
7911
if (HAS_VIRTUAL_REGISTERS)
7912
{
7913
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7914
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7915
jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
7916
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7917
}
7918
else
7919
{
7920
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
7921
jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
7922
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7923
}
7924
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
7925
jump[0] = JUMP(SLJIT_JUMP);
7926
JUMPHERE(jump[1]);
7927
7928
if (!common->alt_circumflex)
7929
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
7930
7931
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7932
{
7933
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7934
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
7935
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
7936
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
7937
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7938
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7939
}
7940
else
7941
{
7942
peek_char_back(common, common->nlmax, backtracks);
7943
check_newlinechar(common, common->nltype, backtracks, FALSE);
7944
}
7945
JUMPHERE(jump[0]);
7946
return cc;
7947
}
7948
SLJIT_UNREACHABLE();
7949
return cc;
7950
}
7951
7952
/* Forward definitions. */
7953
static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
7954
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
7955
7956
#define PUSH_BACKTRACK(size, ccstart, error) \
7957
do \
7958
{ \
7959
backtrack = sljit_alloc_memory(compiler, (size)); \
7960
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
7961
return error; \
7962
memset(backtrack, 0, size); \
7963
backtrack->prev = parent->top; \
7964
backtrack->cc = (ccstart); \
7965
parent->top = backtrack; \
7966
} \
7967
while (0)
7968
7969
#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
7970
do \
7971
{ \
7972
backtrack = sljit_alloc_memory(compiler, (size)); \
7973
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
7974
return; \
7975
memset(backtrack, 0, size); \
7976
backtrack->prev = parent->top; \
7977
backtrack->cc = (ccstart); \
7978
parent->top = backtrack; \
7979
} \
7980
while (0)
7981
7982
#define BACKTRACK_AS(type) ((type *)backtrack)
7983
7984
static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7985
{
7986
/* The OVECTOR offset goes to TMP2. */
7987
DEFINE_COMPILER;
7988
int count = GET2(cc, 1 + IMM2_SIZE);
7989
PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
7990
unsigned int offset;
7991
jump_list *found = NULL;
7992
7993
SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
7994
7995
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
7996
7997
count--;
7998
while (count-- > 0)
7999
{
8000
offset = GET2(slot, 0) << 1;
8001
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8002
add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8003
slot += common->name_entry_size;
8004
}
8005
8006
offset = GET2(slot, 0) << 1;
8007
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8008
if (backtracks != NULL && !common->unset_backref)
8009
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8010
8011
set_jumps(found, LABEL());
8012
}
8013
8014
static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
8015
{
8016
DEFINE_COMPILER;
8017
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8018
int offset = 0;
8019
struct sljit_jump *jump = NULL;
8020
struct sljit_jump *partial;
8021
struct sljit_jump *nopartial;
8022
#if defined SUPPORT_UNICODE
8023
struct sljit_label *loop;
8024
struct sljit_label *caseless_loop;
8025
struct sljit_jump *turkish_ascii_i = NULL;
8026
struct sljit_jump *turkish_non_ascii_i = NULL;
8027
jump_list *no_match = NULL;
8028
int source_reg = COUNT_MATCH;
8029
int source_end_reg = ARGUMENTS;
8030
int char1_reg = STACK_LIMIT;
8031
PCRE2_UCHAR refi_flag = 0;
8032
8033
if (*cc == OP_REFI || *cc == OP_DNREFI)
8034
refi_flag = cc[PRIV(OP_lengths)[*cc] - 1];
8035
#endif /* SUPPORT_UNICODE */
8036
8037
if (ref)
8038
{
8039
offset = GET2(cc, 1) << 1;
8040
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8041
/* OVECTOR(1) contains the "string begin - 1" constant. */
8042
if (withchecks && !common->unset_backref)
8043
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8044
}
8045
else
8046
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8047
8048
#if defined SUPPORT_UNICODE
8049
if ((common->utf || common->ucp) && (*cc == OP_REFI || *cc == OP_DNREFI))
8050
{
8051
/* Update ref_update_local_size() when this changes. */
8052
SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
8053
8054
if (ref)
8055
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8056
else
8057
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8058
8059
if (withchecks && emptyfail)
8060
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
8061
8062
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, source_reg, 0);
8063
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, source_end_reg, 0);
8064
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, char1_reg, 0);
8065
8066
OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
8067
OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
8068
8069
loop = LABEL();
8070
jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
8071
partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8072
8073
/* Read original character. It must be a valid UTF character. */
8074
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8075
OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
8076
8077
read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
8078
8079
OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
8080
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8081
OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
8082
8083
/* Read second character. */
8084
read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
8085
8086
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8087
8088
if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==
8089
REFI_FLAG_TURKISH_CASING)
8090
{
8091
OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x20);
8092
turkish_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x69);
8093
8094
OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x1);
8095
turkish_non_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x131);
8096
}
8097
8098
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
8099
8100
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
8101
8102
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
8103
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
8104
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
8105
8106
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
8107
8108
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
8109
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
8110
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
8111
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8112
8113
add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8114
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
8115
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
8116
8117
if (refi_flag & REFI_FLAG_CASELESS_RESTRICT)
8118
add_jump(compiler, &no_match, CMP(SLJIT_LESS | SLJIT_32, SLJIT_MEM1(TMP2), 0, SLJIT_IMM, 128));
8119
8120
caseless_loop = LABEL();
8121
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8122
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
8123
OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
8124
JUMPTO(SLJIT_EQUAL, loop);
8125
JUMPTO(SLJIT_LESS, caseless_loop);
8126
8127
if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==
8128
REFI_FLAG_TURKISH_CASING)
8129
{
8130
add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));
8131
JUMPHERE(turkish_ascii_i);
8132
8133
OP2(SLJIT_LSHR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);
8134
OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8135
OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8136
OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x130);
8137
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8138
8139
add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));
8140
JUMPHERE(turkish_non_ascii_i);
8141
8142
OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8143
OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8144
OP2(SLJIT_SHL, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);
8145
OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x49);
8146
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8147
}
8148
8149
set_jumps(no_match, LABEL());
8150
if (common->mode == PCRE2_JIT_COMPLETE)
8151
JUMPHERE(partial);
8152
8153
OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8154
OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8155
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8156
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8157
8158
if (common->mode != PCRE2_JIT_COMPLETE)
8159
{
8160
JUMPHERE(partial);
8161
OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8162
OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8163
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8164
8165
check_partial(common, FALSE);
8166
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8167
}
8168
8169
JUMPHERE(jump);
8170
OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8171
OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8172
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8173
return;
8174
}
8175
else
8176
#endif /* SUPPORT_UNICODE */
8177
{
8178
if (ref)
8179
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
8180
else
8181
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
8182
8183
if (withchecks)
8184
jump = JUMP(SLJIT_ZERO);
8185
8186
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
8187
partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
8188
if (common->mode == PCRE2_JIT_COMPLETE)
8189
add_jump(compiler, backtracks, partial);
8190
8191
add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
8192
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8193
8194
if (common->mode != PCRE2_JIT_COMPLETE)
8195
{
8196
nopartial = JUMP(SLJIT_JUMP);
8197
JUMPHERE(partial);
8198
/* TMP2 -= STR_END - STR_PTR */
8199
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
8200
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
8201
partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
8202
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
8203
add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
8204
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8205
JUMPHERE(partial);
8206
check_partial(common, FALSE);
8207
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8208
JUMPHERE(nopartial);
8209
}
8210
}
8211
8212
if (jump != NULL)
8213
{
8214
if (emptyfail)
8215
add_jump(compiler, backtracks, jump);
8216
else
8217
JUMPHERE(jump);
8218
}
8219
}
8220
8221
static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8222
{
8223
DEFINE_COMPILER;
8224
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8225
backtrack_common *backtrack;
8226
PCRE2_UCHAR type;
8227
int local_start = LOCAL2;
8228
int offset = 0;
8229
struct sljit_label *label;
8230
struct sljit_jump *zerolength;
8231
struct sljit_jump *jump = NULL;
8232
PCRE2_SPTR ccbegin = cc;
8233
int min = 0, max = 0;
8234
BOOL minimize;
8235
8236
PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
8237
8238
if (ref)
8239
offset = GET2(cc, 1) << 1;
8240
else
8241
cc += IMM2_SIZE;
8242
8243
if (*ccbegin == OP_REFI || *ccbegin == OP_DNREFI)
8244
{
8245
cc += 1;
8246
#ifdef SUPPORT_UNICODE
8247
if (common->utf || common->ucp)
8248
local_start = LOCAL3;
8249
#endif
8250
}
8251
8252
type = cc[1 + IMM2_SIZE];
8253
8254
SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
8255
/* Update ref_update_local_size() when this changes. */
8256
SLJIT_ASSERT(local_start + 2 * SSIZE_OF(sw) <= (int)LOCAL0 + common->locals_size);
8257
minimize = (type & 0x1) != 0;
8258
switch(type)
8259
{
8260
case OP_CRSTAR:
8261
case OP_CRMINSTAR:
8262
min = 0;
8263
max = 0;
8264
cc += 1 + IMM2_SIZE + 1;
8265
break;
8266
case OP_CRPLUS:
8267
case OP_CRMINPLUS:
8268
min = 1;
8269
max = 0;
8270
cc += 1 + IMM2_SIZE + 1;
8271
break;
8272
case OP_CRQUERY:
8273
case OP_CRMINQUERY:
8274
min = 0;
8275
max = 1;
8276
cc += 1 + IMM2_SIZE + 1;
8277
break;
8278
case OP_CRRANGE:
8279
case OP_CRMINRANGE:
8280
min = GET2(cc, 1 + IMM2_SIZE + 1);
8281
max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
8282
cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
8283
break;
8284
default:
8285
SLJIT_UNREACHABLE();
8286
break;
8287
}
8288
8289
if (!minimize)
8290
{
8291
if (min == 0)
8292
{
8293
allocate_stack(common, 2);
8294
if (ref)
8295
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8296
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8297
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8298
/* Temporary release of STR_PTR. */
8299
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8300
/* Handles both invalid and empty cases. Since the minimum repeat,
8301
is zero the invalid case is basically the same as an empty case. */
8302
if (ref)
8303
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8304
else
8305
{
8306
compile_dnref_search(common, ccbegin, NULL);
8307
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8308
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);
8309
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8310
}
8311
/* Restore if not zero length. */
8312
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8313
}
8314
else
8315
{
8316
allocate_stack(common, 1);
8317
if (ref)
8318
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8319
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8320
8321
if (ref)
8322
{
8323
if (!common->unset_backref)
8324
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8325
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8326
}
8327
else
8328
{
8329
compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
8330
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8331
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);
8332
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8333
}
8334
}
8335
8336
if (min > 1 || max > 1)
8337
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, SLJIT_IMM, 0);
8338
8339
label = LABEL();
8340
if (!ref)
8341
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw));
8342
compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
8343
8344
if (min > 1 || max > 1)
8345
{
8346
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), local_start);
8347
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8348
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, TMP1, 0);
8349
if (min > 1)
8350
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
8351
if (max > 1)
8352
{
8353
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
8354
allocate_stack(common, 1);
8355
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8356
JUMPTO(SLJIT_JUMP, label);
8357
JUMPHERE(jump);
8358
}
8359
}
8360
8361
if (max == 0)
8362
{
8363
/* Includes min > 1 case as well. */
8364
allocate_stack(common, 1);
8365
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8366
JUMPTO(SLJIT_JUMP, label);
8367
}
8368
8369
JUMPHERE(zerolength);
8370
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8371
8372
count_match(common);
8373
return cc;
8374
}
8375
8376
allocate_stack(common, ref ? 2 : 3);
8377
if (ref)
8378
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8379
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8380
if (type != OP_CRMINSTAR)
8381
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8382
8383
if (min == 0)
8384
{
8385
/* Handles both invalid and empty cases. Since the minimum repeat,
8386
is zero the invalid case is basically the same as an empty case. */
8387
if (ref)
8388
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8389
else
8390
{
8391
compile_dnref_search(common, ccbegin, NULL);
8392
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8393
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8394
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8395
}
8396
/* Length is non-zero, we can match real repeats. */
8397
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8398
jump = JUMP(SLJIT_JUMP);
8399
}
8400
else
8401
{
8402
if (ref)
8403
{
8404
if (!common->unset_backref)
8405
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8406
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8407
}
8408
else
8409
{
8410
compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
8411
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8412
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8413
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8414
}
8415
}
8416
8417
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8418
if (max > 0)
8419
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
8420
8421
if (!ref)
8422
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8423
compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
8424
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8425
8426
if (min > 1)
8427
{
8428
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8429
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8430
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
8431
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
8432
}
8433
else if (max > 0)
8434
OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
8435
8436
if (jump != NULL)
8437
JUMPHERE(jump);
8438
JUMPHERE(zerolength);
8439
8440
count_match(common);
8441
return cc;
8442
}
8443
8444
static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8445
{
8446
DEFINE_COMPILER;
8447
backtrack_common *backtrack;
8448
recurse_entry *entry = common->entries;
8449
recurse_entry *prev = NULL;
8450
sljit_sw start = GET(cc, 1);
8451
PCRE2_SPTR start_cc;
8452
BOOL needs_control_head;
8453
8454
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
8455
8456
/* Inlining simple patterns. */
8457
if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
8458
{
8459
start_cc = common->start + start;
8460
compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
8461
BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
8462
return cc + 1 + LINK_SIZE;
8463
}
8464
8465
while (entry != NULL)
8466
{
8467
if (entry->start == start)
8468
break;
8469
prev = entry;
8470
entry = entry->next;
8471
}
8472
8473
if (entry == NULL)
8474
{
8475
entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
8476
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8477
return NULL;
8478
entry->next = NULL;
8479
entry->entry_label = NULL;
8480
entry->backtrack_label = NULL;
8481
entry->entry_calls = NULL;
8482
entry->backtrack_calls = NULL;
8483
entry->start = start;
8484
8485
if (prev != NULL)
8486
prev->next = entry;
8487
else
8488
common->entries = entry;
8489
}
8490
8491
BACKTRACK_AS(recurse_backtrack)->entry = entry;
8492
8493
if (entry->entry_label == NULL)
8494
add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
8495
else
8496
JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
8497
/* Leave if the match is failed. */
8498
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
8499
BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
8500
return cc + 1 + LINK_SIZE;
8501
}
8502
8503
static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
8504
{
8505
PCRE2_SPTR begin;
8506
PCRE2_SIZE *ovector;
8507
sljit_u32 oveccount, capture_top;
8508
8509
if (arguments->callout == NULL)
8510
return 0;
8511
8512
SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
8513
8514
begin = arguments->begin;
8515
ovector = (PCRE2_SIZE*)(callout_block + 1);
8516
oveccount = callout_block->capture_top;
8517
8518
SLJIT_ASSERT(oveccount >= 1);
8519
8520
callout_block->version = 2;
8521
callout_block->callout_flags = 0;
8522
8523
/* Offsets in subject. */
8524
callout_block->subject_length = arguments->end - arguments->begin;
8525
callout_block->start_match = jit_ovector[0] - begin;
8526
callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
8527
callout_block->subject = begin;
8528
8529
/* Convert and copy the JIT offset vector to the ovector array. */
8530
callout_block->capture_top = 1;
8531
callout_block->offset_vector = ovector;
8532
8533
ovector[0] = PCRE2_UNSET;
8534
ovector[1] = PCRE2_UNSET;
8535
ovector += 2;
8536
jit_ovector += 2;
8537
capture_top = 1;
8538
8539
/* Convert pointers to sizes. */
8540
while (--oveccount != 0)
8541
{
8542
capture_top++;
8543
8544
ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
8545
ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
8546
8547
if (ovector[0] != PCRE2_UNSET)
8548
callout_block->capture_top = capture_top;
8549
8550
ovector += 2;
8551
jit_ovector += 2;
8552
}
8553
8554
return (arguments->callout)(callout_block, arguments->callout_data);
8555
}
8556
8557
#define CALLOUT_ARG_OFFSET(arg) \
8558
SLJIT_OFFSETOF(pcre2_callout_block, arg)
8559
8560
static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8561
{
8562
DEFINE_COMPILER;
8563
backtrack_common *backtrack;
8564
sljit_s32 mov_opcode;
8565
unsigned int callout_length = (*cc == OP_CALLOUT)
8566
? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
8567
sljit_sw value1;
8568
sljit_sw value2;
8569
sljit_sw value3;
8570
sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
8571
8572
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8573
8574
callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
8575
8576
allocate_stack(common, callout_arg_size);
8577
8578
SLJIT_ASSERT(common->capture_last_ptr != 0);
8579
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8580
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8581
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
8582
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
8583
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
8584
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
8585
8586
/* These pointer sized fields temporarly stores internal variables. */
8587
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
8588
8589
if (common->mark_ptr != 0)
8590
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
8591
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
8592
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
8593
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
8594
8595
if (*cc == OP_CALLOUT)
8596
{
8597
value1 = 0;
8598
value2 = 0;
8599
value3 = 0;
8600
}
8601
else
8602
{
8603
value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
8604
value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
8605
value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
8606
}
8607
8608
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
8609
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
8610
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
8611
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
8612
8613
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8614
8615
/* Needed to save important temporary registers. */
8616
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
8617
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
8618
/* SLJIT_R0 = arguments */
8619
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
8620
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
8621
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
8622
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8623
free_stack(common, callout_arg_size);
8624
8625
/* Check return value. */
8626
OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
8627
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
8628
if (common->abort_label == NULL)
8629
add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
8630
else
8631
JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
8632
return cc + callout_length;
8633
}
8634
8635
#undef CALLOUT_ARG_SIZE
8636
#undef CALLOUT_ARG_OFFSET
8637
8638
static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8639
{
8640
DEFINE_COMPILER;
8641
backtrack_common *backtrack = NULL;
8642
jump_list **reverse_failed;
8643
unsigned int lmin, lmax;
8644
#ifdef SUPPORT_UNICODE
8645
struct sljit_jump *jump;
8646
struct sljit_label *label;
8647
#endif
8648
8649
SLJIT_ASSERT(parent->top == NULL);
8650
8651
if (*cc == OP_REVERSE)
8652
{
8653
reverse_failed = &parent->own_backtracks;
8654
lmin = GET2(cc, 1);
8655
lmax = lmin;
8656
cc += 1 + IMM2_SIZE;
8657
8658
SLJIT_ASSERT(lmin > 0);
8659
}
8660
else
8661
{
8662
SLJIT_ASSERT(*cc == OP_VREVERSE);
8663
PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
8664
8665
reverse_failed = &backtrack->own_backtracks;
8666
lmin = GET2(cc, 1);
8667
lmax = GET2(cc, 1 + IMM2_SIZE);
8668
cc += 1 + 2 * IMM2_SIZE;
8669
8670
SLJIT_ASSERT(lmin < lmax);
8671
}
8672
8673
if (HAS_VIRTUAL_REGISTERS)
8674
{
8675
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8676
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8677
}
8678
else
8679
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8680
8681
#ifdef SUPPORT_UNICODE
8682
if (common->utf)
8683
{
8684
if (lmin > 0)
8685
{
8686
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
8687
label = LABEL();
8688
add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8689
move_back(common, reverse_failed, FALSE);
8690
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8691
JUMPTO(SLJIT_NOT_ZERO, label);
8692
}
8693
8694
if (lmin < lmax)
8695
{
8696
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8697
8698
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
8699
label = LABEL();
8700
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
8701
move_back(common, reverse_failed, FALSE);
8702
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8703
JUMPTO(SLJIT_NOT_ZERO, label);
8704
8705
JUMPHERE(jump);
8706
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
8707
}
8708
}
8709
else
8710
#endif
8711
{
8712
if (lmin > 0)
8713
{
8714
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
8715
add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8716
}
8717
8718
if (lmin < lmax)
8719
{
8720
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8721
8722
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
8723
OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
8724
SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
8725
8726
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
8727
}
8728
}
8729
8730
check_start_used_ptr(common);
8731
8732
if (lmin < lmax)
8733
BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
8734
8735
return cc;
8736
}
8737
8738
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
8739
{
8740
while (TRUE)
8741
{
8742
switch (*cc)
8743
{
8744
case OP_CALLOUT_STR:
8745
cc += GET(cc, 1 + 2*LINK_SIZE);
8746
break;
8747
8748
case OP_NOT_WORD_BOUNDARY:
8749
case OP_WORD_BOUNDARY:
8750
case OP_CIRC:
8751
case OP_CIRCM:
8752
case OP_DOLL:
8753
case OP_DOLLM:
8754
case OP_CALLOUT:
8755
case OP_ALT:
8756
case OP_NOT_UCP_WORD_BOUNDARY:
8757
case OP_UCP_WORD_BOUNDARY:
8758
cc += PRIV(OP_lengths)[*cc];
8759
break;
8760
8761
case OP_KET:
8762
return FALSE;
8763
8764
default:
8765
return TRUE;
8766
}
8767
}
8768
}
8769
8770
static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
8771
{
8772
DEFINE_COMPILER;
8773
int framesize;
8774
int extrasize;
8775
BOOL local_quit_available = FALSE;
8776
BOOL needs_control_head;
8777
BOOL end_block_size = 0;
8778
BOOL has_vreverse;
8779
int private_data_ptr;
8780
backtrack_common altbacktrack;
8781
PCRE2_SPTR ccbegin;
8782
PCRE2_UCHAR opcode;
8783
PCRE2_UCHAR bra = OP_BRA;
8784
jump_list *tmp = NULL;
8785
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
8786
jump_list **found;
8787
/* Saving previous accept variables. */
8788
BOOL save_local_quit_available = common->local_quit_available;
8789
BOOL save_in_positive_assertion = common->in_positive_assertion;
8790
sljit_s32 save_restore_end_ptr = common->restore_end_ptr;
8791
then_trap_backtrack *save_then_trap = common->then_trap;
8792
struct sljit_label *save_quit_label = common->quit_label;
8793
struct sljit_label *save_accept_label = common->accept_label;
8794
jump_list *save_quit = common->quit;
8795
jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
8796
jump_list *save_accept = common->accept;
8797
struct sljit_jump *jump;
8798
struct sljit_jump *brajump = NULL;
8799
8800
/* Assert captures then. */
8801
common->then_trap = NULL;
8802
8803
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8804
{
8805
SLJIT_ASSERT(!conditional);
8806
bra = *cc;
8807
cc++;
8808
}
8809
8810
private_data_ptr = PRIVATE_DATA(cc);
8811
SLJIT_ASSERT(private_data_ptr != 0);
8812
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8813
backtrack->framesize = framesize;
8814
backtrack->private_data_ptr = private_data_ptr;
8815
opcode = *cc;
8816
SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
8817
found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
8818
ccbegin = cc;
8819
cc += GET(cc, 1);
8820
8821
if (bra == OP_BRAMINZERO)
8822
{
8823
/* This is a braminzero backtrack path. */
8824
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8825
free_stack(common, 1);
8826
brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8827
}
8828
8829
if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
8830
end_block_size = 3;
8831
8832
if (framesize < 0)
8833
{
8834
extrasize = 1;
8835
if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
8836
extrasize = 0;
8837
8838
extrasize += end_block_size;
8839
8840
if (needs_control_head)
8841
extrasize++;
8842
8843
if (framesize == no_frame)
8844
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8845
8846
if (extrasize > 0)
8847
allocate_stack(common, extrasize);
8848
8849
if (needs_control_head)
8850
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8851
8852
if (extrasize > 0)
8853
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8854
8855
if (needs_control_head)
8856
{
8857
SLJIT_ASSERT(extrasize == end_block_size + 2);
8858
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
8859
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
8860
}
8861
}
8862
else
8863
{
8864
extrasize = (needs_control_head ? 3 : 2) + end_block_size;
8865
8866
OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
8867
allocate_stack(common, framesize + extrasize);
8868
8869
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8870
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8871
if (needs_control_head)
8872
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8873
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8874
8875
if (needs_control_head)
8876
{
8877
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
8878
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
8879
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
8880
}
8881
else
8882
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
8883
8884
init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
8885
}
8886
8887
if (end_block_size > 0)
8888
{
8889
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
8890
OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
8891
}
8892
8893
memset(&altbacktrack, 0, sizeof(backtrack_common));
8894
if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
8895
{
8896
/* Control verbs cannot escape from these asserts. */
8897
local_quit_available = TRUE;
8898
common->restore_end_ptr = 0;
8899
common->local_quit_available = TRUE;
8900
common->quit_label = NULL;
8901
common->quit = NULL;
8902
}
8903
8904
common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
8905
common->positive_assertion_quit = NULL;
8906
8907
while (1)
8908
{
8909
common->accept_label = NULL;
8910
common->accept = NULL;
8911
altbacktrack.top = NULL;
8912
altbacktrack.own_backtracks = NULL;
8913
8914
if (*ccbegin == OP_ALT && extrasize > 0)
8915
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8916
8917
altbacktrack.cc = ccbegin;
8918
ccbegin += 1 + LINK_SIZE;
8919
8920
has_vreverse = (*ccbegin == OP_VREVERSE);
8921
if (*ccbegin == OP_REVERSE || has_vreverse)
8922
ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
8923
8924
compile_matchingpath(common, ccbegin, cc, &altbacktrack);
8925
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8926
{
8927
if (local_quit_available)
8928
{
8929
common->local_quit_available = save_local_quit_available;
8930
common->quit_label = save_quit_label;
8931
common->quit = save_quit;
8932
}
8933
common->in_positive_assertion = save_in_positive_assertion;
8934
common->restore_end_ptr = save_restore_end_ptr;
8935
common->then_trap = save_then_trap;
8936
common->accept_label = save_accept_label;
8937
common->positive_assertion_quit = save_positive_assertion_quit;
8938
common->accept = save_accept;
8939
return NULL;
8940
}
8941
8942
if (has_vreverse)
8943
{
8944
SLJIT_ASSERT(altbacktrack.top != NULL);
8945
add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8946
}
8947
8948
common->accept_label = LABEL();
8949
if (common->accept != NULL)
8950
set_jumps(common->accept, common->accept_label);
8951
8952
/* Reset stack. */
8953
if (framesize < 0)
8954
{
8955
if (framesize == no_frame)
8956
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8957
else if (extrasize > 0)
8958
free_stack(common, extrasize);
8959
8960
if (end_block_size > 0)
8961
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
8962
8963
if (needs_control_head)
8964
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
8965
}
8966
else
8967
{
8968
if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
8969
{
8970
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
8971
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
8972
8973
if (end_block_size > 0)
8974
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
8975
8976
if (needs_control_head)
8977
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
8978
}
8979
else
8980
{
8981
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8982
8983
if (end_block_size > 0)
8984
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
8985
8986
if (needs_control_head)
8987
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
8988
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8989
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
8990
}
8991
}
8992
8993
if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
8994
{
8995
/* We know that STR_PTR was stored on the top of the stack. */
8996
if (conditional)
8997
{
8998
if (extrasize > 0)
8999
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
9000
}
9001
else if (bra == OP_BRAZERO)
9002
{
9003
if (framesize < 0)
9004
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9005
else
9006
{
9007
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9008
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9009
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9010
}
9011
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9012
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9013
}
9014
else if (framesize >= 0)
9015
{
9016
/* For OP_BRA and OP_BRAMINZERO. */
9017
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9018
}
9019
}
9020
add_jump(compiler, found, JUMP(SLJIT_JUMP));
9021
9022
compile_backtrackingpath(common, altbacktrack.top);
9023
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9024
{
9025
if (local_quit_available)
9026
{
9027
common->local_quit_available = save_local_quit_available;
9028
common->quit_label = save_quit_label;
9029
common->quit = save_quit;
9030
}
9031
common->in_positive_assertion = save_in_positive_assertion;
9032
common->restore_end_ptr = save_restore_end_ptr;
9033
common->then_trap = save_then_trap;
9034
common->accept_label = save_accept_label;
9035
common->positive_assertion_quit = save_positive_assertion_quit;
9036
common->accept = save_accept;
9037
return NULL;
9038
}
9039
set_jumps(altbacktrack.own_backtracks, LABEL());
9040
9041
if (*cc != OP_ALT)
9042
break;
9043
9044
ccbegin = cc;
9045
cc += GET(cc, 1);
9046
}
9047
9048
if (local_quit_available)
9049
{
9050
SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9051
/* Makes the check less complicated below. */
9052
common->positive_assertion_quit = common->quit;
9053
}
9054
9055
/* None of them matched. */
9056
if (common->positive_assertion_quit != NULL)
9057
{
9058
jump = JUMP(SLJIT_JUMP);
9059
set_jumps(common->positive_assertion_quit, LABEL());
9060
SLJIT_ASSERT(framesize != no_stack);
9061
if (framesize < 0)
9062
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9063
else
9064
{
9065
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9066
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9067
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9068
}
9069
JUMPHERE(jump);
9070
}
9071
9072
if (end_block_size > 0)
9073
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9074
9075
if (needs_control_head)
9076
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
9077
9078
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9079
{
9080
/* Assert is failed. */
9081
if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9082
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9083
9084
if (framesize < 0)
9085
{
9086
/* The topmost item should be 0. */
9087
if (bra == OP_BRAZERO)
9088
{
9089
if (extrasize >= 2)
9090
free_stack(common, extrasize - 1);
9091
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9092
}
9093
else if (extrasize > 0)
9094
free_stack(common, extrasize);
9095
}
9096
else
9097
{
9098
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9099
/* The topmost item should be 0. */
9100
if (bra == OP_BRAZERO)
9101
{
9102
free_stack(common, framesize + extrasize - 1);
9103
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9104
}
9105
else
9106
free_stack(common, framesize + extrasize);
9107
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9108
}
9109
jump = JUMP(SLJIT_JUMP);
9110
if (bra != OP_BRAZERO)
9111
add_jump(compiler, target, jump);
9112
9113
/* Assert is successful. */
9114
set_jumps(tmp, LABEL());
9115
if (framesize < 0)
9116
{
9117
/* We know that STR_PTR was stored on the top of the stack. */
9118
if (extrasize > 0)
9119
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9120
9121
/* Keep the STR_PTR on the top of the stack. */
9122
if (bra == OP_BRAZERO)
9123
{
9124
/* This allocation is always successful. */
9125
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9126
if (extrasize >= 2)
9127
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9128
}
9129
else if (bra == OP_BRAMINZERO)
9130
{
9131
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9132
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9133
}
9134
}
9135
else
9136
{
9137
if (bra == OP_BRA)
9138
{
9139
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9140
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9141
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9142
}
9143
else
9144
{
9145
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9146
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
9147
9148
if (extrasize == 2 + end_block_size)
9149
{
9150
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9151
if (bra == OP_BRAMINZERO)
9152
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9153
}
9154
else
9155
{
9156
SLJIT_ASSERT(extrasize == 3 + end_block_size);
9157
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9158
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9159
}
9160
}
9161
}
9162
9163
if (bra == OP_BRAZERO)
9164
{
9165
backtrack->matchingpath = LABEL();
9166
SET_LABEL(jump, backtrack->matchingpath);
9167
}
9168
else if (bra == OP_BRAMINZERO)
9169
{
9170
JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9171
JUMPHERE(brajump);
9172
SLJIT_ASSERT(framesize != 0);
9173
if (framesize > 0)
9174
{
9175
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9176
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9177
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9178
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9179
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9180
}
9181
set_jumps(backtrack->common.own_backtracks, LABEL());
9182
}
9183
}
9184
else
9185
{
9186
/* AssertNot is successful. */
9187
if (framesize < 0)
9188
{
9189
if (extrasize > 0)
9190
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9191
9192
if (bra != OP_BRA)
9193
{
9194
if (extrasize >= 2)
9195
free_stack(common, extrasize - 1);
9196
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9197
}
9198
else if (extrasize > 0)
9199
free_stack(common, extrasize);
9200
}
9201
else
9202
{
9203
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9204
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9205
/* The topmost item should be 0. */
9206
if (bra != OP_BRA)
9207
{
9208
free_stack(common, framesize + extrasize - 1);
9209
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9210
}
9211
else
9212
free_stack(common, framesize + extrasize);
9213
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9214
}
9215
9216
if (bra == OP_BRAZERO)
9217
backtrack->matchingpath = LABEL();
9218
else if (bra == OP_BRAMINZERO)
9219
{
9220
JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9221
JUMPHERE(brajump);
9222
}
9223
9224
if (bra != OP_BRA)
9225
{
9226
SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
9227
set_jumps(backtrack->common.own_backtracks, LABEL());
9228
backtrack->common.own_backtracks = NULL;
9229
}
9230
}
9231
9232
if (local_quit_available)
9233
{
9234
common->local_quit_available = save_local_quit_available;
9235
common->quit_label = save_quit_label;
9236
common->quit = save_quit;
9237
}
9238
9239
common->in_positive_assertion = save_in_positive_assertion;
9240
common->restore_end_ptr = save_restore_end_ptr;
9241
common->then_trap = save_then_trap;
9242
common->accept_label = save_accept_label;
9243
common->positive_assertion_quit = save_positive_assertion_quit;
9244
common->accept = save_accept;
9245
return cc + 1 + LINK_SIZE;
9246
}
9247
9248
static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
9249
{
9250
DEFINE_COMPILER;
9251
int stacksize;
9252
9253
if (framesize < 0)
9254
{
9255
if (framesize == no_frame)
9256
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9257
else
9258
{
9259
stacksize = needs_control_head ? 1 : 0;
9260
if (ket != OP_KET || has_alternatives)
9261
stacksize++;
9262
9263
if (stacksize > 0)
9264
free_stack(common, stacksize);
9265
}
9266
9267
if (needs_control_head)
9268
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
9269
9270
/* TMP2 which is set here used by OP_KETRMAX below. */
9271
if (ket == OP_KETRMAX)
9272
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9273
else if (ket == OP_KETRMIN)
9274
{
9275
/* Move the STR_PTR to the private_data_ptr. */
9276
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9277
}
9278
}
9279
else
9280
{
9281
stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
9282
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
9283
if (needs_control_head)
9284
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9285
9286
if (ket == OP_KETRMAX)
9287
{
9288
/* TMP2 which is set here used by OP_KETRMAX below. */
9289
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9290
}
9291
}
9292
if (needs_control_head)
9293
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
9294
}
9295
9296
static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
9297
{
9298
DEFINE_COMPILER;
9299
9300
if (common->capture_last_ptr != 0)
9301
{
9302
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9303
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
9304
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9305
stacksize++;
9306
}
9307
if (common->optimized_cbracket[offset >> 1] == 0)
9308
{
9309
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9310
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9311
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9312
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9313
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
9314
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9315
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9316
stacksize += 2;
9317
}
9318
return stacksize;
9319
}
9320
9321
static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
9322
{
9323
if (PRIV(script_run)(ptr, endptr, FALSE))
9324
return endptr;
9325
return NULL;
9326
}
9327
9328
#ifdef SUPPORT_UNICODE
9329
9330
static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
9331
{
9332
if (PRIV(script_run)(ptr, endptr, TRUE))
9333
return endptr;
9334
return NULL;
9335
}
9336
9337
#endif /* SUPPORT_UNICODE */
9338
9339
static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
9340
{
9341
DEFINE_COMPILER;
9342
9343
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9344
9345
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9346
#ifdef SUPPORT_UNICODE
9347
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9348
common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
9349
#else
9350
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
9351
#endif
9352
9353
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9354
add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9355
}
9356
9357
/*
9358
Handling bracketed expressions is probably the most complex part.
9359
9360
Stack layout naming characters:
9361
S - Push the current STR_PTR
9362
0 - Push a 0 (NULL)
9363
A - Push the current STR_PTR. Needed for restoring the STR_PTR
9364
before the next alternative. Not pushed if there are no alternatives.
9365
M - Any values pushed by the current alternative. Can be empty, or anything.
9366
C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
9367
L - Push the previous local (pointed by localptr) to the stack
9368
() - opional values stored on the stack
9369
()* - optonal, can be stored multiple times
9370
9371
The following list shows the regular expression templates, their PCRE byte codes
9372
and stack layout supported by pcre-sljit.
9373
9374
(?:) OP_BRA | OP_KET A M
9375
() OP_CBRA | OP_KET C M
9376
(?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
9377
OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
9378
(?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
9379
OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
9380
()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
9381
OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
9382
()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
9383
OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
9384
(?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
9385
(?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
9386
()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
9387
()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
9388
(?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
9389
OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
9390
(?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
9391
OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
9392
()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
9393
OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
9394
()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
9395
OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
9396
9397
9398
Stack layout naming characters:
9399
A - Push the alternative index (starting from 0) on the stack.
9400
Not pushed if there is no alternatives.
9401
M - Any values pushed by the current alternative. Can be empty, or anything.
9402
9403
The next list shows the possible content of a bracket:
9404
(|) OP_*BRA | OP_ALT ... M A
9405
(?()|) OP_*COND | OP_ALT M A
9406
(?>|) OP_ONCE | OP_ALT ... [stack trace] M A
9407
Or nothing, if trace is unnecessary
9408
*/
9409
9410
static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9411
{
9412
DEFINE_COMPILER;
9413
backtrack_common *backtrack;
9414
PCRE2_UCHAR opcode;
9415
int private_data_ptr = 0;
9416
int offset = 0;
9417
int i, stacksize;
9418
int repeat_ptr = 0, repeat_length = 0;
9419
int repeat_type = 0, repeat_count = 0;
9420
PCRE2_SPTR ccbegin;
9421
PCRE2_SPTR matchingpath;
9422
PCRE2_SPTR slot;
9423
PCRE2_UCHAR bra = OP_BRA;
9424
PCRE2_UCHAR ket;
9425
assert_backtrack *assert;
9426
BOOL has_alternatives;
9427
BOOL needs_control_head = FALSE;
9428
BOOL has_vreverse = FALSE;
9429
struct sljit_jump *jump;
9430
struct sljit_jump *skip;
9431
jump_list *jumplist;
9432
struct sljit_label *rmax_label = NULL;
9433
struct sljit_jump *braminzero = NULL;
9434
9435
PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
9436
9437
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9438
{
9439
bra = *cc;
9440
cc++;
9441
opcode = *cc;
9442
}
9443
9444
opcode = *cc;
9445
ccbegin = cc;
9446
matchingpath = bracketend(cc) - 1 - LINK_SIZE;
9447
ket = *matchingpath;
9448
if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
9449
{
9450
repeat_ptr = PRIVATE_DATA(matchingpath);
9451
repeat_length = PRIVATE_DATA(matchingpath + 1);
9452
repeat_type = PRIVATE_DATA(matchingpath + 2);
9453
repeat_count = PRIVATE_DATA(matchingpath + 3);
9454
SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
9455
if (repeat_type == OP_UPTO)
9456
ket = OP_KETRMAX;
9457
if (repeat_type == OP_MINUPTO)
9458
ket = OP_KETRMIN;
9459
}
9460
9461
matchingpath = ccbegin + 1 + LINK_SIZE;
9462
SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
9463
SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
9464
cc += GET(cc, 1);
9465
9466
has_alternatives = *cc == OP_ALT;
9467
if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
9468
{
9469
SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
9470
compile_time_checks_must_be_grouped_together);
9471
has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
9472
}
9473
9474
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
9475
opcode = OP_SCOND;
9476
9477
if (opcode == OP_CBRA || opcode == OP_SCBRA)
9478
{
9479
/* Capturing brackets has a pre-allocated space. */
9480
offset = GET2(ccbegin, 1 + LINK_SIZE);
9481
if (common->optimized_cbracket[offset] == 0)
9482
{
9483
private_data_ptr = OVECTOR_PRIV(offset);
9484
offset <<= 1;
9485
}
9486
else
9487
{
9488
offset <<= 1;
9489
private_data_ptr = OVECTOR(offset);
9490
}
9491
BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
9492
matchingpath += IMM2_SIZE;
9493
}
9494
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE
9495
|| opcode == OP_ASSERT_SCS || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
9496
{
9497
/* Other brackets simply allocate the next entry. */
9498
private_data_ptr = PRIVATE_DATA(ccbegin);
9499
SLJIT_ASSERT(private_data_ptr != 0);
9500
BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
9501
if (opcode == OP_ONCE)
9502
BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
9503
}
9504
9505
/* Instructions before the first alternative. */
9506
stacksize = 0;
9507
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9508
stacksize++;
9509
if (bra == OP_BRAZERO)
9510
stacksize++;
9511
9512
if (stacksize > 0)
9513
allocate_stack(common, stacksize);
9514
9515
stacksize = 0;
9516
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9517
{
9518
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9519
stacksize++;
9520
}
9521
9522
if (bra == OP_BRAZERO)
9523
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9524
9525
if (bra == OP_BRAMINZERO)
9526
{
9527
/* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
9528
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9529
if (ket != OP_KETRMIN)
9530
{
9531
free_stack(common, 1);
9532
braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9533
}
9534
else if (opcode == OP_ONCE || opcode >= OP_SBRA)
9535
{
9536
jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9537
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9538
/* Nothing stored during the first run. */
9539
skip = JUMP(SLJIT_JUMP);
9540
JUMPHERE(jump);
9541
/* Checking zero-length iteration. */
9542
if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9543
{
9544
/* When we come from outside, private_data_ptr contains the previous STR_PTR. */
9545
braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9546
}
9547
else
9548
{
9549
/* Except when the whole stack frame must be saved. */
9550
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9551
braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
9552
}
9553
JUMPHERE(skip);
9554
}
9555
else
9556
{
9557
jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9558
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9559
JUMPHERE(jump);
9560
}
9561
}
9562
9563
if (repeat_type != 0)
9564
{
9565
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
9566
if (repeat_type == OP_EXACT)
9567
rmax_label = LABEL();
9568
}
9569
9570
if (ket == OP_KETRMIN)
9571
BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
9572
9573
if (ket == OP_KETRMAX)
9574
{
9575
rmax_label = LABEL();
9576
if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
9577
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
9578
}
9579
9580
/* Handling capturing brackets and alternatives. */
9581
if (opcode == OP_ONCE)
9582
{
9583
stacksize = 0;
9584
if (needs_control_head)
9585
{
9586
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9587
stacksize++;
9588
}
9589
9590
if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9591
{
9592
/* Neither capturing brackets nor recursions are found in the block. */
9593
if (ket == OP_KETRMIN)
9594
{
9595
stacksize += 2;
9596
if (!needs_control_head)
9597
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9598
}
9599
else
9600
{
9601
if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9602
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9603
if (ket == OP_KETRMAX || has_alternatives)
9604
stacksize++;
9605
}
9606
9607
if (stacksize > 0)
9608
allocate_stack(common, stacksize);
9609
9610
stacksize = 0;
9611
if (needs_control_head)
9612
{
9613
stacksize++;
9614
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9615
}
9616
9617
if (ket == OP_KETRMIN)
9618
{
9619
if (needs_control_head)
9620
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9621
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9622
if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9623
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
9624
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
9625
}
9626
else if (ket == OP_KETRMAX || has_alternatives)
9627
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9628
}
9629
else
9630
{
9631
if (ket != OP_KET || has_alternatives)
9632
stacksize++;
9633
9634
stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
9635
allocate_stack(common, stacksize);
9636
9637
if (needs_control_head)
9638
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9639
9640
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9641
OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9642
9643
stacksize = needs_control_head ? 1 : 0;
9644
if (ket != OP_KET || has_alternatives)
9645
{
9646
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9647
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9648
stacksize++;
9649
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9650
}
9651
else
9652
{
9653
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9654
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9655
}
9656
init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
9657
}
9658
}
9659
else if (opcode == OP_CBRA || opcode == OP_SCBRA)
9660
{
9661
/* Saving the previous values. */
9662
if (common->optimized_cbracket[offset >> 1] != 0)
9663
{
9664
SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
9665
allocate_stack(common, 2);
9666
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9667
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9668
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9669
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9670
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9671
}
9672
else
9673
{
9674
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9675
allocate_stack(common, 1);
9676
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9677
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9678
}
9679
}
9680
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
9681
{
9682
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9683
allocate_stack(common, 4);
9684
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9685
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9686
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9687
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9688
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9689
OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
9690
9691
has_vreverse = (*matchingpath == OP_VREVERSE);
9692
if (*matchingpath == OP_REVERSE || has_vreverse)
9693
matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
9694
}
9695
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
9696
{
9697
/* Saving the previous value. */
9698
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9699
allocate_stack(common, 1);
9700
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9701
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9702
9703
if (*matchingpath == OP_REVERSE)
9704
matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
9705
}
9706
else if (opcode == OP_ASSERT_SCS)
9707
{
9708
/* Nested scs blocks will not update this variable. */
9709
if (common->restore_end_ptr == 0)
9710
common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);
9711
9712
if (*matchingpath == OP_CREF && (matchingpath[1 + IMM2_SIZE] != OP_CREF && matchingpath[1 + IMM2_SIZE] != OP_DNCREF))
9713
{
9714
/* Optimized case for a single capture reference. */
9715
i = OVECTOR(GET2(matchingpath, 1) << 1);
9716
9717
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), i);
9718
9719
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9720
matchingpath += 1 + IMM2_SIZE;
9721
9722
allocate_stack(common, has_alternatives ? 3 : 2);
9723
9724
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9725
OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9726
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9727
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), i + sizeof(sljit_sw));
9728
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9729
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
9730
}
9731
else
9732
{
9733
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9734
jumplist = NULL;
9735
9736
while (TRUE)
9737
{
9738
if (*matchingpath == OP_CREF)
9739
{
9740
sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(matchingpath, 1) << 1));
9741
matchingpath += 1 + IMM2_SIZE;
9742
}
9743
else
9744
{
9745
SLJIT_ASSERT(*matchingpath == OP_DNCREF);
9746
9747
i = GET2(matchingpath, 1 + IMM2_SIZE);
9748
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9749
9750
while (i-- > 1)
9751
{
9752
sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));
9753
add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9754
slot += common->name_entry_size;
9755
}
9756
9757
sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));
9758
matchingpath += 1 + 2 * IMM2_SIZE;
9759
}
9760
9761
if (*matchingpath != OP_CREF && *matchingpath != OP_DNCREF)
9762
break;
9763
9764
add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9765
}
9766
9767
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),
9768
CMP(SLJIT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9769
9770
set_jumps(jumplist, LABEL());
9771
9772
allocate_stack(common, has_alternatives ? 3 : 2);
9773
9774
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9775
OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9776
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9777
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), 0);
9778
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9779
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9780
}
9781
9782
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9783
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_TMP_DEST_REG, 0);
9784
9785
if (has_alternatives)
9786
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
9787
}
9788
else if (has_alternatives)
9789
{
9790
/* Pushing the starting string pointer. */
9791
allocate_stack(common, 1);
9792
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9793
}
9794
9795
/* Generating code for the first alternative. */
9796
if (opcode == OP_COND || opcode == OP_SCOND)
9797
{
9798
if (*matchingpath == OP_CREF)
9799
{
9800
SLJIT_ASSERT(has_alternatives);
9801
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),
9802
CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9803
matchingpath += 1 + IMM2_SIZE;
9804
}
9805
else if (*matchingpath == OP_DNCREF)
9806
{
9807
SLJIT_ASSERT(has_alternatives);
9808
9809
i = GET2(matchingpath, 1 + IMM2_SIZE);
9810
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9811
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9812
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9813
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9814
slot += common->name_entry_size;
9815
i--;
9816
while (i-- > 0)
9817
{
9818
OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9819
OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
9820
slot += common->name_entry_size;
9821
}
9822
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9823
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), JUMP(SLJIT_ZERO));
9824
matchingpath += 1 + 2 * IMM2_SIZE;
9825
}
9826
else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
9827
{
9828
/* Never has other case. */
9829
BACKTRACK_AS(bracket_backtrack)->u.no_capture = NULL;
9830
SLJIT_ASSERT(!has_alternatives);
9831
9832
if (*matchingpath == OP_TRUE)
9833
{
9834
stacksize = 1;
9835
matchingpath++;
9836
}
9837
else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
9838
stacksize = 0;
9839
else if (*matchingpath == OP_RREF)
9840
{
9841
stacksize = GET2(matchingpath, 1);
9842
if (common->currententry == NULL)
9843
stacksize = 0;
9844
else if (stacksize == RREF_ANY)
9845
stacksize = 1;
9846
else if (common->currententry->start == 0)
9847
stacksize = stacksize == 0;
9848
else
9849
stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
9850
9851
if (stacksize != 0)
9852
matchingpath += 1 + IMM2_SIZE;
9853
}
9854
else
9855
{
9856
if (common->currententry == NULL || common->currententry->start == 0)
9857
stacksize = 0;
9858
else
9859
{
9860
stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
9861
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9862
i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
9863
while (stacksize > 0)
9864
{
9865
if ((int)GET2(slot, 0) == i)
9866
break;
9867
slot += common->name_entry_size;
9868
stacksize--;
9869
}
9870
}
9871
9872
if (stacksize != 0)
9873
matchingpath += 1 + 2 * IMM2_SIZE;
9874
}
9875
9876
/* The stacksize == 0 is a common "else" case. */
9877
if (stacksize == 0)
9878
{
9879
if (*cc == OP_ALT)
9880
{
9881
matchingpath = cc + 1 + LINK_SIZE;
9882
cc += GET(cc, 1);
9883
}
9884
else
9885
matchingpath = cc;
9886
}
9887
}
9888
else
9889
{
9890
SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
9891
/* Similar code as PUSH_BACKTRACK macro. */
9892
assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
9893
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9894
return NULL;
9895
memset(assert, 0, sizeof(assert_backtrack));
9896
assert->common.cc = matchingpath;
9897
BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
9898
matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
9899
}
9900
}
9901
9902
compile_matchingpath(common, matchingpath, cc, backtrack);
9903
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9904
return NULL;
9905
9906
switch (opcode)
9907
{
9908
case OP_ASSERTBACK_NA:
9909
if (has_vreverse)
9910
{
9911
SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
9912
add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
9913
}
9914
9915
if (PRIVATE_DATA(ccbegin + 1))
9916
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9917
break;
9918
case OP_ONCE:
9919
match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9920
break;
9921
case OP_SCRIPT_RUN:
9922
match_script_run_common(common, private_data_ptr, backtrack);
9923
break;
9924
}
9925
9926
stacksize = 0;
9927
if (repeat_type == OP_MINUPTO)
9928
{
9929
/* We need to preserve the counter. TMP2 will be used below. */
9930
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9931
stacksize++;
9932
}
9933
if (ket != OP_KET || bra != OP_BRA)
9934
stacksize++;
9935
if (offset != 0)
9936
{
9937
if (common->capture_last_ptr != 0)
9938
stacksize++;
9939
if (common->optimized_cbracket[offset >> 1] == 0)
9940
stacksize += 2;
9941
}
9942
if (has_alternatives && opcode != OP_ONCE)
9943
stacksize++;
9944
9945
if (stacksize > 0)
9946
allocate_stack(common, stacksize);
9947
9948
stacksize = 0;
9949
if (repeat_type == OP_MINUPTO)
9950
{
9951
/* TMP2 was set above. */
9952
OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9953
stacksize++;
9954
}
9955
9956
if (ket != OP_KET || bra != OP_BRA)
9957
{
9958
if (ket != OP_KET)
9959
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9960
else
9961
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9962
stacksize++;
9963
}
9964
9965
if (offset != 0)
9966
stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9967
9968
/* Skip and count the other alternatives. */
9969
i = 1;
9970
while (*cc == OP_ALT)
9971
{
9972
cc += GET(cc, 1);
9973
i++;
9974
}
9975
9976
if (has_alternatives)
9977
{
9978
if (opcode != OP_ONCE)
9979
{
9980
if (i <= 3)
9981
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9982
else
9983
BACKTRACK_AS(bracket_backtrack)->matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
9984
}
9985
if (ket != OP_KETRMAX)
9986
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
9987
}
9988
9989
/* Must be after the matchingpath label. */
9990
if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
9991
{
9992
SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9993
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9994
}
9995
else switch (opcode)
9996
{
9997
case OP_ASSERT_NA:
9998
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9999
break;
10000
case OP_ASSERT_SCS:
10001
OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);
10002
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10003
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10004
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);
10005
10006
/* Nested scs blocks will not update this variable. */
10007
if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))
10008
common->restore_end_ptr = 0;
10009
break;
10010
}
10011
10012
if (ket == OP_KETRMAX)
10013
{
10014
if (repeat_type != 0)
10015
{
10016
if (has_alternatives)
10017
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10018
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10019
JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10020
/* Drop STR_PTR for greedy plus quantifier. */
10021
if (opcode != OP_ONCE)
10022
free_stack(common, 1);
10023
}
10024
else if (opcode < OP_BRA || opcode >= OP_SBRA)
10025
{
10026
if (has_alternatives)
10027
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10028
10029
/* Checking zero-length iteration. */
10030
if (opcode != OP_ONCE)
10031
{
10032
/* This case includes opcodes such as OP_SCRIPT_RUN. */
10033
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10034
/* Drop STR_PTR for greedy plus quantifier. */
10035
if (bra != OP_BRAZERO)
10036
free_stack(common, 1);
10037
}
10038
else
10039
/* TMP2 must contain the starting STR_PTR. */
10040
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10041
}
10042
else
10043
JUMPTO(SLJIT_JUMP, rmax_label);
10044
BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10045
}
10046
10047
if (repeat_type == OP_EXACT)
10048
{
10049
count_match(common);
10050
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10051
JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10052
}
10053
else if (repeat_type == OP_UPTO)
10054
{
10055
/* We need to preserve the counter. */
10056
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10057
allocate_stack(common, 1);
10058
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10059
}
10060
10061
if (bra == OP_BRAZERO)
10062
BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10063
10064
if (bra == OP_BRAMINZERO)
10065
{
10066
/* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10067
JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10068
if (braminzero != NULL)
10069
{
10070
JUMPHERE(braminzero);
10071
/* We need to release the end pointer to perform the
10072
backtrack for the zero-length iteration. When
10073
framesize is < 0, OP_ONCE will do the release itself. */
10074
if (opcode == OP_ONCE)
10075
{
10076
int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
10077
10078
SLJIT_ASSERT(framesize != 0);
10079
if (framesize > 0)
10080
{
10081
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10082
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10083
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10084
}
10085
}
10086
else if (ket == OP_KETRMIN)
10087
free_stack(common, 1);
10088
}
10089
/* Continue to the normal backtrack. */
10090
}
10091
10092
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))
10093
count_match(common);
10094
10095
cc += 1 + LINK_SIZE;
10096
10097
if (opcode == OP_ONCE)
10098
{
10099
int data;
10100
int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
10101
10102
SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);
10103
/* We temporarily encode the needs_control_head in the lowest bit.
10104
The real value should be short enough for this operation to work
10105
without triggering Undefined Behaviour. */
10106
data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));
10107
BACKTRACK_AS(bracket_backtrack)->u.framesize = data;
10108
}
10109
return cc + repeat_length;
10110
}
10111
10112
static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10113
{
10114
DEFINE_COMPILER;
10115
backtrack_common *backtrack;
10116
PCRE2_UCHAR opcode;
10117
int private_data_ptr;
10118
int cbraprivptr = 0;
10119
BOOL needs_control_head;
10120
int framesize;
10121
int stacksize;
10122
int offset = 0;
10123
BOOL zero = FALSE;
10124
PCRE2_SPTR ccbegin = NULL;
10125
int stack; /* Also contains the offset of control head. */
10126
struct sljit_label *loop = NULL;
10127
struct jump_list *emptymatch = NULL;
10128
10129
PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10130
if (*cc == OP_BRAPOSZERO)
10131
{
10132
zero = TRUE;
10133
cc++;
10134
}
10135
10136
opcode = *cc;
10137
private_data_ptr = PRIVATE_DATA(cc);
10138
SLJIT_ASSERT(private_data_ptr != 0);
10139
BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10140
switch(opcode)
10141
{
10142
case OP_BRAPOS:
10143
case OP_SBRAPOS:
10144
ccbegin = cc + 1 + LINK_SIZE;
10145
break;
10146
10147
case OP_CBRAPOS:
10148
case OP_SCBRAPOS:
10149
offset = GET2(cc, 1 + LINK_SIZE);
10150
/* This case cannot be optimized in the same way as
10151
normal capturing brackets. */
10152
SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10153
cbraprivptr = OVECTOR_PRIV(offset);
10154
offset <<= 1;
10155
ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10156
break;
10157
10158
default:
10159
SLJIT_UNREACHABLE();
10160
break;
10161
}
10162
10163
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10164
BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10165
if (framesize < 0)
10166
{
10167
if (offset != 0)
10168
{
10169
stacksize = 2;
10170
if (common->capture_last_ptr != 0)
10171
stacksize++;
10172
}
10173
else
10174
stacksize = 1;
10175
10176
if (needs_control_head)
10177
stacksize++;
10178
if (!zero)
10179
stacksize++;
10180
10181
BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10182
allocate_stack(common, stacksize);
10183
if (framesize == no_frame)
10184
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10185
10186
stack = 0;
10187
if (offset != 0)
10188
{
10189
stack = 2;
10190
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10191
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10192
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10193
if (common->capture_last_ptr != 0)
10194
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10195
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10196
if (needs_control_head)
10197
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10198
if (common->capture_last_ptr != 0)
10199
{
10200
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10201
stack = 3;
10202
}
10203
}
10204
else
10205
{
10206
if (needs_control_head)
10207
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10208
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10209
stack = 1;
10210
}
10211
10212
if (needs_control_head)
10213
stack++;
10214
if (!zero)
10215
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10216
if (needs_control_head)
10217
{
10218
stack--;
10219
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10220
}
10221
}
10222
else
10223
{
10224
stacksize = framesize + 1;
10225
if (!zero)
10226
stacksize++;
10227
if (needs_control_head)
10228
stacksize++;
10229
if (offset == 0)
10230
stacksize++;
10231
BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10232
10233
allocate_stack(common, stacksize);
10234
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10235
if (needs_control_head)
10236
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10237
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10238
10239
stack = 0;
10240
if (!zero)
10241
{
10242
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10243
stack = 1;
10244
}
10245
if (needs_control_head)
10246
{
10247
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10248
stack++;
10249
}
10250
if (offset == 0)
10251
{
10252
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10253
stack++;
10254
}
10255
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10256
init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10257
stack -= 1 + (offset == 0);
10258
}
10259
10260
if (offset != 0)
10261
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10262
10263
loop = LABEL();
10264
while (*cc != OP_KETRPOS)
10265
{
10266
backtrack->top = NULL;
10267
backtrack->own_backtracks = NULL;
10268
cc += GET(cc, 1);
10269
10270
compile_matchingpath(common, ccbegin, cc, backtrack);
10271
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10272
return NULL;
10273
10274
if (framesize < 0)
10275
{
10276
if (framesize == no_frame)
10277
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10278
10279
if (offset != 0)
10280
{
10281
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10282
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10283
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10284
if (common->capture_last_ptr != 0)
10285
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10286
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10287
}
10288
else
10289
{
10290
if (opcode == OP_SBRAPOS)
10291
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10292
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10293
}
10294
10295
/* Even if the match is empty, we need to reset the control head. */
10296
if (needs_control_head)
10297
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10298
10299
if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10300
add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10301
10302
if (!zero)
10303
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10304
}
10305
else
10306
{
10307
if (offset != 0)
10308
{
10309
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10310
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10311
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10312
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10313
if (common->capture_last_ptr != 0)
10314
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10315
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10316
}
10317
else
10318
{
10319
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10320
OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10321
if (opcode == OP_SBRAPOS)
10322
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10323
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10324
}
10325
10326
/* Even if the match is empty, we need to reset the control head. */
10327
if (needs_control_head)
10328
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10329
10330
if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10331
add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10332
10333
if (!zero)
10334
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10335
}
10336
10337
JUMPTO(SLJIT_JUMP, loop);
10338
flush_stubs(common);
10339
10340
compile_backtrackingpath(common, backtrack->top);
10341
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10342
return NULL;
10343
set_jumps(backtrack->own_backtracks, LABEL());
10344
10345
if (framesize < 0)
10346
{
10347
if (offset != 0)
10348
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10349
else
10350
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10351
}
10352
else
10353
{
10354
if (offset != 0)
10355
{
10356
/* Last alternative. */
10357
if (*cc == OP_KETRPOS)
10358
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10359
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10360
}
10361
else
10362
{
10363
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10364
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10365
}
10366
}
10367
10368
if (*cc == OP_KETRPOS)
10369
break;
10370
ccbegin = cc + 1 + LINK_SIZE;
10371
}
10372
10373
/* We don't have to restore the control head in case of a failed match. */
10374
10375
backtrack->own_backtracks = NULL;
10376
if (!zero)
10377
{
10378
if (framesize < 0)
10379
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
10380
else /* TMP2 is set to [private_data_ptr] above. */
10381
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
10382
}
10383
10384
/* None of them matched. */
10385
set_jumps(emptymatch, LABEL());
10386
count_match(common);
10387
return cc + 1 + LINK_SIZE;
10388
}
10389
10390
static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
10391
{
10392
int class_len;
10393
10394
*opcode = *cc;
10395
*exact = 0;
10396
10397
if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
10398
{
10399
cc++;
10400
*type = OP_CHAR;
10401
}
10402
else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
10403
{
10404
cc++;
10405
*type = OP_CHARI;
10406
*opcode -= OP_STARI - OP_STAR;
10407
}
10408
else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
10409
{
10410
cc++;
10411
*type = OP_NOT;
10412
*opcode -= OP_NOTSTAR - OP_STAR;
10413
}
10414
else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
10415
{
10416
cc++;
10417
*type = OP_NOTI;
10418
*opcode -= OP_NOTSTARI - OP_STAR;
10419
}
10420
else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
10421
{
10422
cc++;
10423
*opcode -= OP_TYPESTAR - OP_STAR;
10424
*type = OP_END;
10425
}
10426
else
10427
{
10428
SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS || *opcode == OP_ECLASS);
10429
*type = *opcode;
10430
class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 1);
10431
*opcode = cc[class_len];
10432
cc++;
10433
10434
if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
10435
{
10436
*opcode -= OP_CRSTAR - OP_STAR;
10437
*end = cc + class_len;
10438
10439
if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
10440
{
10441
*exact = 1;
10442
*opcode -= OP_PLUS - OP_STAR;
10443
}
10444
return cc;
10445
}
10446
10447
if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
10448
{
10449
*opcode -= OP_CRPOSSTAR - OP_POSSTAR;
10450
*end = cc + class_len;
10451
10452
if (*opcode == OP_POSPLUS)
10453
{
10454
*exact = 1;
10455
*opcode = OP_POSSTAR;
10456
}
10457
return cc;
10458
}
10459
10460
SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
10461
*max = GET2(cc, (class_len + IMM2_SIZE));
10462
*exact = GET2(cc, class_len);
10463
*end = cc + class_len + 2 * IMM2_SIZE;
10464
10465
if (*max == 0)
10466
{
10467
SLJIT_ASSERT(*exact > 1);
10468
if (*opcode == OP_CRRANGE)
10469
*opcode = OP_UPTO;
10470
else if (*opcode == OP_CRPOSRANGE)
10471
*opcode = OP_POSUPTO;
10472
else
10473
*opcode = OP_MINSTAR;
10474
return cc;
10475
}
10476
10477
*max -= *exact;
10478
if (*max == 0)
10479
*opcode = OP_EXACT;
10480
else
10481
{
10482
SLJIT_ASSERT(*exact > 0 || *max > 1);
10483
if (*opcode == OP_CRRANGE)
10484
*opcode = OP_UPTO;
10485
else if (*opcode == OP_CRPOSRANGE)
10486
*opcode = OP_POSUPTO;
10487
else if (*max == 1)
10488
*opcode = OP_MINQUERY;
10489
else
10490
*opcode = OP_MINUPTO;
10491
}
10492
return cc;
10493
}
10494
10495
switch(*opcode)
10496
{
10497
case OP_EXACT:
10498
*exact = GET2(cc, 0);
10499
cc += IMM2_SIZE;
10500
break;
10501
10502
case OP_PLUS:
10503
case OP_MINPLUS:
10504
*exact = 1;
10505
*opcode -= OP_PLUS - OP_STAR;
10506
break;
10507
10508
case OP_POSPLUS:
10509
*exact = 1;
10510
*opcode = OP_POSSTAR;
10511
break;
10512
10513
case OP_UPTO:
10514
case OP_MINUPTO:
10515
case OP_POSUPTO:
10516
*max = GET2(cc, 0);
10517
cc += IMM2_SIZE;
10518
break;
10519
}
10520
10521
if (*type == OP_END)
10522
{
10523
*type = *cc;
10524
*end = next_opcode(common, cc);
10525
cc++;
10526
return cc;
10527
}
10528
10529
*end = cc + 1;
10530
#ifdef SUPPORT_UNICODE
10531
if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
10532
#endif
10533
return cc;
10534
}
10535
10536
static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent, jump_list **prev_backtracks)
10537
{
10538
DEFINE_COMPILER;
10539
backtrack_common *backtrack = NULL;
10540
PCRE2_SPTR begin = cc;
10541
PCRE2_UCHAR opcode;
10542
PCRE2_UCHAR type;
10543
sljit_u32 max = 0, exact;
10544
sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
10545
sljit_s32 early_fail_type;
10546
BOOL charpos_enabled, use_tmp;
10547
PCRE2_UCHAR charpos_char;
10548
unsigned int charpos_othercasebit;
10549
PCRE2_SPTR end;
10550
jump_list *no_match = NULL;
10551
jump_list *no_char1_match = NULL;
10552
struct sljit_jump *jump = NULL;
10553
struct sljit_label *label;
10554
int private_data_ptr = PRIVATE_DATA(cc);
10555
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
10556
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
10557
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
10558
int tmp_base, tmp_offset;
10559
10560
early_fail_type = (early_fail_ptr & 0x7);
10561
early_fail_ptr >>= 3;
10562
10563
/* During recursion, these optimizations are disabled. */
10564
if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
10565
{
10566
early_fail_ptr = 0;
10567
early_fail_type = type_skip;
10568
}
10569
10570
SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
10571
|| (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
10572
10573
if (early_fail_type == type_fail)
10574
add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
10575
10576
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
10577
10578
if (type != OP_EXTUNI)
10579
{
10580
tmp_base = TMP3;
10581
tmp_offset = 0;
10582
}
10583
else
10584
{
10585
tmp_base = SLJIT_MEM1(SLJIT_SP);
10586
tmp_offset = LOCAL2;
10587
}
10588
10589
if (opcode == OP_EXACT)
10590
{
10591
SLJIT_ASSERT(early_fail_ptr == 0 && exact >= 2);
10592
10593
if (common->mode == PCRE2_JIT_COMPLETE
10594
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10595
&& !common->utf
10596
#endif
10597
&& type != OP_ANYNL && type != OP_EXTUNI)
10598
{
10599
OP2(SLJIT_SUB, TMP1, 0, STR_END, 0, STR_PTR, 0);
10600
add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, IN_UCHARS(exact)));
10601
10602
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
10603
if (type == OP_ALLANY && !common->invalid_utf)
10604
#else
10605
if (type == OP_ALLANY)
10606
#endif
10607
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
10608
else
10609
{
10610
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
10611
label = LABEL();
10612
compile_char1_matchingpath(common, type, cc, prev_backtracks, FALSE);
10613
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10614
JUMPTO(SLJIT_NOT_ZERO, label);
10615
}
10616
}
10617
else
10618
{
10619
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
10620
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
10621
label = LABEL();
10622
compile_char1_matchingpath(common, type, cc, prev_backtracks, TRUE);
10623
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10624
JUMPTO(SLJIT_NOT_ZERO, label);
10625
}
10626
}
10627
10628
if (early_fail_type == type_fail_range)
10629
{
10630
/* Range end first, followed by range start. */
10631
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
10632
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
10633
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
10634
OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
10635
add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
10636
10637
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10638
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
10639
}
10640
10641
if (opcode < OP_EXACT)
10642
PUSH_BACKTRACK(sizeof(char_iterator_backtrack), begin, NULL);
10643
10644
switch(opcode)
10645
{
10646
case OP_STAR:
10647
case OP_UPTO:
10648
SLJIT_ASSERT(backtrack != NULL && (early_fail_ptr == 0 || opcode == OP_STAR));
10649
max += exact;
10650
10651
if (type == OP_EXTUNI)
10652
{
10653
SLJIT_ASSERT(private_data_ptr == 0);
10654
SLJIT_ASSERT(early_fail_ptr == 0);
10655
10656
if (exact == 1)
10657
{
10658
SLJIT_ASSERT(opcode == OP_STAR);
10659
allocate_stack(common, 1);
10660
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10661
}
10662
else
10663
{
10664
/* If OP_EXTUNI is present, it has a separate EXACT opcode. */
10665
SLJIT_ASSERT(exact == 0);
10666
10667
allocate_stack(common, 2);
10668
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10669
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
10670
}
10671
10672
if (opcode == OP_UPTO)
10673
{
10674
SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
10675
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, SLJIT_IMM, max);
10676
}
10677
10678
label = LABEL();
10679
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
10680
if (opcode == OP_UPTO)
10681
{
10682
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
10683
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10684
jump = JUMP(SLJIT_ZERO);
10685
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, TMP1, 0);
10686
}
10687
10688
/* We cannot use TMP3 because of allocate_stack. */
10689
allocate_stack(common, 1);
10690
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10691
JUMPTO(SLJIT_JUMP, label);
10692
if (jump != NULL)
10693
JUMPHERE(jump);
10694
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10695
break;
10696
}
10697
#ifdef SUPPORT_UNICODE
10698
else if (type == OP_ALLANY && !common->invalid_utf)
10699
#else
10700
else if (type == OP_ALLANY)
10701
#endif
10702
{
10703
if (opcode == OP_STAR)
10704
{
10705
if (exact == 1)
10706
detect_partial_match(common, prev_backtracks);
10707
10708
if (private_data_ptr == 0)
10709
allocate_stack(common, 2);
10710
10711
OP1(SLJIT_MOV, base, offset0, STR_END, 0);
10712
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10713
10714
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
10715
process_partial_match(common);
10716
10717
if (early_fail_ptr != 0)
10718
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
10719
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10720
break;
10721
}
10722
#ifdef SUPPORT_UNICODE
10723
else if (!common->utf)
10724
#else
10725
else
10726
#endif
10727
{
10728
/* If OP_ALLANY is present, it has a separate EXACT opcode. */
10729
SLJIT_ASSERT(exact == 0);
10730
10731
if (private_data_ptr == 0)
10732
allocate_stack(common, 2);
10733
10734
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10735
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
10736
10737
if (common->mode == PCRE2_JIT_COMPLETE)
10738
{
10739
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
10740
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
10741
}
10742
else
10743
{
10744
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
10745
process_partial_match(common);
10746
JUMPHERE(jump);
10747
}
10748
10749
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10750
10751
if (early_fail_ptr != 0)
10752
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10753
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10754
break;
10755
}
10756
}
10757
10758
charpos_enabled = FALSE;
10759
charpos_char = 0;
10760
charpos_othercasebit = 0;
10761
10762
SLJIT_ASSERT(tmp_base == TMP3);
10763
if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
10764
{
10765
#ifdef SUPPORT_UNICODE
10766
charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
10767
#else
10768
charpos_enabled = TRUE;
10769
#endif
10770
if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
10771
{
10772
charpos_othercasebit = char_get_othercase_bit(common, end + 1);
10773
if (charpos_othercasebit == 0)
10774
charpos_enabled = FALSE;
10775
}
10776
10777
if (charpos_enabled)
10778
{
10779
charpos_char = end[1];
10780
/* Consume the OP_CHAR opcode. */
10781
end += 2;
10782
#if PCRE2_CODE_UNIT_WIDTH == 8
10783
SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
10784
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
10785
SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
10786
if ((charpos_othercasebit & 0x100) != 0)
10787
charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
10788
#endif
10789
if (charpos_othercasebit != 0)
10790
charpos_char |= charpos_othercasebit;
10791
10792
BACKTRACK_AS(char_iterator_backtrack)->charpos.charpos_enabled = TRUE;
10793
BACKTRACK_AS(char_iterator_backtrack)->charpos.chr = charpos_char;
10794
BACKTRACK_AS(char_iterator_backtrack)->charpos.othercasebit = charpos_othercasebit;
10795
10796
if (private_data_ptr == 0)
10797
allocate_stack(common, 2);
10798
10799
use_tmp = (opcode == OP_STAR);
10800
10801
if (use_tmp)
10802
{
10803
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10804
OP1(SLJIT_MOV, base, offset0, TMP3, 0);
10805
}
10806
else
10807
{
10808
OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);
10809
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, 0);
10810
OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);
10811
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : (max + 1));
10812
}
10813
10814
/* Search the first instance of charpos_char. */
10815
if (exact > 0)
10816
detect_partial_match(common, &no_match);
10817
else
10818
jump = JUMP(SLJIT_JUMP);
10819
10820
label = LABEL();
10821
10822
if (opcode == OP_UPTO)
10823
{
10824
if (exact == max)
10825
OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10826
else
10827
{
10828
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10829
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
10830
}
10831
}
10832
10833
compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
10834
10835
if (early_fail_ptr != 0)
10836
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10837
10838
if (exact == 0)
10839
JUMPHERE(jump);
10840
10841
detect_partial_match(common, &no_match);
10842
10843
if (opcode == OP_UPTO && exact > 0)
10844
{
10845
if (exact == max)
10846
CMPTO(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact, label);
10847
else
10848
CMPTO(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, (max + 1) - exact, label);
10849
}
10850
10851
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
10852
if (charpos_othercasebit != 0)
10853
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
10854
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
10855
10856
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10857
if (use_tmp)
10858
{
10859
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, SLJIT_IMM, 0);
10860
SELECT(SLJIT_EQUAL, TMP3, STR_PTR, 0, TMP3);
10861
}
10862
else
10863
{
10864
OP2U(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, SLJIT_IMM, 0);
10865
SELECT(SLJIT_EQUAL, COUNT_MATCH, STR_PTR, 0, COUNT_MATCH);
10866
}
10867
JUMPTO(SLJIT_JUMP, label);
10868
10869
set_jumps(no_match, LABEL());
10870
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10871
if (use_tmp)
10872
OP1(SLJIT_MOV, base, offset1, TMP3, 0);
10873
else
10874
{
10875
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
10876
OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);
10877
OP1(SLJIT_MOV, COUNT_MATCH, 0, TMP1, 0);
10878
}
10879
10880
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
10881
10882
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10883
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10884
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10885
break;
10886
}
10887
}
10888
10889
if (private_data_ptr == 0)
10890
allocate_stack(common, 2);
10891
10892
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10893
use_tmp = (opcode == OP_STAR);
10894
10895
if (common->utf)
10896
{
10897
if (!use_tmp)
10898
OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);
10899
10900
OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
10901
}
10902
#endif
10903
10904
if (opcode == OP_UPTO)
10905
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? -(sljit_sw)exact : (sljit_sw)max);
10906
10907
if (opcode == OP_UPTO && exact > 0)
10908
{
10909
label = LABEL();
10910
detect_partial_match(common, &no_match);
10911
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
10912
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10913
if (common->utf)
10914
OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
10915
#endif
10916
10917
if (exact == max)
10918
{
10919
OP2(SLJIT_ADD | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10920
JUMPTO(SLJIT_NOT_ZERO, label);
10921
}
10922
else
10923
{
10924
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10925
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
10926
CMPTO(SLJIT_NOT_EQUAL, TMP3, 0, SLJIT_IMM, max - exact, label);
10927
}
10928
10929
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10930
JUMPTO(SLJIT_JUMP, label);
10931
}
10932
else
10933
{
10934
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10935
10936
detect_partial_match(common, &no_match);
10937
label = LABEL();
10938
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
10939
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10940
if (common->utf)
10941
OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
10942
#endif
10943
10944
if (opcode == OP_UPTO)
10945
{
10946
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10947
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
10948
}
10949
10950
detect_partial_match_to(common, label);
10951
}
10952
10953
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10954
if (common->utf)
10955
{
10956
set_jumps(no_char1_match, LABEL());
10957
set_jumps(no_match, LABEL());
10958
if (use_tmp)
10959
{
10960
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10961
OP1(SLJIT_MOV, base, offset0, TMP3, 0);
10962
}
10963
else
10964
{
10965
OP1(SLJIT_MOV, STR_PTR, 0, COUNT_MATCH, 0);
10966
OP1(SLJIT_MOV, COUNT_MATCH, 0, base, offset0);
10967
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10968
}
10969
}
10970
else
10971
#endif
10972
{
10973
if (opcode != OP_UPTO || exact == 0)
10974
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10975
set_jumps(no_char1_match, LABEL());
10976
10977
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10978
set_jumps(no_match, LABEL());
10979
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10980
}
10981
10982
if (opcode == OP_UPTO)
10983
{
10984
if (exact > 0)
10985
{
10986
if (max == exact)
10987
jump = CMP(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, -(sljit_sw)exact);
10988
else
10989
jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);
10990
10991
add_jump(compiler, &backtrack->own_backtracks, jump);
10992
}
10993
}
10994
else if (exact == 1)
10995
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, base, offset1, STR_PTR, 0));
10996
10997
if (early_fail_ptr != 0)
10998
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10999
11000
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11001
break;
11002
11003
case OP_QUERY:
11004
SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);
11005
if (private_data_ptr == 0)
11006
allocate_stack(common, 1);
11007
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11008
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11009
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11010
break;
11011
11012
case OP_MINSTAR:
11013
case OP_MINQUERY:
11014
SLJIT_ASSERT(backtrack != NULL && (opcode == OP_MINSTAR || early_fail_ptr == 0));
11015
if (private_data_ptr == 0)
11016
allocate_stack(common, 1);
11017
11018
if (exact >= 1)
11019
{
11020
if (exact >= 2)
11021
{
11022
/* Extuni has a separate exact opcode. */
11023
SLJIT_ASSERT(tmp_base == TMP3 && early_fail_ptr == 0);
11024
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);
11025
}
11026
11027
if (opcode == OP_MINQUERY)
11028
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, -1);
11029
11030
label = LABEL();
11031
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;
11032
11033
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11034
11035
if (exact >= 2)
11036
{
11037
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11038
JUMPTO(SLJIT_NOT_ZERO, label);
11039
}
11040
11041
if (opcode == OP_MINQUERY)
11042
OP2(SLJIT_AND, base, offset0, base, offset0, STR_PTR, 0);
11043
else
11044
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11045
}
11046
else
11047
{
11048
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11049
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11050
}
11051
11052
if (early_fail_ptr != 0)
11053
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11054
break;
11055
11056
case OP_MINUPTO:
11057
SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);
11058
if (private_data_ptr == 0)
11059
allocate_stack(common, 2);
11060
11061
OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11062
11063
if (exact == 0)
11064
{
11065
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11066
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11067
break;
11068
}
11069
11070
if (exact >= 2)
11071
{
11072
/* Extuni has a separate exact opcode. */
11073
SLJIT_ASSERT(tmp_base == TMP3);
11074
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);
11075
}
11076
11077
label = LABEL();
11078
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;
11079
11080
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11081
11082
if (exact >= 2)
11083
{
11084
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11085
JUMPTO(SLJIT_NOT_ZERO, label);
11086
}
11087
11088
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11089
break;
11090
11091
case OP_EXACT:
11092
SLJIT_ASSERT(backtrack == NULL);
11093
break;
11094
11095
case OP_POSSTAR:
11096
SLJIT_ASSERT(backtrack == NULL);
11097
#if defined SUPPORT_UNICODE
11098
if (type == OP_ALLANY && !common->invalid_utf)
11099
#else
11100
if (type == OP_ALLANY)
11101
#endif
11102
{
11103
if (exact == 1)
11104
detect_partial_match(common, prev_backtracks);
11105
11106
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11107
process_partial_match(common);
11108
if (early_fail_ptr != 0)
11109
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11110
break;
11111
}
11112
11113
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11114
if (common->utf)
11115
{
11116
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
11117
11118
if (tmp_base != TMP3)
11119
{
11120
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);
11121
tmp_base = COUNT_MATCH;
11122
}
11123
11124
OP1(SLJIT_MOV, tmp_base, 0, exact == 1 ? SLJIT_IMM : STR_PTR, 0);
11125
detect_partial_match(common, &no_match);
11126
label = LABEL();
11127
compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11128
OP1(SLJIT_MOV, tmp_base, 0, STR_PTR, 0);
11129
detect_partial_match_to(common, label);
11130
11131
set_jumps(no_match, LABEL());
11132
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, 0);
11133
11134
if (tmp_base != TMP3)
11135
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
11136
11137
if (exact == 1)
11138
add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
11139
11140
if (early_fail_ptr != 0)
11141
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11142
break;
11143
}
11144
#endif
11145
11146
if (exact == 1)
11147
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11148
11149
detect_partial_match(common, &no_match);
11150
label = LABEL();
11151
/* Extuni never fails, so no_char1_match is not used in that case.
11152
Anynl optionally reads an extra character on success. */
11153
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11154
detect_partial_match_to(common, label);
11155
if (type != OP_EXTUNI)
11156
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11157
11158
set_jumps(no_char1_match, LABEL());
11159
if (type != OP_EXTUNI)
11160
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11161
11162
set_jumps(no_match, LABEL());
11163
11164
if (exact == 1)
11165
add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, tmp_base, tmp_offset, STR_PTR, 0));
11166
11167
if (early_fail_ptr != 0)
11168
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11169
break;
11170
11171
case OP_POSUPTO:
11172
SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);
11173
max += exact;
11174
11175
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11176
if (type == OP_EXTUNI || common->utf)
11177
#else
11178
if (type == OP_EXTUNI)
11179
#endif
11180
{
11181
SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
11182
11183
/* Count match is not modified by compile_char1_matchingpath. */
11184
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);
11185
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, exact == max ? 0 : max);
11186
11187
label = LABEL();
11188
/* Extuni only modifies TMP3 on successful match. */
11189
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11190
compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11191
11192
if (exact == max)
11193
{
11194
OP2(SLJIT_ADD, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
11195
JUMPTO(SLJIT_JUMP, label);
11196
}
11197
else
11198
{
11199
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
11200
JUMPTO(SLJIT_NOT_ZERO, label);
11201
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11202
}
11203
11204
set_jumps(no_match, LABEL());
11205
11206
if (exact > 0)
11207
{
11208
if (exact == max)
11209
OP2U(SLJIT_SUB | SLJIT_SET_LESS, COUNT_MATCH, 0, SLJIT_IMM, exact);
11210
else
11211
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, COUNT_MATCH, 0, SLJIT_IMM, max - exact);
11212
}
11213
11214
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
11215
11216
if (exact > 0)
11217
add_jump(compiler, prev_backtracks, JUMP(exact == max ? SLJIT_LESS : SLJIT_GREATER));
11218
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11219
break;
11220
}
11221
11222
SLJIT_ASSERT(tmp_base == TMP3);
11223
11224
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : max);
11225
11226
detect_partial_match(common, &no_match);
11227
label = LABEL();
11228
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11229
11230
if (exact == max)
11231
OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11232
else
11233
{
11234
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11235
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11236
}
11237
detect_partial_match_to(common, label);
11238
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11239
11240
set_jumps(no_char1_match, LABEL());
11241
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11242
set_jumps(no_match, LABEL());
11243
11244
if (exact > 0)
11245
{
11246
if (exact == max)
11247
jump = CMP(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact);
11248
else
11249
jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);
11250
11251
add_jump(compiler, prev_backtracks, jump);
11252
}
11253
break;
11254
11255
case OP_POSQUERY:
11256
SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);
11257
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
11258
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11259
compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11260
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11261
set_jumps(no_match, LABEL());
11262
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11263
break;
11264
11265
default:
11266
SLJIT_UNREACHABLE();
11267
break;
11268
}
11269
11270
count_match(common);
11271
return end;
11272
}
11273
11274
static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11275
{
11276
DEFINE_COMPILER;
11277
backtrack_common *backtrack;
11278
11279
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11280
11281
if (*cc == OP_FAIL)
11282
{
11283
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
11284
return cc + 1;
11285
}
11286
11287
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11288
add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11289
11290
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11291
{
11292
/* No need to check notempty conditions. */
11293
if (common->accept_label == NULL)
11294
add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11295
else
11296
JUMPTO(SLJIT_JUMP, common->accept_label);
11297
return cc + 1;
11298
}
11299
11300
if (common->accept_label == NULL)
11301
add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11302
else
11303
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11304
11305
if (HAS_VIRTUAL_REGISTERS)
11306
{
11307
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11308
OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11309
}
11310
else
11311
OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11312
11313
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11314
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
11315
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11316
if (common->accept_label == NULL)
11317
add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11318
else
11319
JUMPTO(SLJIT_ZERO, common->accept_label);
11320
11321
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11322
if (common->accept_label == NULL)
11323
add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11324
else
11325
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11326
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
11327
return cc + 1;
11328
}
11329
11330
static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11331
{
11332
DEFINE_COMPILER;
11333
int offset = GET2(cc, 1);
11334
BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11335
11336
/* Data will be discarded anyway... */
11337
if (common->currententry != NULL)
11338
return cc + 1 + IMM2_SIZE;
11339
11340
if (!optimized_cbracket)
11341
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11342
offset <<= 1;
11343
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11344
if (!optimized_cbracket)
11345
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11346
return cc + 1 + IMM2_SIZE;
11347
}
11348
11349
static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11350
{
11351
DEFINE_COMPILER;
11352
backtrack_common *backtrack;
11353
PCRE2_UCHAR opcode = *cc;
11354
PCRE2_SPTR ccend = cc + 1;
11355
11356
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11357
opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11358
ccend += 2 + cc[1];
11359
11360
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11361
11362
if (opcode == OP_SKIP)
11363
{
11364
allocate_stack(common, 1);
11365
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11366
return ccend;
11367
}
11368
11369
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11370
{
11371
if (HAS_VIRTUAL_REGISTERS)
11372
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11373
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11374
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11375
OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11376
}
11377
11378
return ccend;
11379
}
11380
11381
static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11382
11383
static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11384
{
11385
DEFINE_COMPILER;
11386
backtrack_common *backtrack;
11387
BOOL needs_control_head;
11388
int size;
11389
11390
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11391
common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11392
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11393
BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11394
BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11395
11396
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11397
size = 3 + (size < 0 ? 0 : size);
11398
11399
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11400
allocate_stack(common, size);
11401
if (size > 3)
11402
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11403
else
11404
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11405
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11406
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11407
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11408
11409
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11410
if (size >= 0)
11411
init_frame(common, cc, ccend, size - 1, 0);
11412
}
11413
11414
static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11415
{
11416
DEFINE_COMPILER;
11417
backtrack_common *backtrack;
11418
BOOL has_then_trap = FALSE;
11419
then_trap_backtrack *save_then_trap = NULL;
11420
size_t op_len;
11421
11422
SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11423
11424
if (common->has_then && common->then_offsets[cc - common->start] != 0)
11425
{
11426
SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11427
has_then_trap = TRUE;
11428
save_then_trap = common->then_trap;
11429
/* Tail item on backtrack. */
11430
compile_then_trap_matchingpath(common, cc, ccend, parent);
11431
}
11432
11433
while (cc < ccend)
11434
{
11435
switch(*cc)
11436
{
11437
case OP_SOD:
11438
case OP_SOM:
11439
case OP_NOT_WORD_BOUNDARY:
11440
case OP_WORD_BOUNDARY:
11441
case OP_EODN:
11442
case OP_EOD:
11443
case OP_DOLL:
11444
case OP_DOLLM:
11445
case OP_CIRC:
11446
case OP_CIRCM:
11447
case OP_NOT_UCP_WORD_BOUNDARY:
11448
case OP_UCP_WORD_BOUNDARY:
11449
cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11450
break;
11451
11452
case OP_NOT_DIGIT:
11453
case OP_DIGIT:
11454
case OP_NOT_WHITESPACE:
11455
case OP_WHITESPACE:
11456
case OP_NOT_WORDCHAR:
11457
case OP_WORDCHAR:
11458
case OP_ANY:
11459
case OP_ALLANY:
11460
case OP_ANYBYTE:
11461
case OP_NOTPROP:
11462
case OP_PROP:
11463
case OP_ANYNL:
11464
case OP_NOT_HSPACE:
11465
case OP_HSPACE:
11466
case OP_NOT_VSPACE:
11467
case OP_VSPACE:
11468
case OP_EXTUNI:
11469
case OP_NOT:
11470
case OP_NOTI:
11471
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11472
break;
11473
11474
case OP_SET_SOM:
11475
PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11476
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11477
allocate_stack(common, 1);
11478
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11479
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11480
cc++;
11481
break;
11482
11483
case OP_CHAR:
11484
case OP_CHARI:
11485
if (common->mode == PCRE2_JIT_COMPLETE)
11486
cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11487
else
11488
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11489
break;
11490
11491
case OP_STAR:
11492
case OP_MINSTAR:
11493
case OP_PLUS:
11494
case OP_MINPLUS:
11495
case OP_QUERY:
11496
case OP_MINQUERY:
11497
case OP_UPTO:
11498
case OP_MINUPTO:
11499
case OP_EXACT:
11500
case OP_POSSTAR:
11501
case OP_POSPLUS:
11502
case OP_POSQUERY:
11503
case OP_POSUPTO:
11504
case OP_STARI:
11505
case OP_MINSTARI:
11506
case OP_PLUSI:
11507
case OP_MINPLUSI:
11508
case OP_QUERYI:
11509
case OP_MINQUERYI:
11510
case OP_UPTOI:
11511
case OP_MINUPTOI:
11512
case OP_EXACTI:
11513
case OP_POSSTARI:
11514
case OP_POSPLUSI:
11515
case OP_POSQUERYI:
11516
case OP_POSUPTOI:
11517
case OP_NOTSTAR:
11518
case OP_NOTMINSTAR:
11519
case OP_NOTPLUS:
11520
case OP_NOTMINPLUS:
11521
case OP_NOTQUERY:
11522
case OP_NOTMINQUERY:
11523
case OP_NOTUPTO:
11524
case OP_NOTMINUPTO:
11525
case OP_NOTEXACT:
11526
case OP_NOTPOSSTAR:
11527
case OP_NOTPOSPLUS:
11528
case OP_NOTPOSQUERY:
11529
case OP_NOTPOSUPTO:
11530
case OP_NOTSTARI:
11531
case OP_NOTMINSTARI:
11532
case OP_NOTPLUSI:
11533
case OP_NOTMINPLUSI:
11534
case OP_NOTQUERYI:
11535
case OP_NOTMINQUERYI:
11536
case OP_NOTUPTOI:
11537
case OP_NOTMINUPTOI:
11538
case OP_NOTEXACTI:
11539
case OP_NOTPOSSTARI:
11540
case OP_NOTPOSPLUSI:
11541
case OP_NOTPOSQUERYI:
11542
case OP_NOTPOSUPTOI:
11543
case OP_TYPESTAR:
11544
case OP_TYPEMINSTAR:
11545
case OP_TYPEPLUS:
11546
case OP_TYPEMINPLUS:
11547
case OP_TYPEQUERY:
11548
case OP_TYPEMINQUERY:
11549
case OP_TYPEUPTO:
11550
case OP_TYPEMINUPTO:
11551
case OP_TYPEEXACT:
11552
case OP_TYPEPOSSTAR:
11553
case OP_TYPEPOSPLUS:
11554
case OP_TYPEPOSQUERY:
11555
case OP_TYPEPOSUPTO:
11556
cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11557
break;
11558
11559
case OP_CLASS:
11560
case OP_NCLASS:
11561
if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
11562
cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11563
else
11564
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11565
break;
11566
11567
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11568
case OP_XCLASS:
11569
case OP_ECLASS:
11570
op_len = GET(cc, 1);
11571
if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11572
cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11573
else
11574
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11575
break;
11576
#endif
11577
11578
case OP_REF:
11579
case OP_REFI:
11580
op_len = PRIV(OP_lengths)[*cc];
11581
if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11582
cc = compile_ref_iterator_matchingpath(common, cc, parent);
11583
else
11584
{
11585
compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
11586
cc += op_len;
11587
}
11588
break;
11589
11590
case OP_DNREF:
11591
case OP_DNREFI:
11592
op_len = PRIV(OP_lengths)[*cc];
11593
if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11594
cc = compile_ref_iterator_matchingpath(common, cc, parent);
11595
else
11596
{
11597
compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11598
compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
11599
cc += op_len;
11600
}
11601
break;
11602
11603
case OP_RECURSE:
11604
cc = compile_recurse_matchingpath(common, cc, parent);
11605
break;
11606
11607
case OP_CALLOUT:
11608
case OP_CALLOUT_STR:
11609
cc = compile_callout_matchingpath(common, cc, parent);
11610
break;
11611
11612
case OP_ASSERT:
11613
case OP_ASSERT_NOT:
11614
case OP_ASSERTBACK:
11615
case OP_ASSERTBACK_NOT:
11616
PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
11617
cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
11618
break;
11619
11620
case OP_BRAMINZERO:
11621
PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
11622
cc = bracketend(cc + 1);
11623
if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
11624
{
11625
allocate_stack(common, 1);
11626
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11627
}
11628
else
11629
{
11630
allocate_stack(common, 2);
11631
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11632
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
11633
}
11634
BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
11635
count_match(common);
11636
break;
11637
11638
case OP_ASSERT_NA:
11639
case OP_ASSERTBACK_NA:
11640
case OP_ASSERT_SCS:
11641
case OP_ONCE:
11642
case OP_SCRIPT_RUN:
11643
case OP_BRA:
11644
case OP_CBRA:
11645
case OP_COND:
11646
case OP_SBRA:
11647
case OP_SCBRA:
11648
case OP_SCOND:
11649
cc = compile_bracket_matchingpath(common, cc, parent);
11650
break;
11651
11652
case OP_BRAZERO:
11653
if (cc[1] > OP_ASSERTBACK_NOT)
11654
cc = compile_bracket_matchingpath(common, cc, parent);
11655
else
11656
{
11657
PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
11658
cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
11659
}
11660
break;
11661
11662
case OP_BRAPOS:
11663
case OP_CBRAPOS:
11664
case OP_SBRAPOS:
11665
case OP_SCBRAPOS:
11666
case OP_BRAPOSZERO:
11667
cc = compile_bracketpos_matchingpath(common, cc, parent);
11668
break;
11669
11670
case OP_MARK:
11671
PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11672
SLJIT_ASSERT(common->mark_ptr != 0);
11673
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
11674
allocate_stack(common, common->has_skip_arg ? 5 : 1);
11675
if (HAS_VIRTUAL_REGISTERS)
11676
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11677
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
11678
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11679
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11680
OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11681
if (common->has_skip_arg)
11682
{
11683
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11684
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11685
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
11686
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
11687
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
11688
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11689
}
11690
cc += 1 + 2 + cc[1];
11691
break;
11692
11693
case OP_PRUNE:
11694
case OP_PRUNE_ARG:
11695
case OP_SKIP:
11696
case OP_SKIP_ARG:
11697
case OP_THEN:
11698
case OP_THEN_ARG:
11699
case OP_COMMIT:
11700
case OP_COMMIT_ARG:
11701
cc = compile_control_verb_matchingpath(common, cc, parent);
11702
break;
11703
11704
case OP_FAIL:
11705
case OP_ACCEPT:
11706
case OP_ASSERT_ACCEPT:
11707
cc = compile_fail_accept_matchingpath(common, cc, parent);
11708
break;
11709
11710
case OP_CLOSE:
11711
cc = compile_close_matchingpath(common, cc);
11712
break;
11713
11714
case OP_SKIPZERO:
11715
cc = bracketend(cc + 1);
11716
break;
11717
11718
default:
11719
SLJIT_UNREACHABLE();
11720
return;
11721
}
11722
if (cc == NULL)
11723
return;
11724
}
11725
11726
if (has_then_trap)
11727
{
11728
/* Head item on backtrack. */
11729
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11730
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11731
BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
11732
common->then_trap = save_then_trap;
11733
}
11734
SLJIT_ASSERT(cc == ccend);
11735
}
11736
11737
#undef PUSH_BACKTRACK
11738
#undef PUSH_BACKTRACK_NOVALUE
11739
#undef BACKTRACK_AS
11740
11741
#define COMPILE_BACKTRACKINGPATH(current) \
11742
do \
11743
{ \
11744
compile_backtrackingpath(common, (current)); \
11745
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
11746
return; \
11747
} \
11748
while (0)
11749
11750
#define CURRENT_AS(type) ((type *)current)
11751
11752
static void compile_newline_move_back(compiler_common *common)
11753
{
11754
DEFINE_COMPILER;
11755
struct sljit_jump *jump;
11756
11757
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11758
jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, TMP2, 0);
11759
/* All newlines are single byte, or their last byte
11760
is not equal to CHAR_NL/CHAR_CR even if UTF is enabled. */
11761
OP1(MOV_UCHAR, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
11762
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
11763
OP2(SLJIT_SHL, SLJIT_TMP_DEST_REG, 0, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 8);
11764
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_TMP_DEST_REG, 0);
11765
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR << 8 | CHAR_NL);
11766
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
11767
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11768
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
11769
#endif
11770
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
11771
JUMPHERE(jump);
11772
}
11773
11774
static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11775
{
11776
DEFINE_COMPILER;
11777
PCRE2_SPTR cc = current->cc;
11778
PCRE2_UCHAR opcode;
11779
PCRE2_UCHAR type;
11780
sljit_u32 max = 0, exact;
11781
struct sljit_label *label = NULL;
11782
struct sljit_jump *jump = NULL;
11783
jump_list *jumplist = NULL;
11784
PCRE2_SPTR end;
11785
int private_data_ptr = PRIVATE_DATA(cc);
11786
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11787
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11788
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11789
11790
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11791
11792
switch(opcode)
11793
{
11794
case OP_STAR:
11795
case OP_UPTO:
11796
if (type == OP_EXTUNI)
11797
{
11798
SLJIT_ASSERT(private_data_ptr == 0);
11799
set_jumps(current->own_backtracks, LABEL());
11800
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11801
free_stack(common, 1);
11802
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11803
}
11804
else
11805
{
11806
if (CURRENT_AS(char_iterator_backtrack)->charpos.charpos_enabled)
11807
{
11808
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11809
OP1(SLJIT_MOV, TMP2, 0, base, offset1);
11810
11811
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
11812
label = LABEL();
11813
if (type == OP_ANYNL)
11814
compile_newline_move_back(common);
11815
move_back(common, NULL, TRUE);
11816
11817
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11818
if (CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit != 0)
11819
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit);
11820
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11821
/* The range beginning must match, no need to compare. */
11822
JUMPTO(SLJIT_JUMP, label);
11823
11824
set_jumps(current->own_backtracks, LABEL());
11825
current->own_backtracks = NULL;
11826
}
11827
else
11828
{
11829
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11830
11831
if (opcode == OP_STAR && exact == 1)
11832
{
11833
if (type == OP_ANYNL)
11834
{
11835
OP1(SLJIT_MOV, TMP2, 0, base, offset1);
11836
compile_newline_move_back(common);
11837
}
11838
11839
move_back(common, NULL, TRUE);
11840
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
11841
}
11842
else
11843
{
11844
if (type == OP_ANYNL)
11845
{
11846
OP1(SLJIT_MOV, TMP2, 0, base, offset1);
11847
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
11848
compile_newline_move_back(common);
11849
}
11850
else
11851
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
11852
11853
move_back(common, NULL, TRUE);
11854
}
11855
11856
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11857
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11858
11859
set_jumps(current->own_backtracks, LABEL());
11860
}
11861
11862
JUMPHERE(jump);
11863
if (private_data_ptr == 0)
11864
free_stack(common, 2);
11865
}
11866
break;
11867
11868
case OP_QUERY:
11869
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11870
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
11871
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11872
jump = JUMP(SLJIT_JUMP);
11873
set_jumps(current->own_backtracks, LABEL());
11874
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11875
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
11876
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11877
JUMPHERE(jump);
11878
if (private_data_ptr == 0)
11879
free_stack(common, 1);
11880
break;
11881
11882
case OP_MINSTAR:
11883
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11884
if (exact == 0)
11885
{
11886
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
11887
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11888
}
11889
else if (exact > 1)
11890
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
11891
11892
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11893
set_jumps(exact > 0 ? current->own_backtracks : jumplist, LABEL());
11894
if (private_data_ptr == 0)
11895
free_stack(common, 1);
11896
break;
11897
11898
case OP_MINUPTO:
11899
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
11900
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11901
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11902
11903
if (exact == 0)
11904
{
11905
add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
11906
11907
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
11908
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
11909
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11910
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11911
11912
set_jumps(jumplist, LABEL());
11913
}
11914
else
11915
{
11916
if (exact > 1)
11917
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
11918
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
11919
JUMPTO(SLJIT_NOT_ZERO, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11920
11921
set_jumps(current->own_backtracks, LABEL());
11922
}
11923
11924
if (private_data_ptr == 0)
11925
free_stack(common, 2);
11926
break;
11927
11928
case OP_MINQUERY:
11929
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11930
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
11931
11932
if (exact >= 1)
11933
{
11934
if (exact >= 2)
11935
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
11936
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11937
set_jumps(current->own_backtracks, LABEL());
11938
}
11939
else
11940
{
11941
jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
11942
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
11943
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11944
set_jumps(jumplist, LABEL());
11945
JUMPHERE(jump);
11946
}
11947
11948
if (private_data_ptr == 0)
11949
free_stack(common, 1);
11950
break;
11951
11952
default:
11953
SLJIT_UNREACHABLE();
11954
break;
11955
}
11956
}
11957
11958
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11959
{
11960
DEFINE_COMPILER;
11961
PCRE2_SPTR cc = current->cc;
11962
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
11963
PCRE2_UCHAR type;
11964
11965
type = cc[PRIV(OP_lengths)[*cc]];
11966
11967
if ((type & 0x1) == 0)
11968
{
11969
/* Maximize case. */
11970
set_jumps(current->own_backtracks, LABEL());
11971
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11972
free_stack(common, 1);
11973
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
11974
return;
11975
}
11976
11977
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11978
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
11979
set_jumps(current->own_backtracks, LABEL());
11980
free_stack(common, ref ? 2 : 3);
11981
}
11982
11983
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11984
{
11985
DEFINE_COMPILER;
11986
recurse_entry *entry;
11987
11988
if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
11989
{
11990
entry = CURRENT_AS(recurse_backtrack)->entry;
11991
if (entry->backtrack_label == NULL)
11992
add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
11993
else
11994
JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
11995
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
11996
}
11997
else
11998
compile_backtrackingpath(common, current->top);
11999
12000
set_jumps(current->own_backtracks, LABEL());
12001
}
12002
12003
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12004
{
12005
DEFINE_COMPILER;
12006
PCRE2_SPTR cc = current->cc;
12007
PCRE2_UCHAR bra = OP_BRA;
12008
struct sljit_jump *brajump = NULL;
12009
12010
SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12011
if (*cc == OP_BRAZERO)
12012
{
12013
bra = *cc;
12014
cc++;
12015
}
12016
12017
if (bra == OP_BRAZERO)
12018
{
12019
SLJIT_ASSERT(current->own_backtracks == NULL);
12020
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12021
}
12022
12023
if (CURRENT_AS(assert_backtrack)->framesize < 0)
12024
{
12025
set_jumps(current->own_backtracks, LABEL());
12026
12027
if (bra == OP_BRAZERO)
12028
{
12029
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12030
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12031
free_stack(common, 1);
12032
}
12033
return;
12034
}
12035
12036
if (bra == OP_BRAZERO)
12037
{
12038
if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12039
{
12040
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12041
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12042
free_stack(common, 1);
12043
return;
12044
}
12045
free_stack(common, 1);
12046
brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12047
}
12048
12049
if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12050
{
12051
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12052
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12053
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12054
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12055
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12056
12057
set_jumps(current->own_backtracks, LABEL());
12058
}
12059
else
12060
set_jumps(current->own_backtracks, LABEL());
12061
12062
if (bra == OP_BRAZERO)
12063
{
12064
/* We know there is enough place on the stack. */
12065
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12066
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12067
JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12068
JUMPHERE(brajump);
12069
}
12070
}
12071
12072
static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12073
{
12074
DEFINE_COMPILER;
12075
int opcode, stacksize, alt_count, alt_max;
12076
int offset = 0;
12077
int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12078
int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12079
PCRE2_SPTR cc = current->cc;
12080
PCRE2_SPTR ccbegin;
12081
PCRE2_SPTR ccprev;
12082
PCRE2_UCHAR bra = OP_BRA;
12083
PCRE2_UCHAR ket;
12084
const assert_backtrack *assert;
12085
BOOL has_alternatives;
12086
BOOL needs_control_head = FALSE;
12087
BOOL has_vreverse;
12088
struct sljit_jump *brazero = NULL;
12089
struct sljit_jump *next_alt = NULL;
12090
struct sljit_jump *once = NULL;
12091
struct sljit_jump *cond = NULL;
12092
struct sljit_label *rmin_label = NULL;
12093
struct sljit_label *exact_label = NULL;
12094
struct sljit_jump *mov_addr = NULL;
12095
12096
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12097
{
12098
bra = *cc;
12099
cc++;
12100
}
12101
12102
opcode = *cc;
12103
ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12104
ket = *ccbegin;
12105
if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12106
{
12107
repeat_ptr = PRIVATE_DATA(ccbegin);
12108
repeat_type = PRIVATE_DATA(ccbegin + 2);
12109
repeat_count = PRIVATE_DATA(ccbegin + 3);
12110
SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12111
if (repeat_type == OP_UPTO)
12112
ket = OP_KETRMAX;
12113
if (repeat_type == OP_MINUPTO)
12114
ket = OP_KETRMIN;
12115
}
12116
ccbegin = cc;
12117
cc += GET(cc, 1);
12118
has_alternatives = *cc == OP_ALT;
12119
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12120
has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.no_capture != NULL;
12121
if (opcode == OP_CBRA || opcode == OP_SCBRA)
12122
offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12123
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12124
opcode = OP_SCOND;
12125
12126
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12127
12128
/* Decoding the needs_control_head in framesize. */
12129
if (opcode == OP_ONCE)
12130
{
12131
needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12132
CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12133
}
12134
12135
if (ket != OP_KET && repeat_type != 0)
12136
{
12137
/* TMP1 is used in OP_KETRMIN below. */
12138
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12139
free_stack(common, 1);
12140
if (repeat_type == OP_UPTO)
12141
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12142
else
12143
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12144
}
12145
12146
if (ket == OP_KETRMAX)
12147
{
12148
if (bra == OP_BRAZERO)
12149
{
12150
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12151
free_stack(common, 1);
12152
brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12153
}
12154
}
12155
else if (ket == OP_KETRMIN)
12156
{
12157
if (bra != OP_BRAMINZERO)
12158
{
12159
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12160
if (repeat_type != 0)
12161
{
12162
/* TMP1 was set a few lines above. */
12163
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12164
/* Drop STR_PTR for non-greedy plus quantifier. */
12165
if (opcode != OP_ONCE)
12166
free_stack(common, 1);
12167
}
12168
else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12169
{
12170
/* Checking zero-length iteration. */
12171
if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12172
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12173
else
12174
{
12175
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12176
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12177
}
12178
/* Drop STR_PTR for non-greedy plus quantifier. */
12179
if (opcode != OP_ONCE)
12180
free_stack(common, 1);
12181
}
12182
else
12183
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12184
}
12185
rmin_label = LABEL();
12186
if (repeat_type != 0)
12187
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12188
}
12189
else if (bra == OP_BRAZERO)
12190
{
12191
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12192
free_stack(common, 1);
12193
brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12194
}
12195
else if (repeat_type == OP_EXACT)
12196
{
12197
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12198
exact_label = LABEL();
12199
}
12200
12201
if (offset != 0)
12202
{
12203
if (common->capture_last_ptr != 0)
12204
{
12205
SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12206
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12207
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12208
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12209
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12210
free_stack(common, 3);
12211
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12212
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12213
}
12214
else if (common->optimized_cbracket[offset >> 1] == 0)
12215
{
12216
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12217
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12218
free_stack(common, 2);
12219
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12220
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12221
}
12222
}
12223
else if (SLJIT_UNLIKELY(opcode == OP_ASSERT_SCS))
12224
{
12225
OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);
12226
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12227
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);
12228
12229
/* Nested scs blocks will not update this variable. */
12230
if (common->restore_end_ptr == 0)
12231
common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);
12232
}
12233
12234
if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12235
{
12236
int framesize = CURRENT_AS(bracket_backtrack)->u.framesize;
12237
12238
SLJIT_ASSERT(framesize != 0);
12239
if (framesize > 0)
12240
{
12241
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12242
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12243
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
12244
}
12245
once = JUMP(SLJIT_JUMP);
12246
}
12247
else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12248
{
12249
if (has_alternatives)
12250
{
12251
/* Always exactly one alternative. */
12252
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12253
free_stack(common, 1);
12254
12255
alt_max = 2;
12256
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12257
}
12258
}
12259
else if (has_alternatives)
12260
{
12261
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12262
free_stack(common, 1);
12263
12264
if (alt_max > 3)
12265
{
12266
sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12267
12268
SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->matching_mov_addr != NULL);
12269
sljit_set_label(CURRENT_AS(bracket_backtrack)->matching_mov_addr, LABEL());
12270
sljit_emit_op0(compiler, SLJIT_ENDBR);
12271
}
12272
else
12273
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12274
}
12275
12276
COMPILE_BACKTRACKINGPATH(current->top);
12277
if (current->own_backtracks)
12278
set_jumps(current->own_backtracks, LABEL());
12279
12280
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12281
{
12282
/* Conditional block always has at most one alternative. */
12283
if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12284
{
12285
SLJIT_ASSERT(has_alternatives);
12286
assert = CURRENT_AS(bracket_backtrack)->u.assert;
12287
SLJIT_ASSERT(assert->framesize != 0);
12288
if (assert->framesize > 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12289
{
12290
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12291
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12292
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12293
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12294
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12295
}
12296
cond = JUMP(SLJIT_JUMP);
12297
set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12298
}
12299
else if (CURRENT_AS(bracket_backtrack)->u.no_capture != NULL)
12300
{
12301
SLJIT_ASSERT(has_alternatives);
12302
cond = JUMP(SLJIT_JUMP);
12303
set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());
12304
}
12305
else
12306
SLJIT_ASSERT(!has_alternatives);
12307
}
12308
12309
if (has_alternatives)
12310
{
12311
alt_count = 1;
12312
do
12313
{
12314
current->top = NULL;
12315
current->own_backtracks = NULL;
12316
current->simple_backtracks = NULL;
12317
/* Conditional blocks always have an additional alternative, even if it is empty. */
12318
if (*cc == OP_ALT)
12319
{
12320
ccprev = cc + 1 + LINK_SIZE;
12321
cc += GET(cc, 1);
12322
12323
has_vreverse = FALSE;
12324
12325
switch (opcode)
12326
{
12327
case OP_ASSERTBACK:
12328
case OP_ASSERTBACK_NA:
12329
SLJIT_ASSERT(private_data_ptr != 0);
12330
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12331
12332
has_vreverse = (*ccprev == OP_VREVERSE);
12333
if (*ccprev == OP_REVERSE || has_vreverse)
12334
ccprev = compile_reverse_matchingpath(common, ccprev, current);
12335
break;
12336
case OP_ASSERT_SCS:
12337
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12338
break;
12339
case OP_ONCE:
12340
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12341
break;
12342
case OP_COND:
12343
case OP_SCOND:
12344
break;
12345
default:
12346
if (private_data_ptr != 0)
12347
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12348
else
12349
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12350
break;
12351
}
12352
12353
compile_matchingpath(common, ccprev, cc, current);
12354
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12355
return;
12356
12357
switch (opcode)
12358
{
12359
case OP_ASSERTBACK_NA:
12360
if (has_vreverse)
12361
{
12362
SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
12363
add_jump(compiler, &current->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
12364
}
12365
12366
if (PRIVATE_DATA(ccbegin + 1))
12367
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12368
break;
12369
case OP_ASSERT_NA:
12370
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12371
break;
12372
case OP_SCRIPT_RUN:
12373
match_script_run_common(common, private_data_ptr, current);
12374
break;
12375
}
12376
}
12377
12378
/* Instructions after the current alternative is successfully matched. */
12379
/* There is a similar code in compile_bracket_matchingpath. */
12380
if (opcode == OP_ONCE)
12381
match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12382
12383
stacksize = 0;
12384
if (repeat_type == OP_MINUPTO)
12385
{
12386
/* We need to preserve the counter. TMP2 will be used below. */
12387
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12388
stacksize++;
12389
}
12390
if (ket != OP_KET || bra != OP_BRA)
12391
stacksize++;
12392
if (offset != 0)
12393
{
12394
if (common->capture_last_ptr != 0)
12395
stacksize++;
12396
if (common->optimized_cbracket[offset >> 1] == 0)
12397
stacksize += 2;
12398
}
12399
if (opcode != OP_ONCE)
12400
stacksize++;
12401
12402
if (stacksize > 0)
12403
allocate_stack(common, stacksize);
12404
12405
stacksize = 0;
12406
if (repeat_type == OP_MINUPTO)
12407
{
12408
/* TMP2 was set above. */
12409
OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12410
stacksize++;
12411
}
12412
12413
if (ket != OP_KET || bra != OP_BRA)
12414
{
12415
if (ket != OP_KET)
12416
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12417
else
12418
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12419
stacksize++;
12420
}
12421
12422
if (offset != 0)
12423
stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12424
12425
if (opcode != OP_ONCE)
12426
{
12427
if (alt_max <= 3)
12428
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12429
else
12430
mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12431
}
12432
12433
if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12434
{
12435
/* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12436
SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12437
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12438
}
12439
12440
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12441
12442
if (opcode != OP_ONCE)
12443
{
12444
if (alt_max <= 3)
12445
{
12446
JUMPHERE(next_alt);
12447
alt_count++;
12448
if (alt_count < alt_max)
12449
{
12450
SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12451
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12452
}
12453
}
12454
else
12455
{
12456
sljit_set_label(mov_addr, LABEL());
12457
sljit_emit_op0(compiler, SLJIT_ENDBR);
12458
}
12459
}
12460
12461
COMPILE_BACKTRACKINGPATH(current->top);
12462
if (current->own_backtracks)
12463
set_jumps(current->own_backtracks, LABEL());
12464
SLJIT_ASSERT(!current->simple_backtracks);
12465
}
12466
while (*cc == OP_ALT);
12467
12468
if (cond != NULL)
12469
{
12470
SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12471
if (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT)
12472
{
12473
assert = CURRENT_AS(bracket_backtrack)->u.assert;
12474
SLJIT_ASSERT(assert->framesize != 0);
12475
if (assert->framesize > 0)
12476
{
12477
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12478
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12479
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12480
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12481
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12482
}
12483
}
12484
JUMPHERE(cond);
12485
}
12486
12487
/* Free the STR_PTR. */
12488
if (private_data_ptr == 0)
12489
free_stack(common, 1);
12490
}
12491
12492
if (offset != 0)
12493
{
12494
/* Using both tmp register is better for instruction scheduling. */
12495
if (common->optimized_cbracket[offset >> 1] != 0)
12496
{
12497
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12498
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12499
free_stack(common, 2);
12500
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12501
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12502
}
12503
else
12504
{
12505
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12506
free_stack(common, 1);
12507
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12508
}
12509
}
12510
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
12511
{
12512
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12513
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12514
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12515
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12516
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
12517
free_stack(common, 4);
12518
}
12519
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12520
{
12521
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12522
free_stack(common, 1);
12523
}
12524
else if (opcode == OP_ASSERT_SCS)
12525
{
12526
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12527
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12528
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12529
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12530
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
12531
free_stack(common, has_alternatives ? 3 : 2);
12532
12533
set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());
12534
12535
/* Nested scs blocks will not update this variable. */
12536
if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))
12537
common->restore_end_ptr = 0;
12538
}
12539
else if (opcode == OP_ONCE)
12540
{
12541
cc = ccbegin + GET(ccbegin, 1);
12542
stacksize = needs_control_head ? 1 : 0;
12543
12544
if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12545
{
12546
/* Reset head and drop saved frame. */
12547
stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12548
}
12549
else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12550
{
12551
/* The STR_PTR must be released. */
12552
stacksize++;
12553
}
12554
12555
if (stacksize > 0)
12556
free_stack(common, stacksize);
12557
12558
JUMPHERE(once);
12559
/* Restore previous private_data_ptr */
12560
if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12561
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12562
else if (ket == OP_KETRMIN)
12563
{
12564
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12565
/* See the comment below. */
12566
free_stack(common, 2);
12567
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12568
}
12569
}
12570
12571
if (repeat_type == OP_EXACT)
12572
{
12573
OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12574
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12575
CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12576
}
12577
else if (ket == OP_KETRMAX)
12578
{
12579
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12580
if (bra != OP_BRAZERO)
12581
free_stack(common, 1);
12582
12583
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12584
if (bra == OP_BRAZERO)
12585
{
12586
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12587
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12588
JUMPHERE(brazero);
12589
free_stack(common, 1);
12590
}
12591
}
12592
else if (ket == OP_KETRMIN)
12593
{
12594
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12595
12596
/* OP_ONCE removes everything in case of a backtrack, so we don't
12597
need to explicitly release the STR_PTR. The extra release would
12598
affect badly the free_stack(2) above. */
12599
if (opcode != OP_ONCE)
12600
free_stack(common, 1);
12601
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12602
if (opcode == OP_ONCE)
12603
free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12604
else if (bra == OP_BRAMINZERO)
12605
free_stack(common, 1);
12606
}
12607
else if (bra == OP_BRAZERO)
12608
{
12609
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12610
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12611
JUMPHERE(brazero);
12612
}
12613
}
12614
12615
static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12616
{
12617
DEFINE_COMPILER;
12618
int offset;
12619
struct sljit_jump *jump;
12620
PCRE2_SPTR cc;
12621
12622
/* No retry on backtrack, just drop everything. */
12623
if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12624
{
12625
cc = current->cc;
12626
12627
if (*cc == OP_BRAPOSZERO)
12628
cc++;
12629
12630
if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
12631
{
12632
offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
12633
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12634
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12635
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12636
if (common->capture_last_ptr != 0)
12637
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12638
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12639
if (common->capture_last_ptr != 0)
12640
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12641
}
12642
set_jumps(current->own_backtracks, LABEL());
12643
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12644
return;
12645
}
12646
12647
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12648
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12649
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12650
12651
if (current->own_backtracks)
12652
{
12653
jump = JUMP(SLJIT_JUMP);
12654
set_jumps(current->own_backtracks, LABEL());
12655
/* Drop the stack frame. */
12656
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12657
JUMPHERE(jump);
12658
}
12659
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12660
}
12661
12662
static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12663
{
12664
assert_backtrack backtrack;
12665
12666
current->top = NULL;
12667
current->own_backtracks = NULL;
12668
current->simple_backtracks = NULL;
12669
if (current->cc[1] > OP_ASSERTBACK_NOT)
12670
{
12671
/* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12672
compile_bracket_matchingpath(common, current->cc, current);
12673
compile_bracket_backtrackingpath(common, current->top);
12674
}
12675
else
12676
{
12677
memset(&backtrack, 0, sizeof(backtrack));
12678
backtrack.common.cc = current->cc;
12679
backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12680
/* Manual call of compile_assert_matchingpath. */
12681
compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12682
}
12683
SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
12684
}
12685
12686
static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12687
{
12688
DEFINE_COMPILER;
12689
PCRE2_UCHAR opcode = *current->cc;
12690
struct sljit_label *loop;
12691
struct sljit_jump *jump;
12692
12693
if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12694
{
12695
if (common->then_trap != NULL)
12696
{
12697
SLJIT_ASSERT(common->control_head_ptr != 0);
12698
12699
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12700
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12701
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12702
jump = JUMP(SLJIT_JUMP);
12703
12704
loop = LABEL();
12705
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12706
JUMPHERE(jump);
12707
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12708
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12709
add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12710
return;
12711
}
12712
else if (!common->local_quit_available && common->in_positive_assertion)
12713
{
12714
add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12715
return;
12716
}
12717
}
12718
12719
if (common->restore_end_ptr != 0 && opcode != OP_SKIP_ARG)
12720
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
12721
12722
if (common->local_quit_available)
12723
{
12724
/* Abort match with a fail. */
12725
if (common->quit_label == NULL)
12726
add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12727
else
12728
JUMPTO(SLJIT_JUMP, common->quit_label);
12729
return;
12730
}
12731
12732
if (opcode == OP_SKIP_ARG)
12733
{
12734
SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
12735
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12736
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
12737
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
12738
12739
if (common->restore_end_ptr == 0)
12740
{
12741
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12742
add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
12743
return;
12744
}
12745
12746
jump = CMP(SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0);
12747
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12748
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
12749
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12750
JUMPHERE(jump);
12751
return;
12752
}
12753
12754
if (opcode == OP_SKIP)
12755
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12756
else
12757
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
12758
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12759
}
12760
12761
static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12762
{
12763
DEFINE_COMPILER;
12764
struct sljit_jump *jump;
12765
struct sljit_label *label;
12766
12767
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12768
jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
12769
skip_valid_char(common);
12770
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
12771
JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
12772
12773
label = LABEL();
12774
sljit_set_label(jump, label);
12775
set_jumps(current->own_backtracks, label);
12776
}
12777
12778
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12779
{
12780
DEFINE_COMPILER;
12781
struct sljit_jump *jump;
12782
int framesize;
12783
int size;
12784
12785
if (CURRENT_AS(then_trap_backtrack)->then_trap)
12786
{
12787
common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
12788
return;
12789
}
12790
12791
size = CURRENT_AS(then_trap_backtrack)->framesize;
12792
size = 3 + (size < 0 ? 0 : size);
12793
12794
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
12795
free_stack(common, size);
12796
jump = JUMP(SLJIT_JUMP);
12797
12798
set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
12799
12800
framesize = CURRENT_AS(then_trap_backtrack)->framesize;
12801
SLJIT_ASSERT(framesize != 0);
12802
12803
/* STACK_TOP is set by THEN. */
12804
if (framesize > 0)
12805
{
12806
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12807
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
12808
}
12809
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12810
free_stack(common, 3);
12811
12812
JUMPHERE(jump);
12813
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
12814
}
12815
12816
static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12817
{
12818
DEFINE_COMPILER;
12819
then_trap_backtrack *save_then_trap = common->then_trap;
12820
12821
while (current)
12822
{
12823
if (current->simple_backtracks != NULL)
12824
set_jumps(current->simple_backtracks, LABEL());
12825
switch(*current->cc)
12826
{
12827
case OP_SET_SOM:
12828
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12829
free_stack(common, 1);
12830
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
12831
break;
12832
12833
case OP_STAR:
12834
case OP_MINSTAR:
12835
case OP_PLUS:
12836
case OP_MINPLUS:
12837
case OP_QUERY:
12838
case OP_MINQUERY:
12839
case OP_UPTO:
12840
case OP_MINUPTO:
12841
case OP_EXACT:
12842
case OP_POSSTAR:
12843
case OP_POSPLUS:
12844
case OP_POSQUERY:
12845
case OP_POSUPTO:
12846
case OP_STARI:
12847
case OP_MINSTARI:
12848
case OP_PLUSI:
12849
case OP_MINPLUSI:
12850
case OP_QUERYI:
12851
case OP_MINQUERYI:
12852
case OP_UPTOI:
12853
case OP_MINUPTOI:
12854
case OP_EXACTI:
12855
case OP_POSSTARI:
12856
case OP_POSPLUSI:
12857
case OP_POSQUERYI:
12858
case OP_POSUPTOI:
12859
case OP_NOTSTAR:
12860
case OP_NOTMINSTAR:
12861
case OP_NOTPLUS:
12862
case OP_NOTMINPLUS:
12863
case OP_NOTQUERY:
12864
case OP_NOTMINQUERY:
12865
case OP_NOTUPTO:
12866
case OP_NOTMINUPTO:
12867
case OP_NOTEXACT:
12868
case OP_NOTPOSSTAR:
12869
case OP_NOTPOSPLUS:
12870
case OP_NOTPOSQUERY:
12871
case OP_NOTPOSUPTO:
12872
case OP_NOTSTARI:
12873
case OP_NOTMINSTARI:
12874
case OP_NOTPLUSI:
12875
case OP_NOTMINPLUSI:
12876
case OP_NOTQUERYI:
12877
case OP_NOTMINQUERYI:
12878
case OP_NOTUPTOI:
12879
case OP_NOTMINUPTOI:
12880
case OP_NOTEXACTI:
12881
case OP_NOTPOSSTARI:
12882
case OP_NOTPOSPLUSI:
12883
case OP_NOTPOSQUERYI:
12884
case OP_NOTPOSUPTOI:
12885
case OP_TYPESTAR:
12886
case OP_TYPEMINSTAR:
12887
case OP_TYPEPLUS:
12888
case OP_TYPEMINPLUS:
12889
case OP_TYPEQUERY:
12890
case OP_TYPEMINQUERY:
12891
case OP_TYPEUPTO:
12892
case OP_TYPEMINUPTO:
12893
case OP_TYPEEXACT:
12894
case OP_TYPEPOSSTAR:
12895
case OP_TYPEPOSPLUS:
12896
case OP_TYPEPOSQUERY:
12897
case OP_TYPEPOSUPTO:
12898
/* Since classes has no backtracking path, this
12899
backtrackingpath was pushed by an iterator. */
12900
case OP_CLASS:
12901
case OP_NCLASS:
12902
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
12903
case OP_XCLASS:
12904
case OP_ECLASS:
12905
#endif
12906
compile_iterator_backtrackingpath(common, current);
12907
break;
12908
12909
case OP_REF:
12910
case OP_REFI:
12911
case OP_DNREF:
12912
case OP_DNREFI:
12913
compile_ref_iterator_backtrackingpath(common, current);
12914
break;
12915
12916
case OP_RECURSE:
12917
compile_recurse_backtrackingpath(common, current);
12918
break;
12919
12920
case OP_ASSERT:
12921
case OP_ASSERT_NOT:
12922
case OP_ASSERTBACK:
12923
case OP_ASSERTBACK_NOT:
12924
compile_assert_backtrackingpath(common, current);
12925
break;
12926
12927
case OP_ASSERT_NA:
12928
case OP_ASSERTBACK_NA:
12929
case OP_ASSERT_SCS:
12930
case OP_ONCE:
12931
case OP_SCRIPT_RUN:
12932
case OP_BRA:
12933
case OP_CBRA:
12934
case OP_COND:
12935
case OP_SBRA:
12936
case OP_SCBRA:
12937
case OP_SCOND:
12938
compile_bracket_backtrackingpath(common, current);
12939
break;
12940
12941
case OP_BRAZERO:
12942
if (current->cc[1] > OP_ASSERTBACK_NOT)
12943
compile_bracket_backtrackingpath(common, current);
12944
else
12945
compile_assert_backtrackingpath(common, current);
12946
break;
12947
12948
case OP_BRAPOS:
12949
case OP_CBRAPOS:
12950
case OP_SBRAPOS:
12951
case OP_SCBRAPOS:
12952
case OP_BRAPOSZERO:
12953
compile_bracketpos_backtrackingpath(common, current);
12954
break;
12955
12956
case OP_BRAMINZERO:
12957
compile_braminzero_backtrackingpath(common, current);
12958
break;
12959
12960
case OP_MARK:
12961
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
12962
if (common->has_skip_arg)
12963
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12964
free_stack(common, common->has_skip_arg ? 5 : 1);
12965
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
12966
if (common->has_skip_arg)
12967
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
12968
break;
12969
12970
case OP_THEN:
12971
case OP_THEN_ARG:
12972
case OP_PRUNE:
12973
case OP_PRUNE_ARG:
12974
case OP_SKIP:
12975
case OP_SKIP_ARG:
12976
compile_control_verb_backtrackingpath(common, current);
12977
break;
12978
12979
case OP_COMMIT:
12980
case OP_COMMIT_ARG:
12981
if (common->restore_end_ptr != 0)
12982
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
12983
12984
if (!common->local_quit_available)
12985
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
12986
12987
if (common->quit_label == NULL)
12988
add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12989
else
12990
JUMPTO(SLJIT_JUMP, common->quit_label);
12991
break;
12992
12993
case OP_CALLOUT:
12994
case OP_CALLOUT_STR:
12995
case OP_FAIL:
12996
case OP_ACCEPT:
12997
case OP_ASSERT_ACCEPT:
12998
set_jumps(current->own_backtracks, LABEL());
12999
break;
13000
13001
case OP_VREVERSE:
13002
compile_vreverse_backtrackingpath(common, current);
13003
break;
13004
13005
case OP_THEN_TRAP:
13006
/* A virtual opcode for then traps. */
13007
compile_then_trap_backtrackingpath(common, current);
13008
break;
13009
13010
default:
13011
SLJIT_UNREACHABLE();
13012
break;
13013
}
13014
current = current->prev;
13015
}
13016
common->then_trap = save_then_trap;
13017
}
13018
13019
static SLJIT_INLINE void compile_recurse(compiler_common *common)
13020
{
13021
DEFINE_COMPILER;
13022
PCRE2_SPTR cc = common->start + common->currententry->start;
13023
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13024
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13025
uint32_t recurse_flags = 0;
13026
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13027
int alt_count, alt_max, local_size;
13028
backtrack_common altbacktrack;
13029
jump_list *match = NULL;
13030
struct sljit_jump *next_alt = NULL;
13031
struct sljit_jump *accept_exit = NULL;
13032
struct sljit_label *quit;
13033
struct sljit_jump *mov_addr = NULL;
13034
13035
/* Recurse captures then. */
13036
common->then_trap = NULL;
13037
13038
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13039
13040
alt_max = no_alternatives(cc);
13041
alt_count = 0;
13042
13043
/* Matching path. */
13044
SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13045
common->currententry->entry_label = LABEL();
13046
set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13047
13048
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13049
count_match(common);
13050
13051
local_size = (alt_max > 1) ? 2 : 1;
13052
13053
/* (Reversed) stack layout:
13054
[private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13055
13056
allocate_stack(common, private_data_size + local_size);
13057
/* Save return address. */
13058
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13059
13060
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13061
13062
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13063
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13064
13065
if (recurse_flags & recurse_flag_control_head_found)
13066
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13067
13068
if (alt_max > 1)
13069
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13070
13071
memset(&altbacktrack, 0, sizeof(backtrack_common));
13072
common->quit_label = NULL;
13073
common->accept_label = NULL;
13074
common->quit = NULL;
13075
common->accept = NULL;
13076
altbacktrack.cc = ccbegin;
13077
cc += GET(cc, 1);
13078
while (1)
13079
{
13080
altbacktrack.top = NULL;
13081
altbacktrack.own_backtracks = NULL;
13082
13083
if (altbacktrack.cc != ccbegin)
13084
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13085
13086
compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13087
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13088
return;
13089
13090
allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
13091
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13092
13093
if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
13094
{
13095
if (alt_max > 3)
13096
mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13097
else
13098
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13099
}
13100
13101
add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13102
13103
if (alt_count == 0)
13104
{
13105
/* Backtracking path entry. */
13106
SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13107
common->currententry->backtrack_label = LABEL();
13108
set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13109
13110
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
13111
13112
if (recurse_flags & recurse_flag_accept_found)
13113
accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13114
13115
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13116
/* Save return address. */
13117
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13118
13119
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13120
13121
if (alt_max > 1)
13122
{
13123
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13124
free_stack(common, 2);
13125
13126
if (alt_max > 3)
13127
{
13128
sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13129
sljit_set_label(mov_addr, LABEL());
13130
sljit_emit_op0(compiler, SLJIT_ENDBR);
13131
}
13132
else
13133
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13134
}
13135
else
13136
free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
13137
}
13138
else if (alt_max > 3)
13139
{
13140
sljit_set_label(mov_addr, LABEL());
13141
sljit_emit_op0(compiler, SLJIT_ENDBR);
13142
}
13143
else
13144
{
13145
JUMPHERE(next_alt);
13146
if (alt_count + 1 < alt_max)
13147
{
13148
SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13149
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13150
}
13151
}
13152
13153
alt_count++;
13154
13155
compile_backtrackingpath(common, altbacktrack.top);
13156
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13157
return;
13158
set_jumps(altbacktrack.own_backtracks, LABEL());
13159
13160
if (*cc != OP_ALT)
13161
break;
13162
13163
altbacktrack.cc = cc + 1 + LINK_SIZE;
13164
cc += GET(cc, 1);
13165
}
13166
13167
/* No alternative is matched. */
13168
13169
quit = LABEL();
13170
13171
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
13172
13173
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13174
free_stack(common, private_data_size + local_size);
13175
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13176
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13177
13178
if (common->quit != NULL)
13179
{
13180
SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
13181
13182
set_jumps(common->quit, LABEL());
13183
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13184
copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13185
JUMPTO(SLJIT_JUMP, quit);
13186
}
13187
13188
if (recurse_flags & recurse_flag_accept_found)
13189
{
13190
JUMPHERE(accept_exit);
13191
free_stack(common, 2);
13192
13193
/* Save return address. */
13194
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13195
13196
copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13197
13198
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13199
free_stack(common, private_data_size + local_size);
13200
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13201
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13202
}
13203
13204
if (common->accept != NULL)
13205
{
13206
SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
13207
13208
set_jumps(common->accept, LABEL());
13209
13210
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13211
OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13212
13213
allocate_stack(common, 2);
13214
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13215
}
13216
13217
set_jumps(match, LABEL());
13218
13219
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13220
13221
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13222
13223
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13224
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13225
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13226
}
13227
13228
#undef COMPILE_BACKTRACKINGPATH
13229
#undef CURRENT_AS
13230
13231
#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13232
(PCRE2_JIT_INVALID_UTF)
13233
13234
static int jit_compile(pcre2_code *code, sljit_u32 mode)
13235
{
13236
pcre2_real_code *re = (pcre2_real_code *)code;
13237
struct sljit_compiler *compiler;
13238
backtrack_common rootbacktrack;
13239
compiler_common common_data;
13240
compiler_common *common = &common_data;
13241
const sljit_u8 *tables = re->tables;
13242
void *allocator_data = &re->memctl;
13243
int private_data_size;
13244
PCRE2_SPTR ccend;
13245
executable_functions *functions;
13246
void *executable_func;
13247
sljit_uw executable_size, private_data_length, total_length;
13248
struct sljit_label *mainloop_label = NULL;
13249
struct sljit_label *continue_match_label;
13250
struct sljit_label *empty_match_found_label = NULL;
13251
struct sljit_label *empty_match_backtrack_label = NULL;
13252
struct sljit_label *reset_match_label;
13253
struct sljit_label *quit_label;
13254
struct sljit_jump *jump;
13255
struct sljit_jump *minlength_check_failed = NULL;
13256
struct sljit_jump *empty_match = NULL;
13257
struct sljit_jump *end_anchor_failed = NULL;
13258
jump_list *reqcu_not_found = NULL;
13259
13260
SLJIT_ASSERT(tables);
13261
13262
#if HAS_VIRTUAL_REGISTERS == 1
13263
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
13264
#elif HAS_VIRTUAL_REGISTERS == 0
13265
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
13266
#else
13267
#error "Invalid value for HAS_VIRTUAL_REGISTERS"
13268
#endif
13269
13270
memset(&rootbacktrack, 0, sizeof(backtrack_common));
13271
memset(common, 0, sizeof(compiler_common));
13272
common->re = re;
13273
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13274
rootbacktrack.cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start);
13275
13276
#ifdef SUPPORT_UNICODE
13277
common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13278
#endif /* SUPPORT_UNICODE */
13279
mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13280
13281
common->start = rootbacktrack.cc;
13282
common->read_only_data_head = NULL;
13283
common->fcc = tables + fcc_offset;
13284
common->lcc = (sljit_sw)(tables + lcc_offset);
13285
common->mode = mode;
13286
common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13287
common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13288
common->nltype = NLTYPE_FIXED;
13289
switch(re->newline_convention)
13290
{
13291
case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13292
case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13293
case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13294
case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13295
case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13296
case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13297
default: return PCRE2_ERROR_INTERNAL;
13298
}
13299
common->nlmax = READ_CHAR_MAX;
13300
common->nlmin = 0;
13301
if (re->bsr_convention == PCRE2_BSR_UNICODE)
13302
common->bsr_nltype = NLTYPE_ANY;
13303
else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13304
common->bsr_nltype = NLTYPE_ANYCRLF;
13305
else
13306
{
13307
#ifdef BSR_ANYCRLF
13308
common->bsr_nltype = NLTYPE_ANYCRLF;
13309
#else
13310
common->bsr_nltype = NLTYPE_ANY;
13311
#endif
13312
}
13313
common->bsr_nlmax = READ_CHAR_MAX;
13314
common->bsr_nlmin = 0;
13315
common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13316
common->ctypes = (sljit_sw)(tables + ctypes_offset);
13317
common->name_count = re->name_count;
13318
common->name_entry_size = re->name_entry_size;
13319
common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13320
common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13321
#ifdef SUPPORT_UNICODE
13322
/* PCRE2_UTF[16|32] have the same value as PCRE2_UTF8. */
13323
common->utf = (re->overall_options & PCRE2_UTF) != 0;
13324
common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13325
if (common->utf)
13326
{
13327
if (common->nltype == NLTYPE_ANY)
13328
common->nlmax = 0x2029;
13329
else if (common->nltype == NLTYPE_ANYCRLF)
13330
common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13331
else
13332
{
13333
/* We only care about the first newline character. */
13334
common->nlmax = common->newline & 0xff;
13335
}
13336
13337
if (common->nltype == NLTYPE_FIXED)
13338
common->nlmin = common->newline & 0xff;
13339
else
13340
common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13341
13342
if (common->bsr_nltype == NLTYPE_ANY)
13343
common->bsr_nlmax = 0x2029;
13344
else
13345
common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13346
common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13347
}
13348
else
13349
common->invalid_utf = FALSE;
13350
#endif /* SUPPORT_UNICODE */
13351
ccend = bracketend(common->start);
13352
13353
/* Calculate the local space size on the stack. */
13354
common->ovector_start = LOCAL0;
13355
/* Allocate space for temporary data structures. */
13356
private_data_length = ccend - common->start;
13357
/* The chance of overflow is very low, but might happen on 32 bit. */
13358
if (private_data_length > ~(sljit_uw)0 / sizeof(sljit_s32))
13359
return PCRE2_ERROR_NOMEMORY;
13360
13361
private_data_length *= sizeof(sljit_s32);
13362
/* Align to 32 bit. */
13363
total_length = ((re->top_bracket + 1) + (sljit_uw)(sizeof(sljit_s32) - 1)) & ~(sljit_uw)(sizeof(sljit_s32) - 1);
13364
if (~(sljit_uw)0 - private_data_length < total_length)
13365
return PCRE2_ERROR_NOMEMORY;
13366
13367
total_length += private_data_length;
13368
common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length, allocator_data);
13369
if (!common->private_data_ptrs)
13370
return PCRE2_ERROR_NOMEMORY;
13371
13372
memset(common->private_data_ptrs, 0, private_data_length);
13373
common->optimized_cbracket = ((sljit_u8 *)common->private_data_ptrs) + private_data_length;
13374
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13375
memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13376
#else
13377
memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13378
#endif
13379
13380
SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13381
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13382
common->capture_last_ptr = common->ovector_start;
13383
common->ovector_start += sizeof(sljit_sw);
13384
#endif
13385
if (!check_opcode_types(common, common->start, ccend))
13386
{
13387
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13388
return PCRE2_ERROR_JIT_UNSUPPORTED;
13389
}
13390
13391
/* Checking flags and updating ovector_start. */
13392
if (mode == PCRE2_JIT_COMPLETE &&
13393
(re->flags & PCRE2_LASTSET) != 0 &&
13394
(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13395
{
13396
common->req_char_ptr = common->ovector_start;
13397
common->ovector_start += sizeof(sljit_sw);
13398
}
13399
13400
if (mode != PCRE2_JIT_COMPLETE)
13401
{
13402
common->start_used_ptr = common->ovector_start;
13403
common->ovector_start += sizeof(sljit_sw);
13404
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13405
{
13406
common->hit_start = common->ovector_start;
13407
common->ovector_start += sizeof(sljit_sw);
13408
}
13409
}
13410
13411
if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13412
{
13413
common->match_end_ptr = common->ovector_start;
13414
common->ovector_start += sizeof(sljit_sw);
13415
}
13416
13417
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13418
common->control_head_ptr = 1;
13419
#endif
13420
13421
if (common->control_head_ptr != 0)
13422
{
13423
common->control_head_ptr = common->ovector_start;
13424
common->ovector_start += sizeof(sljit_sw);
13425
}
13426
13427
if (common->has_set_som)
13428
{
13429
/* Saving the real start pointer is necessary. */
13430
common->start_ptr = common->ovector_start;
13431
common->ovector_start += sizeof(sljit_sw);
13432
}
13433
13434
/* Aligning ovector to even number of sljit words. */
13435
if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13436
common->ovector_start += sizeof(sljit_sw);
13437
13438
if (common->start_ptr == 0)
13439
common->start_ptr = OVECTOR(0);
13440
13441
/* Capturing brackets cannot be optimized if callouts are allowed. */
13442
if (common->capture_last_ptr != 0)
13443
memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13444
13445
SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13446
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13447
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13448
13449
if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
13450
(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&
13451
!common->has_skip_in_assert_back)
13452
detect_early_fail(common, common->start, &private_data_size, 0, 0);
13453
13454
set_private_data_ptrs(common, &private_data_size, ccend);
13455
13456
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13457
13458
if (private_data_size > 65536)
13459
{
13460
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13461
return PCRE2_ERROR_JIT_UNSUPPORTED;
13462
}
13463
13464
if (common->has_then)
13465
{
13466
total_length = ccend - common->start;
13467
common->then_offsets = (sljit_u8 *)SLJIT_MALLOC(total_length, allocator_data);
13468
if (!common->then_offsets)
13469
{
13470
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13471
return PCRE2_ERROR_NOMEMORY;
13472
}
13473
memset(common->then_offsets, 0, total_length);
13474
set_then_offsets(common, common->start, NULL);
13475
}
13476
13477
compiler = sljit_create_compiler(allocator_data);
13478
if (!compiler)
13479
{
13480
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13481
if (common->has_then)
13482
SLJIT_FREE(common->then_offsets, allocator_data);
13483
return PCRE2_ERROR_NOMEMORY;
13484
}
13485
common->compiler = compiler;
13486
13487
/* Main pcre2_jit_exec entry. */
13488
SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13489
sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS), 5, private_data_size);
13490
13491
/* Register init. */
13492
reset_ovector(common, (re->top_bracket + 1) * 2);
13493
if (common->req_char_ptr != 0)
13494
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13495
13496
OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13497
OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13498
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13499
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13500
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13501
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13502
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13503
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13504
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13505
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13506
13507
if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13508
reset_early_fail(common);
13509
13510
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13511
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13512
if (common->mark_ptr != 0)
13513
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13514
if (common->control_head_ptr != 0)
13515
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13516
13517
/* Main part of the matching */
13518
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13519
{
13520
mainloop_label = mainloop_entry(common);
13521
continue_match_label = LABEL();
13522
/* Forward search if possible. */
13523
if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13524
{
13525
if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13526
;
13527
else if ((re->flags & PCRE2_FIRSTSET) != 0)
13528
fast_forward_first_char(common);
13529
else if ((re->flags & PCRE2_STARTLINE) != 0)
13530
fast_forward_newline(common);
13531
else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13532
fast_forward_start_bits(common);
13533
}
13534
}
13535
else
13536
continue_match_label = LABEL();
13537
13538
if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 &&
13539
(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13540
{
13541
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13542
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13543
minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13544
}
13545
if (common->req_char_ptr != 0)
13546
reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13547
13548
/* Store the current STR_PTR in OVECTOR(0). */
13549
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13550
/* Copy the limit of allowed recursions. */
13551
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13552
if (common->capture_last_ptr != 0)
13553
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13554
if (common->fast_forward_bc_ptr != NULL)
13555
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13556
13557
if (common->start_ptr != OVECTOR(0))
13558
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13559
13560
/* Copy the beginning of the string. */
13561
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13562
{
13563
jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13564
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13565
JUMPHERE(jump);
13566
}
13567
else if (mode == PCRE2_JIT_PARTIAL_HARD)
13568
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13569
13570
compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13571
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13572
{
13573
sljit_free_compiler(compiler);
13574
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13575
if (common->has_then)
13576
SLJIT_FREE(common->then_offsets, allocator_data);
13577
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13578
return PCRE2_ERROR_NOMEMORY;
13579
}
13580
13581
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13582
end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13583
13584
if (common->might_be_empty)
13585
{
13586
empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13587
empty_match_found_label = LABEL();
13588
}
13589
13590
common->accept_label = LABEL();
13591
if (common->accept != NULL)
13592
set_jumps(common->accept, common->accept_label);
13593
13594
/* This means we have a match. Update the ovector. */
13595
copy_ovector(common, re->top_bracket + 1);
13596
common->quit_label = common->abort_label = LABEL();
13597
if (common->quit != NULL)
13598
set_jumps(common->quit, common->quit_label);
13599
if (common->abort != NULL)
13600
set_jumps(common->abort, common->abort_label);
13601
if (minlength_check_failed != NULL)
13602
SET_LABEL(minlength_check_failed, common->abort_label);
13603
13604
sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13605
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13606
13607
if (common->failed_match != NULL)
13608
{
13609
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13610
set_jumps(common->failed_match, LABEL());
13611
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13612
JUMPTO(SLJIT_JUMP, common->abort_label);
13613
}
13614
13615
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13616
JUMPHERE(end_anchor_failed);
13617
13618
if (mode != PCRE2_JIT_COMPLETE)
13619
{
13620
common->partialmatchlabel = LABEL();
13621
set_jumps(common->partialmatch, common->partialmatchlabel);
13622
return_with_partial_match(common, common->quit_label);
13623
}
13624
13625
if (common->might_be_empty)
13626
empty_match_backtrack_label = LABEL();
13627
compile_backtrackingpath(common, rootbacktrack.top);
13628
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13629
{
13630
sljit_free_compiler(compiler);
13631
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13632
if (common->has_then)
13633
SLJIT_FREE(common->then_offsets, allocator_data);
13634
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13635
return PCRE2_ERROR_NOMEMORY;
13636
}
13637
13638
SLJIT_ASSERT(rootbacktrack.prev == NULL);
13639
reset_match_label = LABEL();
13640
13641
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13642
{
13643
/* Update hit_start only in the first time. */
13644
jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13645
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13646
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13647
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13648
JUMPHERE(jump);
13649
}
13650
13651
/* Check we have remaining characters. */
13652
if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13653
{
13654
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13655
}
13656
13657
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13658
(common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13659
13660
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13661
{
13662
if (common->ff_newline_shortcut != NULL)
13663
{
13664
/* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13665
if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13666
{
13667
if (common->match_end_ptr != 0)
13668
{
13669
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13670
OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13671
CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13672
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13673
}
13674
else
13675
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13676
}
13677
}
13678
else
13679
CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13680
}
13681
13682
/* No more remaining characters. */
13683
if (reqcu_not_found != NULL)
13684
set_jumps(reqcu_not_found, LABEL());
13685
13686
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13687
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13688
13689
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13690
JUMPTO(SLJIT_JUMP, common->quit_label);
13691
13692
flush_stubs(common);
13693
13694
if (common->might_be_empty)
13695
{
13696
JUMPHERE(empty_match);
13697
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13698
OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13699
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13700
JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13701
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13702
JUMPTO(SLJIT_ZERO, empty_match_found_label);
13703
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13704
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13705
JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13706
}
13707
13708
common->fast_forward_bc_ptr = NULL;
13709
common->early_fail_start_ptr = 0;
13710
common->early_fail_end_ptr = 0;
13711
common->currententry = common->entries;
13712
common->local_quit_available = TRUE;
13713
quit_label = common->quit_label;
13714
SLJIT_ASSERT(common->restore_end_ptr == 0);
13715
13716
if (common->currententry != NULL)
13717
{
13718
/* A free bit for each private data. */
13719
common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
13720
SLJIT_ASSERT(common->recurse_bitset_size > 0);
13721
common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
13722
13723
if (common->recurse_bitset != NULL)
13724
{
13725
do
13726
{
13727
/* Might add new entries. */
13728
compile_recurse(common);
13729
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13730
break;
13731
flush_stubs(common);
13732
common->currententry = common->currententry->next;
13733
}
13734
while (common->currententry != NULL);
13735
13736
SLJIT_FREE(common->recurse_bitset, allocator_data);
13737
}
13738
13739
if (common->currententry != NULL)
13740
{
13741
/* The common->recurse_bitset has been freed. */
13742
SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
13743
13744
sljit_free_compiler(compiler);
13745
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13746
if (common->has_then)
13747
SLJIT_FREE(common->then_offsets, allocator_data);
13748
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13749
return PCRE2_ERROR_NOMEMORY;
13750
}
13751
}
13752
13753
common->local_quit_available = FALSE;
13754
common->quit_label = quit_label;
13755
SLJIT_ASSERT(common->restore_end_ptr == 0);
13756
13757
/* Allocating stack, returns with PCRE2_ERROR_JIT_STACKLIMIT if fails. */
13758
/* This is a (really) rare case. */
13759
set_jumps(common->stackalloc, LABEL());
13760
/* RETURN_ADDR is not a saved register. */
13761
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
13762
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
13763
13764
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13765
13766
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, STR_PTR, 0);
13767
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13768
OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13769
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13770
OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13771
13772
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
13773
13774
jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13775
OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13776
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13777
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
13778
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
13779
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13780
13781
/* Allocation failed. */
13782
JUMPHERE(jump);
13783
/* We break the return address cache here, but this is a really rare case. */
13784
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13785
JUMPTO(SLJIT_JUMP, common->quit_label);
13786
13787
/* Call limit reached. */
13788
set_jumps(common->calllimit, LABEL());
13789
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13790
JUMPTO(SLJIT_JUMP, common->quit_label);
13791
13792
if (common->revertframes != NULL)
13793
{
13794
set_jumps(common->revertframes, LABEL());
13795
do_revertframes(common);
13796
}
13797
if (common->wordboundary != NULL)
13798
{
13799
set_jumps(common->wordboundary, LABEL());
13800
check_wordboundary(common, FALSE);
13801
}
13802
if (common->ucp_wordboundary != NULL)
13803
{
13804
set_jumps(common->ucp_wordboundary, LABEL());
13805
check_wordboundary(common, TRUE);
13806
}
13807
if (common->anynewline != NULL)
13808
{
13809
set_jumps(common->anynewline, LABEL());
13810
check_anynewline(common);
13811
}
13812
if (common->hspace != NULL)
13813
{
13814
set_jumps(common->hspace, LABEL());
13815
check_hspace(common);
13816
}
13817
if (common->vspace != NULL)
13818
{
13819
set_jumps(common->vspace, LABEL());
13820
check_vspace(common);
13821
}
13822
if (common->casefulcmp != NULL)
13823
{
13824
set_jumps(common->casefulcmp, LABEL());
13825
do_casefulcmp(common);
13826
}
13827
if (common->caselesscmp != NULL)
13828
{
13829
set_jumps(common->caselesscmp, LABEL());
13830
do_caselesscmp(common);
13831
}
13832
if (common->reset_match != NULL || common->restart_match != NULL)
13833
{
13834
if (common->restart_match != NULL)
13835
{
13836
set_jumps(common->restart_match, LABEL());
13837
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13838
}
13839
13840
set_jumps(common->reset_match, LABEL());
13841
do_reset_match(common, (re->top_bracket + 1) * 2);
13842
/* The value of restart_match is in TMP1. */
13843
CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
13844
OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
13845
JUMPTO(SLJIT_JUMP, reset_match_label);
13846
}
13847
#ifdef SUPPORT_UNICODE
13848
#if PCRE2_CODE_UNIT_WIDTH == 8
13849
if (common->utfreadchar != NULL)
13850
{
13851
set_jumps(common->utfreadchar, LABEL());
13852
do_utfreadchar(common);
13853
}
13854
if (common->utfreadtype8 != NULL)
13855
{
13856
set_jumps(common->utfreadtype8, LABEL());
13857
do_utfreadtype8(common);
13858
}
13859
if (common->utfpeakcharback != NULL)
13860
{
13861
set_jumps(common->utfpeakcharback, LABEL());
13862
do_utfpeakcharback(common);
13863
}
13864
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
13865
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
13866
if (common->utfreadchar_invalid != NULL)
13867
{
13868
set_jumps(common->utfreadchar_invalid, LABEL());
13869
do_utfreadchar_invalid(common);
13870
}
13871
if (common->utfreadnewline_invalid != NULL)
13872
{
13873
set_jumps(common->utfreadnewline_invalid, LABEL());
13874
do_utfreadnewline_invalid(common);
13875
}
13876
if (common->utfmoveback_invalid)
13877
{
13878
set_jumps(common->utfmoveback_invalid, LABEL());
13879
do_utfmoveback_invalid(common);
13880
}
13881
if (common->utfpeakcharback_invalid)
13882
{
13883
set_jumps(common->utfpeakcharback_invalid, LABEL());
13884
do_utfpeakcharback_invalid(common);
13885
}
13886
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
13887
if (common->getucd != NULL)
13888
{
13889
set_jumps(common->getucd, LABEL());
13890
do_getucd(common);
13891
}
13892
if (common->getucdtype != NULL)
13893
{
13894
set_jumps(common->getucdtype, LABEL());
13895
do_getucdtype(common);
13896
}
13897
#endif /* SUPPORT_UNICODE */
13898
13899
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13900
if (common->has_then)
13901
SLJIT_FREE(common->then_offsets, allocator_data);
13902
13903
executable_func = sljit_generate_code(compiler, 0, NULL);
13904
executable_size = sljit_get_generated_code_size(compiler);
13905
sljit_free_compiler(compiler);
13906
13907
if (executable_func == NULL)
13908
{
13909
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13910
return PCRE2_ERROR_NOMEMORY;
13911
}
13912
13913
/* Reuse the function descriptor if possible. */
13914
if (re->executable_jit != NULL)
13915
functions = (executable_functions *)re->executable_jit;
13916
else
13917
{
13918
functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
13919
if (functions == NULL)
13920
{
13921
/* This case is highly unlikely since we just recently
13922
freed a lot of memory. Not impossible though. */
13923
sljit_free_code(executable_func, NULL);
13924
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13925
return PCRE2_ERROR_NOMEMORY;
13926
}
13927
memset(functions, 0, sizeof(executable_functions));
13928
functions->top_bracket = re->top_bracket + 1;
13929
functions->limit_match = re->limit_match;
13930
re->executable_jit = functions;
13931
}
13932
13933
/* Turn mode into an index. */
13934
if (mode == PCRE2_JIT_COMPLETE)
13935
mode = 0;
13936
else
13937
mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
13938
13939
SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
13940
functions->executable_funcs[mode] = executable_func;
13941
functions->read_only_data_heads[mode] = common->read_only_data_head;
13942
functions->executable_sizes[mode] = executable_size;
13943
return 0;
13944
}
13945
13946
#endif
13947
13948
/*************************************************
13949
* JIT compile a Regular Expression *
13950
*************************************************/
13951
13952
/* This function used JIT to convert a previously-compiled pattern into machine
13953
code.
13954
13955
Arguments:
13956
code a compiled pattern
13957
options JIT option bits
13958
13959
Returns: 0: success or (*NOJIT) was used
13960
<0: an error code
13961
*/
13962
13963
#define PUBLIC_JIT_COMPILE_OPTIONS \
13964
(PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
13965
13966
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
13967
pcre2_jit_compile(pcre2_code *code, uint32_t options)
13968
{
13969
pcre2_real_code *re = (pcre2_real_code *)code;
13970
#ifdef SUPPORT_JIT
13971
void *exec_memory;
13972
executable_functions *functions;
13973
static int executable_allocator_is_working = -1;
13974
13975
if (executable_allocator_is_working == -1)
13976
{
13977
/* Checks whether the executable allocator is working. This check
13978
might run multiple times in multi-threaded environments, but the
13979
result should not be affected by it. */
13980
exec_memory = SLJIT_MALLOC_EXEC(32, NULL);
13981
if (exec_memory != NULL)
13982
{
13983
SLJIT_FREE_EXEC(((sljit_u8*)(exec_memory)) + SLJIT_EXEC_OFFSET(exec_memory), NULL);
13984
executable_allocator_is_working = 1;
13985
}
13986
else executable_allocator_is_working = 0;
13987
}
13988
#endif
13989
13990
if (options & PCRE2_JIT_TEST_ALLOC)
13991
{
13992
if (options != PCRE2_JIT_TEST_ALLOC)
13993
return PCRE2_ERROR_JIT_BADOPTION;
13994
13995
#ifdef SUPPORT_JIT
13996
return executable_allocator_is_working ? 0 : PCRE2_ERROR_NOMEMORY;
13997
#else
13998
return PCRE2_ERROR_JIT_UNSUPPORTED;
13999
#endif
14000
}
14001
14002
if (code == NULL)
14003
return PCRE2_ERROR_NULL;
14004
14005
if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14006
return PCRE2_ERROR_JIT_BADOPTION;
14007
14008
/* Support for invalid UTF was first introduced in JIT, with the option
14009
PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14010
compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14011
preferred feature, with the earlier option deprecated. However, for backward
14012
compatibility, if the earlier option is set, it forces the new option so that
14013
if JIT matching falls back to the interpreter, there is still support for
14014
invalid UTF. However, if this function has already been successfully called
14015
without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14016
non-invalid-supporting JIT code was compiled), give an error.
14017
14018
If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14019
actions are needed:
14020
14021
1. Remove the definition from pcre2.h.in and from the list in
14022
PUBLIC_JIT_COMPILE_OPTIONS above.
14023
14024
2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14025
14026
3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14027
14028
4. Delete the following short block of code. The setting of "re" and
14029
"functions" can be moved into the JIT-only block below, but if that is
14030
done, (void)re and (void)functions will be needed in the non-JIT case, to
14031
avoid compiler warnings.
14032
*/
14033
14034
#ifdef SUPPORT_JIT
14035
functions = (executable_functions *)re->executable_jit;
14036
#endif
14037
14038
if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14039
{
14040
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14041
{
14042
#ifdef SUPPORT_JIT
14043
if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14044
#endif
14045
re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14046
}
14047
}
14048
14049
/* The above tests are run with and without JIT support. This means that
14050
PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14051
interpreter support) even in the absence of JIT. But now, if there is no JIT
14052
support, give an error return. */
14053
14054
#ifndef SUPPORT_JIT
14055
return PCRE2_ERROR_JIT_BADOPTION;
14056
#else /* SUPPORT_JIT */
14057
14058
/* There is JIT support. Do the necessary. */
14059
14060
if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14061
14062
if (!executable_allocator_is_working)
14063
return PCRE2_ERROR_NOMEMORY;
14064
14065
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14066
options |= PCRE2_JIT_INVALID_UTF;
14067
14068
if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14069
|| functions->executable_funcs[0] == NULL)) {
14070
uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14071
int result = jit_compile(code, options & ~excluded_options);
14072
if (result != 0)
14073
return result;
14074
}
14075
14076
if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14077
|| functions->executable_funcs[1] == NULL)) {
14078
uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14079
int result = jit_compile(code, options & ~excluded_options);
14080
if (result != 0)
14081
return result;
14082
}
14083
14084
if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14085
|| functions->executable_funcs[2] == NULL)) {
14086
uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14087
int result = jit_compile(code, options & ~excluded_options);
14088
if (result != 0)
14089
return result;
14090
}
14091
14092
return 0;
14093
14094
#endif /* SUPPORT_JIT */
14095
}
14096
14097
/* JIT compiler uses an all-in-one approach. This improves security,
14098
since the code generator functions are not exported. */
14099
14100
#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14101
14102
#include "pcre2_jit_match.c"
14103
#include "pcre2_jit_misc.c"
14104
14105
/* End of pcre2_jit_compile.c */
14106
14107