Path: blob/master/thirdparty/pcre2/src/pcre2_jit_compile.c
9903 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8This module by Zoltan Herczeg9Original API code Copyright (c) 1997-2012 University of Cambridge10New API code Copyright (c) 2016-2024 University of Cambridge1112-----------------------------------------------------------------------------13Redistribution and use in source and binary forms, with or without14modification, are permitted provided that the following conditions are met:1516* Redistributions of source code must retain the above copyright notice,17this list of conditions and the following disclaimer.1819* Redistributions in binary form must reproduce the above copyright20notice, this list of conditions and the following disclaimer in the21documentation and/or other materials provided with the distribution.2223* Neither the name of the University of Cambridge nor the names of its24contributors may be used to endorse or promote products derived from25this software without specific prior written permission.2627THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE37POSSIBILITY OF SUCH DAMAGE.38-----------------------------------------------------------------------------39*/4041#ifdef HAVE_CONFIG_H42#include "config.h"43#endif4445#if defined(__has_feature)46#if __has_feature(memory_sanitizer)47#include <sanitizer/msan_interface.h>48#endif /* __has_feature(memory_sanitizer) */49#endif /* defined(__has_feature) */5051#include "pcre2_internal.h"5253#ifdef SUPPORT_JIT5455/* All-in-one: Since we use the JIT compiler only from here,56we just include it. This way we don't need to touch the build57system files. */5859#define SLJIT_CONFIG_AUTO 160#define SLJIT_CONFIG_STATIC 161#define SLJIT_VERBOSE 06263#ifdef PCRE2_DEBUG64#define SLJIT_DEBUG 165#else66#define SLJIT_DEBUG 067#endif6869#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)70#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)7172static void * pcre2_jit_malloc(size_t size, void *allocator_data)73{74pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);75return allocator->malloc(size, allocator->memory_data);76}7778static void pcre2_jit_free(void *ptr, void *allocator_data)79{80pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);81allocator->free(ptr, allocator->memory_data);82}8384#include "../deps/sljit/sljit_src/sljitLir.c"8586#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED87#error Unsupported architecture88#endif8990/* Defines for debugging purposes. */9192/* 1 - Use unoptimized capturing brackets.932 - Enable capture_last_ptr (includes option 1). */94/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */9596/* 1 - Always have a control head. */97/* #define DEBUG_FORCE_CONTROL_HEAD 1 */9899/* Allocate memory for the regex stack on the real machine stack.100Fast, but limited size. */101#define MACHINE_STACK_SIZE 32768102103/* Growth rate for stack allocated by the OS. Should be the multiply104of page size. */105#define STACK_GROWTH_RATE 8192106107/* Enable to check that the allocation could destroy temporaries. */108#if defined SLJIT_DEBUG && SLJIT_DEBUG109#define DESTROY_REGISTERS 1110#endif111112/*113Short summary about the backtracking mechanism empolyed by the jit code generator:114115The code generator follows the recursive nature of the PERL compatible regular116expressions. The basic blocks of regular expressions are condition checkers117whose execute different commands depending on the result of the condition check.118The relationship between the operators can be horizontal (concatenation) and119vertical (sub-expression) (See struct backtrack_common for more details).120121'ab' - 'a' and 'b' regexps are concatenated122'a+' - 'a' is the sub-expression of the '+' operator123124The condition checkers are boolean (true/false) checkers. Machine code is generated125for the checker itself and for the actions depending on the result of the checker.126The 'true' case is called as the matching path (expected path), and the other is called as127the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken128branches on the matching path.129130Greedy star operator (*) :131Matching path: match happens.132Backtrack path: match failed.133Non-greedy star operator (*?) :134Matching path: no need to perform a match.135Backtrack path: match is required.136137The following example shows how the code generated for a capturing bracket138with two alternatives. Let A, B, C, D are arbirary regular expressions, and139we have the following regular expression:140141A(B|C)D142143The generated code will be the following:144145A matching path146'(' matching path (pushing arguments to the stack)147B matching path148')' matching path (pushing arguments to the stack)149D matching path150return with successful match151152D backtrack path153')' backtrack path (If we arrived from "C" jump to the backtrack of "C")154B backtrack path155C expected path156jump to D matching path157C backtrack path158A backtrack path159160Notice, that the order of backtrack code paths are the opposite of the fast161code paths. In this way the topmost value on the stack is always belong162to the current backtrack code path. The backtrack path must check163whether there is a next alternative. If so, it needs to jump back to164the matching path eventually. Otherwise it needs to clear out its own stack165frame and continue the execution on the backtrack code paths.166*/167168/*169Saved stack frames:170171Atomic blocks and asserts require reloading the values of private data172when the backtrack mechanism performed. Because of OP_RECURSE, the data173are not necessarly known in compile time, thus we need a dynamic restore174mechanism.175176The stack frames are stored in a chain list, and have the following format:177([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]178179Thus we can restore the private data to a particular point in the stack.180*/181182typedef struct jit_arguments {183/* Pointers first. */184struct sljit_stack *stack;185PCRE2_SPTR str;186PCRE2_SPTR begin;187PCRE2_SPTR end;188pcre2_match_data *match_data;189PCRE2_SPTR startchar_ptr;190PCRE2_UCHAR *mark_ptr;191int (*callout)(pcre2_callout_block *, void *);192void *callout_data;193/* Everything else after. */194sljit_uw offset_limit;195sljit_u32 limit_match;196sljit_u32 oveccount;197sljit_u32 options;198} jit_arguments;199200#define JIT_NUMBER_OF_COMPILE_MODES 3201202typedef struct executable_functions {203void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];204void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];205sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];206sljit_u32 top_bracket;207sljit_u32 limit_match;208} executable_functions;209210typedef struct jump_list {211struct sljit_jump *jump;212struct jump_list *next;213} jump_list;214215typedef struct stub_list {216struct sljit_jump *start;217struct sljit_label *quit;218struct stub_list *next;219} stub_list;220221enum frame_types {222no_frame = -1,223no_stack = -2224};225226enum control_types {227type_mark = 0,228type_then_trap = 1229};230231enum early_fail_types {232type_skip = 0,233type_fail = 1,234type_fail_range = 2235};236237typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);238239/* The following structure is the key data type for the recursive240code generator. It is allocated by compile_matchingpath, and contains241the arguments for compile_backtrackingpath. Must be the first member242of its descendants. */243typedef struct backtrack_common {244/* Backtracking path of an opcode, which falls back245to our opcode, if it cannot resume matching. */246struct backtrack_common *prev;247/* Backtracks for opcodes without backtracking path.248These opcodes are between 'prev' and the current249opcode, and they never resume the match. */250jump_list *simple_backtracks;251/* Internal backtracking list for block constructs252which contains other opcodes, such as brackets,253asserts, conditionals, etc. */254struct backtrack_common *top;255/* Backtracks used internally by the opcode. For component256opcodes, this list is also used by those opcodes without257backtracking path which follows the 'top' backtrack. */258jump_list *own_backtracks;259/* Opcode pointer. */260PCRE2_SPTR cc;261} backtrack_common;262263typedef struct assert_backtrack {264backtrack_common common;265jump_list *condfailed;266/* Less than 0 if a frame is not needed. */267int framesize;268/* Points to our private memory word on the stack. */269int private_data_ptr;270/* For iterators. */271struct sljit_label *matchingpath;272} assert_backtrack;273274typedef struct bracket_backtrack {275backtrack_common common;276/* Where to coninue if an alternative is successfully matched. */277struct sljit_label *alternative_matchingpath;278/* For rmin and rmax iterators. */279struct sljit_label *recursive_matchingpath;280/* For greedy ? operator. */281struct sljit_label *zero_matchingpath;282/* Contains the branches of a failed condition. */283union {284/* Both for OP_COND, OP_SCOND, OP_ASSERT_SCS. */285jump_list *no_capture;286assert_backtrack *assert;287/* For OP_ONCE. Less than 0 if not needed. */288int framesize;289} u;290/* For brackets with >3 alternatives. */291struct sljit_jump *matching_mov_addr;292/* Points to our private memory word on the stack. */293int private_data_ptr;294} bracket_backtrack;295296typedef struct bracketpos_backtrack {297backtrack_common common;298/* Points to our private memory word on the stack. */299int private_data_ptr;300/* Reverting stack is needed. */301int framesize;302/* Allocated stack size. */303int stacksize;304} bracketpos_backtrack;305306typedef struct braminzero_backtrack {307backtrack_common common;308struct sljit_label *matchingpath;309} braminzero_backtrack;310311typedef struct char_iterator_backtrack {312backtrack_common common;313/* Next iteration. */314struct sljit_label *matchingpath;315/* Creating a range based on the next character. */316struct {317unsigned int othercasebit;318PCRE2_UCHAR chr;319BOOL charpos_enabled;320} charpos;321} char_iterator_backtrack;322323typedef struct ref_iterator_backtrack {324backtrack_common common;325/* Next iteration. */326struct sljit_label *matchingpath;327} ref_iterator_backtrack;328329typedef struct recurse_entry {330struct recurse_entry *next;331/* Contains the function entry label. */332struct sljit_label *entry_label;333/* Contains the function entry label. */334struct sljit_label *backtrack_label;335/* Collects the entry calls until the function is not created. */336jump_list *entry_calls;337/* Collects the backtrack calls until the function is not created. */338jump_list *backtrack_calls;339/* Points to the starting opcode. */340sljit_sw start;341} recurse_entry;342343typedef struct recurse_backtrack {344backtrack_common common;345/* Return to the matching path. */346struct sljit_label *matchingpath;347/* Recursive pattern. */348recurse_entry *entry;349/* Pattern is inlined. */350BOOL inlined_pattern;351} recurse_backtrack;352353typedef struct vreverse_backtrack {354backtrack_common common;355/* Return to the matching path. */356struct sljit_label *matchingpath;357} vreverse_backtrack;358359#define OP_THEN_TRAP OP_TABLE_LENGTH360361typedef struct then_trap_backtrack {362backtrack_common common;363/* If then_trap is not NULL, this structure contains the real364then_trap for the backtracking path. */365struct then_trap_backtrack *then_trap;366/* Points to the starting opcode. */367sljit_sw start;368/* Exit point for the then opcodes of this alternative. */369jump_list *quit;370/* Frame size of the current alternative. */371int framesize;372} then_trap_backtrack;373374#define MAX_N_CHARS 12375#define MAX_DIFF_CHARS 5376377typedef struct fast_forward_char_data {378/* Number of characters in the chars array, 255 for any character. */379sljit_u8 count;380/* Number of last UTF-8 characters in the chars array. */381sljit_u8 last_count;382/* Available characters in the current position. */383PCRE2_UCHAR chars[MAX_DIFF_CHARS];384} fast_forward_char_data;385386#define MAX_CLASS_RANGE_SIZE 4387#define MAX_CLASS_CHARS_SIZE 3388389typedef struct compiler_common {390/* The sljit ceneric compiler. */391struct sljit_compiler *compiler;392/* Compiled regular expression. */393pcre2_real_code *re;394/* First byte code. */395PCRE2_SPTR start;396/* Maps private data offset to each opcode. */397sljit_s32 *private_data_ptrs;398/* Chain list of read-only data ptrs. */399void *read_only_data_head;400/* Tells whether the capturing bracket is optimized. */401sljit_u8 *optimized_cbracket;402/* Tells whether the starting offset is a target of then. */403sljit_u8 *then_offsets;404/* Current position where a THEN must jump. */405then_trap_backtrack *then_trap;406/* Starting offset of private data for capturing brackets. */407sljit_s32 cbra_ptr;408#if defined SLJIT_DEBUG && SLJIT_DEBUG409/* End offset of locals for assertions. */410sljit_s32 locals_size;411#endif412/* Output vector starting point. Must be divisible by 2. */413sljit_s32 ovector_start;414/* Points to the starting character of the current match. */415sljit_s32 start_ptr;416/* Last known position of the requested byte. */417sljit_s32 req_char_ptr;418/* Head of the last recursion. */419sljit_s32 recursive_head_ptr;420/* First inspected character for partial matching.421(Needed for avoiding zero length partial matches.) */422sljit_s32 start_used_ptr;423/* Starting pointer for partial soft matches. */424sljit_s32 hit_start;425/* Pointer of the match end position. */426sljit_s32 match_end_ptr;427/* Points to the marked string. */428sljit_s32 mark_ptr;429/* Head of the recursive control verb management chain.430Each item must have a previous offset and type431(see control_types) values. See do_search_mark. */432sljit_s32 control_head_ptr;433/* The offset of the saved STR_END in the outermost434scan substring block. Since scan substring restores435STR_END after a match, it is enough to restore436STR_END inside a scan substring block. */437sljit_s32 restore_end_ptr;438/* Points to the last matched capture block index. */439sljit_s32 capture_last_ptr;440/* Fast forward skipping byte code pointer. */441PCRE2_SPTR fast_forward_bc_ptr;442/* Locals used by fast fail optimization. */443sljit_s32 early_fail_start_ptr;444sljit_s32 early_fail_end_ptr;445/* Variables used by recursive call generator. */446sljit_s32 recurse_bitset_size;447uint8_t *recurse_bitset;448449/* Flipped and lower case tables. */450const sljit_u8 *fcc;451sljit_sw lcc;452/* Mode can be PCRE2_JIT_COMPLETE and others. */453int mode;454/* TRUE, when empty match is accepted for partial matching. */455BOOL allow_empty_partial;456/* TRUE, when minlength is greater than 0. */457BOOL might_be_empty;458/* \K is found in the pattern. */459BOOL has_set_som;460/* (*SKIP:arg) is found in the pattern. */461BOOL has_skip_arg;462/* (*THEN) is found in the pattern. */463BOOL has_then;464/* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */465BOOL has_skip_in_assert_back;466/* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */467BOOL local_quit_available;468/* Currently in a positive assertion. */469BOOL in_positive_assertion;470/* Newline control. */471int nltype;472sljit_u32 nlmax;473sljit_u32 nlmin;474int newline;475int bsr_nltype;476sljit_u32 bsr_nlmax;477sljit_u32 bsr_nlmin;478/* Dollar endonly. */479int endonly;480/* Tables. */481sljit_sw ctypes;482/* Named capturing brackets. */483PCRE2_SPTR name_table;484sljit_sw name_count;485sljit_sw name_entry_size;486487/* Labels and jump lists. */488struct sljit_label *partialmatchlabel;489struct sljit_label *quit_label;490struct sljit_label *abort_label;491struct sljit_label *accept_label;492struct sljit_label *ff_newline_shortcut;493stub_list *stubs;494recurse_entry *entries;495recurse_entry *currententry;496jump_list *partialmatch;497jump_list *quit;498jump_list *positive_assertion_quit;499jump_list *abort;500jump_list *failed_match;501jump_list *accept;502jump_list *calllimit;503jump_list *stackalloc;504jump_list *revertframes;505jump_list *wordboundary;506jump_list *ucp_wordboundary;507jump_list *anynewline;508jump_list *hspace;509jump_list *vspace;510jump_list *casefulcmp;511jump_list *caselesscmp;512jump_list *reset_match;513/* Same as reset_match, but resets the STR_PTR as well. */514jump_list *restart_match;515BOOL unset_backref;516BOOL alt_circumflex;517#ifdef SUPPORT_UNICODE518BOOL utf;519BOOL invalid_utf;520BOOL ucp;521/* Points to saving area for iref. */522jump_list *getucd;523jump_list *getucdtype;524#if PCRE2_CODE_UNIT_WIDTH == 8525jump_list *utfreadchar;526jump_list *utfreadtype8;527jump_list *utfpeakcharback;528#endif529#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16530jump_list *utfreadchar_invalid;531jump_list *utfreadnewline_invalid;532jump_list *utfmoveback_invalid;533jump_list *utfpeakcharback_invalid;534#endif535#endif /* SUPPORT_UNICODE */536} compiler_common;537538/* For byte_sequence_compare. */539540typedef struct compare_context {541int length;542int sourcereg;543#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED544int ucharptr;545union {546sljit_s32 asint;547sljit_u16 asushort;548#if PCRE2_CODE_UNIT_WIDTH == 8549sljit_u8 asbyte;550sljit_u8 asuchars[4];551#elif PCRE2_CODE_UNIT_WIDTH == 16552sljit_u16 asuchars[2];553#elif PCRE2_CODE_UNIT_WIDTH == 32554sljit_u32 asuchars[1];555#endif556} c;557union {558sljit_s32 asint;559sljit_u16 asushort;560#if PCRE2_CODE_UNIT_WIDTH == 8561sljit_u8 asbyte;562sljit_u8 asuchars[4];563#elif PCRE2_CODE_UNIT_WIDTH == 16564sljit_u16 asuchars[2];565#elif PCRE2_CODE_UNIT_WIDTH == 32566sljit_u32 asuchars[1];567#endif568} oc;569#endif570} compare_context;571572/* Undefine sljit macros. */573#undef CMP574575/* Used for accessing the elements of the stack. */576#define STACK(i) ((i) * SSIZE_OF(sw))577578#ifdef SLJIT_PREF_SHIFT_REG579#if SLJIT_PREF_SHIFT_REG == SLJIT_R2580/* Nothing. */581#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3582#define SHIFT_REG_IS_R3583#else584#error "Unsupported shift register"585#endif586#endif587588#define TMP1 SLJIT_R0589#ifdef SHIFT_REG_IS_R3590#define TMP2 SLJIT_R3591#define TMP3 SLJIT_R2592#else593#define TMP2 SLJIT_R2594#define TMP3 SLJIT_R3595#endif596#define STR_PTR SLJIT_R1597#define STR_END SLJIT_S0598#define STACK_TOP SLJIT_S1599#define STACK_LIMIT SLJIT_S2600#define COUNT_MATCH SLJIT_S3601#define ARGUMENTS SLJIT_S4602#define RETURN_ADDR SLJIT_R4603604#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)605#define HAS_VIRTUAL_REGISTERS 1606#else607#define HAS_VIRTUAL_REGISTERS 0608#endif609610/* Local space layout. */611/* Max limit of recursions. */612#define LIMIT_MATCH (0 * sizeof(sljit_sw))613/* Local variables. Their number is computed by check_opcode_types. */614#define LOCAL0 (1 * sizeof(sljit_sw))615#define LOCAL1 (2 * sizeof(sljit_sw))616#define LOCAL2 (3 * sizeof(sljit_sw))617#define LOCAL3 (4 * sizeof(sljit_sw))618#define LOCAL4 (5 * sizeof(sljit_sw))619/* The output vector is stored on the stack, and contains pointers620to characters. The vector data is divided into two groups: the first621group contains the start / end character pointers, and the second is622the start pointers when the end of the capturing group has not yet reached. */623#define OVECTOR_START (common->ovector_start)624#define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw))625#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw))626#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])627628#if PCRE2_CODE_UNIT_WIDTH == 8629#define MOV_UCHAR SLJIT_MOV_U8630#define IN_UCHARS(x) (x)631#elif PCRE2_CODE_UNIT_WIDTH == 16632#define MOV_UCHAR SLJIT_MOV_U16633#define UCHAR_SHIFT (1)634#define IN_UCHARS(x) ((x) * 2)635#elif PCRE2_CODE_UNIT_WIDTH == 32636#define MOV_UCHAR SLJIT_MOV_U32637#define UCHAR_SHIFT (2)638#define IN_UCHARS(x) ((x) * 4)639#else640#error Unsupported compiling mode641#endif642643/* Shortcuts. */644#define DEFINE_COMPILER \645struct sljit_compiler *compiler = common->compiler646#define OP1(op, dst, dstw, src, srcw) \647sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))648#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \649sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))650#define OP2U(op, src1, src1w, src2, src2w) \651sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))652#define OP_SRC(op, src, srcw) \653sljit_emit_op_src(compiler, (op), (src), (srcw))654#define LABEL() \655sljit_emit_label(compiler)656#define JUMP(type) \657sljit_emit_jump(compiler, (type))658#define JUMPTO(type, label) \659sljit_set_label(sljit_emit_jump(compiler, (type)), (label))660#define JUMPHERE(jump) \661sljit_set_label((jump), sljit_emit_label(compiler))662#define SET_LABEL(jump, label) \663sljit_set_label((jump), (label))664#define CMP(type, src1, src1w, src2, src2w) \665sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))666#define CMPTO(type, src1, src1w, src2, src2w, label) \667sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))668#define OP_FLAGS(op, dst, dstw, type) \669sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))670#define SELECT(type, dst_reg, src1, src1w, src2_reg) \671sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))672#define GET_LOCAL_BASE(dst, dstw, offset) \673sljit_get_local_base(compiler, (dst), (dstw), (offset))674675#define READ_CHAR_MAX ((sljit_u32)0xffffffff)676677#define INVALID_UTF_CHAR -1678#define UNASSIGNED_UTF_CHAR 888679680#if defined SUPPORT_UNICODE681#if PCRE2_CODE_UNIT_WIDTH == 8682683#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \684{ \685if (ptr[0] <= 0x7f) \686c = *ptr++; \687else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \688{ \689c = ptr[1] - 0x80; \690\691if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \692{ \693c |= (ptr[0] - 0xc0) << 6; \694ptr += 2; \695} \696else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \697{ \698c = c << 6 | (ptr[2] - 0x80); \699\700if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \701{ \702c |= (ptr[0] - 0xe0) << 12; \703ptr += 3; \704\705if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \706{ \707invalid_action; \708} \709} \710else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \711{ \712c = c << 6 | (ptr[3] - 0x80); \713\714if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \715{ \716c |= (ptr[0] - 0xf0) << 18; \717ptr += 4; \718\719if (c >= 0x110000 || c < 0x10000) \720{ \721invalid_action; \722} \723} \724else \725{ \726invalid_action; \727} \728} \729else \730{ \731invalid_action; \732} \733} \734else \735{ \736invalid_action; \737} \738} \739else \740{ \741invalid_action; \742} \743}744745#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \746{ \747c = ptr[-1]; \748if (c <= 0x7f) \749ptr--; \750else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \751{ \752c -= 0x80; \753\754if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \755{ \756c |= (ptr[-2] - 0xc0) << 6; \757ptr -= 2; \758} \759else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \760{ \761c = c << 6 | (ptr[-2] - 0x80); \762\763if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \764{ \765c |= (ptr[-3] - 0xe0) << 12; \766ptr -= 3; \767\768if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \769{ \770invalid_action; \771} \772} \773else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \774{ \775c = c << 6 | (ptr[-3] - 0x80); \776\777if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \778{ \779c |= (ptr[-4] - 0xf0) << 18; \780ptr -= 4; \781\782if (c >= 0x110000 || c < 0x10000) \783{ \784invalid_action; \785} \786} \787else \788{ \789invalid_action; \790} \791} \792else \793{ \794invalid_action; \795} \796} \797else \798{ \799invalid_action; \800} \801} \802else \803{ \804invalid_action; \805} \806}807808#elif PCRE2_CODE_UNIT_WIDTH == 16809810#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \811{ \812if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \813c = *ptr++; \814else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \815{ \816c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \817ptr += 2; \818} \819else \820{ \821invalid_action; \822} \823}824825#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \826{ \827c = ptr[-1]; \828if (c < 0xd800 || c >= 0xe000) \829ptr--; \830else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \831{ \832c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \833ptr -= 2; \834} \835else \836{ \837invalid_action; \838} \839}840841842#elif PCRE2_CODE_UNIT_WIDTH == 32843844#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \845{ \846if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \847c = *ptr++; \848else \849{ \850invalid_action; \851} \852}853854#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \855{ \856c = ptr[-1]; \857if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \858ptr--; \859else \860{ \861invalid_action; \862} \863}864865#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */866#endif /* SUPPORT_UNICODE */867868static PCRE2_SPTR bracketend(PCRE2_SPTR cc)869{870SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));871do cc += GET(cc, 1); while (*cc == OP_ALT);872SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);873cc += 1 + LINK_SIZE;874return cc;875}876877static int no_alternatives(PCRE2_SPTR cc)878{879int count = 0;880SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));881do882{883cc += GET(cc, 1);884count++;885}886while (*cc == OP_ALT);887SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);888return count;889}890891static BOOL find_vreverse(PCRE2_SPTR cc)892{893SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA);894895do896{897if (cc[1 + LINK_SIZE] == OP_VREVERSE)898return TRUE;899cc += GET(cc, 1);900}901while (*cc == OP_ALT);902903return FALSE;904}905906/* Functions whose might need modification for all new supported opcodes:907next_opcode908check_opcode_types909set_private_data_ptrs910get_framesize911init_frame912get_recurse_data_length913copy_recurse_data914compile_matchingpath915compile_backtrackingpath916*/917918static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)919{920SLJIT_UNUSED_ARG(common);921switch(*cc)922{923case OP_SOD:924case OP_SOM:925case OP_SET_SOM:926case OP_NOT_WORD_BOUNDARY:927case OP_WORD_BOUNDARY:928case OP_NOT_DIGIT:929case OP_DIGIT:930case OP_NOT_WHITESPACE:931case OP_WHITESPACE:932case OP_NOT_WORDCHAR:933case OP_WORDCHAR:934case OP_ANY:935case OP_ALLANY:936case OP_NOTPROP:937case OP_PROP:938case OP_ANYNL:939case OP_NOT_HSPACE:940case OP_HSPACE:941case OP_NOT_VSPACE:942case OP_VSPACE:943case OP_EXTUNI:944case OP_EODN:945case OP_EOD:946case OP_CIRC:947case OP_CIRCM:948case OP_DOLL:949case OP_DOLLM:950case OP_CRSTAR:951case OP_CRMINSTAR:952case OP_CRPLUS:953case OP_CRMINPLUS:954case OP_CRQUERY:955case OP_CRMINQUERY:956case OP_CRRANGE:957case OP_CRMINRANGE:958case OP_CRPOSSTAR:959case OP_CRPOSPLUS:960case OP_CRPOSQUERY:961case OP_CRPOSRANGE:962case OP_CLASS:963case OP_NCLASS:964case OP_REF:965case OP_REFI:966case OP_DNREF:967case OP_DNREFI:968case OP_RECURSE:969case OP_CALLOUT:970case OP_ALT:971case OP_KET:972case OP_KETRMAX:973case OP_KETRMIN:974case OP_KETRPOS:975case OP_REVERSE:976case OP_VREVERSE:977case OP_ASSERT:978case OP_ASSERT_NOT:979case OP_ASSERTBACK:980case OP_ASSERTBACK_NOT:981case OP_ASSERT_NA:982case OP_ASSERTBACK_NA:983case OP_ASSERT_SCS:984case OP_ONCE:985case OP_SCRIPT_RUN:986case OP_BRA:987case OP_BRAPOS:988case OP_CBRA:989case OP_CBRAPOS:990case OP_COND:991case OP_SBRA:992case OP_SBRAPOS:993case OP_SCBRA:994case OP_SCBRAPOS:995case OP_SCOND:996case OP_CREF:997case OP_DNCREF:998case OP_RREF:999case OP_DNRREF:1000case OP_FALSE:1001case OP_TRUE:1002case OP_BRAZERO:1003case OP_BRAMINZERO:1004case OP_BRAPOSZERO:1005case OP_PRUNE:1006case OP_SKIP:1007case OP_THEN:1008case OP_COMMIT:1009case OP_FAIL:1010case OP_ACCEPT:1011case OP_ASSERT_ACCEPT:1012case OP_CLOSE:1013case OP_SKIPZERO:1014case OP_NOT_UCP_WORD_BOUNDARY:1015case OP_UCP_WORD_BOUNDARY:1016return cc + PRIV(OP_lengths)[*cc];10171018case OP_CHAR:1019case OP_CHARI:1020case OP_NOT:1021case OP_NOTI:1022case OP_STAR:1023case OP_MINSTAR:1024case OP_PLUS:1025case OP_MINPLUS:1026case OP_QUERY:1027case OP_MINQUERY:1028case OP_UPTO:1029case OP_MINUPTO:1030case OP_EXACT:1031case OP_POSSTAR:1032case OP_POSPLUS:1033case OP_POSQUERY:1034case OP_POSUPTO:1035case OP_STARI:1036case OP_MINSTARI:1037case OP_PLUSI:1038case OP_MINPLUSI:1039case OP_QUERYI:1040case OP_MINQUERYI:1041case OP_UPTOI:1042case OP_MINUPTOI:1043case OP_EXACTI:1044case OP_POSSTARI:1045case OP_POSPLUSI:1046case OP_POSQUERYI:1047case OP_POSUPTOI:1048case OP_NOTSTAR:1049case OP_NOTMINSTAR:1050case OP_NOTPLUS:1051case OP_NOTMINPLUS:1052case OP_NOTQUERY:1053case OP_NOTMINQUERY:1054case OP_NOTUPTO:1055case OP_NOTMINUPTO:1056case OP_NOTEXACT:1057case OP_NOTPOSSTAR:1058case OP_NOTPOSPLUS:1059case OP_NOTPOSQUERY:1060case OP_NOTPOSUPTO:1061case OP_NOTSTARI:1062case OP_NOTMINSTARI:1063case OP_NOTPLUSI:1064case OP_NOTMINPLUSI:1065case OP_NOTQUERYI:1066case OP_NOTMINQUERYI:1067case OP_NOTUPTOI:1068case OP_NOTMINUPTOI:1069case OP_NOTEXACTI:1070case OP_NOTPOSSTARI:1071case OP_NOTPOSPLUSI:1072case OP_NOTPOSQUERYI:1073case OP_NOTPOSUPTOI:1074cc += PRIV(OP_lengths)[*cc];1075#ifdef SUPPORT_UNICODE1076if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);1077#endif1078return cc;10791080/* Special cases. */1081case OP_TYPESTAR:1082case OP_TYPEMINSTAR:1083case OP_TYPEPLUS:1084case OP_TYPEMINPLUS:1085case OP_TYPEQUERY:1086case OP_TYPEMINQUERY:1087case OP_TYPEUPTO:1088case OP_TYPEMINUPTO:1089case OP_TYPEEXACT:1090case OP_TYPEPOSSTAR:1091case OP_TYPEPOSPLUS:1092case OP_TYPEPOSQUERY:1093case OP_TYPEPOSUPTO:1094return cc + PRIV(OP_lengths)[*cc] - 1;10951096case OP_ANYBYTE:1097#ifdef SUPPORT_UNICODE1098if (common->utf) return NULL;1099#endif1100return cc + 1;11011102case OP_CALLOUT_STR:1103return cc + GET(cc, 1 + 2*LINK_SIZE);11041105#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 81106case OP_ECLASS:1107case OP_XCLASS:1108SLJIT_COMPILE_ASSERT(OP_XCLASS + 1 == OP_ECLASS && OP_CLASS + 1 == OP_NCLASS && OP_NCLASS < OP_XCLASS, class_byte_code_order);1109return cc + GET(cc, 1);1110#endif11111112case OP_MARK:1113case OP_COMMIT_ARG:1114case OP_PRUNE_ARG:1115case OP_SKIP_ARG:1116case OP_THEN_ARG:1117return cc + 1 + 2 + cc[1];11181119default:1120SLJIT_UNREACHABLE();1121return NULL;1122}1123}11241125static sljit_s32 ref_update_local_size(compiler_common *common, PCRE2_SPTR cc, sljit_s32 current_locals_size)1126{1127/* Depends on do_casefulcmp(), do_caselesscmp(), and compile_ref_matchingpath() */1128int locals_size = 2 * SSIZE_OF(sw);1129SLJIT_UNUSED_ARG(common);11301131#ifdef SUPPORT_UNICODE1132if ((*cc == OP_REFI || *cc == OP_DNREFI) && (common->utf || common->ucp))1133locals_size = 3 * SSIZE_OF(sw);1134#endif11351136cc += PRIV(OP_lengths)[*cc];1137/* Although do_casefulcmp() uses only one local, the allocate_stack()1138calls during the repeat destroys LOCAL1 variables. */1139if (*cc >= OP_CRSTAR && *cc <= OP_CRPOSRANGE)1140locals_size += 2 * SSIZE_OF(sw);11411142return (current_locals_size >= locals_size) ? current_locals_size : locals_size;1143}11441145static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)1146{1147int count;1148PCRE2_SPTR slot;1149PCRE2_SPTR assert_back_end = cc - 1;1150PCRE2_SPTR assert_na_end = cc - 1;1151sljit_s32 locals_size = 2 * SSIZE_OF(sw);1152BOOL set_recursive_head = FALSE;1153BOOL set_capture_last = FALSE;1154BOOL set_mark = FALSE;11551156/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */1157while (cc < ccend)1158{1159switch(*cc)1160{1161case OP_SET_SOM:1162common->has_set_som = TRUE;1163common->might_be_empty = TRUE;1164cc += 1;1165break;11661167case OP_TYPEUPTO:1168case OP_TYPEEXACT:1169if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))1170locals_size = 3 * SSIZE_OF(sw);1171cc += (2 + IMM2_SIZE) - 1;1172break;11731174case OP_TYPEPOSSTAR:1175case OP_TYPEPOSPLUS:1176case OP_TYPEPOSQUERY:1177if (cc[1] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))1178locals_size = 3 * SSIZE_OF(sw);1179cc += 2 - 1;1180break;11811182case OP_TYPEPOSUPTO:1183#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 321184if (common->utf && locals_size <= 3 * SSIZE_OF(sw))1185locals_size = 3 * SSIZE_OF(sw);1186#endif1187if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))1188locals_size = 3 * SSIZE_OF(sw);1189cc += (2 + IMM2_SIZE) - 1;1190break;11911192case OP_REFI:1193case OP_REF:1194locals_size = ref_update_local_size(common, cc, locals_size);1195common->optimized_cbracket[GET2(cc, 1)] = 0;1196cc += PRIV(OP_lengths)[*cc];1197break;11981199case OP_ASSERT_NA:1200case OP_ASSERTBACK_NA:1201case OP_ASSERT_SCS:1202slot = bracketend(cc);1203if (slot > assert_na_end)1204assert_na_end = slot;1205cc += 1 + LINK_SIZE;1206break;12071208case OP_CBRAPOS:1209case OP_SCBRAPOS:1210common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;1211cc += 1 + LINK_SIZE + IMM2_SIZE;1212break;12131214case OP_COND:1215case OP_SCOND:1216/* Only AUTO_CALLOUT can insert this opcode. We do1217not intend to support this case. */1218if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)1219return FALSE;1220cc += 1 + LINK_SIZE;1221break;12221223case OP_CREF:1224common->optimized_cbracket[GET2(cc, 1)] = 0;1225cc += 1 + IMM2_SIZE;1226break;12271228case OP_DNREFI:1229case OP_DNREF:1230locals_size = ref_update_local_size(common, cc, locals_size);1231/* Fall through */1232case OP_DNCREF:1233count = GET2(cc, 1 + IMM2_SIZE);1234slot = common->name_table + GET2(cc, 1) * common->name_entry_size;1235while (count-- > 0)1236{1237common->optimized_cbracket[GET2(slot, 0)] = 0;1238slot += common->name_entry_size;1239}1240cc += PRIV(OP_lengths)[*cc];1241break;12421243case OP_RECURSE:1244/* Set its value only once. */1245set_recursive_head = TRUE;1246cc += 1 + LINK_SIZE;1247break;12481249case OP_CALLOUT:1250case OP_CALLOUT_STR:1251set_capture_last = TRUE;1252cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);1253break;12541255case OP_ASSERTBACK:1256slot = bracketend(cc);1257if (slot > assert_back_end)1258assert_back_end = slot;1259cc += 1 + LINK_SIZE;1260break;12611262case OP_THEN_ARG:1263common->has_then = TRUE;1264common->control_head_ptr = 1;1265/* Fall through. */12661267case OP_COMMIT_ARG:1268case OP_PRUNE_ARG:1269case OP_MARK:1270set_mark = TRUE;1271cc += 1 + 2 + cc[1];1272break;12731274case OP_THEN:1275common->has_then = TRUE;1276common->control_head_ptr = 1;1277cc += 1;1278break;12791280case OP_SKIP:1281if (cc < assert_back_end)1282common->has_skip_in_assert_back = TRUE;1283cc += 1;1284break;12851286case OP_SKIP_ARG:1287common->control_head_ptr = 1;1288common->has_skip_arg = TRUE;1289if (cc < assert_back_end)1290common->has_skip_in_assert_back = TRUE;1291cc += 1 + 2 + cc[1];1292break;12931294case OP_ASSERT_ACCEPT:1295if (cc < assert_na_end)1296return FALSE;1297cc++;1298break;12991300#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 321301case OP_CRPOSRANGE:1302/* The second value can be 0 for infinite repeats. */1303if (common->utf && GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE) && locals_size <= 3 * SSIZE_OF(sw))1304locals_size = 3 * SSIZE_OF(sw);1305cc += 1 + 2 * IMM2_SIZE;1306break;13071308case OP_POSUPTO:1309case OP_POSUPTOI:1310case OP_NOTPOSUPTO:1311case OP_NOTPOSUPTOI:1312if (common->utf && locals_size <= 3 * SSIZE_OF(sw))1313locals_size = 3 * SSIZE_OF(sw);1314#endif1315/* Fall through */1316default:1317cc = next_opcode(common, cc);1318if (cc == NULL)1319return FALSE;1320break;1321}1322}13231324SLJIT_ASSERT((locals_size & (SSIZE_OF(sw) - 1)) == 0);1325#if defined SLJIT_DEBUG && SLJIT_DEBUG1326common->locals_size = locals_size;1327#endif13281329if (locals_size > 0)1330common->ovector_start += locals_size;13311332if (set_mark)1333{1334SLJIT_ASSERT(common->mark_ptr == 0);1335common->mark_ptr = common->ovector_start;1336common->ovector_start += sizeof(sljit_sw);1337}13381339if (set_recursive_head)1340{1341SLJIT_ASSERT(common->recursive_head_ptr == 0);1342common->recursive_head_ptr = common->ovector_start;1343common->ovector_start += sizeof(sljit_sw);1344}13451346if (set_capture_last)1347{1348SLJIT_ASSERT(common->capture_last_ptr == 0);1349common->capture_last_ptr = common->ovector_start;1350common->ovector_start += sizeof(sljit_sw);1351}13521353return TRUE;1354}13551356#define EARLY_FAIL_ENHANCE_MAX (3 + 3)13571358/*1359Start represent the number of allowed early fail enhancements13601361The 0-2 values has a special meaning:13620 - skip is allowed for all iterators13631 - fail is allowed for all iterators13642 - fail is allowed for greedy iterators13653 - only ranged early fail is allowed1366>3 - (start - 3) number of remaining ranged early fails allowed13671368return: the updated value of start1369*/1370static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,1371int *private_data_start, sljit_s32 depth, int start)1372{1373PCRE2_SPTR begin = cc;1374PCRE2_SPTR next_alt;1375PCRE2_SPTR end;1376PCRE2_SPTR accelerated_start;1377int result = 0;1378int count, prev_count;13791380SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);1381SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);1382SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);13831384next_alt = cc + GET(cc, 1);1385if (*next_alt == OP_ALT && start < 1)1386start = 1;13871388do1389{1390count = start;1391cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);13921393while (TRUE)1394{1395accelerated_start = NULL;13961397switch(*cc)1398{1399case OP_SOD:1400case OP_SOM:1401case OP_SET_SOM:1402case OP_NOT_WORD_BOUNDARY:1403case OP_WORD_BOUNDARY:1404case OP_EODN:1405case OP_EOD:1406case OP_CIRC:1407case OP_CIRCM:1408case OP_DOLL:1409case OP_DOLLM:1410case OP_NOT_UCP_WORD_BOUNDARY:1411case OP_UCP_WORD_BOUNDARY:1412/* Zero width assertions. */1413cc++;1414continue;14151416case OP_NOT_DIGIT:1417case OP_DIGIT:1418case OP_NOT_WHITESPACE:1419case OP_WHITESPACE:1420case OP_NOT_WORDCHAR:1421case OP_WORDCHAR:1422case OP_ANY:1423case OP_ALLANY:1424case OP_ANYBYTE:1425case OP_NOT_HSPACE:1426case OP_HSPACE:1427case OP_NOT_VSPACE:1428case OP_VSPACE:1429if (count < 1)1430count = 1;1431cc++;1432continue;14331434case OP_ANYNL:1435case OP_EXTUNI:1436if (count < 3)1437count = 3;1438cc++;1439continue;14401441case OP_NOTPROP:1442case OP_PROP:1443if (count < 1)1444count = 1;1445cc += 1 + 2;1446continue;14471448case OP_CHAR:1449case OP_CHARI:1450case OP_NOT:1451case OP_NOTI:1452if (count < 1)1453count = 1;1454cc += 2;1455#ifdef SUPPORT_UNICODE1456if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);1457#endif1458continue;14591460case OP_TYPEMINSTAR:1461case OP_TYPEMINPLUS:1462if (count == 2)1463count = 3;1464/* Fall through */14651466case OP_TYPESTAR:1467case OP_TYPEPLUS:1468case OP_TYPEPOSSTAR:1469case OP_TYPEPOSPLUS:1470/* The type or prop opcode is skipped in the next iteration. */1471cc += 1;14721473if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)1474{1475accelerated_start = cc - 1;1476break;1477}14781479if (count < 3)1480count = 3;1481continue;14821483case OP_TYPEEXACT:1484if (count < 1)1485count = 1;1486cc += 1 + IMM2_SIZE;1487continue;14881489case OP_TYPEUPTO:1490case OP_TYPEMINUPTO:1491case OP_TYPEPOSUPTO:1492cc += IMM2_SIZE;1493/* Fall through */14941495case OP_TYPEQUERY:1496case OP_TYPEMINQUERY:1497case OP_TYPEPOSQUERY:1498/* The type or prop opcode is skipped in the next iteration. */1499if (count < 3)1500count = 3;1501cc += 1;1502continue;15031504case OP_MINSTAR:1505case OP_MINPLUS:1506case OP_MINSTARI:1507case OP_MINPLUSI:1508case OP_NOTMINSTAR:1509case OP_NOTMINPLUS:1510case OP_NOTMINSTARI:1511case OP_NOTMINPLUSI:1512if (count == 2)1513count = 3;1514/* Fall through */15151516case OP_STAR:1517case OP_PLUS:1518case OP_POSSTAR:1519case OP_POSPLUS:15201521case OP_STARI:1522case OP_PLUSI:1523case OP_POSSTARI:1524case OP_POSPLUSI:15251526case OP_NOTSTAR:1527case OP_NOTPLUS:1528case OP_NOTPOSSTAR:1529case OP_NOTPOSPLUS:15301531case OP_NOTSTARI:1532case OP_NOTPLUSI:1533case OP_NOTPOSSTARI:1534case OP_NOTPOSPLUSI:1535accelerated_start = cc;1536cc += 2;1537#ifdef SUPPORT_UNICODE1538if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);1539#endif1540break;15411542case OP_EXACT:1543if (count < 1)1544count = 1;1545cc += 2 + IMM2_SIZE;1546#ifdef SUPPORT_UNICODE1547if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);1548#endif1549continue;15501551case OP_UPTO:1552case OP_MINUPTO:1553case OP_POSUPTO:1554case OP_UPTOI:1555case OP_MINUPTOI:1556case OP_EXACTI:1557case OP_POSUPTOI:1558case OP_NOTUPTO:1559case OP_NOTMINUPTO:1560case OP_NOTEXACT:1561case OP_NOTPOSUPTO:1562case OP_NOTUPTOI:1563case OP_NOTMINUPTOI:1564case OP_NOTEXACTI:1565case OP_NOTPOSUPTOI:1566cc += IMM2_SIZE;1567/* Fall through */15681569case OP_QUERY:1570case OP_MINQUERY:1571case OP_POSQUERY:1572case OP_QUERYI:1573case OP_MINQUERYI:1574case OP_POSQUERYI:1575case OP_NOTQUERY:1576case OP_NOTMINQUERY:1577case OP_NOTPOSQUERY:1578case OP_NOTQUERYI:1579case OP_NOTMINQUERYI:1580case OP_NOTPOSQUERYI:1581if (count < 3)1582count = 3;1583cc += 2;1584#ifdef SUPPORT_UNICODE1585if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);1586#endif1587continue;15881589case OP_CLASS:1590case OP_NCLASS:1591#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 81592case OP_XCLASS:1593case OP_ECLASS:1594accelerated_start = cc;1595cc += (*cc >= OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)));1596#else1597accelerated_start = cc;1598cc += (1 + (32 / sizeof(PCRE2_UCHAR)));1599#endif16001601switch (*cc)1602{1603case OP_CRMINSTAR:1604case OP_CRMINPLUS:1605if (count == 2)1606count = 3;1607/* Fall through */16081609case OP_CRSTAR:1610case OP_CRPLUS:1611case OP_CRPOSSTAR:1612case OP_CRPOSPLUS:1613cc++;1614break;16151616case OP_CRRANGE:1617case OP_CRMINRANGE:1618case OP_CRPOSRANGE:1619if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))1620{1621/* Exact repeat. */1622cc += 1 + 2 * IMM2_SIZE;1623if (count < 1)1624count = 1;1625continue;1626}16271628cc += 2 * IMM2_SIZE;1629/* Fall through */1630case OP_CRQUERY:1631case OP_CRMINQUERY:1632case OP_CRPOSQUERY:1633cc++;1634if (count < 3)1635count = 3;1636continue;16371638default:1639/* No repeat. */1640if (count < 1)1641count = 1;1642continue;1643}1644break;16451646case OP_BRA:1647case OP_CBRA:1648prev_count = count;1649if (count < 1)1650count = 1;16511652if (depth >= 4)1653break;16541655if (count < 3 && cc[GET(cc, 1)] == OP_ALT)1656count = 3;16571658end = bracketend(cc);1659if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))1660break;16611662prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);16631664if (prev_count > count)1665count = prev_count;16661667if (PRIVATE_DATA(cc) != 0)1668common->private_data_ptrs[begin - common->start] = 1;16691670if (count < EARLY_FAIL_ENHANCE_MAX)1671{1672cc = end;1673continue;1674}1675break;16761677case OP_KET:1678SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);1679if (cc >= next_alt)1680break;1681cc += 1 + LINK_SIZE;1682continue;1683}16841685if (accelerated_start == NULL)1686break;16871688if (count == 0)1689{1690common->fast_forward_bc_ptr = accelerated_start;1691common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;1692*private_data_start += sizeof(sljit_sw);1693count = 4;1694}1695else if (count < 3)1696{1697common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;16981699if (common->early_fail_start_ptr == 0)1700common->early_fail_start_ptr = *private_data_start;17011702*private_data_start += sizeof(sljit_sw);1703common->early_fail_end_ptr = *private_data_start;17041705if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)1706return EARLY_FAIL_ENHANCE_MAX;17071708count = 4;1709}1710else1711{1712common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;17131714if (common->early_fail_start_ptr == 0)1715common->early_fail_start_ptr = *private_data_start;17161717*private_data_start += 2 * sizeof(sljit_sw);1718common->early_fail_end_ptr = *private_data_start;17191720if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)1721return EARLY_FAIL_ENHANCE_MAX;17221723count++;1724}17251726/* Cannot be part of a repeat. */1727common->private_data_ptrs[begin - common->start] = 1;17281729if (count >= EARLY_FAIL_ENHANCE_MAX)1730break;1731}17321733if (*cc != OP_ALT && *cc != OP_KET)1734result = EARLY_FAIL_ENHANCE_MAX;1735else if (result < count)1736result = count;17371738cc = next_alt;1739next_alt = cc + GET(cc, 1);1740}1741while (*cc == OP_ALT);17421743return result;1744}17451746static int get_class_iterator_size(PCRE2_SPTR cc)1747{1748sljit_u32 min;1749sljit_u32 max;1750switch(*cc)1751{1752case OP_CRSTAR:1753case OP_CRPLUS:1754return 2;17551756case OP_CRMINSTAR:1757case OP_CRMINPLUS:1758case OP_CRQUERY:1759case OP_CRMINQUERY:1760return 1;17611762case OP_CRRANGE:1763case OP_CRMINRANGE:1764min = GET2(cc, 1);1765max = GET2(cc, 1 + IMM2_SIZE);1766if (max == 0)1767return (*cc == OP_CRRANGE) ? 2 : 1;1768max -= min;1769if (max > (sljit_u32)(*cc == OP_CRRANGE ? 0 : 1))1770max = 2;1771return max;17721773default:1774return 0;1775}1776}17771778static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)1779{1780PCRE2_SPTR end = bracketend(begin);1781PCRE2_SPTR next;1782PCRE2_SPTR next_end;1783PCRE2_SPTR max_end;1784PCRE2_UCHAR type;1785sljit_sw length = end - begin;1786sljit_s32 min, max, i;17871788/* Detect fixed iterations first. */1789if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)1790return FALSE;17911792/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/1793* Skip the check of the second part. */1794if (PRIVATE_DATA(end - LINK_SIZE) != 0)1795return TRUE;17961797next = end;1798min = 1;1799while (1)1800{1801if (*next != *begin)1802break;1803next_end = bracketend(next);1804if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)1805break;1806next = next_end;1807min++;1808}18091810if (min == 2)1811return FALSE;18121813max = 0;1814max_end = next;1815if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)1816{1817type = *next;1818while (1)1819{1820if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)1821break;1822next_end = bracketend(next + 2 + LINK_SIZE);1823if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)1824break;1825next = next_end;1826max++;1827}18281829if (next[0] == type && next[1] == *begin && max >= 1)1830{1831next_end = bracketend(next + 1);1832if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)1833{1834for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)1835if (*next_end != OP_KET)1836break;18371838if (i == max)1839{1840common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;1841common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;1842/* +2 the original and the last. */1843common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;1844if (min == 1)1845return TRUE;1846min--;1847max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);1848}1849}1850}1851}18521853if (min >= 3)1854{1855common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;1856common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;1857common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;1858return TRUE;1859}18601861return FALSE;1862}18631864#define CASE_ITERATOR_PRIVATE_DATA_1 \1865case OP_MINSTAR: \1866case OP_MINPLUS: \1867case OP_QUERY: \1868case OP_MINQUERY: \1869case OP_MINSTARI: \1870case OP_MINPLUSI: \1871case OP_QUERYI: \1872case OP_MINQUERYI: \1873case OP_NOTMINSTAR: \1874case OP_NOTMINPLUS: \1875case OP_NOTQUERY: \1876case OP_NOTMINQUERY: \1877case OP_NOTMINSTARI: \1878case OP_NOTMINPLUSI: \1879case OP_NOTQUERYI: \1880case OP_NOTMINQUERYI:18811882#define CASE_ITERATOR_PRIVATE_DATA_2A \1883case OP_STAR: \1884case OP_PLUS: \1885case OP_STARI: \1886case OP_PLUSI: \1887case OP_NOTSTAR: \1888case OP_NOTPLUS: \1889case OP_NOTSTARI: \1890case OP_NOTPLUSI:18911892#define CASE_ITERATOR_PRIVATE_DATA_2B \1893case OP_UPTO: \1894case OP_MINUPTO: \1895case OP_UPTOI: \1896case OP_MINUPTOI: \1897case OP_NOTUPTO: \1898case OP_NOTMINUPTO: \1899case OP_NOTUPTOI: \1900case OP_NOTMINUPTOI:19011902#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \1903case OP_TYPEMINSTAR: \1904case OP_TYPEMINPLUS: \1905case OP_TYPEQUERY: \1906case OP_TYPEMINQUERY:19071908#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \1909case OP_TYPESTAR: \1910case OP_TYPEPLUS:19111912#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \1913case OP_TYPEUPTO: \1914case OP_TYPEMINUPTO:19151916static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)1917{1918PCRE2_SPTR cc = common->start;1919PCRE2_SPTR alternative;1920PCRE2_SPTR end = NULL;1921int private_data_ptr = *private_data_start;1922int space, size, bracketlen;1923BOOL repeat_check = TRUE;19241925while (cc < ccend)1926{1927space = 0;1928size = 0;1929bracketlen = 0;1930if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)1931break;19321933/* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */1934if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))1935{1936if (detect_repeat(common, cc))1937{1938/* These brackets are converted to repeats, so no global1939based single character repeat is allowed. */1940if (cc >= end)1941end = bracketend(cc);1942}1943}1944repeat_check = TRUE;19451946switch(*cc)1947{1948case OP_KET:1949if (common->private_data_ptrs[cc + 1 - common->start] != 0)1950{1951common->private_data_ptrs[cc - common->start] = private_data_ptr;1952private_data_ptr += sizeof(sljit_sw);1953cc += common->private_data_ptrs[cc + 1 - common->start];1954}1955cc += 1 + LINK_SIZE;1956break;19571958case OP_ASSERT:1959case OP_ASSERT_NOT:1960case OP_ASSERTBACK:1961case OP_ASSERTBACK_NOT:1962case OP_ASSERT_NA:1963case OP_ONCE:1964case OP_SCRIPT_RUN:1965case OP_BRAPOS:1966case OP_SBRA:1967case OP_SBRAPOS:1968case OP_SCOND:1969common->private_data_ptrs[cc - common->start] = private_data_ptr;1970private_data_ptr += sizeof(sljit_sw);1971bracketlen = 1 + LINK_SIZE;1972break;19731974case OP_ASSERTBACK_NA:1975common->private_data_ptrs[cc - common->start] = private_data_ptr;1976private_data_ptr += sizeof(sljit_sw);19771978if (find_vreverse(cc))1979{1980common->private_data_ptrs[cc + 1 - common->start] = 1;1981private_data_ptr += sizeof(sljit_sw);1982}19831984bracketlen = 1 + LINK_SIZE;1985break;19861987case OP_ASSERT_SCS:1988common->private_data_ptrs[cc - common->start] = private_data_ptr;1989private_data_ptr += 2 * sizeof(sljit_sw);1990bracketlen = 1 + LINK_SIZE;1991break;19921993case OP_CBRAPOS:1994case OP_SCBRAPOS:1995common->private_data_ptrs[cc - common->start] = private_data_ptr;1996private_data_ptr += sizeof(sljit_sw);1997bracketlen = 1 + LINK_SIZE + IMM2_SIZE;1998break;19992000case OP_COND:2001/* Might be a hidden SCOND. */2002common->private_data_ptrs[cc - common->start] = 0;2003alternative = cc + GET(cc, 1);2004if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)2005{2006common->private_data_ptrs[cc - common->start] = private_data_ptr;2007private_data_ptr += sizeof(sljit_sw);2008}2009bracketlen = 1 + LINK_SIZE;2010break;20112012case OP_BRA:2013bracketlen = 1 + LINK_SIZE;2014break;20152016case OP_CBRA:2017case OP_SCBRA:2018bracketlen = 1 + LINK_SIZE + IMM2_SIZE;2019break;20202021case OP_BRAZERO:2022case OP_BRAMINZERO:2023case OP_BRAPOSZERO:2024size = 1;2025repeat_check = FALSE;2026break;20272028CASE_ITERATOR_PRIVATE_DATA_12029size = -2;2030space = 1;2031break;20322033CASE_ITERATOR_PRIVATE_DATA_2A2034size = -2;2035space = 2;2036break;20372038CASE_ITERATOR_PRIVATE_DATA_2B2039size = -(2 + IMM2_SIZE);2040space = 2;2041break;20422043CASE_ITERATOR_TYPE_PRIVATE_DATA_12044size = 1;2045space = 1;2046break;20472048CASE_ITERATOR_TYPE_PRIVATE_DATA_2A2049size = 1;2050if (cc[1] != OP_EXTUNI)2051space = 2;2052break;20532054case OP_TYPEUPTO:2055size = 1 + IMM2_SIZE;2056if (cc[1 + IMM2_SIZE] != OP_EXTUNI)2057space = 2;2058break;20592060case OP_TYPEMINUPTO:2061size = 1 + IMM2_SIZE;2062space = 2;2063break;20642065case OP_CLASS:2066case OP_NCLASS:2067size = 1 + 32 / sizeof(PCRE2_UCHAR);2068space = get_class_iterator_size(cc + size);2069break;20702071#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 82072case OP_XCLASS:2073case OP_ECLASS:2074size = GET(cc, 1);2075space = get_class_iterator_size(cc + size);2076break;2077#endif20782079default:2080cc = next_opcode(common, cc);2081SLJIT_ASSERT(cc != NULL);2082break;2083}20842085/* Character iterators, which are not inside a repeated bracket,2086gets a private slot instead of allocating it on the stack. */2087if (space > 0 && cc >= end)2088{2089common->private_data_ptrs[cc - common->start] = private_data_ptr;2090private_data_ptr += sizeof(sljit_sw) * space;2091}20922093if (size != 0)2094{2095if (size < 0)2096{2097cc += -size;2098#ifdef SUPPORT_UNICODE2099if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);2100#endif2101}2102else2103cc += size;2104}21052106if (bracketlen > 0)2107{2108if (cc >= end)2109{2110end = bracketend(cc);2111if (end[-1 - LINK_SIZE] == OP_KET)2112end = NULL;2113}2114cc += bracketlen;2115}2116}2117*private_data_start = private_data_ptr;2118}21192120/* Returns with a frame_types (always < 0) if no need for frame. */2121static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)2122{2123int length = 0;2124int possessive = 0;2125BOOL stack_restore = FALSE;2126BOOL setsom_found = recursive;2127BOOL setmark_found = recursive;2128/* The last capture is a local variable even for recursions. */2129BOOL capture_last_found = FALSE;21302131#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD2132SLJIT_ASSERT(common->control_head_ptr != 0);2133*needs_control_head = TRUE;2134#else2135*needs_control_head = FALSE;2136#endif21372138if (ccend == NULL)2139{2140ccend = bracketend(cc) - (1 + LINK_SIZE);2141if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))2142{2143possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;2144/* This is correct regardless of common->capture_last_ptr. */2145capture_last_found = TRUE;2146}2147cc = next_opcode(common, cc);2148}21492150SLJIT_ASSERT(cc != NULL);2151while (cc < ccend)2152switch(*cc)2153{2154case OP_SET_SOM:2155SLJIT_ASSERT(common->has_set_som);2156stack_restore = TRUE;2157if (!setsom_found)2158{2159length += 2;2160setsom_found = TRUE;2161}2162cc += 1;2163break;21642165case OP_MARK:2166case OP_COMMIT_ARG:2167case OP_PRUNE_ARG:2168case OP_THEN_ARG:2169SLJIT_ASSERT(common->mark_ptr != 0);2170stack_restore = TRUE;2171if (!setmark_found)2172{2173length += 2;2174setmark_found = TRUE;2175}2176if (common->control_head_ptr != 0)2177*needs_control_head = TRUE;2178cc += 1 + 2 + cc[1];2179break;21802181case OP_RECURSE:2182stack_restore = TRUE;2183if (common->has_set_som && !setsom_found)2184{2185length += 2;2186setsom_found = TRUE;2187}2188if (common->mark_ptr != 0 && !setmark_found)2189{2190length += 2;2191setmark_found = TRUE;2192}2193if (common->capture_last_ptr != 0 && !capture_last_found)2194{2195length += 2;2196capture_last_found = TRUE;2197}2198cc += 1 + LINK_SIZE;2199break;22002201case OP_CBRA:2202case OP_CBRAPOS:2203case OP_SCBRA:2204case OP_SCBRAPOS:2205stack_restore = TRUE;2206if (common->capture_last_ptr != 0 && !capture_last_found)2207{2208length += 2;2209capture_last_found = TRUE;2210}2211length += 3;2212cc += 1 + LINK_SIZE + IMM2_SIZE;2213break;22142215case OP_THEN:2216stack_restore = TRUE;2217if (common->control_head_ptr != 0)2218*needs_control_head = TRUE;2219cc ++;2220break;22212222default:2223stack_restore = TRUE;2224/* Fall through. */22252226case OP_NOT_WORD_BOUNDARY:2227case OP_WORD_BOUNDARY:2228case OP_NOT_DIGIT:2229case OP_DIGIT:2230case OP_NOT_WHITESPACE:2231case OP_WHITESPACE:2232case OP_NOT_WORDCHAR:2233case OP_WORDCHAR:2234case OP_ANY:2235case OP_ALLANY:2236case OP_ANYBYTE:2237case OP_NOTPROP:2238case OP_PROP:2239case OP_ANYNL:2240case OP_NOT_HSPACE:2241case OP_HSPACE:2242case OP_NOT_VSPACE:2243case OP_VSPACE:2244case OP_EXTUNI:2245case OP_EODN:2246case OP_EOD:2247case OP_CIRC:2248case OP_CIRCM:2249case OP_DOLL:2250case OP_DOLLM:2251case OP_CHAR:2252case OP_CHARI:2253case OP_NOT:2254case OP_NOTI:22552256case OP_EXACT:2257case OP_POSSTAR:2258case OP_POSPLUS:2259case OP_POSQUERY:2260case OP_POSUPTO:22612262case OP_EXACTI:2263case OP_POSSTARI:2264case OP_POSPLUSI:2265case OP_POSQUERYI:2266case OP_POSUPTOI:22672268case OP_NOTEXACT:2269case OP_NOTPOSSTAR:2270case OP_NOTPOSPLUS:2271case OP_NOTPOSQUERY:2272case OP_NOTPOSUPTO:22732274case OP_NOTEXACTI:2275case OP_NOTPOSSTARI:2276case OP_NOTPOSPLUSI:2277case OP_NOTPOSQUERYI:2278case OP_NOTPOSUPTOI:22792280case OP_TYPEEXACT:2281case OP_TYPEPOSSTAR:2282case OP_TYPEPOSPLUS:2283case OP_TYPEPOSQUERY:2284case OP_TYPEPOSUPTO:22852286case OP_CLASS:2287case OP_NCLASS:2288case OP_XCLASS:2289case OP_ECLASS:22902291case OP_CALLOUT:2292case OP_CALLOUT_STR:22932294case OP_NOT_UCP_WORD_BOUNDARY:2295case OP_UCP_WORD_BOUNDARY:22962297cc = next_opcode(common, cc);2298SLJIT_ASSERT(cc != NULL);2299break;2300}23012302/* Possessive quantifiers can use a special case. */2303if (SLJIT_UNLIKELY(possessive == length))2304return stack_restore ? no_frame : no_stack;23052306if (length > 0)2307return length + 1;2308return stack_restore ? no_frame : no_stack;2309}23102311static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)2312{2313DEFINE_COMPILER;2314BOOL setsom_found = FALSE;2315BOOL setmark_found = FALSE;2316/* The last capture is a local variable even for recursions. */2317BOOL capture_last_found = FALSE;2318int offset;23192320/* >= 1 + shortest item size (2) */2321SLJIT_UNUSED_ARG(stacktop);2322SLJIT_ASSERT(stackpos >= stacktop + 2);23232324stackpos = STACK(stackpos);2325if (ccend == NULL)2326{2327ccend = bracketend(cc) - (1 + LINK_SIZE);2328if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)2329cc = next_opcode(common, cc);2330}23312332/* The data is restored by do_revertframes(). */2333SLJIT_ASSERT(cc != NULL);2334while (cc < ccend)2335switch(*cc)2336{2337case OP_SET_SOM:2338SLJIT_ASSERT(common->has_set_som);2339if (!setsom_found)2340{2341OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));2342OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));2343stackpos -= SSIZE_OF(sw);2344OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2345stackpos -= SSIZE_OF(sw);2346setsom_found = TRUE;2347}2348cc += 1;2349break;23502351case OP_MARK:2352case OP_COMMIT_ARG:2353case OP_PRUNE_ARG:2354case OP_THEN_ARG:2355SLJIT_ASSERT(common->mark_ptr != 0);2356if (!setmark_found)2357{2358OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);2359OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);2360stackpos -= SSIZE_OF(sw);2361OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2362stackpos -= SSIZE_OF(sw);2363setmark_found = TRUE;2364}2365cc += 1 + 2 + cc[1];2366break;23672368case OP_RECURSE:2369if (common->has_set_som && !setsom_found)2370{2371OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));2372OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));2373stackpos -= SSIZE_OF(sw);2374OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2375stackpos -= SSIZE_OF(sw);2376setsom_found = TRUE;2377}2378if (common->mark_ptr != 0 && !setmark_found)2379{2380OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);2381OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);2382stackpos -= SSIZE_OF(sw);2383OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2384stackpos -= SSIZE_OF(sw);2385setmark_found = TRUE;2386}2387if (common->capture_last_ptr != 0 && !capture_last_found)2388{2389OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);2390OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);2391stackpos -= SSIZE_OF(sw);2392OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2393stackpos -= SSIZE_OF(sw);2394capture_last_found = TRUE;2395}2396cc += 1 + LINK_SIZE;2397break;23982399case OP_CBRA:2400case OP_CBRAPOS:2401case OP_SCBRA:2402case OP_SCBRAPOS:2403if (common->capture_last_ptr != 0 && !capture_last_found)2404{2405OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);2406OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);2407stackpos -= SSIZE_OF(sw);2408OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2409stackpos -= SSIZE_OF(sw);2410capture_last_found = TRUE;2411}2412offset = (GET2(cc, 1 + LINK_SIZE)) << 1;2413OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));2414stackpos -= SSIZE_OF(sw);2415OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));2416OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));2417OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2418stackpos -= SSIZE_OF(sw);2419OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);2420stackpos -= SSIZE_OF(sw);24212422cc += 1 + LINK_SIZE + IMM2_SIZE;2423break;24242425default:2426cc = next_opcode(common, cc);2427SLJIT_ASSERT(cc != NULL);2428break;2429}24302431OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);2432SLJIT_ASSERT(stackpos == STACK(stacktop));2433}24342435#define RECURSE_TMP_REG_COUNT 324362437typedef struct delayed_mem_copy_status {2438struct sljit_compiler *compiler;2439int store_bases[RECURSE_TMP_REG_COUNT];2440int store_offsets[RECURSE_TMP_REG_COUNT];2441int tmp_regs[RECURSE_TMP_REG_COUNT];2442int saved_tmp_regs[RECURSE_TMP_REG_COUNT];2443int next_tmp_reg;2444} delayed_mem_copy_status;24452446static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)2447{2448int i;24492450for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)2451{2452SLJIT_ASSERT(status->tmp_regs[i] >= 0);2453SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);24542455status->store_bases[i] = -1;2456}2457status->next_tmp_reg = 0;2458status->compiler = common->compiler;2459}24602461static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,2462int store_base, sljit_sw store_offset)2463{2464struct sljit_compiler *compiler = status->compiler;2465int next_tmp_reg = status->next_tmp_reg;2466int tmp_reg = status->tmp_regs[next_tmp_reg];24672468SLJIT_ASSERT(load_base > 0 && store_base > 0);24692470if (status->store_bases[next_tmp_reg] == -1)2471{2472/* Preserve virtual registers. */2473if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)2474OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);2475}2476else2477OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);24782479OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);2480status->store_bases[next_tmp_reg] = store_base;2481status->store_offsets[next_tmp_reg] = store_offset;24822483status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;2484}24852486static void delayed_mem_copy_finish(delayed_mem_copy_status *status)2487{2488struct sljit_compiler *compiler = status->compiler;2489int next_tmp_reg = status->next_tmp_reg;2490int tmp_reg, saved_tmp_reg, i;24912492for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)2493{2494if (status->store_bases[next_tmp_reg] != -1)2495{2496tmp_reg = status->tmp_regs[next_tmp_reg];2497saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];24982499OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);25002501/* Restore virtual registers. */2502if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)2503OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);2504}25052506next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;2507}2508}25092510#undef RECURSE_TMP_REG_COUNT25112512static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)2513{2514uint8_t *byte;2515uint8_t mask;25162517SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);25182519bit_index >>= SLJIT_WORD_SHIFT;25202521SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);25222523mask = 1 << (bit_index & 0x7);2524byte = common->recurse_bitset + (bit_index >> 3);25252526if (*byte & mask)2527return FALSE;25282529*byte |= mask;2530return TRUE;2531}25322533enum get_recurse_flags {2534recurse_flag_quit_found = (1 << 0),2535recurse_flag_accept_found = (1 << 1),2536recurse_flag_setsom_found = (1 << 2),2537recurse_flag_setmark_found = (1 << 3),2538recurse_flag_control_head_found = (1 << 4),2539};25402541static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)2542{2543int length = 1;2544int size, offset;2545PCRE2_SPTR alternative;2546uint32_t recurse_flags = 0;25472548memset(common->recurse_bitset, 0, common->recurse_bitset_size);25492550#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD2551SLJIT_ASSERT(common->control_head_ptr != 0);2552recurse_flags |= recurse_flag_control_head_found;2553#endif25542555/* Calculate the sum of the private machine words. */2556while (cc < ccend)2557{2558size = 0;2559switch(*cc)2560{2561case OP_SET_SOM:2562SLJIT_ASSERT(common->has_set_som);2563recurse_flags |= recurse_flag_setsom_found;2564cc += 1;2565break;25662567case OP_RECURSE:2568if (common->has_set_som)2569recurse_flags |= recurse_flag_setsom_found;2570if (common->mark_ptr != 0)2571recurse_flags |= recurse_flag_setmark_found;2572if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))2573length++;2574cc += 1 + LINK_SIZE;2575break;25762577case OP_KET:2578offset = PRIVATE_DATA(cc);2579if (offset != 0)2580{2581if (recurse_check_bit(common, offset))2582length++;2583SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);2584cc += PRIVATE_DATA(cc + 1);2585}2586cc += 1 + LINK_SIZE;2587break;25882589case OP_ASSERT:2590case OP_ASSERT_NOT:2591case OP_ASSERTBACK:2592case OP_ASSERTBACK_NOT:2593case OP_ASSERT_NA:2594case OP_ASSERTBACK_NA:2595case OP_ONCE:2596case OP_SCRIPT_RUN:2597case OP_BRAPOS:2598case OP_SBRA:2599case OP_SBRAPOS:2600case OP_SCOND:2601SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);2602if (recurse_check_bit(common, PRIVATE_DATA(cc)))2603length++;2604cc += 1 + LINK_SIZE;2605break;26062607case OP_ASSERT_SCS:2608SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);2609if (recurse_check_bit(common, PRIVATE_DATA(cc)))2610length += 2;2611cc += 1 + LINK_SIZE;2612break;26132614case OP_CBRA:2615case OP_SCBRA:2616offset = GET2(cc, 1 + LINK_SIZE);2617if (recurse_check_bit(common, OVECTOR(offset << 1)))2618{2619SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));2620length += 2;2621}2622if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))2623length++;2624if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))2625length++;2626cc += 1 + LINK_SIZE + IMM2_SIZE;2627break;26282629case OP_CBRAPOS:2630case OP_SCBRAPOS:2631offset = GET2(cc, 1 + LINK_SIZE);2632if (recurse_check_bit(common, OVECTOR(offset << 1)))2633{2634SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));2635length += 2;2636}2637if (recurse_check_bit(common, OVECTOR_PRIV(offset)))2638length++;2639if (recurse_check_bit(common, PRIVATE_DATA(cc)))2640length++;2641if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))2642length++;2643cc += 1 + LINK_SIZE + IMM2_SIZE;2644break;26452646case OP_COND:2647/* Might be a hidden SCOND. */2648alternative = cc + GET(cc, 1);2649if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))2650length++;2651cc += 1 + LINK_SIZE;2652break;26532654CASE_ITERATOR_PRIVATE_DATA_12655offset = PRIVATE_DATA(cc);2656if (offset != 0 && recurse_check_bit(common, offset))2657length++;2658cc += 2;2659#ifdef SUPPORT_UNICODE2660if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);2661#endif2662break;26632664CASE_ITERATOR_PRIVATE_DATA_2A2665offset = PRIVATE_DATA(cc);2666if (offset != 0 && recurse_check_bit(common, offset))2667{2668SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));2669length += 2;2670}2671cc += 2;2672#ifdef SUPPORT_UNICODE2673if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);2674#endif2675break;26762677CASE_ITERATOR_PRIVATE_DATA_2B2678offset = PRIVATE_DATA(cc);2679if (offset != 0 && recurse_check_bit(common, offset))2680{2681SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));2682length += 2;2683}2684cc += 2 + IMM2_SIZE;2685#ifdef SUPPORT_UNICODE2686if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);2687#endif2688break;26892690CASE_ITERATOR_TYPE_PRIVATE_DATA_12691offset = PRIVATE_DATA(cc);2692if (offset != 0 && recurse_check_bit(common, offset))2693length++;2694cc += 1;2695break;26962697CASE_ITERATOR_TYPE_PRIVATE_DATA_2A2698offset = PRIVATE_DATA(cc);2699if (offset != 0 && recurse_check_bit(common, offset))2700{2701SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));2702length += 2;2703}2704cc += 1;2705break;27062707CASE_ITERATOR_TYPE_PRIVATE_DATA_2B2708offset = PRIVATE_DATA(cc);2709if (offset != 0 && recurse_check_bit(common, offset))2710{2711SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));2712length += 2;2713}2714cc += 1 + IMM2_SIZE;2715break;27162717case OP_CLASS:2718case OP_NCLASS:2719#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 82720case OP_XCLASS:2721case OP_ECLASS:2722size = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);2723#else2724size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);2725#endif27262727offset = PRIVATE_DATA(cc);2728if (offset != 0 && recurse_check_bit(common, offset))2729length += get_class_iterator_size(cc + size);2730cc += size;2731break;27322733case OP_MARK:2734case OP_COMMIT_ARG:2735case OP_PRUNE_ARG:2736case OP_THEN_ARG:2737SLJIT_ASSERT(common->mark_ptr != 0);2738recurse_flags |= recurse_flag_setmark_found;2739if (common->control_head_ptr != 0)2740recurse_flags |= recurse_flag_control_head_found;2741if (*cc != OP_MARK)2742recurse_flags |= recurse_flag_quit_found;27432744cc += 1 + 2 + cc[1];2745break;27462747case OP_PRUNE:2748case OP_SKIP:2749case OP_COMMIT:2750recurse_flags |= recurse_flag_quit_found;2751cc++;2752break;27532754case OP_SKIP_ARG:2755recurse_flags |= recurse_flag_quit_found;2756cc += 1 + 2 + cc[1];2757break;27582759case OP_THEN:2760SLJIT_ASSERT(common->control_head_ptr != 0);2761recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;2762cc++;2763break;27642765case OP_ACCEPT:2766case OP_ASSERT_ACCEPT:2767recurse_flags |= recurse_flag_accept_found;2768cc++;2769break;27702771default:2772cc = next_opcode(common, cc);2773SLJIT_ASSERT(cc != NULL);2774break;2775}2776}2777SLJIT_ASSERT(cc == ccend);27782779if (recurse_flags & recurse_flag_control_head_found)2780length++;2781if (recurse_flags & recurse_flag_quit_found)2782{2783if (recurse_flags & recurse_flag_setsom_found)2784length++;2785if (recurse_flags & recurse_flag_setmark_found)2786length++;2787}27882789*result_flags = recurse_flags;2790return length;2791}27922793enum copy_recurse_data_types {2794recurse_copy_from_global,2795recurse_copy_private_to_global,2796recurse_copy_shared_to_global,2797recurse_copy_kept_shared_to_global,2798recurse_swap_global2799};28002801static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,2802int type, int stackptr, int stacktop, uint32_t recurse_flags)2803{2804delayed_mem_copy_status status;2805PCRE2_SPTR alternative;2806sljit_sw private_srcw[2];2807sljit_sw shared_srcw[3];2808sljit_sw kept_shared_srcw[2];2809int private_count, shared_count, kept_shared_count;2810int from_sp, base_reg, offset, i;28112812memset(common->recurse_bitset, 0, common->recurse_bitset_size);28132814#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD2815SLJIT_ASSERT(common->control_head_ptr != 0);2816recurse_check_bit(common, common->control_head_ptr);2817#endif28182819switch (type)2820{2821case recurse_copy_from_global:2822from_sp = TRUE;2823base_reg = STACK_TOP;2824break;28252826case recurse_copy_private_to_global:2827case recurse_copy_shared_to_global:2828case recurse_copy_kept_shared_to_global:2829from_sp = FALSE;2830base_reg = STACK_TOP;2831break;28322833default:2834SLJIT_ASSERT(type == recurse_swap_global);2835from_sp = FALSE;2836base_reg = TMP2;2837break;2838}28392840stackptr = STACK(stackptr);2841stacktop = STACK(stacktop);28422843status.tmp_regs[0] = TMP1;2844status.saved_tmp_regs[0] = TMP1;28452846if (base_reg != TMP2)2847{2848status.tmp_regs[1] = TMP2;2849status.saved_tmp_regs[1] = TMP2;2850}2851else2852{2853status.saved_tmp_regs[1] = RETURN_ADDR;2854if (HAS_VIRTUAL_REGISTERS)2855status.tmp_regs[1] = STR_PTR;2856else2857status.tmp_regs[1] = RETURN_ADDR;2858}28592860status.saved_tmp_regs[2] = TMP3;2861if (HAS_VIRTUAL_REGISTERS)2862status.tmp_regs[2] = STR_END;2863else2864status.tmp_regs[2] = TMP3;28652866delayed_mem_copy_init(&status, common);28672868if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)2869{2870SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);28712872if (!from_sp)2873delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);28742875if (from_sp || type == recurse_swap_global)2876delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);2877}28782879stackptr += sizeof(sljit_sw);28802881#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD2882if (type != recurse_copy_shared_to_global)2883{2884if (!from_sp)2885delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);28862887if (from_sp || type == recurse_swap_global)2888delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);2889}28902891stackptr += sizeof(sljit_sw);2892#endif28932894while (cc < ccend)2895{2896private_count = 0;2897shared_count = 0;2898kept_shared_count = 0;28992900switch(*cc)2901{2902case OP_SET_SOM:2903SLJIT_ASSERT(common->has_set_som);2904if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))2905{2906kept_shared_srcw[0] = OVECTOR(0);2907kept_shared_count = 1;2908}2909cc += 1;2910break;29112912case OP_RECURSE:2913if (recurse_flags & recurse_flag_quit_found)2914{2915if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))2916{2917kept_shared_srcw[0] = OVECTOR(0);2918kept_shared_count = 1;2919}2920if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))2921{2922kept_shared_srcw[kept_shared_count] = common->mark_ptr;2923kept_shared_count++;2924}2925}2926if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))2927{2928shared_srcw[0] = common->capture_last_ptr;2929shared_count = 1;2930}2931cc += 1 + LINK_SIZE;2932break;29332934case OP_KET:2935private_srcw[0] = PRIVATE_DATA(cc);2936if (private_srcw[0] != 0)2937{2938if (recurse_check_bit(common, private_srcw[0]))2939private_count = 1;2940SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);2941cc += PRIVATE_DATA(cc + 1);2942}2943cc += 1 + LINK_SIZE;2944break;29452946case OP_ASSERT:2947case OP_ASSERT_NOT:2948case OP_ASSERTBACK:2949case OP_ASSERTBACK_NOT:2950case OP_ASSERT_NA:2951case OP_ASSERTBACK_NA:2952case OP_ONCE:2953case OP_SCRIPT_RUN:2954case OP_BRAPOS:2955case OP_SBRA:2956case OP_SBRAPOS:2957case OP_SCOND:2958private_srcw[0] = PRIVATE_DATA(cc);2959if (recurse_check_bit(common, private_srcw[0]))2960private_count = 1;2961cc += 1 + LINK_SIZE;2962break;29632964case OP_ASSERT_SCS:2965private_srcw[0] = PRIVATE_DATA(cc);2966private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);2967if (recurse_check_bit(common, private_srcw[0]))2968private_count = 2;2969cc += 1 + LINK_SIZE;2970break;29712972case OP_CBRA:2973case OP_SCBRA:2974offset = GET2(cc, 1 + LINK_SIZE);2975shared_srcw[0] = OVECTOR(offset << 1);2976if (recurse_check_bit(common, shared_srcw[0]))2977{2978shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);2979SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));2980shared_count = 2;2981}29822983if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))2984{2985shared_srcw[shared_count] = common->capture_last_ptr;2986shared_count++;2987}29882989if (common->optimized_cbracket[offset] == 0)2990{2991private_srcw[0] = OVECTOR_PRIV(offset);2992if (recurse_check_bit(common, private_srcw[0]))2993private_count = 1;2994}29952996cc += 1 + LINK_SIZE + IMM2_SIZE;2997break;29982999case OP_CBRAPOS:3000case OP_SCBRAPOS:3001offset = GET2(cc, 1 + LINK_SIZE);3002shared_srcw[0] = OVECTOR(offset << 1);3003if (recurse_check_bit(common, shared_srcw[0]))3004{3005shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);3006SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));3007shared_count = 2;3008}30093010if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))3011{3012shared_srcw[shared_count] = common->capture_last_ptr;3013shared_count++;3014}30153016private_srcw[0] = PRIVATE_DATA(cc);3017if (recurse_check_bit(common, private_srcw[0]))3018private_count = 1;30193020offset = OVECTOR_PRIV(offset);3021if (recurse_check_bit(common, offset))3022{3023private_srcw[private_count] = offset;3024private_count++;3025}3026cc += 1 + LINK_SIZE + IMM2_SIZE;3027break;30283029case OP_COND:3030/* Might be a hidden SCOND. */3031alternative = cc + GET(cc, 1);3032if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)3033{3034private_srcw[0] = PRIVATE_DATA(cc);3035if (recurse_check_bit(common, private_srcw[0]))3036private_count = 1;3037}3038cc += 1 + LINK_SIZE;3039break;30403041CASE_ITERATOR_PRIVATE_DATA_13042private_srcw[0] = PRIVATE_DATA(cc);3043if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3044private_count = 1;3045cc += 2;3046#ifdef SUPPORT_UNICODE3047if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);3048#endif3049break;30503051CASE_ITERATOR_PRIVATE_DATA_2A3052private_srcw[0] = PRIVATE_DATA(cc);3053if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3054{3055private_count = 2;3056private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3057SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));3058}3059cc += 2;3060#ifdef SUPPORT_UNICODE3061if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);3062#endif3063break;30643065CASE_ITERATOR_PRIVATE_DATA_2B3066private_srcw[0] = PRIVATE_DATA(cc);3067if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3068{3069private_count = 2;3070private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3071SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));3072}3073cc += 2 + IMM2_SIZE;3074#ifdef SUPPORT_UNICODE3075if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);3076#endif3077break;30783079CASE_ITERATOR_TYPE_PRIVATE_DATA_13080private_srcw[0] = PRIVATE_DATA(cc);3081if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3082private_count = 1;3083cc += 1;3084break;30853086CASE_ITERATOR_TYPE_PRIVATE_DATA_2A3087private_srcw[0] = PRIVATE_DATA(cc);3088if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3089{3090private_count = 2;3091private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3092SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));3093}3094cc += 1;3095break;30963097CASE_ITERATOR_TYPE_PRIVATE_DATA_2B3098private_srcw[0] = PRIVATE_DATA(cc);3099if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3100{3101private_count = 2;3102private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3103SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));3104}3105cc += 1 + IMM2_SIZE;3106break;31073108case OP_CLASS:3109case OP_NCLASS:3110#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 83111case OP_XCLASS:3112case OP_ECLASS:3113i = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);3114#else3115i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);3116#endif3117if (PRIVATE_DATA(cc) != 0)3118{3119private_count = 1;3120private_srcw[0] = PRIVATE_DATA(cc);3121switch(get_class_iterator_size(cc + i))3122{3123case 1:3124break;31253126case 2:3127if (recurse_check_bit(common, private_srcw[0]))3128{3129private_count = 2;3130private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3131SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));3132}3133break;31343135default:3136SLJIT_UNREACHABLE();3137break;3138}3139}3140cc += i;3141break;31423143case OP_MARK:3144case OP_COMMIT_ARG:3145case OP_PRUNE_ARG:3146case OP_THEN_ARG:3147SLJIT_ASSERT(common->mark_ptr != 0);3148if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))3149{3150kept_shared_srcw[0] = common->mark_ptr;3151kept_shared_count = 1;3152}3153if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))3154{3155private_srcw[0] = common->control_head_ptr;3156private_count = 1;3157}3158cc += 1 + 2 + cc[1];3159break;31603161case OP_THEN:3162SLJIT_ASSERT(common->control_head_ptr != 0);3163if (recurse_check_bit(common, common->control_head_ptr))3164{3165private_srcw[0] = common->control_head_ptr;3166private_count = 1;3167}3168cc++;3169break;31703171default:3172cc = next_opcode(common, cc);3173SLJIT_ASSERT(cc != NULL);3174continue;3175}31763177if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)3178{3179SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);31803181for (i = 0; i < private_count; i++)3182{3183SLJIT_ASSERT(private_srcw[i] != 0);31843185if (!from_sp)3186delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);31873188if (from_sp || type == recurse_swap_global)3189delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);31903191stackptr += sizeof(sljit_sw);3192}3193}3194else3195stackptr += sizeof(sljit_sw) * private_count;31963197if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)3198{3199SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);32003201for (i = 0; i < shared_count; i++)3202{3203SLJIT_ASSERT(shared_srcw[i] != 0);32043205if (!from_sp)3206delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);32073208if (from_sp || type == recurse_swap_global)3209delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);32103211stackptr += sizeof(sljit_sw);3212}3213}3214else3215stackptr += sizeof(sljit_sw) * shared_count;32163217if (type != recurse_copy_private_to_global && type != recurse_swap_global)3218{3219SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);32203221for (i = 0; i < kept_shared_count; i++)3222{3223SLJIT_ASSERT(kept_shared_srcw[i] != 0);32243225if (!from_sp)3226delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);32273228if (from_sp || type == recurse_swap_global)3229delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);32303231stackptr += sizeof(sljit_sw);3232}3233}3234else3235stackptr += sizeof(sljit_sw) * kept_shared_count;3236}32373238SLJIT_ASSERT(cc == ccend && stackptr == stacktop);32393240delayed_mem_copy_finish(&status);3241}32423243static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)3244{3245PCRE2_SPTR end = bracketend(cc);3246BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;32473248/* Assert captures *THEN verb even if it has no alternatives. */3249if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)3250current_offset = NULL;3251else if (*cc >= OP_ASSERT_NA && *cc <= OP_ASSERT_SCS)3252has_alternatives = TRUE;3253/* Conditional block does never capture. */3254else if (*cc == OP_COND || *cc == OP_SCOND)3255has_alternatives = FALSE;32563257cc = next_opcode(common, cc);32583259if (has_alternatives)3260{3261switch (*cc)3262{3263case OP_REVERSE:3264case OP_CREF:3265cc += 1 + IMM2_SIZE;3266break;3267case OP_VREVERSE:3268case OP_DNCREF:3269cc += 1 + 2 * IMM2_SIZE;3270break;3271}32723273current_offset = common->then_offsets + (cc - common->start);3274}32753276while (cc < end)3277{3278if (*cc >= OP_ASSERT && *cc <= OP_SCOND)3279{3280cc = set_then_offsets(common, cc, current_offset);3281continue;3282}32833284if (*cc == OP_ALT && has_alternatives)3285{3286cc += 1 + LINK_SIZE;32873288if (*cc == OP_REVERSE)3289cc += 1 + IMM2_SIZE;3290else if (*cc == OP_VREVERSE)3291cc += 1 + 2 * IMM2_SIZE;32923293current_offset = common->then_offsets + (cc - common->start);3294continue;3295}32963297if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)3298*current_offset = 1;3299cc = next_opcode(common, cc);3300}33013302cc = end - 1 - LINK_SIZE;33033304/* Ignore repeats. */3305if (*cc == OP_KET && PRIVATE_DATA(cc) != 0)3306end += PRIVATE_DATA(cc + 1);33073308return end;3309}33103311#undef CASE_ITERATOR_PRIVATE_DATA_13312#undef CASE_ITERATOR_PRIVATE_DATA_2A3313#undef CASE_ITERATOR_PRIVATE_DATA_2B3314#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_13315#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A3316#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B33173318static SLJIT_INLINE BOOL is_powerof2(unsigned int value)3319{3320return (value & (value - 1)) == 0;3321}33223323static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)3324{3325while (list != NULL)3326{3327/* sljit_set_label is clever enough to do nothing3328if either the jump or the label is NULL. */3329SET_LABEL(list->jump, label);3330list = list->next;3331}3332}33333334static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)3335{3336jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));3337if (list_item)3338{3339list_item->next = *list;3340list_item->jump = jump;3341*list = list_item;3342}3343}33443345static void add_stub(compiler_common *common, struct sljit_jump *start)3346{3347DEFINE_COMPILER;3348stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));33493350if (list_item)3351{3352list_item->start = start;3353list_item->quit = LABEL();3354list_item->next = common->stubs;3355common->stubs = list_item;3356}3357}33583359static void flush_stubs(compiler_common *common)3360{3361DEFINE_COMPILER;3362stub_list *list_item = common->stubs;33633364while (list_item)3365{3366JUMPHERE(list_item->start);3367add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));3368JUMPTO(SLJIT_JUMP, list_item->quit);3369list_item = list_item->next;3370}3371common->stubs = NULL;3372}33733374static SLJIT_INLINE void count_match(compiler_common *common)3375{3376DEFINE_COMPILER;33773378OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);3379add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));3380}33813382static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)3383{3384/* May destroy all locals and registers except TMP2. */3385DEFINE_COMPILER;33863387SLJIT_ASSERT(size > 0);3388OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));3389#ifdef DESTROY_REGISTERS3390OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);3391OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);3392OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);3393#if defined SLJIT_DEBUG && SLJIT_DEBUG3394SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));3395/* These two are also used by the stackalloc calls. */3396OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP1, 0);3397OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP1, 0);3398#endif3399#endif3400add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));3401}34023403static SLJIT_INLINE void free_stack(compiler_common *common, int size)3404{3405DEFINE_COMPILER;34063407SLJIT_ASSERT(size > 0);3408OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));3409}34103411static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)3412{3413DEFINE_COMPILER;3414sljit_uw *result;34153416if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))3417return NULL;34183419result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);3420if (SLJIT_UNLIKELY(result == NULL))3421{3422sljit_set_compiler_memory_error(compiler);3423return NULL;3424}34253426*(void**)result = common->read_only_data_head;3427common->read_only_data_head = (void *)result;3428return result + 1;3429}34303431static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)3432{3433DEFINE_COMPILER;3434struct sljit_label *loop;3435sljit_s32 i;34363437/* At this point we can freely use all temporary registers. */3438SLJIT_ASSERT(length > 1);3439/* TMP1 returns with begin - 1. */3440OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));3441if (length < 8)3442{3443for (i = 1; i < length; i++)3444OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);3445}3446else3447{3448if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)3449{3450GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);3451OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);3452loop = LABEL();3453sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));3454OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);3455JUMPTO(SLJIT_NOT_ZERO, loop);3456}3457else3458{3459GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));3460OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);3461loop = LABEL();3462OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);3463OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));3464OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);3465JUMPTO(SLJIT_NOT_ZERO, loop);3466}3467}3468}34693470static SLJIT_INLINE void reset_early_fail(compiler_common *common)3471{3472DEFINE_COMPILER;3473sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);3474sljit_u32 uncleared_size;3475sljit_s32 src = SLJIT_IMM;3476sljit_s32 i;3477struct sljit_label *loop;34783479SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);34803481if (size == sizeof(sljit_sw))3482{3483OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);3484return;3485}34863487if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))3488{3489OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);3490src = TMP3;3491}34923493if (size <= 6 * sizeof(sljit_sw))3494{3495for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))3496OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);3497return;3498}34993500GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);35013502uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);35033504OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);35053506loop = LABEL();3507OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);3508OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));3509OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);3510OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);3511CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);35123513if (uncleared_size >= sizeof(sljit_sw))3514OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);35153516if (uncleared_size >= 2 * sizeof(sljit_sw))3517OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);3518}35193520static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)3521{3522DEFINE_COMPILER;3523struct sljit_label *loop;3524int i;35253526SLJIT_ASSERT(length > 1);3527/* OVECTOR(1) contains the "string begin - 1" constant. */3528if (length > 2)3529OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));3530if (length < 8)3531{3532for (i = 2; i < length; i++)3533OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);3534}3535else3536{3537if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)3538{3539GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));3540OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);3541loop = LABEL();3542sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));3543OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);3544JUMPTO(SLJIT_NOT_ZERO, loop);3545}3546else3547{3548GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));3549OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);3550loop = LABEL();3551OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);3552OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));3553OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);3554JUMPTO(SLJIT_NOT_ZERO, loop);3555}3556}35573558if (!HAS_VIRTUAL_REGISTERS)3559OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));3560else3561OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);35623563if (common->mark_ptr != 0)3564OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);3565if (common->control_head_ptr != 0)3566OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);3567if (HAS_VIRTUAL_REGISTERS)3568OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));35693570OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);3571OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));3572}35733574static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)3575{3576while (current != NULL)3577{3578switch (current[1])3579{3580case type_then_trap:3581break;35823583case type_mark:3584if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)3585return current[3];3586break;35873588default:3589SLJIT_UNREACHABLE();3590break;3591}3592SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);3593current = (sljit_sw*)current[0];3594}3595return 0;3596}35973598static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)3599{3600DEFINE_COMPILER;3601struct sljit_label *loop;3602BOOL has_pre;36033604/* At this point we can freely use all registers. */3605OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));3606OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);36073608if (HAS_VIRTUAL_REGISTERS)3609{3610OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);3611OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);3612if (common->mark_ptr != 0)3613OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);3614OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));3615OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);3616if (common->mark_ptr != 0)3617OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);3618OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),3619SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));3620}3621else3622{3623OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);3624OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));3625if (common->mark_ptr != 0)3626OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);3627OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));3628OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);3629if (common->mark_ptr != 0)3630OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);3631OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));3632}36333634has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;36353636GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));3637OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));36383639loop = LABEL();36403641if (has_pre)3642sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));3643else3644{3645OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);3646OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));3647}36483649OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));3650OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);3651/* Copy the integer value to the output buffer */3652#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 323653OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);3654#endif36553656SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);3657OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);36583659OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);3660JUMPTO(SLJIT_NOT_ZERO, loop);36613662/* Calculate the return value, which is the maximum ovector value. */3663if (topbracket > 1)3664{3665if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)3666{3667GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));3668OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);36693670/* OVECTOR(0) is never equal to SLJIT_S2. */3671loop = LABEL();3672sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));3673OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);3674CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);3675OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);3676}3677else3678{3679GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));3680OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);36813682/* OVECTOR(0) is never equal to SLJIT_S2. */3683loop = LABEL();3684OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);3685OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));3686OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);3687CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);3688OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);3689}3690}3691else3692OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);3693}36943695static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)3696{3697DEFINE_COMPILER;3698sljit_s32 mov_opcode;3699sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;37003701SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);3702SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 03703&& (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));37043705if (arguments_reg != ARGUMENTS)3706OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);3707OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),3708common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);3709OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);37103711/* Store match begin and end. */3712OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));3713OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);3714OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));37153716mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;37173718OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);3719#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 323720OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);3721#endif3722OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);37233724OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);3725#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 323726OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);3727#endif3728OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);37293730JUMPTO(SLJIT_JUMP, quit);3731}37323733static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)3734{3735/* May destroy TMP1. */3736DEFINE_COMPILER;3737struct sljit_jump *jump;37383739if (common->mode == PCRE2_JIT_PARTIAL_SOFT)3740{3741/* The value of -1 must be kept for start_used_ptr! */3742OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);3743/* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting3744is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */3745jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);3746OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);3747JUMPHERE(jump);3748}3749else if (common->mode == PCRE2_JIT_PARTIAL_HARD)3750{3751jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);3752OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);3753JUMPHERE(jump);3754}3755}37563757static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)3758{3759/* Detects if the character has an othercase. */3760unsigned int c;37613762#ifdef SUPPORT_UNICODE3763if (common->utf || common->ucp)3764{3765if (common->utf)3766{3767GETCHAR(c, cc);3768}3769else3770c = *cc;37713772if (c > 127)3773return c != UCD_OTHERCASE(c);37743775return common->fcc[c] != c;3776}3777else3778#endif3779c = *cc;3780return MAX_255(c) ? common->fcc[c] != c : FALSE;3781}37823783static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)3784{3785/* Returns with the othercase. */3786#ifdef SUPPORT_UNICODE3787if ((common->utf || common->ucp) && c > 127)3788return UCD_OTHERCASE(c);3789#endif3790return TABLE_GET(c, common->fcc, c);3791}37923793static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)3794{3795/* Detects if the character and its othercase has only 1 bit difference. */3796unsigned int c, oc, bit;3797#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 83798int n;3799#endif38003801#ifdef SUPPORT_UNICODE3802if (common->utf || common->ucp)3803{3804if (common->utf)3805{3806GETCHAR(c, cc);3807}3808else3809c = *cc;38103811if (c <= 127)3812oc = common->fcc[c];3813else3814oc = UCD_OTHERCASE(c);3815}3816else3817{3818c = *cc;3819oc = TABLE_GET(c, common->fcc, c);3820}3821#else3822c = *cc;3823oc = TABLE_GET(c, common->fcc, c);3824#endif38253826SLJIT_ASSERT(c != oc);38273828bit = c ^ oc;3829/* Optimized for English alphabet. */3830if (c <= 127 && bit == 0x20)3831return (0 << 8) | 0x20;38323833/* Since c != oc, they must have at least 1 bit difference. */3834if (!is_powerof2(bit))3835return 0;38363837#if PCRE2_CODE_UNIT_WIDTH == 838383839#ifdef SUPPORT_UNICODE3840if (common->utf && c > 127)3841{3842n = GET_EXTRALEN(*cc);3843while ((bit & 0x3f) == 0)3844{3845n--;3846bit >>= 6;3847}3848return (n << 8) | bit;3849}3850#endif /* SUPPORT_UNICODE */3851return (0 << 8) | bit;38523853#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 3238543855#ifdef SUPPORT_UNICODE3856if (common->utf && c > 65535)3857{3858if (bit >= (1u << 10))3859bit >>= 10;3860else3861return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));3862}3863#endif /* SUPPORT_UNICODE */3864return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));38653866#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */3867}38683869static void check_partial(compiler_common *common, BOOL force)3870{3871/* Checks whether a partial matching is occurred. Does not modify registers. */3872DEFINE_COMPILER;3873struct sljit_jump *jump = NULL;38743875SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);38763877if (common->mode == PCRE2_JIT_COMPLETE)3878return;38793880if (!force && !common->allow_empty_partial)3881jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);3882else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)3883jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);38843885if (common->mode == PCRE2_JIT_PARTIAL_SOFT)3886OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);3887else3888{3889if (common->partialmatchlabel != NULL)3890JUMPTO(SLJIT_JUMP, common->partialmatchlabel);3891else3892add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));3893}38943895if (jump != NULL)3896JUMPHERE(jump);3897}38983899static void check_str_end(compiler_common *common, jump_list **end_reached)3900{3901/* Does not affect registers. Usually used in a tight spot. */3902DEFINE_COMPILER;3903struct sljit_jump *jump;39043905if (common->mode == PCRE2_JIT_COMPLETE)3906{3907add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));3908return;3909}39103911jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);3912if (common->mode == PCRE2_JIT_PARTIAL_SOFT)3913{3914add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));3915OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);3916add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));3917}3918else3919{3920add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));3921if (common->partialmatchlabel != NULL)3922JUMPTO(SLJIT_JUMP, common->partialmatchlabel);3923else3924add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));3925}3926JUMPHERE(jump);3927}39283929static void detect_partial_match(compiler_common *common, jump_list **backtracks)3930{3931DEFINE_COMPILER;3932struct sljit_jump *jump;39333934if (common->mode == PCRE2_JIT_COMPLETE)3935{3936add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));3937return;3938}39393940/* Partial matching mode. */3941jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);3942if (!common->allow_empty_partial)3943add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));3944else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)3945add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));39463947if (common->mode == PCRE2_JIT_PARTIAL_SOFT)3948{3949OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);3950add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));3951}3952else3953{3954if (common->partialmatchlabel != NULL)3955JUMPTO(SLJIT_JUMP, common->partialmatchlabel);3956else3957add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));3958}3959JUMPHERE(jump);3960}39613962static void process_partial_match(compiler_common *common)3963{3964DEFINE_COMPILER;3965struct sljit_jump *jump;39663967/* Partial matching mode. */3968if (common->mode == PCRE2_JIT_PARTIAL_SOFT)3969{3970jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);3971OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);3972JUMPHERE(jump);3973}3974else if (common->mode == PCRE2_JIT_PARTIAL_HARD)3975{3976if (common->partialmatchlabel != NULL)3977CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);3978else3979add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));3980}3981}39823983static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)3984{3985DEFINE_COMPILER;39863987CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);3988process_partial_match(common);3989}39903991static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)3992{3993/* Reads the character into TMP1, keeps STR_PTR.3994Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */3995DEFINE_COMPILER;3996#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 323997struct sljit_jump *jump;3998#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */39994000SLJIT_UNUSED_ARG(max);4001SLJIT_UNUSED_ARG(dst);4002SLJIT_UNUSED_ARG(dstw);4003SLJIT_UNUSED_ARG(backtracks);40044005OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));40064007#ifdef SUPPORT_UNICODE4008#if PCRE2_CODE_UNIT_WIDTH == 84009if (common->utf)4010{4011if (max < 128) return;40124013jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);4014OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);4015OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4016add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));4017OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);4018if (backtracks && common->invalid_utf)4019add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4020JUMPHERE(jump);4021}4022#elif PCRE2_CODE_UNIT_WIDTH == 164023if (common->utf)4024{4025if (max < 0xd800) return;40264027OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);40284029if (common->invalid_utf)4030{4031jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);4032OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);4033OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4034add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));4035OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);4036if (backtracks && common->invalid_utf)4037add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4038}4039else4040{4041/* TMP2 contains the high surrogate. */4042jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);4043OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));4044OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);4045OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);4046OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);4047}40484049JUMPHERE(jump);4050}4051#elif PCRE2_CODE_UNIT_WIDTH == 324052if (common->invalid_utf)4053{4054if (max < 0xd800) return;40554056if (backtracks != NULL)4057{4058OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4059add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));4060add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));4061}4062else4063{4064OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4065OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);4066SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4067OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);4068SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4069}4070}4071#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */4072#endif /* SUPPORT_UNICODE */4073}40744075static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)4076{4077/* Reads one character back without moving STR_PTR. TMP2 must4078contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */4079DEFINE_COMPILER;40804081#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 324082struct sljit_jump *jump;4083#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */40844085SLJIT_UNUSED_ARG(max);4086SLJIT_UNUSED_ARG(backtracks);40874088OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));40894090#ifdef SUPPORT_UNICODE4091#if PCRE2_CODE_UNIT_WIDTH == 84092if (common->utf)4093{4094if (max < 128) return;40954096jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);4097if (common->invalid_utf)4098{4099add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));4100if (backtracks != NULL)4101add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4102}4103else4104add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));4105JUMPHERE(jump);4106}4107#elif PCRE2_CODE_UNIT_WIDTH == 164108if (common->utf)4109{4110if (max < 0xd800) return;41114112if (common->invalid_utf)4113{4114jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);4115add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));4116if (backtracks != NULL)4117add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4118}4119else4120{4121OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);4122jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);4123/* TMP2 contains the low surrogate. */4124OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));4125OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);4126OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);4127OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);4128OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);4129}4130JUMPHERE(jump);4131}4132#elif PCRE2_CODE_UNIT_WIDTH == 324133if (common->invalid_utf)4134{4135OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4136add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));4137add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));4138}4139#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */4140#endif /* SUPPORT_UNICODE */4141}41424143#define READ_CHAR_UPDATE_STR_PTR 0x14144#define READ_CHAR_UTF8_NEWLINE 0x24145#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)4146#define READ_CHAR_VALID_UTF 0x441474148static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,4149jump_list **backtracks, sljit_u32 options)4150{4151/* Reads the precise value of a character into TMP1, if the character is4152between min and max (c >= min && c <= max). Otherwise it returns with a value4153outside the range. Does not check STR_END. */4154DEFINE_COMPILER;4155#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 324156struct sljit_jump *jump;4157#endif4158#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 84159struct sljit_jump *jump2;4160#endif41614162SLJIT_UNUSED_ARG(min);4163SLJIT_UNUSED_ARG(max);4164SLJIT_UNUSED_ARG(backtracks);4165SLJIT_UNUSED_ARG(options);4166SLJIT_ASSERT(min <= max);41674168OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4169OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));41704171#ifdef SUPPORT_UNICODE4172#if PCRE2_CODE_UNIT_WIDTH == 84173if (common->utf)4174{4175if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;41764177if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))4178{4179jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);41804181if (options & READ_CHAR_UTF8_NEWLINE)4182add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));4183else4184add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));41854186if (backtracks != NULL)4187add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4188JUMPHERE(jump);4189return;4190}41914192jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);4193if (min >= 0x10000)4194{4195OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);4196if (options & READ_CHAR_UPDATE_STR_PTR)4197OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);4198OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4199jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);4200OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);4201OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);4202OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4203OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));4204OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4205OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4206OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4207OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));4208if (!(options & READ_CHAR_UPDATE_STR_PTR))4209OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));4210OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4211OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4212OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4213JUMPHERE(jump2);4214if (options & READ_CHAR_UPDATE_STR_PTR)4215OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);4216}4217else if (min >= 0x800 && max <= 0xffff)4218{4219OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);4220if (options & READ_CHAR_UPDATE_STR_PTR)4221OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);4222OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4223jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);4224OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);4225OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);4226OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4227OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));4228if (!(options & READ_CHAR_UPDATE_STR_PTR))4229OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));4230OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4231OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4232OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4233JUMPHERE(jump2);4234if (options & READ_CHAR_UPDATE_STR_PTR)4235OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);4236}4237else if (max >= 0x800)4238{4239add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));4240}4241else if (max < 128)4242{4243OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);4244OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);4245}4246else4247{4248OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4249if (!(options & READ_CHAR_UPDATE_STR_PTR))4250OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4251else4252OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);4253OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);4254OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4255OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4256OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4257if (options & READ_CHAR_UPDATE_STR_PTR)4258OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);4259}4260JUMPHERE(jump);4261}4262#elif PCRE2_CODE_UNIT_WIDTH == 164263if (common->utf)4264{4265if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;42664267if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))4268{4269OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4270jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);42714272if (options & READ_CHAR_UTF8_NEWLINE)4273add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));4274else4275add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));42764277if (backtracks != NULL)4278add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4279JUMPHERE(jump);4280return;4281}42824283if (max >= 0x10000)4284{4285OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4286jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);4287/* TMP2 contains the high surrogate. */4288OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4289OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);4290OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4291OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);4292OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);4293JUMPHERE(jump);4294return;4295}42964297/* Skip low surrogate if necessary. */4298OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);42994300if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)4301{4302if (options & READ_CHAR_UPDATE_STR_PTR)4303OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4304OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);4305if (options & READ_CHAR_UPDATE_STR_PTR)4306SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);4307if (max >= 0xd800)4308SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);4309}4310else4311{4312jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);4313if (options & READ_CHAR_UPDATE_STR_PTR)4314OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4315if (max >= 0xd800)4316OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);4317JUMPHERE(jump);4318}4319}4320#elif PCRE2_CODE_UNIT_WIDTH == 324321if (common->invalid_utf)4322{4323if (backtracks != NULL)4324{4325OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4326add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));4327add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));4328}4329else4330{4331OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4332OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);4333SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4334OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);4335SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4336}4337}4338#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */4339#endif /* SUPPORT_UNICODE */4340}43414342static void skip_valid_char(compiler_common *common)4343{4344DEFINE_COMPILER;4345#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)4346struct sljit_jump *jump;4347#endif43484349#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)4350if (common->utf)4351{4352OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);4353OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4354#if PCRE2_CODE_UNIT_WIDTH == 84355jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);4356OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);4357OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);4358#elif PCRE2_CODE_UNIT_WIDTH == 164359jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);4360OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);4361OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);4362OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);4363OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);4364OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);4365#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */4366JUMPHERE(jump);4367return;4368}4369#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */4370OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4371}43724373#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 843744375static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)4376{4377/* Tells whether the character codes below 128 are enough4378to determine a match. */4379const sljit_u8 value = nclass ? 0xff : 0;4380const sljit_u8 *end = bitset + 32;43814382bitset += 16;4383do4384{4385if (*bitset++ != value)4386return FALSE;4387}4388while (bitset < end);4389return TRUE;4390}43914392static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)4393{4394/* Reads the precise character type of a character into TMP1, if the character4395is less than 128. Otherwise it returns with zero. Does not check STR_END. The4396full_read argument tells whether characters above max are accepted or not. */4397DEFINE_COMPILER;4398struct sljit_jump *jump;43994400SLJIT_ASSERT(common->utf);44014402OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);4403OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));44044405/* All values > 127 are zero in ctypes. */4406OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);44074408if (negated)4409{4410jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);44114412if (common->invalid_utf)4413{4414OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);4415add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));4416add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4417OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4418}4419else4420{4421OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);4422OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);4423}4424JUMPHERE(jump);4425}4426}44274428#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */44294430static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)4431{4432/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */4433DEFINE_COMPILER;4434#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 84435struct sljit_jump *jump;4436#endif4437#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 84438struct sljit_jump *jump2;4439#endif44404441SLJIT_UNUSED_ARG(backtracks);4442SLJIT_UNUSED_ARG(negated);44434444OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);4445OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));44464447#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 84448if (common->utf)4449{4450/* The result of this read may be unused, but saves an "else" part. */4451OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);4452jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);44534454if (!negated)4455{4456if (common->invalid_utf)4457add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));44584459OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4460OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4461OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);4462if (common->invalid_utf)4463add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));44644465OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);4466OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);4467OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);4468if (common->invalid_utf)4469add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));44704471OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4472jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);4473OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);4474JUMPHERE(jump2);4475}4476else if (common->invalid_utf)4477{4478add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));4479OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);4480add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));44814482OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4483jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);4484OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);4485JUMPHERE(jump2);4486}4487else4488add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));44894490JUMPHERE(jump);4491return;4492}4493#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */44944495#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 324496if (common->invalid_utf && negated)4497add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));4498#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */44994500#if PCRE2_CODE_UNIT_WIDTH != 84501/* The ctypes array contains only 256 values. */4502OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4503jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);4504#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */4505OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);4506#if PCRE2_CODE_UNIT_WIDTH != 84507JUMPHERE(jump);4508#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */45094510#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 164511if (common->utf && negated)4512{4513/* Skip low surrogate if necessary. */4514if (!common->invalid_utf)4515{4516OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);45174518if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)4519{4520OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4521OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);4522SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);4523}4524else4525{4526jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);4527OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4528JUMPHERE(jump);4529}4530return;4531}45324533OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);4534jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);4535add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));4536add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));45374538OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4539OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4540OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);4541add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));45424543JUMPHERE(jump);4544return;4545}4546#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */4547}45484549static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)4550{4551/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,4552TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,4553and it is destroyed. Does not modify STR_PTR for invalid character sequences. */4554DEFINE_COMPILER;45554556#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 324557struct sljit_jump *jump;4558#endif45594560#ifdef SUPPORT_UNICODE4561#if PCRE2_CODE_UNIT_WIDTH == 84562struct sljit_label *label;45634564if (common->utf)4565{4566if (!must_be_valid && common->invalid_utf)4567{4568OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));4569OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4570jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);4571add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));4572if (backtracks != NULL)4573add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));4574JUMPHERE(jump);4575return;4576}45774578label = LABEL();4579OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));4580OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4581OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);4582CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);4583return;4584}4585#elif PCRE2_CODE_UNIT_WIDTH == 164586if (common->utf)4587{4588OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));4589OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));45904591if (!must_be_valid && common->invalid_utf)4592{4593OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);4594jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);4595add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));4596if (backtracks != NULL)4597add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));4598JUMPHERE(jump);4599return;4600}46014602/* Skip low surrogate if necessary. */4603OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);4604OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);4605OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);4606OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);4607OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);4608return;4609}4610#elif PCRE2_CODE_UNIT_WIDTH == 324611if (common->invalid_utf && !must_be_valid)4612{4613OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));4614if (backtracks != NULL)4615{4616add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));4617OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4618return;4619}46204621OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);4622OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);4623OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);4624OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);4625return;4626}4627#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */4628#endif /* SUPPORT_UNICODE */46294630SLJIT_UNUSED_ARG(backtracks);4631SLJIT_UNUSED_ARG(must_be_valid);46324633OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4634}46354636static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)4637{4638/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */4639DEFINE_COMPILER;4640struct sljit_jump *jump;46414642if (nltype == NLTYPE_ANY)4643{4644add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));4645sljit_set_current_flags(compiler, SLJIT_SET_Z);4646add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));4647}4648else if (nltype == NLTYPE_ANYCRLF)4649{4650if (jumpifmatch)4651{4652add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));4653add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));4654}4655else4656{4657jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);4658add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));4659JUMPHERE(jump);4660}4661}4662else4663{4664SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);4665add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));4666}4667}46684669#ifdef SUPPORT_UNICODE46704671#if PCRE2_CODE_UNIT_WIDTH == 84672static void do_utfreadchar(compiler_common *common)4673{4674/* Fast decoding a UTF-8 character. TMP1 contains the first byte4675of the character (>= 0xc0). Return char value in TMP1. */4676DEFINE_COMPILER;4677struct sljit_jump *jump;46784679sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);4680OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4681OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4682OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4683OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);46844685/* Searching for the first zero. */4686OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);4687jump = JUMP(SLJIT_NOT_ZERO);4688/* Two byte sequence. */4689OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);4690OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4691OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);46924693JUMPHERE(jump);4694OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));4695OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4696OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4697OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);46984699OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);4700jump = JUMP(SLJIT_NOT_ZERO);4701/* Three byte sequence. */4702OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);4703OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));4704OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);47054706/* Four byte sequence. */4707JUMPHERE(jump);4708OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));4709OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);4710OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));4711OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4712OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4713OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4714OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);4715}47164717static void do_utfreadtype8(compiler_common *common)4718{4719/* Fast decoding a UTF-8 character type. TMP2 contains the first byte4720of the character (>= 0xc0). Return value in TMP1. */4721DEFINE_COMPILER;4722struct sljit_jump *jump;4723struct sljit_jump *compare;47244725sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);47264727OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);4728jump = JUMP(SLJIT_NOT_ZERO);4729/* Two byte sequence. */4730OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4731OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4732OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);4733/* The upper 5 bits are known at this point. */4734compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);4735OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);4736OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);4737OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);4738OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);4739OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);47404741JUMPHERE(compare);4742OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4743OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);47444745/* We only have types for characters less than 256. */4746JUMPHERE(jump);4747OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);4748OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4749OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);4750OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);4751}47524753static void do_utfreadchar_invalid(compiler_common *common)4754{4755/* Slow decoding a UTF-8 character. TMP1 contains the first byte4756of the character (>= 0xc0). Return char value in TMP1. STR_PTR is4757undefined for invalid characters. */4758DEFINE_COMPILER;4759sljit_s32 i;4760sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);4761struct sljit_jump *jump;4762struct sljit_jump *buffer_end_close;4763struct sljit_label *three_byte_entry;4764struct sljit_label *exit_invalid_label;4765struct sljit_jump *exit_invalid[11];47664767sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);47684769OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);47704771/* Usually more than 3 characters remained in the subject buffer. */4772OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));47734774/* Not a valid start of a multi-byte sequence, no more bytes read. */4775exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);47764777buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);47784779OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));4780OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4781/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */4782OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);4783OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);4784exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);47854786OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);4787jump = JUMP(SLJIT_NOT_ZERO);47884789OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));4790OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);47914792JUMPHERE(jump);47934794/* Three-byte sequence. */4795OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));4796OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4797OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);4798OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4799if (has_cmov)4800{4801OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);4802SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);4803exit_invalid[2] = NULL;4804}4805else4806exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);48074808OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);4809jump = JUMP(SLJIT_NOT_ZERO);48104811three_byte_entry = LABEL();48124813OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);4814if (has_cmov)4815{4816OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);4817SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);4818exit_invalid[3] = NULL;4819}4820else4821exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);4822OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);4823OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));48244825if (has_cmov)4826{4827OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);4828SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4829exit_invalid[4] = NULL;4830}4831else4832exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);4833OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);48344835JUMPHERE(jump);48364837/* Four-byte sequence. */4838OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));4839OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4840OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);4841OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4842if (has_cmov)4843{4844OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);4845SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);4846exit_invalid[5] = NULL;4847}4848else4849exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);48504851OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);4852if (has_cmov)4853{4854OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);4855SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);4856exit_invalid[6] = NULL;4857}4858else4859exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);48604861OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);4862OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);48634864JUMPHERE(buffer_end_close);4865OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));4866exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);48674868/* Two-byte sequence. */4869OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));4870OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4871/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */4872OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);4873OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);4874exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);48754876OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);4877jump = JUMP(SLJIT_NOT_ZERO);48784879OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);48804881/* Three-byte sequence. */4882JUMPHERE(jump);4883exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);48844885OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4886OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4887OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);4888OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4889if (has_cmov)4890{4891OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);4892SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4893exit_invalid[10] = NULL;4894}4895else4896exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);48974898/* One will be substracted from STR_PTR later. */4899OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));49004901/* Four byte sequences are not possible. */4902CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);49034904exit_invalid_label = LABEL();4905for (i = 0; i < 11; i++)4906sljit_set_label(exit_invalid[i], exit_invalid_label);49074908OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);4909OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);4910}49114912static void do_utfreadnewline_invalid(compiler_common *common)4913{4914/* Slow decoding a UTF-8 character, specialized for newlines.4915TMP1 contains the first byte of the character (>= 0xc0). Return4916char value in TMP1. */4917DEFINE_COMPILER;4918struct sljit_label *loop;4919struct sljit_label *skip_start;4920struct sljit_label *three_byte_exit;4921struct sljit_jump *jump[5];49224923sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);49244925if (common->nltype != NLTYPE_ANY)4926{4927SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);49284929/* All newlines are ascii, just skip intermediate octets. */4930jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);4931loop = LABEL();4932if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)4933sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));4934else4935{4936OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4937OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4938}49394940OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);4941CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);4942OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));49434944JUMPHERE(jump[0]);49454946OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);4947OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);4948return;4949}49504951jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);4952OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4953OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));49544955jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);4956jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);49574958skip_start = LABEL();4959OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);4960jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);49614962/* Skip intermediate octets. */4963loop = LABEL();4964jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);4965OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4966OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4967OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);4968CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);49694970JUMPHERE(jump[3]);4971OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));49724973three_byte_exit = LABEL();4974JUMPHERE(jump[0]);4975JUMPHERE(jump[4]);49764977OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);4978OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);49794980/* Two byte long newline: 0x85. */4981JUMPHERE(jump[1]);4982CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);49834984OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);4985OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);49864987/* Three byte long newlines: 0x2028 and 0x2029. */4988JUMPHERE(jump[2]);4989CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);4990CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);49914992OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4993OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));49944995OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);4996CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);49974998OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);4999OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);5000OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5001}50025003static void do_utfmoveback_invalid(compiler_common *common)5004{5005/* Goes one character back. */5006DEFINE_COMPILER;5007sljit_s32 i;5008struct sljit_jump *jump;5009struct sljit_jump *buffer_start_close;5010struct sljit_label *exit_ok_label;5011struct sljit_label *exit_invalid_label;5012struct sljit_jump *exit_invalid[7];50135014sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);50155016OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));5017exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);50185019/* Two-byte sequence. */5020buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);50215022OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));50235024OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);5025jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);50265027OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);5028OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));5029OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);50305031/* Three-byte sequence. */5032JUMPHERE(jump);5033exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);50345035OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));50365037OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);5038jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);50395040OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);5041OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5042OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);50435044/* Four-byte sequence. */5045JUMPHERE(jump);5046OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);5047exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);50485049OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5050OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);5051exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);50525053exit_ok_label = LABEL();5054OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);5055OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);50565057/* Two-byte sequence. */5058JUMPHERE(buffer_start_close);5059OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));50605061exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);50625063OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));50645065OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);5066CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);50675068/* Three-byte sequence. */5069OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5070exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);5071exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);50725073OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));50745075OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);5076CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);50775078/* Four-byte sequences are not possible. */50795080exit_invalid_label = LABEL();5081sljit_set_label(exit_invalid[5], exit_invalid_label);5082sljit_set_label(exit_invalid[6], exit_invalid_label);5083OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);5084OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));5085OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);50865087JUMPHERE(exit_invalid[4]);5088/* -2 + 4 = 2 */5089OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));50905091exit_invalid_label = LABEL();5092for (i = 0; i < 4; i++)5093sljit_set_label(exit_invalid[i], exit_invalid_label);5094OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);5095OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));5096OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5097}50985099static void do_utfpeakcharback(compiler_common *common)5100{5101/* Peak a character back. Does not modify STR_PTR. */5102DEFINE_COMPILER;5103struct sljit_jump *jump[2];51045105sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);51065107OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5108OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);5109jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);51105111OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));5112OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);5113jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);51145115OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));5116OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);5117OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);5118OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);5119OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);51205121JUMPHERE(jump[1]);5122OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5123OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);5124OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);5125OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);51265127JUMPHERE(jump[0]);5128OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));5129OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);5130OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);5131OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);51325133OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5134}51355136static void do_utfpeakcharback_invalid(compiler_common *common)5137{5138/* Peak a character back. Does not modify STR_PTR. */5139DEFINE_COMPILER;5140sljit_s32 i;5141sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);5142struct sljit_jump *jump[2];5143struct sljit_label *two_byte_entry;5144struct sljit_label *three_byte_entry;5145struct sljit_label *exit_invalid_label;5146struct sljit_jump *exit_invalid[8];51475148sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);51495150OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));5151exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);5152jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);51535154/* Two-byte sequence. */5155OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5156OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);5157jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);51585159two_byte_entry = LABEL();5160OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);5161/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */5162OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);5163OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);51645165JUMPHERE(jump[1]);5166OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);5167OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);5168exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);5169OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);5170OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);51715172/* Three-byte sequence. */5173OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));5174OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);5175jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);51765177three_byte_entry = LABEL();5178OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);5179OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);51805181OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);5182if (has_cmov)5183{5184OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);5185SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);5186exit_invalid[2] = NULL;5187}5188else5189exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);51905191OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);5192if (has_cmov)5193{5194OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);5195SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);5196exit_invalid[3] = NULL;5197}5198else5199exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);52005201OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);52025203JUMPHERE(jump[1]);5204OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);5205exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);5206OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);5207OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);52085209/* Four-byte sequence. */5210OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));5211OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);5212OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);5213OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);5214/* ADD is used instead of OR because of the SUB 0x10000 above. */5215OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);52165217if (has_cmov)5218{5219OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);5220SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);5221exit_invalid[5] = NULL;5222}5223else5224exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);52255226OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);5227OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);52285229JUMPHERE(jump[0]);5230OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));5231jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);52325233/* Two-byte sequence. */5234OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5235OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);5236CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);52375238OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);5239OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);5240exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);5241OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);5242OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);52435244/* Three-byte sequence. */5245OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));5246OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);5247CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);52485249OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5250OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);52515252JUMPHERE(jump[0]);5253exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);52545255/* Two-byte sequence. */5256OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5257OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);5258CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);52595260exit_invalid_label = LABEL();5261for (i = 0; i < 8; i++)5262sljit_set_label(exit_invalid[i], exit_invalid_label);52635264OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5265OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5266}52675268#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */52695270#if PCRE2_CODE_UNIT_WIDTH == 1652715272static void do_utfreadchar_invalid(compiler_common *common)5273{5274/* Slow decoding a UTF-16 character. TMP1 contains the first half5275of the character (>= 0xd800). Return char value in TMP1. STR_PTR is5276undefined for invalid characters. */5277DEFINE_COMPILER;5278struct sljit_jump *exit_invalid[3];52795280sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);52815282/* TMP2 contains the high surrogate. */5283exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);5284exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);52855286OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5287OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);5288OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));52895290OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);5291OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);5292exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);52935294OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);5295OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);52965297JUMPHERE(exit_invalid[0]);5298JUMPHERE(exit_invalid[1]);5299JUMPHERE(exit_invalid[2]);5300OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5301OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5302}53035304static void do_utfreadnewline_invalid(compiler_common *common)5305{5306/* Slow decoding a UTF-16 character, specialized for newlines.5307TMP1 contains the first half of the character (>= 0xd800). Return5308char value in TMP1. */53095310DEFINE_COMPILER;5311struct sljit_jump *exit_invalid[2];53125313sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);53145315/* TMP2 contains the high surrogate. */5316exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);53175318OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5319exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);53205321OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);5322OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);5323OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);5324OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);5325OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);5326OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);53275328OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);53295330JUMPHERE(exit_invalid[0]);5331JUMPHERE(exit_invalid[1]);5332OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5333OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5334}53355336static void do_utfmoveback_invalid(compiler_common *common)5337{5338/* Goes one character back. */5339DEFINE_COMPILER;5340struct sljit_jump *exit_invalid[3];53415342sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);53435344exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);5345exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);53465347OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));5348OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);5349exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);53505351OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5352OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);5353OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);53545355JUMPHERE(exit_invalid[0]);5356JUMPHERE(exit_invalid[1]);5357JUMPHERE(exit_invalid[2]);53585359OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5360OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);5361OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5362}53635364static void do_utfpeakcharback_invalid(compiler_common *common)5365{5366/* Peak a character back. Does not modify STR_PTR. */5367DEFINE_COMPILER;5368struct sljit_jump *jump;5369struct sljit_jump *exit_invalid[3];53705371sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);53725373jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);5374OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));5375exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);5376exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);53775378OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5379OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);5380OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);5381exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);5382OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);5383OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);53845385JUMPHERE(jump);5386OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);53875388JUMPHERE(exit_invalid[0]);5389JUMPHERE(exit_invalid[1]);5390JUMPHERE(exit_invalid[2]);53915392OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5393OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5394}53955396#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */53975398/* UCD_BLOCK_SIZE must be 128 (see the assert below). */5399#define UCD_BLOCK_MASK 1275400#define UCD_BLOCK_SHIFT 754015402static void do_getucd(compiler_common *common)5403{5404/* Search the UCD record for the character comes in TMP1.5405Returns chartype in TMP1 and UCD offset in TMP2. */5406DEFINE_COMPILER;5407#if PCRE2_CODE_UNIT_WIDTH == 325408struct sljit_jump *jump;5409#endif54105411#if defined SLJIT_DEBUG && SLJIT_DEBUG5412/* dummy_ucd_record */5413const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);5414SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);5415SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);5416#endif54175418SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);54195420sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);54215422#if PCRE2_CODE_UNIT_WIDTH == 325423if (!common->utf)5424{5425jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);5426OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);5427JUMPHERE(jump);5428}5429#endif54305431OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);5432OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);5433OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));5434OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);5435OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);5436OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);5437OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));5438OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);5439OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5440}54415442static void do_getucdtype(compiler_common *common)5443{5444/* Search the UCD record for the character comes in TMP1.5445Returns chartype in TMP1 and UCD offset in TMP2. */5446DEFINE_COMPILER;5447#if PCRE2_CODE_UNIT_WIDTH == 325448struct sljit_jump *jump;5449#endif54505451#if defined SLJIT_DEBUG && SLJIT_DEBUG5452/* dummy_ucd_record */5453const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);5454SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);5455SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);5456#endif54575458SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);54595460sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);54615462#if PCRE2_CODE_UNIT_WIDTH == 325463if (!common->utf)5464{5465jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);5466OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);5467JUMPHERE(jump);5468}5469#endif54705471OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);5472OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);5473OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));5474OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);5475OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);5476OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);5477OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));5478OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);54795480/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */5481OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));5482OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);5483OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);5484OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);54855486OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5487}54885489#endif /* SUPPORT_UNICODE */54905491static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)5492{5493DEFINE_COMPILER;5494struct sljit_label *mainloop;5495struct sljit_label *newlinelabel = NULL;5496struct sljit_jump *start;5497struct sljit_jump *end = NULL;5498struct sljit_jump *end2 = NULL;5499#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 325500struct sljit_label *loop;5501struct sljit_jump *jump;5502#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */5503jump_list *newline = NULL;5504sljit_u32 overall_options = common->re->overall_options;5505BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;5506BOOL newlinecheck = FALSE;5507BOOL readuchar = FALSE;55085509if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)5510&& (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))5511newlinecheck = TRUE;55125513SLJIT_ASSERT(common->abort_label == NULL);55145515if ((overall_options & PCRE2_FIRSTLINE) != 0)5516{5517/* Search for the end of the first line. */5518SLJIT_ASSERT(common->match_end_ptr != 0);5519OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);55205521if (common->nltype == NLTYPE_FIXED && common->newline > 255)5522{5523mainloop = LABEL();5524OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5525end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5526OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));5527OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5528CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);5529CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);5530JUMPHERE(end);5531OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5532}5533else5534{5535end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5536mainloop = LABEL();5537/* Continual stores does not cause data dependency. */5538OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);5539read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);5540check_newlinechar(common, common->nltype, &newline, TRUE);5541CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);5542JUMPHERE(end);5543OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);5544set_jumps(newline, LABEL());5545}55465547OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);5548}5549else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)5550{5551/* Check whether offset limit is set and valid. */5552SLJIT_ASSERT(common->match_end_ptr != 0);55535554if (HAS_VIRTUAL_REGISTERS)5555{5556OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);5557OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));5558}5559else5560OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));55615562OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);5563end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);5564if (HAS_VIRTUAL_REGISTERS)5565OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);5566else5567OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));55685569#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 325570OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);5571#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */5572if (HAS_VIRTUAL_REGISTERS)5573OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));55745575OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);5576end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);5577OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);5578JUMPHERE(end2);5579OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);5580add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));5581JUMPHERE(end);5582OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);5583}55845585start = JUMP(SLJIT_JUMP);55865587if (newlinecheck)5588{5589newlinelabel = LABEL();5590OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5591end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5592OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);5593OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);5594OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);5595#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 325596OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);5597#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */5598OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);5599end2 = JUMP(SLJIT_JUMP);5600}56015602mainloop = LABEL();56035604/* Increasing the STR_PTR here requires one less jump in the most common case. */5605#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 325606if (common->utf && !common->invalid_utf) readuchar = TRUE;5607#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */5608if (newlinecheck) readuchar = TRUE;56095610if (readuchar)5611OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);56125613if (newlinecheck)5614CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);56155616OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5617#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 325618#if PCRE2_CODE_UNIT_WIDTH == 85619if (common->invalid_utf)5620{5621/* Skip continuation code units. */5622loop = LABEL();5623jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5624OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);5625OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5626OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);5627CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);5628OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5629JUMPHERE(jump);5630}5631else if (common->utf)5632{5633jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);5634OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);5635OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);5636JUMPHERE(jump);5637}5638#elif PCRE2_CODE_UNIT_WIDTH == 165639if (common->invalid_utf)5640{5641/* Skip continuation code units. */5642loop = LABEL();5643jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5644OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);5645OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5646OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);5647CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);5648OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5649JUMPHERE(jump);5650}5651else if (common->utf)5652{5653OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);56545655if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))5656{5657OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5658OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);5659SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);5660}5661else5662{5663OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);5664OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);5665OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);5666OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);5667}5668}5669#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */5670#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */5671JUMPHERE(start);56725673if (newlinecheck)5674{5675JUMPHERE(end);5676JUMPHERE(end2);5677}56785679return mainloop;5680}568156825683static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)5684{5685sljit_u32 i, count = chars->count;56865687if (count == 255)5688return;56895690if (count == 0)5691{5692chars->count = 1;5693chars->chars[0] = chr;56945695if (last)5696chars->last_count = 1;5697return;5698}56995700for (i = 0; i < count; i++)5701if (chars->chars[i] == chr)5702return;57035704if (count >= MAX_DIFF_CHARS)5705{5706chars->count = 255;5707return;5708}57095710chars->chars[count] = chr;5711chars->count = count + 1;57125713if (last)5714chars->last_count++;5715}57165717/* Value can be increased if needed. Patterns5718such as /(a|){33}b/ can exhaust the stack.57195720Note: /(a|){29}b/ already stops scan_prefix()5721because it reaches the maximum step_count. */5722#define SCAN_PREFIX_STACK_END 3257235724/*5725Scan prefix stores the prefix string in the chars array.5726The elements of the chars array is either small character5727sets or "any" (count is set to 255).57285729Examples (the chars array is represented by a simple regex):57305731/(abc|xbyd)/ prefix: /[ax]b[cy]/ (length: 3)5732/a[a-z]b+c/ prefix: a.b (length: 3)5733/ab?cd/ prefix: a[bc][cd] (length: 3)5734/(ab|cd)|(ef|gh)/ prefix: [aceg][bdfh] (length: 2)57355736The length is returned by scan_prefix(). The length is5737less than or equal than the minimum length of the pattern.5738*/57395740static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars)5741{5742fast_forward_char_data *chars_start = chars;5743fast_forward_char_data *chars_end = chars + MAX_N_CHARS;5744PCRE2_SPTR cc_stack[SCAN_PREFIX_STACK_END];5745fast_forward_char_data *chars_stack[SCAN_PREFIX_STACK_END];5746sljit_u8 next_alternative_stack[SCAN_PREFIX_STACK_END];5747BOOL last, any, class, caseless;5748int stack_ptr, step_count, repeat, len, len_save;5749sljit_u32 chr; /* Any unicode character. */5750sljit_u8 *bytes, *bytes_end, byte;5751PCRE2_SPTR alternative, cc_save, oc;5752#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 85753PCRE2_UCHAR othercase[4];5754#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 165755PCRE2_UCHAR othercase[2];5756#else5757PCRE2_UCHAR othercase[1];5758#endif57595760repeat = 1;5761stack_ptr = 0;5762step_count = 10000;5763while (TRUE)5764{5765if (--step_count == 0)5766return 0;57675768SLJIT_ASSERT(chars <= chars_start + MAX_N_CHARS);57695770if (chars >= chars_end)5771{5772if (stack_ptr == 0)5773return (int)(chars_end - chars_start);57745775--stack_ptr;5776cc = cc_stack[stack_ptr];5777chars = chars_stack[stack_ptr];57785779if (chars >= chars_end)5780continue;57815782if (next_alternative_stack[stack_ptr] != 0)5783{5784/* When an alternative is processed, the5785next alternative is pushed onto the stack. */5786SLJIT_ASSERT(*cc == OP_ALT);5787alternative = cc + GET(cc, 1);5788if (*alternative == OP_ALT)5789{5790SLJIT_ASSERT(stack_ptr < SCAN_PREFIX_STACK_END);5791SLJIT_ASSERT(chars_stack[stack_ptr] == chars);5792SLJIT_ASSERT(next_alternative_stack[stack_ptr] == 1);5793cc_stack[stack_ptr] = alternative;5794stack_ptr++;5795}5796cc += 1 + LINK_SIZE;5797}5798}57995800last = TRUE;5801any = FALSE;5802class = FALSE;5803caseless = FALSE;58045805switch (*cc)5806{5807case OP_CHARI:5808caseless = TRUE;5809/* Fall through */5810case OP_CHAR:5811last = FALSE;5812cc++;5813break;58145815case OP_SOD:5816case OP_SOM:5817case OP_SET_SOM:5818case OP_NOT_WORD_BOUNDARY:5819case OP_WORD_BOUNDARY:5820case OP_EODN:5821case OP_EOD:5822case OP_CIRC:5823case OP_CIRCM:5824case OP_DOLL:5825case OP_DOLLM:5826case OP_NOT_UCP_WORD_BOUNDARY:5827case OP_UCP_WORD_BOUNDARY:5828/* Zero width assertions. */5829cc++;5830continue;58315832case OP_ASSERT:5833case OP_ASSERT_NOT:5834case OP_ASSERTBACK:5835case OP_ASSERTBACK_NOT:5836case OP_ASSERT_NA:5837case OP_ASSERTBACK_NA:5838case OP_ASSERT_SCS:5839cc = bracketend(cc);5840continue;58415842case OP_PLUSI:5843case OP_MINPLUSI:5844case OP_POSPLUSI:5845caseless = TRUE;5846/* Fall through */5847case OP_PLUS:5848case OP_MINPLUS:5849case OP_POSPLUS:5850cc++;5851break;58525853case OP_EXACTI:5854caseless = TRUE;5855/* Fall through */5856case OP_EXACT:5857repeat = GET2(cc, 1);5858last = FALSE;5859cc += 1 + IMM2_SIZE;5860break;58615862case OP_QUERYI:5863case OP_MINQUERYI:5864case OP_POSQUERYI:5865caseless = TRUE;5866/* Fall through */5867case OP_QUERY:5868case OP_MINQUERY:5869case OP_POSQUERY:5870len = 1;5871cc++;5872#ifdef SUPPORT_UNICODE5873if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);5874#endif5875if (stack_ptr >= SCAN_PREFIX_STACK_END)5876{5877chars_end = chars;5878continue;5879}58805881cc_stack[stack_ptr] = cc + len;5882chars_stack[stack_ptr] = chars;5883next_alternative_stack[stack_ptr] = 0;5884stack_ptr++;58855886last = FALSE;5887break;58885889case OP_KET:5890cc += 1 + LINK_SIZE;5891continue;58925893case OP_ALT:5894cc += GET(cc, 1);5895continue;58965897case OP_ONCE:5898case OP_BRA:5899case OP_BRAPOS:5900case OP_CBRA:5901case OP_CBRAPOS:5902alternative = cc + GET(cc, 1);5903if (*alternative == OP_ALT)5904{5905if (stack_ptr >= SCAN_PREFIX_STACK_END)5906{5907chars_end = chars;5908continue;5909}59105911cc_stack[stack_ptr] = alternative;5912chars_stack[stack_ptr] = chars;5913next_alternative_stack[stack_ptr] = 1;5914stack_ptr++;5915}59165917if (*cc == OP_CBRA || *cc == OP_CBRAPOS)5918cc += IMM2_SIZE;5919cc += 1 + LINK_SIZE;5920continue;59215922case OP_CLASS:5923#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 85924if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))5925{5926chars_end = chars;5927continue;5928}5929#endif5930class = TRUE;5931break;59325933case OP_NCLASS:5934#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 325935if (common->utf)5936{5937chars_end = chars;5938continue;5939}5940#endif5941class = TRUE;5942break;59435944#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 85945case OP_XCLASS:5946case OP_ECLASS:5947#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 325948if (common->utf)5949{5950chars_end = chars;5951continue;5952}5953#endif5954any = TRUE;5955cc += GET(cc, 1);5956break;5957#endif59585959case OP_DIGIT:5960#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 85961if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))5962{5963chars_end = chars;5964continue;5965}5966#endif5967any = TRUE;5968cc++;5969break;59705971case OP_WHITESPACE:5972#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 85973if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))5974{5975chars_end = chars;5976continue;5977}5978#endif5979any = TRUE;5980cc++;5981break;59825983case OP_WORDCHAR:5984#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 85985if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))5986{5987chars_end = chars;5988continue;5989}5990#endif5991any = TRUE;5992cc++;5993break;59945995case OP_NOT:5996case OP_NOTI:5997cc++;5998/* Fall through. */5999case OP_NOT_DIGIT:6000case OP_NOT_WHITESPACE:6001case OP_NOT_WORDCHAR:6002case OP_ANY:6003case OP_ALLANY:6004#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326005if (common->utf)6006{6007chars_end = chars;6008continue;6009}6010#endif6011any = TRUE;6012cc++;6013break;60146015#ifdef SUPPORT_UNICODE6016case OP_NOTPROP:6017case OP_PROP:6018#if PCRE2_CODE_UNIT_WIDTH != 326019if (common->utf)6020{6021chars_end = chars;6022continue;6023}6024#endif6025any = TRUE;6026cc += 1 + 2;6027break;6028#endif60296030case OP_TYPEEXACT:6031repeat = GET2(cc, 1);6032cc += 1 + IMM2_SIZE;6033continue;60346035case OP_NOTEXACT:6036case OP_NOTEXACTI:6037#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326038if (common->utf)6039{6040chars_end = chars;6041continue;6042}6043#endif6044any = TRUE;6045repeat = GET2(cc, 1);6046cc += 1 + IMM2_SIZE + 1;6047break;60486049default:6050chars_end = chars;6051continue;6052}60536054SLJIT_ASSERT(chars < chars_end);60556056if (any)6057{6058do6059{6060chars->count = 255;6061chars++;6062}6063while (--repeat > 0 && chars < chars_end);60646065repeat = 1;6066continue;6067}60686069if (class)6070{6071bytes = (sljit_u8*) (cc + 1);6072cc += 1 + 32 / sizeof(PCRE2_UCHAR);60736074SLJIT_ASSERT(last == TRUE && repeat == 1);6075switch (*cc)6076{6077case OP_CRQUERY:6078case OP_CRMINQUERY:6079case OP_CRPOSQUERY:6080last = FALSE;6081/* Fall through */6082case OP_CRSTAR:6083case OP_CRMINSTAR:6084case OP_CRPOSSTAR:6085if (stack_ptr >= SCAN_PREFIX_STACK_END)6086{6087chars_end = chars;6088continue;6089}60906091cc_stack[stack_ptr] = ++cc;6092chars_stack[stack_ptr] = chars;6093next_alternative_stack[stack_ptr] = 0;6094stack_ptr++;6095break;60966097default:6098case OP_CRPLUS:6099case OP_CRMINPLUS:6100case OP_CRPOSPLUS:6101break;61026103case OP_CRRANGE:6104case OP_CRMINRANGE:6105case OP_CRPOSRANGE:6106repeat = GET2(cc, 1);6107if (repeat <= 0)6108{6109chars_end = chars;6110continue;6111}61126113last = (repeat != (int)GET2(cc, 1 + IMM2_SIZE));6114cc += 1 + 2 * IMM2_SIZE;6115break;6116}61176118do6119{6120if (bytes[31] & 0x80)6121chars->count = 255;6122else if (chars->count != 255)6123{6124bytes_end = bytes + 32;6125chr = 0;6126do6127{6128byte = *bytes++;6129SLJIT_ASSERT((chr & 0x7) == 0);6130if (byte == 0)6131chr += 8;6132else6133{6134do6135{6136if ((byte & 0x1) != 0)6137add_prefix_char(chr, chars, TRUE);6138byte >>= 1;6139chr++;6140}6141while (byte != 0);6142chr = (chr + 7) & (sljit_u32)(~7);6143}6144}6145while (chars->count != 255 && bytes < bytes_end);6146bytes = bytes_end - 32;6147}61486149chars++;6150}6151while (--repeat > 0 && chars < chars_end);61526153repeat = 1;6154if (last)6155chars_end = chars;6156continue;6157}61586159len = 1;6160#ifdef SUPPORT_UNICODE6161if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);6162#endif61636164if (caseless && char_has_othercase(common, cc))6165{6166#ifdef SUPPORT_UNICODE6167if (common->utf)6168{6169GETCHAR(chr, cc);6170if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)6171{6172chars_end = chars;6173continue;6174}6175}6176else6177#endif6178{6179chr = *cc;6180#ifdef SUPPORT_UNICODE6181if (common->ucp && chr > 127)6182{6183chr = UCD_OTHERCASE(chr);6184othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;6185}6186else6187#endif6188othercase[0] = TABLE_GET(chr, common->fcc, chr);6189}6190}6191else6192{6193caseless = FALSE;6194othercase[0] = 0; /* Stops compiler warning - PH */6195}61966197len_save = len;6198cc_save = cc;6199while (TRUE)6200{6201oc = othercase;6202do6203{6204len--;62056206chr = *cc;6207add_prefix_char(*cc, chars, len == 0);62086209if (caseless)6210add_prefix_char(*oc, chars, len == 0);62116212chars++;6213cc++;6214oc++;6215}6216while (len > 0 && chars < chars_end);62176218if (--repeat == 0 || chars >= chars_end)6219break;62206221len = len_save;6222cc = cc_save;6223}62246225repeat = 1;6226if (last)6227chars_end = chars;6228}6229}62306231#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326232static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)6233{6234#if PCRE2_CODE_UNIT_WIDTH == 86235OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);6236CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);6237#elif PCRE2_CODE_UNIT_WIDTH == 166238OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);6239CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);6240#else6241#error "Unknown code width"6242#endif6243}6244#endif62456246#include "pcre2_jit_simd_inc.h"62476248#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD62496250static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)6251{6252sljit_s32 i, j, max_i = 0, max_j = 0;6253sljit_u32 max_pri = 0;6254sljit_s32 max_offset = max_fast_forward_char_pair_offset();6255PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;62566257for (i = max - 1; i >= 1; i--)6258{6259if (chars[i].last_count > 2)6260{6261a1 = chars[i].chars[0];6262a2 = chars[i].chars[1];6263a_pri = chars[i].last_count;62646265j = i - max_offset;6266if (j < 0)6267j = 0;62686269while (j < i)6270{6271b_pri = chars[j].last_count;6272if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)6273{6274b1 = chars[j].chars[0];6275b2 = chars[j].chars[1];62766277if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)6278{6279max_pri = a_pri + b_pri;6280max_i = i;6281max_j = j;6282}6283}6284j++;6285}6286}6287}62886289if (max_pri == 0)6290return FALSE;62916292fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);6293return TRUE;6294}62956296#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */62976298static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)6299{6300DEFINE_COMPILER;6301struct sljit_label *start;6302struct sljit_jump *match;6303struct sljit_jump *partial_quit;6304PCRE2_UCHAR mask;6305BOOL has_match_end = (common->match_end_ptr != 0);63066307SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);63086309if (has_match_end)6310OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);63116312if (offset > 0)6313OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));63146315if (has_match_end)6316{6317OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);63186319OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));6320OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);6321SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);6322}63236324#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD63256326if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)6327{6328fast_forward_char_simd(common, char1, char2, offset);63296330if (offset > 0)6331OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));63326333if (has_match_end)6334OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);6335return;6336}63376338#endif63396340start = LABEL();63416342partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6343if (common->mode == PCRE2_JIT_COMPLETE)6344add_jump(compiler, &common->failed_match, partial_quit);63456346OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);6347OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));63486349if (char1 == char2)6350CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);6351else6352{6353mask = char1 ^ char2;6354if (is_powerof2(mask))6355{6356OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);6357CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);6358}6359else6360{6361match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);6362CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);6363JUMPHERE(match);6364}6365}63666367#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326368if (common->utf && offset > 0)6369{6370OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));6371jumpto_if_not_utf_char_start(compiler, TMP1, start);6372}6373#endif63746375OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));63766377if (common->mode != PCRE2_JIT_COMPLETE)6378JUMPHERE(partial_quit);63796380if (has_match_end)6381OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);6382}63836384static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)6385{6386DEFINE_COMPILER;6387struct sljit_label *start;6388struct sljit_jump *match;6389fast_forward_char_data chars[MAX_N_CHARS];6390sljit_s32 offset;6391PCRE2_UCHAR mask;6392PCRE2_UCHAR *char_set, *char_set_end;6393int i, max, from;6394int range_right = -1, range_len;6395sljit_u8 *update_table = NULL;6396BOOL in_range;63976398for (i = 0; i < MAX_N_CHARS; i++)6399{6400chars[i].count = 0;6401chars[i].last_count = 0;6402}64036404max = scan_prefix(common, common->start, chars);64056406if (max < 1)6407return FALSE;64086409/* Convert last_count to priority. */6410for (i = 0; i < max; i++)6411{6412SLJIT_ASSERT(chars[i].last_count <= chars[i].count);64136414switch (chars[i].count)6415{6416case 0:6417chars[i].count = 255;6418chars[i].last_count = 0;6419break;64206421case 1:6422chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;6423/* Simplifies algorithms later. */6424chars[i].chars[1] = chars[i].chars[0];6425break;64266427case 2:6428SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);64296430if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))6431chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;6432else6433chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;6434break;64356436default:6437chars[i].last_count = (chars[i].count == 255) ? 0 : 1;6438break;6439}6440}64416442#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD6443if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))6444return TRUE;6445#endif64466447in_range = FALSE;6448/* Prevent compiler "uninitialized" warning */6449from = 0;6450range_len = 4 /* minimum length */ - 1;6451for (i = 0; i <= max; i++)6452{6453if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))6454{6455range_len = i - from;6456range_right = i - 1;6457}64586459if (i < max && chars[i].count < 255)6460{6461SLJIT_ASSERT(chars[i].count > 0);6462if (!in_range)6463{6464in_range = TRUE;6465from = i;6466}6467}6468else6469in_range = FALSE;6470}64716472if (range_right >= 0)6473{6474update_table = (sljit_u8 *)allocate_read_only_data(common, 256);6475if (update_table == NULL)6476return TRUE;6477memset(update_table, IN_UCHARS(range_len), 256);64786479for (i = 0; i < range_len; i++)6480{6481SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);64826483char_set = chars[range_right - i].chars;6484char_set_end = char_set + chars[range_right - i].count;6485do6486{6487if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))6488update_table[(*char_set) & 0xff] = IN_UCHARS(i);6489char_set++;6490}6491while (char_set < char_set_end);6492}6493}64946495offset = -1;6496/* Scan forward. */6497for (i = 0; i < max; i++)6498{6499if (range_right == i)6500continue;65016502if (offset == -1)6503{6504if (chars[i].last_count >= 2)6505offset = i;6506}6507else if (chars[offset].last_count < chars[i].last_count)6508offset = i;6509}65106511SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));65126513if (range_right < 0)6514{6515if (offset < 0)6516return FALSE;6517/* Works regardless the value is 1 or 2. */6518fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);6519return TRUE;6520}65216522SLJIT_ASSERT(range_right != offset);65236524if (common->match_end_ptr != 0)6525{6526OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);6527OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);6528OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));6529add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));6530OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);6531SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);6532}6533else6534{6535OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));6536add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));6537}65386539SLJIT_ASSERT(range_right >= 0);65406541if (!HAS_VIRTUAL_REGISTERS)6542OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);65436544start = LABEL();6545add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));65466547#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)6548OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));6549#else6550OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);6551#endif65526553if (!HAS_VIRTUAL_REGISTERS)6554OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);6555else6556OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);65576558OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);6559CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);65606561if (offset >= 0)6562{6563OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));6564OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));65656566if (chars[offset].count == 1)6567CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);6568else6569{6570mask = chars[offset].chars[0] ^ chars[offset].chars[1];6571if (is_powerof2(mask))6572{6573OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);6574CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);6575}6576else6577{6578match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);6579CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);6580JUMPHERE(match);6581}6582}6583}65846585#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326586if (common->utf && offset != 0)6587{6588if (offset < 0)6589{6590OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);6591OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6592}6593else6594OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));65956596jumpto_if_not_utf_char_start(compiler, TMP1, start);65976598if (offset < 0)6599OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6600}6601#endif66026603if (offset >= 0)6604OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));66056606if (common->match_end_ptr != 0)6607OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);6608else6609OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));6610return TRUE;6611}66126613static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)6614{6615PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);6616PCRE2_UCHAR oc;66176618oc = first_char;6619if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)6620{6621oc = TABLE_GET(first_char, common->fcc, first_char);6622#if defined SUPPORT_UNICODE6623if (first_char > 127 && (common->utf || common->ucp))6624oc = UCD_OTHERCASE(first_char);6625#endif6626}66276628fast_forward_first_char2(common, first_char, oc, 0);6629}66306631static SLJIT_INLINE void fast_forward_newline(compiler_common *common)6632{6633DEFINE_COMPILER;6634struct sljit_label *loop;6635struct sljit_jump *lastchar = NULL;6636struct sljit_jump *firstchar;6637struct sljit_jump *quit = NULL;6638struct sljit_jump *foundcr = NULL;6639struct sljit_jump *notfoundnl;6640jump_list *newline = NULL;66416642if (common->match_end_ptr != 0)6643{6644OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);6645OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);6646}66476648if (common->nltype == NLTYPE_FIXED && common->newline > 255)6649{6650#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD6651if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)6652{6653if (HAS_VIRTUAL_REGISTERS)6654{6655OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);6656OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));6657OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));6658}6659else6660{6661OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));6662OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));6663}6664firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);66656666OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6667OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);6668OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);6669#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 326670OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);6671#endif6672OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);66736674fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);6675OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));6676}6677else6678#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */6679{6680lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6681if (HAS_VIRTUAL_REGISTERS)6682{6683OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);6684OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));6685OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));6686}6687else6688{6689OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));6690OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));6691}6692firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);66936694OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));6695OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);6696OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);6697#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 326698OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);6699#endif6700OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);67016702loop = LABEL();6703OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6704quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6705OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));6706OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));6707CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);6708CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);67096710JUMPHERE(quit);6711JUMPHERE(lastchar);6712}67136714JUMPHERE(firstchar);67156716if (common->match_end_ptr != 0)6717OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);6718return;6719}67206721if (HAS_VIRTUAL_REGISTERS)6722{6723OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);6724OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));6725}6726else6727OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));67286729/* Example: match /^/ to \r\n from offset 1. */6730firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);67316732if (common->nltype == NLTYPE_ANY)6733move_back(common, NULL, FALSE);6734else6735OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));67366737loop = LABEL();6738common->ff_newline_shortcut = loop;67396740#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD6741if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))6742{6743if (common->nltype == NLTYPE_ANYCRLF)6744{6745fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);6746if (common->mode != PCRE2_JIT_COMPLETE)6747lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);67486749OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);6750OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6751quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);6752}6753else6754{6755fast_forward_char_simd(common, common->newline, common->newline, 0);67566757OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6758if (common->mode != PCRE2_JIT_COMPLETE)6759{6760OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);6761SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);6762}6763}6764}6765else6766#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */6767{6768read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);6769lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6770if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)6771foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);6772check_newlinechar(common, common->nltype, &newline, FALSE);6773set_jumps(newline, loop);6774}67756776if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)6777{6778if (quit == NULL)6779{6780quit = JUMP(SLJIT_JUMP);6781JUMPHERE(foundcr);6782}67836784notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6785OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);6786OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);6787OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);6788#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 326789OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);6790#endif6791OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);6792JUMPHERE(notfoundnl);6793JUMPHERE(quit);6794}67956796if (lastchar)6797JUMPHERE(lastchar);6798JUMPHERE(firstchar);67996800if (common->match_end_ptr != 0)6801OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);6802}68036804static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);68056806static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)6807{6808DEFINE_COMPILER;6809const sljit_u8 *start_bits = common->re->start_bitmap;6810struct sljit_label *start;6811struct sljit_jump *partial_quit;6812#if PCRE2_CODE_UNIT_WIDTH != 86813struct sljit_jump *found = NULL;6814#endif6815jump_list *matches = NULL;68166817if (common->match_end_ptr != 0)6818{6819OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);6820OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);6821OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));6822OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);6823SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);6824}68256826start = LABEL();68276828partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6829if (common->mode == PCRE2_JIT_COMPLETE)6830add_jump(compiler, &common->failed_match, partial_quit);68316832OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);6833OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));68346835if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))6836{6837#if PCRE2_CODE_UNIT_WIDTH != 86838if ((start_bits[31] & 0x80) != 0)6839found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);6840else6841CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);6842#elif defined SUPPORT_UNICODE6843if (common->utf && is_char7_bitset(start_bits, FALSE))6844CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);6845#endif6846OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);6847OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);6848OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);6849if (!HAS_VIRTUAL_REGISTERS)6850{6851OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);6852OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);6853}6854else6855{6856OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);6857OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);6858}6859JUMPTO(SLJIT_ZERO, start);6860}6861else6862set_jumps(matches, start);68636864#if PCRE2_CODE_UNIT_WIDTH != 86865if (found != NULL)6866JUMPHERE(found);6867#endif68686869OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));68706871if (common->mode != PCRE2_JIT_COMPLETE)6872JUMPHERE(partial_quit);68736874if (common->match_end_ptr != 0)6875OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);6876}68776878static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)6879{6880DEFINE_COMPILER;6881struct sljit_label *loop;6882struct sljit_jump *toolong;6883struct sljit_jump *already_found;6884struct sljit_jump *found;6885struct sljit_jump *found_oc = NULL;6886jump_list *not_found = NULL;6887sljit_u32 oc, bit;68886889SLJIT_ASSERT(common->req_char_ptr != 0);6890OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);6891OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);6892toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);6893already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);68946895if (has_firstchar)6896OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6897else6898OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);68996900oc = req_char;6901if (caseless)6902{6903oc = TABLE_GET(req_char, common->fcc, req_char);6904#if defined SUPPORT_UNICODE6905if (req_char > 127 && (common->utf || common->ucp))6906oc = UCD_OTHERCASE(req_char);6907#endif6908}69096910#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD6911if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)6912{6913not_found = fast_requested_char_simd(common, req_char, oc);6914}6915else6916#endif6917{6918loop = LABEL();6919add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));69206921OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);69226923if (req_char == oc)6924found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);6925else6926{6927bit = req_char ^ oc;6928if (is_powerof2(bit))6929{6930OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);6931found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);6932}6933else6934{6935found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);6936found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);6937}6938}6939OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));6940JUMPTO(SLJIT_JUMP, loop);69416942JUMPHERE(found);6943if (found_oc)6944JUMPHERE(found_oc);6945}69466947OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);69486949JUMPHERE(already_found);6950JUMPHERE(toolong);6951return not_found;6952}69536954static void do_revertframes(compiler_common *common)6955{6956DEFINE_COMPILER;6957struct sljit_jump *jump;6958struct sljit_label *mainloop;69596960sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);6961GET_LOCAL_BASE(TMP1, 0, 0);69626963/* Drop frames until we reach STACK_TOP. */6964mainloop = LABEL();6965OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));6966OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);6967jump = JUMP(SLJIT_SIG_LESS_EQUAL);69686969OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);6970if (HAS_VIRTUAL_REGISTERS)6971{6972OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));6973OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));6974OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));6975}6976else6977{6978OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));6979OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));6980OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));6981OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);6982GET_LOCAL_BASE(TMP1, 0, 0);6983OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);6984}6985JUMPTO(SLJIT_JUMP, mainloop);69866987JUMPHERE(jump);6988sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);6989jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);6990/* End of reverting values. */6991OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);69926993JUMPHERE(jump);6994OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, TMP2, 0);6995if (HAS_VIRTUAL_REGISTERS)6996{6997OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));6998OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));6999}7000else7001{7002OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));7003OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));7004OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);7005}7006JUMPTO(SLJIT_JUMP, mainloop);7007}70087009#ifdef SUPPORT_UNICODE7010#define UCPCAT(bit) (1 << (bit))7011#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))7012#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))7013#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))7014#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)7015#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)7016#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)7017#endif70187019static void check_wordboundary(compiler_common *common, BOOL ucp)7020{7021DEFINE_COMPILER;7022struct sljit_jump *skipread;7023jump_list *skipread_list = NULL;7024#ifdef SUPPORT_UNICODE7025struct sljit_label *valid_utf;7026jump_list *invalid_utf1 = NULL;7027#endif /* SUPPORT_UNICODE */7028jump_list *invalid_utf2 = NULL;7029#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE7030struct sljit_jump *jump;7031#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */70327033SLJIT_UNUSED_ARG(ucp);7034SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);70357036SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));7037sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);7038/* Get type of the previous char, and put it to TMP3. */7039OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);7040OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));7041OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);7042skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);70437044#ifdef SUPPORT_UNICODE7045if (common->invalid_utf)7046{7047peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);70487049if (common->mode != PCRE2_JIT_COMPLETE)7050{7051OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);7052OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);7053move_back(common, NULL, TRUE);7054check_start_used_ptr(common);7055OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);7056OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);7057}7058}7059else7060#endif /* SUPPORT_UNICODE */7061{7062if (common->mode == PCRE2_JIT_COMPLETE)7063peek_char_back(common, READ_CHAR_MAX, NULL);7064else7065{7066move_back(common, NULL, TRUE);7067check_start_used_ptr(common);7068read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);7069}7070}70717072/* Testing char type. */7073#ifdef SUPPORT_UNICODE7074if (ucp)7075{7076add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));7077OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);7078OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);7079OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);7080}7081else7082#endif /* SUPPORT_UNICODE */7083{7084#if PCRE2_CODE_UNIT_WIDTH != 87085jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);7086#elif defined SUPPORT_UNICODE7087/* Here TMP3 has already been zeroed. */7088jump = NULL;7089if (common->utf)7090jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);7091#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */7092OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);7093OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);7094OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);7095#if PCRE2_CODE_UNIT_WIDTH != 87096JUMPHERE(jump);7097#elif defined SUPPORT_UNICODE7098if (jump != NULL)7099JUMPHERE(jump);7100#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */7101}7102JUMPHERE(skipread);71037104OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);7105check_str_end(common, &skipread_list);7106peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, &invalid_utf2);71077108/* Testing char type. This is a code duplication. */7109#ifdef SUPPORT_UNICODE71107111valid_utf = LABEL();71127113if (ucp)7114{7115add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));7116OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);7117OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);7118OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);7119}7120else7121#endif /* SUPPORT_UNICODE */7122{7123#if PCRE2_CODE_UNIT_WIDTH != 87124/* TMP2 may be destroyed by peek_char. */7125OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);7126jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);7127#elif defined SUPPORT_UNICODE7128OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);7129jump = NULL;7130if (common->utf)7131jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);7132#endif7133OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);7134OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);7135OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);7136#if PCRE2_CODE_UNIT_WIDTH != 87137JUMPHERE(jump);7138#elif defined SUPPORT_UNICODE7139if (jump != NULL)7140JUMPHERE(jump);7141#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */7142}7143set_jumps(skipread_list, LABEL());71447145OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7146OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);7147OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);71487149#ifdef SUPPORT_UNICODE7150if (common->invalid_utf)7151{7152set_jumps(invalid_utf1, LABEL());71537154peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, NULL);7155CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);71567157OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7158OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);7159OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);71607161set_jumps(invalid_utf2, LABEL());7162OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7163OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);7164OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);7165}7166#endif /* SUPPORT_UNICODE */7167}71687169static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)7170{7171/* May destroy TMP1. */7172DEFINE_COMPILER;7173int ranges[MAX_CLASS_RANGE_SIZE];7174sljit_u8 bit, cbit, all;7175int i, byte, length = 0;71767177bit = bits[0] & 0x1;7178/* All bits will be zero or one (since bit is zero or one). */7179all = (sljit_u8)-bit;71807181for (i = 0; i < 256; )7182{7183byte = i >> 3;7184if ((i & 0x7) == 0 && bits[byte] == all)7185i += 8;7186else7187{7188cbit = (bits[byte] >> (i & 0x7)) & 0x1;7189if (cbit != bit)7190{7191if (length >= MAX_CLASS_RANGE_SIZE)7192return FALSE;7193ranges[length] = i;7194length++;7195bit = cbit;7196all = (sljit_u8)-cbit; /* sign extend bit into byte */7197}7198i++;7199}7200}72017202if (((bit == 0) && nclass) || ((bit == 1) && !nclass))7203{7204if (length >= MAX_CLASS_RANGE_SIZE)7205return FALSE;7206ranges[length] = 256;7207length++;7208}72097210if (length < 0 || length > 4)7211return FALSE;72127213bit = bits[0] & 0x1;7214if (invert) bit ^= 0x1;72157216/* No character is accepted. */7217if (length == 0 && bit == 0)7218add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));72197220switch(length)7221{7222case 0:7223/* When bit != 0, all characters are accepted. */7224return TRUE;72257226case 1:7227add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));7228return TRUE;72297230case 2:7231if (ranges[0] + 1 != ranges[1])7232{7233OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);7234add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));7235}7236else7237add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));7238return TRUE;72397240case 3:7241if (bit != 0)7242{7243add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));7244if (ranges[0] + 1 != ranges[1])7245{7246OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);7247add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));7248}7249else7250add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));7251return TRUE;7252}72537254add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));7255if (ranges[1] + 1 != ranges[2])7256{7257OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);7258add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));7259}7260else7261add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));7262return TRUE;72637264case 4:7265if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])7266&& (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]7267&& (ranges[1] & (ranges[2] - ranges[0])) == 07268&& is_powerof2(ranges[2] - ranges[0]))7269{7270SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);7271OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);7272if (ranges[2] + 1 != ranges[3])7273{7274OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);7275add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));7276}7277else7278add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));7279return TRUE;7280}72817282if (bit != 0)7283{7284i = 0;7285if (ranges[0] + 1 != ranges[1])7286{7287OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);7288add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));7289i = ranges[0];7290}7291else7292add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));72937294if (ranges[2] + 1 != ranges[3])7295{7296OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);7297add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));7298}7299else7300add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));7301return TRUE;7302}73037304OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);7305add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));7306if (ranges[1] + 1 != ranges[2])7307{7308OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);7309add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));7310}7311else7312add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));7313return TRUE;73147315default:7316SLJIT_UNREACHABLE();7317return FALSE;7318}7319}73207321static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)7322{7323/* May destroy TMP1. */7324DEFINE_COMPILER;7325uint16_t char_list[MAX_CLASS_CHARS_SIZE];7326uint8_t byte;7327sljit_s32 type;7328int i, j, k, len, c;73297330if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))7331return FALSE;73327333len = 0;73347335for (i = 0; i < 32; i++)7336{7337byte = bits[i];73387339if (nclass)7340byte = (sljit_u8)~byte;73417342j = 0;7343while (byte != 0)7344{7345if (byte & 0x1)7346{7347c = i * 8 + j;73487349k = len;73507351if ((c & 0x20) != 0)7352{7353for (k = 0; k < len; k++)7354if (char_list[k] == c - 0x20)7355{7356char_list[k] |= 0x120;7357break;7358}7359}73607361if (k == len)7362{7363if (len >= MAX_CLASS_CHARS_SIZE)7364return FALSE;73657366char_list[len++] = (uint16_t) c;7367}7368}73697370byte >>= 1;7371j++;7372}7373}73747375if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */73767377i = 0;7378j = 0;73797380if (char_list[0] == 0)7381{7382i++;7383OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);7384OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);7385}7386else7387OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);73887389while (i < len)7390{7391if ((char_list[i] & 0x100) != 0)7392j++;7393else7394{7395OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);7396SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);7397}7398i++;7399}74007401if (j != 0)7402{7403OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);74047405for (i = 0; i < len; i++)7406if ((char_list[i] & 0x100) != 0)7407{7408j--;7409OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);7410SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);7411}7412}74137414if (invert)7415nclass = !nclass;74167417type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;7418add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));7419return TRUE;7420}74217422static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)7423{7424/* May destroy TMP1. */7425if (optimize_class_ranges(common, bits, nclass, invert, backtracks))7426return TRUE;7427return optimize_class_chars(common, bits, nclass, invert, backtracks);7428}74297430static void check_anynewline(compiler_common *common)7431{7432/* Check whether TMP1 contains a newline character. TMP2 destroyed. */7433DEFINE_COMPILER;74347435sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);74367437OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);7438OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);7439OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);7440OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);7441#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 327442#if PCRE2_CODE_UNIT_WIDTH == 87443if (common->utf)7444{7445#endif7446OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7447OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);7448OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);7449#if PCRE2_CODE_UNIT_WIDTH == 87450}7451#endif7452#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */7453OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);7454OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);7455}74567457static void check_hspace(compiler_common *common)7458{7459/* Check whether TMP1 contains a newline character. TMP2 destroyed. */7460DEFINE_COMPILER;74617462sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);74637464OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);7465OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);7466OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);7467OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7468OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);7469#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 327470#if PCRE2_CODE_UNIT_WIDTH == 87471if (common->utf)7472{7473#endif7474OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7475OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);7476OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7477OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);7478OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7479OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);7480OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);7481OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);7482OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);7483OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7484OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);7485OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7486OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);7487#if PCRE2_CODE_UNIT_WIDTH == 87488}7489#endif7490#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */7491OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);74927493OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);7494}74957496static void check_vspace(compiler_common *common)7497{7498/* Check whether TMP1 contains a newline character. TMP2 destroyed. */7499DEFINE_COMPILER;75007501sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);75027503OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);7504OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);7505OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);7506OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);7507#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 327508#if PCRE2_CODE_UNIT_WIDTH == 87509if (common->utf)7510{7511#endif7512OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7513OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);7514OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);7515#if PCRE2_CODE_UNIT_WIDTH == 87516}7517#endif7518#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */7519OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);75207521OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);7522}75237524static void do_casefulcmp(compiler_common *common)7525{7526DEFINE_COMPILER;7527struct sljit_jump *jump;7528struct sljit_label *label;7529int char1_reg;7530int char2_reg;75317532if (HAS_VIRTUAL_REGISTERS)7533{7534char1_reg = STR_END;7535char2_reg = STACK_TOP;7536}7537else7538{7539char1_reg = TMP3;7540char2_reg = RETURN_ADDR;7541}75427543/* Update ref_update_local_size() when this changes. */7544SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));7545sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);7546OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);75477548if (char1_reg == STR_END)7549{7550OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);7551OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);7552}75537554if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)7555{7556label = LABEL();7557sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));7558sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7559jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);7560OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));7561JUMPTO(SLJIT_NOT_ZERO, label);75627563JUMPHERE(jump);7564OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7565}7566else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)7567{7568OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));7569OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));75707571label = LABEL();7572sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));7573sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7574jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);7575OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));7576JUMPTO(SLJIT_NOT_ZERO, label);75777578JUMPHERE(jump);7579OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7580OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));7581}7582else7583{7584label = LABEL();7585OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);7586OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);7587OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));7588OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));7589jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);7590OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));7591JUMPTO(SLJIT_NOT_ZERO, label);75927593JUMPHERE(jump);7594OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7595}75967597if (char1_reg == STR_END)7598{7599OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);7600OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);7601}76027603OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);7604}76057606static void do_caselesscmp(compiler_common *common)7607{7608DEFINE_COMPILER;7609struct sljit_jump *jump;7610struct sljit_label *label;7611int char1_reg = STR_END;7612int char2_reg;7613int lcc_table;7614int opt_type = 0;76157616if (HAS_VIRTUAL_REGISTERS)7617{7618char2_reg = STACK_TOP;7619lcc_table = STACK_LIMIT;7620}7621else7622{7623char2_reg = RETURN_ADDR;7624lcc_table = TMP3;7625}76267627if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)7628opt_type = 1;7629else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)7630opt_type = 2;76317632/* Update ref_update_local_size() when this changes. */7633SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));7634sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);7635OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);76367637OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, char1_reg, 0);76387639if (char2_reg == STACK_TOP)7640{7641OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);7642OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);7643}76447645OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);76467647if (opt_type == 1)7648{7649label = LABEL();7650sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));7651sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7652}7653else if (opt_type == 2)7654{7655OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));7656OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));76577658label = LABEL();7659sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));7660sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7661}7662else7663{7664label = LABEL();7665OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);7666OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);7667OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));7668}76697670#if PCRE2_CODE_UNIT_WIDTH != 87671jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);7672#endif7673OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);7674#if PCRE2_CODE_UNIT_WIDTH != 87675JUMPHERE(jump);7676jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);7677#endif7678OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);7679#if PCRE2_CODE_UNIT_WIDTH != 87680JUMPHERE(jump);7681#endif76827683if (opt_type == 0)7684OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));76857686jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);7687OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));7688JUMPTO(SLJIT_NOT_ZERO, label);76897690JUMPHERE(jump);7691OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);76927693if (opt_type == 2)7694OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));76957696if (char2_reg == STACK_TOP)7697{7698OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);7699OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);7700}77017702OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);7703OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);7704}77057706#include "pcre2_jit_char_inc.h"77077708static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)7709{7710DEFINE_COMPILER;7711struct sljit_jump *jump[4];77127713switch(type)7714{7715case OP_SOD:7716if (HAS_VIRTUAL_REGISTERS)7717{7718OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);7719OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));7720}7721else7722OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));7723add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));7724return cc;77257726case OP_SOM:7727if (HAS_VIRTUAL_REGISTERS)7728{7729OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);7730OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));7731}7732else7733OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));7734add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));7735return cc;77367737case OP_NOT_WORD_BOUNDARY:7738case OP_WORD_BOUNDARY:7739case OP_NOT_UCP_WORD_BOUNDARY:7740case OP_UCP_WORD_BOUNDARY:7741add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));7742#ifdef SUPPORT_UNICODE7743if (common->invalid_utf)7744{7745add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));7746return cc;7747}7748#endif /* SUPPORT_UNICODE */7749sljit_set_current_flags(compiler, SLJIT_SET_Z);7750add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));7751return cc;77527753case OP_EODN:7754/* Requires rather complex checks. */7755jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);7756if (common->nltype == NLTYPE_FIXED && common->newline > 255)7757{7758OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));7759OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));7760if (common->mode == PCRE2_JIT_COMPLETE)7761add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));7762else7763{7764jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);7765OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);7766OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);7767OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);7768OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);7769add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));7770check_partial(common, TRUE);7771add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));7772JUMPHERE(jump[1]);7773}7774OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7775add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));7776add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));7777}7778else if (common->nltype == NLTYPE_FIXED)7779{7780OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));7781OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));7782add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));7783add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));7784}7785else7786{7787OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));7788jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);7789OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));7790OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);7791jump[2] = JUMP(SLJIT_GREATER);7792add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);7793/* Equal. */7794OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7795jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);7796add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));77977798JUMPHERE(jump[1]);7799if (common->nltype == NLTYPE_ANYCRLF)7800{7801OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));7802add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));7803add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));7804}7805else7806{7807OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);7808read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);7809add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));7810add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));7811sljit_set_current_flags(compiler, SLJIT_SET_Z);7812add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));7813OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);7814}7815JUMPHERE(jump[2]);7816JUMPHERE(jump[3]);7817}7818JUMPHERE(jump[0]);7819if (common->mode != PCRE2_JIT_COMPLETE)7820check_partial(common, TRUE);7821return cc;78227823case OP_EOD:7824add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));7825if (common->mode != PCRE2_JIT_COMPLETE)7826check_partial(common, TRUE);7827return cc;78287829case OP_DOLL:7830if (HAS_VIRTUAL_REGISTERS)7831{7832OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);7833OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);7834}7835else7836OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);7837add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));78387839if (!common->endonly)7840compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);7841else7842{7843add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));7844check_partial(common, FALSE);7845}7846return cc;78477848case OP_DOLLM:7849jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);7850if (HAS_VIRTUAL_REGISTERS)7851{7852OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);7853OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);7854}7855else7856OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);7857add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));7858check_partial(common, FALSE);7859jump[0] = JUMP(SLJIT_JUMP);7860JUMPHERE(jump[1]);78617862if (common->nltype == NLTYPE_FIXED && common->newline > 255)7863{7864OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));7865OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));7866if (common->mode == PCRE2_JIT_COMPLETE)7867add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));7868else7869{7870jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);7871/* STR_PTR = STR_END - IN_UCHARS(1) */7872add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));7873check_partial(common, TRUE);7874add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));7875JUMPHERE(jump[1]);7876}78777878OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7879add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));7880add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));7881}7882else7883{7884peek_char(common, common->nlmax, TMP3, 0, NULL);7885check_newlinechar(common, common->nltype, backtracks, FALSE);7886}7887JUMPHERE(jump[0]);7888return cc;78897890case OP_CIRC:7891if (HAS_VIRTUAL_REGISTERS)7892{7893OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);7894OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));7895add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));7896OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);7897add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));7898}7899else7900{7901OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));7902add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));7903OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);7904add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));7905}7906return cc;79077908case OP_CIRCM:7909/* TMP2 might be used by peek_char_back. */7910if (HAS_VIRTUAL_REGISTERS)7911{7912OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);7913OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));7914jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);7915OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);7916}7917else7918{7919OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));7920jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);7921OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);7922}7923add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));7924jump[0] = JUMP(SLJIT_JUMP);7925JUMPHERE(jump[1]);79267927if (!common->alt_circumflex)7928add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));79297930if (common->nltype == NLTYPE_FIXED && common->newline > 255)7931{7932OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));7933add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));7934OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));7935OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));7936add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));7937add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));7938}7939else7940{7941peek_char_back(common, common->nlmax, backtracks);7942check_newlinechar(common, common->nltype, backtracks, FALSE);7943}7944JUMPHERE(jump[0]);7945return cc;7946}7947SLJIT_UNREACHABLE();7948return cc;7949}79507951/* Forward definitions. */7952static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);7953static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);79547955#define PUSH_BACKTRACK(size, ccstart, error) \7956do \7957{ \7958backtrack = sljit_alloc_memory(compiler, (size)); \7959if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \7960return error; \7961memset(backtrack, 0, size); \7962backtrack->prev = parent->top; \7963backtrack->cc = (ccstart); \7964parent->top = backtrack; \7965} \7966while (0)79677968#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \7969do \7970{ \7971backtrack = sljit_alloc_memory(compiler, (size)); \7972if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \7973return; \7974memset(backtrack, 0, size); \7975backtrack->prev = parent->top; \7976backtrack->cc = (ccstart); \7977parent->top = backtrack; \7978} \7979while (0)79807981#define BACKTRACK_AS(type) ((type *)backtrack)79827983static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)7984{7985/* The OVECTOR offset goes to TMP2. */7986DEFINE_COMPILER;7987int count = GET2(cc, 1 + IMM2_SIZE);7988PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;7989unsigned int offset;7990jump_list *found = NULL;79917992SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);79937994OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));79957996count--;7997while (count-- > 0)7998{7999offset = GET2(slot, 0) << 1;8000GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));8001add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));8002slot += common->name_entry_size;8003}80048005offset = GET2(slot, 0) << 1;8006GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));8007if (backtracks != NULL && !common->unset_backref)8008add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));80098010set_jumps(found, LABEL());8011}80128013static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)8014{8015DEFINE_COMPILER;8016BOOL ref = (*cc == OP_REF || *cc == OP_REFI);8017int offset = 0;8018struct sljit_jump *jump = NULL;8019struct sljit_jump *partial;8020struct sljit_jump *nopartial;8021#if defined SUPPORT_UNICODE8022struct sljit_label *loop;8023struct sljit_label *caseless_loop;8024struct sljit_jump *turkish_ascii_i = NULL;8025struct sljit_jump *turkish_non_ascii_i = NULL;8026jump_list *no_match = NULL;8027int source_reg = COUNT_MATCH;8028int source_end_reg = ARGUMENTS;8029int char1_reg = STACK_LIMIT;8030PCRE2_UCHAR refi_flag = 0;80318032if (*cc == OP_REFI || *cc == OP_DNREFI)8033refi_flag = cc[PRIV(OP_lengths)[*cc] - 1];8034#endif /* SUPPORT_UNICODE */80358036if (ref)8037{8038offset = GET2(cc, 1) << 1;8039OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));8040/* OVECTOR(1) contains the "string begin - 1" constant. */8041if (withchecks && !common->unset_backref)8042add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));8043}8044else8045OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);80468047#if defined SUPPORT_UNICODE8048if ((common->utf || common->ucp) && (*cc == OP_REFI || *cc == OP_DNREFI))8049{8050/* Update ref_update_local_size() when this changes. */8051SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));80528053if (ref)8054OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));8055else8056OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));80578058if (withchecks && emptyfail)8059add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));80608061OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, source_reg, 0);8062OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, source_end_reg, 0);8063OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, char1_reg, 0);80648065OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);8066OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);80678068loop = LABEL();8069jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);8070partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);80718072/* Read original character. It must be a valid UTF character. */8073OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);8074OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);80758076read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);80778078OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);8079OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);8080OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);80818082/* Read second character. */8083read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);80848085CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);80868087if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==8088REFI_FLAG_TURKISH_CASING)8089{8090OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x20);8091turkish_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x69);80928093OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x1);8094turkish_non_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x131);8095}80968097OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);80988099add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));81008101OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);8102OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);8103OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);81048105OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));81068107OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));8108OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));8109OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);8110CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);81118112add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));8113OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);8114OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));81158116if (refi_flag & REFI_FLAG_CASELESS_RESTRICT)8117add_jump(compiler, &no_match, CMP(SLJIT_LESS | SLJIT_32, SLJIT_MEM1(TMP2), 0, SLJIT_IMM, 128));81188119caseless_loop = LABEL();8120OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);8121OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));8122OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);8123JUMPTO(SLJIT_EQUAL, loop);8124JUMPTO(SLJIT_LESS, caseless_loop);81258126if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==8127REFI_FLAG_TURKISH_CASING)8128{8129add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));8130JUMPHERE(turkish_ascii_i);81318132OP2(SLJIT_LSHR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);8133OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);8134OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);8135OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x130);8136CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);81378138add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));8139JUMPHERE(turkish_non_ascii_i);81408141OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);8142OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);8143OP2(SLJIT_SHL, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);8144OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x49);8145CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);8146}81478148set_jumps(no_match, LABEL());8149if (common->mode == PCRE2_JIT_COMPLETE)8150JUMPHERE(partial);81518152OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);8153OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);8154OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);8155add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));81568157if (common->mode != PCRE2_JIT_COMPLETE)8158{8159JUMPHERE(partial);8160OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);8161OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);8162OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);81638164check_partial(common, FALSE);8165add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));8166}81678168JUMPHERE(jump);8169OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);8170OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);8171OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);8172return;8173}8174else8175#endif /* SUPPORT_UNICODE */8176{8177if (ref)8178OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);8179else8180OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);81818182if (withchecks)8183jump = JUMP(SLJIT_ZERO);81848185OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);8186partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);8187if (common->mode == PCRE2_JIT_COMPLETE)8188add_jump(compiler, backtracks, partial);81898190add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));8191add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));81928193if (common->mode != PCRE2_JIT_COMPLETE)8194{8195nopartial = JUMP(SLJIT_JUMP);8196JUMPHERE(partial);8197/* TMP2 -= STR_END - STR_PTR */8198OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);8199OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);8200partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);8201OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);8202add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));8203add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));8204JUMPHERE(partial);8205check_partial(common, FALSE);8206add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));8207JUMPHERE(nopartial);8208}8209}82108211if (jump != NULL)8212{8213if (emptyfail)8214add_jump(compiler, backtracks, jump);8215else8216JUMPHERE(jump);8217}8218}82198220static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)8221{8222DEFINE_COMPILER;8223BOOL ref = (*cc == OP_REF || *cc == OP_REFI);8224backtrack_common *backtrack;8225PCRE2_UCHAR type;8226int local_start = LOCAL2;8227int offset = 0;8228struct sljit_label *label;8229struct sljit_jump *zerolength;8230struct sljit_jump *jump = NULL;8231PCRE2_SPTR ccbegin = cc;8232int min = 0, max = 0;8233BOOL minimize;82348235PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);82368237if (ref)8238offset = GET2(cc, 1) << 1;8239else8240cc += IMM2_SIZE;82418242if (*ccbegin == OP_REFI || *ccbegin == OP_DNREFI)8243{8244cc += 1;8245#ifdef SUPPORT_UNICODE8246if (common->utf || common->ucp)8247local_start = LOCAL3;8248#endif8249}82508251type = cc[1 + IMM2_SIZE];82528253SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);8254/* Update ref_update_local_size() when this changes. */8255SLJIT_ASSERT(local_start + 2 * SSIZE_OF(sw) <= (int)LOCAL0 + common->locals_size);8256minimize = (type & 0x1) != 0;8257switch(type)8258{8259case OP_CRSTAR:8260case OP_CRMINSTAR:8261min = 0;8262max = 0;8263cc += 1 + IMM2_SIZE + 1;8264break;8265case OP_CRPLUS:8266case OP_CRMINPLUS:8267min = 1;8268max = 0;8269cc += 1 + IMM2_SIZE + 1;8270break;8271case OP_CRQUERY:8272case OP_CRMINQUERY:8273min = 0;8274max = 1;8275cc += 1 + IMM2_SIZE + 1;8276break;8277case OP_CRRANGE:8278case OP_CRMINRANGE:8279min = GET2(cc, 1 + IMM2_SIZE + 1);8280max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);8281cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;8282break;8283default:8284SLJIT_UNREACHABLE();8285break;8286}82878288if (!minimize)8289{8290if (min == 0)8291{8292allocate_stack(common, 2);8293if (ref)8294OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));8295OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);8296OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);8297/* Temporary release of STR_PTR. */8298OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));8299/* Handles both invalid and empty cases. Since the minimum repeat,8300is zero the invalid case is basically the same as an empty case. */8301if (ref)8302zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));8303else8304{8305compile_dnref_search(common, ccbegin, NULL);8306OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);8307OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);8308zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));8309}8310/* Restore if not zero length. */8311OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));8312}8313else8314{8315allocate_stack(common, 1);8316if (ref)8317OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));8318OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);83198320if (ref)8321{8322if (!common->unset_backref)8323add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));8324zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));8325}8326else8327{8328compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);8329OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);8330OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);8331zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));8332}8333}83348335if (min > 1 || max > 1)8336OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, SLJIT_IMM, 0);83378338label = LABEL();8339if (!ref)8340OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw));8341compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);83428343if (min > 1 || max > 1)8344{8345OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), local_start);8346OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);8347OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, TMP1, 0);8348if (min > 1)8349CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);8350if (max > 1)8351{8352jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);8353allocate_stack(common, 1);8354OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);8355JUMPTO(SLJIT_JUMP, label);8356JUMPHERE(jump);8357}8358}83598360if (max == 0)8361{8362/* Includes min > 1 case as well. */8363allocate_stack(common, 1);8364OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);8365JUMPTO(SLJIT_JUMP, label);8366}83678368JUMPHERE(zerolength);8369BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();83708371count_match(common);8372return cc;8373}83748375allocate_stack(common, ref ? 2 : 3);8376if (ref)8377OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));8378OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);8379if (type != OP_CRMINSTAR)8380OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);83818382if (min == 0)8383{8384/* Handles both invalid and empty cases. Since the minimum repeat,8385is zero the invalid case is basically the same as an empty case. */8386if (ref)8387zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));8388else8389{8390compile_dnref_search(common, ccbegin, NULL);8391OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);8392OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);8393zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));8394}8395/* Length is non-zero, we can match real repeats. */8396OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);8397jump = JUMP(SLJIT_JUMP);8398}8399else8400{8401if (ref)8402{8403if (!common->unset_backref)8404add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));8405zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));8406}8407else8408{8409compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);8410OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);8411OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);8412zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));8413}8414}84158416BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();8417if (max > 0)8418add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));84198420if (!ref)8421OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));8422compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);8423OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);84248425if (min > 1)8426{8427OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));8428OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);8429OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);8430CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);8431}8432else if (max > 0)8433OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);84348435if (jump != NULL)8436JUMPHERE(jump);8437JUMPHERE(zerolength);84388439count_match(common);8440return cc;8441}84428443static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)8444{8445DEFINE_COMPILER;8446backtrack_common *backtrack;8447recurse_entry *entry = common->entries;8448recurse_entry *prev = NULL;8449sljit_sw start = GET(cc, 1);8450PCRE2_SPTR start_cc;8451BOOL needs_control_head;84528453PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);84548455/* Inlining simple patterns. */8456if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)8457{8458start_cc = common->start + start;8459compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);8460BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;8461return cc + 1 + LINK_SIZE;8462}84638464while (entry != NULL)8465{8466if (entry->start == start)8467break;8468prev = entry;8469entry = entry->next;8470}84718472if (entry == NULL)8473{8474entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));8475if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))8476return NULL;8477entry->next = NULL;8478entry->entry_label = NULL;8479entry->backtrack_label = NULL;8480entry->entry_calls = NULL;8481entry->backtrack_calls = NULL;8482entry->start = start;84838484if (prev != NULL)8485prev->next = entry;8486else8487common->entries = entry;8488}84898490BACKTRACK_AS(recurse_backtrack)->entry = entry;84918492if (entry->entry_label == NULL)8493add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));8494else8495JUMPTO(SLJIT_FAST_CALL, entry->entry_label);8496/* Leave if the match is failed. */8497add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));8498BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();8499return cc + 1 + LINK_SIZE;8500}85018502static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)8503{8504PCRE2_SPTR begin;8505PCRE2_SIZE *ovector;8506sljit_u32 oveccount, capture_top;85078508if (arguments->callout == NULL)8509return 0;85108511SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);85128513begin = arguments->begin;8514ovector = (PCRE2_SIZE*)(callout_block + 1);8515oveccount = callout_block->capture_top;85168517SLJIT_ASSERT(oveccount >= 1);85188519callout_block->version = 2;8520callout_block->callout_flags = 0;85218522/* Offsets in subject. */8523callout_block->subject_length = arguments->end - arguments->begin;8524callout_block->start_match = jit_ovector[0] - begin;8525callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;8526callout_block->subject = begin;85278528/* Convert and copy the JIT offset vector to the ovector array. */8529callout_block->capture_top = 1;8530callout_block->offset_vector = ovector;85318532ovector[0] = PCRE2_UNSET;8533ovector[1] = PCRE2_UNSET;8534ovector += 2;8535jit_ovector += 2;8536capture_top = 1;85378538/* Convert pointers to sizes. */8539while (--oveccount != 0)8540{8541capture_top++;85428543ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);8544ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);85458546if (ovector[0] != PCRE2_UNSET)8547callout_block->capture_top = capture_top;85488549ovector += 2;8550jit_ovector += 2;8551}85528553return (arguments->callout)(callout_block, arguments->callout_data);8554}85558556#define CALLOUT_ARG_OFFSET(arg) \8557SLJIT_OFFSETOF(pcre2_callout_block, arg)85588559static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)8560{8561DEFINE_COMPILER;8562backtrack_common *backtrack;8563sljit_s32 mov_opcode;8564unsigned int callout_length = (*cc == OP_CALLOUT)8565? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);8566sljit_sw value1;8567sljit_sw value2;8568sljit_sw value3;8569sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);85708571PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);85728573callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);85748575allocate_stack(common, callout_arg_size);85768577SLJIT_ASSERT(common->capture_last_ptr != 0);8578OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);8579OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);8580value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;8581OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);8582OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);8583OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);85848585/* These pointer sized fields temporarly stores internal variables. */8586OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);85878588if (common->mark_ptr != 0)8589OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));8590mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;8591OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));8592OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));85938594if (*cc == OP_CALLOUT)8595{8596value1 = 0;8597value2 = 0;8598value3 = 0;8599}8600else8601{8602value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);8603value2 = (callout_length - (1 + 4*LINK_SIZE + 2));8604value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));8605}86068607OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);8608OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);8609OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);8610OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);86118612SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);86138614/* Needed to save important temporary registers. */8615SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));8616OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);8617/* SLJIT_R0 = arguments */8618OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);8619GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);8620sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));8621OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);8622free_stack(common, callout_arg_size);86238624/* Check return value. */8625OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);8626add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));8627if (common->abort_label == NULL)8628add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);8629else8630JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);8631return cc + callout_length;8632}86338634#undef CALLOUT_ARG_SIZE8635#undef CALLOUT_ARG_OFFSET86368637static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)8638{8639DEFINE_COMPILER;8640backtrack_common *backtrack = NULL;8641jump_list **reverse_failed;8642unsigned int lmin, lmax;8643#ifdef SUPPORT_UNICODE8644struct sljit_jump *jump;8645struct sljit_label *label;8646#endif86478648SLJIT_ASSERT(parent->top == NULL);86498650if (*cc == OP_REVERSE)8651{8652reverse_failed = &parent->own_backtracks;8653lmin = GET2(cc, 1);8654lmax = lmin;8655cc += 1 + IMM2_SIZE;86568657SLJIT_ASSERT(lmin > 0);8658}8659else8660{8661SLJIT_ASSERT(*cc == OP_VREVERSE);8662PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);86638664reverse_failed = &backtrack->own_backtracks;8665lmin = GET2(cc, 1);8666lmax = GET2(cc, 1 + IMM2_SIZE);8667cc += 1 + 2 * IMM2_SIZE;86688669SLJIT_ASSERT(lmin < lmax);8670}86718672if (HAS_VIRTUAL_REGISTERS)8673{8674OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);8675OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));8676}8677else8678OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));86798680#ifdef SUPPORT_UNICODE8681if (common->utf)8682{8683if (lmin > 0)8684{8685OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);8686label = LABEL();8687add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));8688move_back(common, reverse_failed, FALSE);8689OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);8690JUMPTO(SLJIT_NOT_ZERO, label);8691}86928693if (lmin < lmax)8694{8695OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);86968697OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);8698label = LABEL();8699jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);8700move_back(common, reverse_failed, FALSE);8701OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);8702JUMPTO(SLJIT_NOT_ZERO, label);87038704JUMPHERE(jump);8705OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);8706}8707}8708else8709#endif8710{8711if (lmin > 0)8712{8713OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));8714add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));8715}87168717if (lmin < lmax)8718{8719OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);87208721OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));8722OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);8723SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);87248725OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);8726}8727}87288729check_start_used_ptr(common);87308731if (lmin < lmax)8732BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();87338734return cc;8735}87368737static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)8738{8739while (TRUE)8740{8741switch (*cc)8742{8743case OP_CALLOUT_STR:8744cc += GET(cc, 1 + 2*LINK_SIZE);8745break;87468747case OP_NOT_WORD_BOUNDARY:8748case OP_WORD_BOUNDARY:8749case OP_CIRC:8750case OP_CIRCM:8751case OP_DOLL:8752case OP_DOLLM:8753case OP_CALLOUT:8754case OP_ALT:8755case OP_NOT_UCP_WORD_BOUNDARY:8756case OP_UCP_WORD_BOUNDARY:8757cc += PRIV(OP_lengths)[*cc];8758break;87598760case OP_KET:8761return FALSE;87628763default:8764return TRUE;8765}8766}8767}87688769static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)8770{8771DEFINE_COMPILER;8772int framesize;8773int extrasize;8774BOOL local_quit_available = FALSE;8775BOOL needs_control_head;8776BOOL end_block_size = 0;8777BOOL has_vreverse;8778int private_data_ptr;8779backtrack_common altbacktrack;8780PCRE2_SPTR ccbegin;8781PCRE2_UCHAR opcode;8782PCRE2_UCHAR bra = OP_BRA;8783jump_list *tmp = NULL;8784jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;8785jump_list **found;8786/* Saving previous accept variables. */8787BOOL save_local_quit_available = common->local_quit_available;8788BOOL save_in_positive_assertion = common->in_positive_assertion;8789sljit_s32 save_restore_end_ptr = common->restore_end_ptr;8790then_trap_backtrack *save_then_trap = common->then_trap;8791struct sljit_label *save_quit_label = common->quit_label;8792struct sljit_label *save_accept_label = common->accept_label;8793jump_list *save_quit = common->quit;8794jump_list *save_positive_assertion_quit = common->positive_assertion_quit;8795jump_list *save_accept = common->accept;8796struct sljit_jump *jump;8797struct sljit_jump *brajump = NULL;87988799/* Assert captures then. */8800common->then_trap = NULL;88018802if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)8803{8804SLJIT_ASSERT(!conditional);8805bra = *cc;8806cc++;8807}88088809private_data_ptr = PRIVATE_DATA(cc);8810SLJIT_ASSERT(private_data_ptr != 0);8811framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);8812backtrack->framesize = framesize;8813backtrack->private_data_ptr = private_data_ptr;8814opcode = *cc;8815SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);8816found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;8817ccbegin = cc;8818cc += GET(cc, 1);88198820if (bra == OP_BRAMINZERO)8821{8822/* This is a braminzero backtrack path. */8823OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));8824free_stack(common, 1);8825brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);8826}88278828if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))8829end_block_size = 3;88308831if (framesize < 0)8832{8833extrasize = 1;8834if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))8835extrasize = 0;88368837extrasize += end_block_size;88388839if (needs_control_head)8840extrasize++;88418842if (framesize == no_frame)8843OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);88448845if (extrasize > 0)8846allocate_stack(common, extrasize);88478848if (needs_control_head)8849OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);88508851if (extrasize > 0)8852OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);88538854if (needs_control_head)8855{8856SLJIT_ASSERT(extrasize == end_block_size + 2);8857OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);8858OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);8859}8860}8861else8862{8863extrasize = (needs_control_head ? 3 : 2) + end_block_size;88648865OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);8866allocate_stack(common, framesize + extrasize);88678868OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);8869OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);8870if (needs_control_head)8871OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);8872OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);88738874if (needs_control_head)8875{8876OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);8877OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);8878OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);8879}8880else8881OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);88828883init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);8884}88858886if (end_block_size > 0)8887{8888OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);8889OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);8890}88918892memset(&altbacktrack, 0, sizeof(backtrack_common));8893if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))8894{8895/* Control verbs cannot escape from these asserts. */8896local_quit_available = TRUE;8897common->restore_end_ptr = 0;8898common->local_quit_available = TRUE;8899common->quit_label = NULL;8900common->quit = NULL;8901}89028903common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);8904common->positive_assertion_quit = NULL;89058906while (1)8907{8908common->accept_label = NULL;8909common->accept = NULL;8910altbacktrack.top = NULL;8911altbacktrack.own_backtracks = NULL;89128913if (*ccbegin == OP_ALT && extrasize > 0)8914OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));89158916altbacktrack.cc = ccbegin;8917ccbegin += 1 + LINK_SIZE;89188919has_vreverse = (*ccbegin == OP_VREVERSE);8920if (*ccbegin == OP_REVERSE || has_vreverse)8921ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);89228923compile_matchingpath(common, ccbegin, cc, &altbacktrack);8924if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))8925{8926if (local_quit_available)8927{8928common->local_quit_available = save_local_quit_available;8929common->quit_label = save_quit_label;8930common->quit = save_quit;8931}8932common->in_positive_assertion = save_in_positive_assertion;8933common->restore_end_ptr = save_restore_end_ptr;8934common->then_trap = save_then_trap;8935common->accept_label = save_accept_label;8936common->positive_assertion_quit = save_positive_assertion_quit;8937common->accept = save_accept;8938return NULL;8939}89408941if (has_vreverse)8942{8943SLJIT_ASSERT(altbacktrack.top != NULL);8944add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));8945}89468947common->accept_label = LABEL();8948if (common->accept != NULL)8949set_jumps(common->accept, common->accept_label);89508951/* Reset stack. */8952if (framesize < 0)8953{8954if (framesize == no_frame)8955OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);8956else if (extrasize > 0)8957free_stack(common, extrasize);89588959if (end_block_size > 0)8960OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));89618962if (needs_control_head)8963OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));8964}8965else8966{8967if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)8968{8969/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */8970OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));89718972if (end_block_size > 0)8973OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));89748975if (needs_control_head)8976OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));8977}8978else8979{8980OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);89818982if (end_block_size > 0)8983OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));89848985if (needs_control_head)8986OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));8987add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));8988OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));8989}8990}89918992if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)8993{8994/* We know that STR_PTR was stored on the top of the stack. */8995if (conditional)8996{8997if (extrasize > 0)8998OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));8999}9000else if (bra == OP_BRAZERO)9001{9002if (framesize < 0)9003OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));9004else9005{9006OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));9007OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));9008OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);9009}9010OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));9011OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9012}9013else if (framesize >= 0)9014{9015/* For OP_BRA and OP_BRAMINZERO. */9016OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));9017}9018}9019add_jump(compiler, found, JUMP(SLJIT_JUMP));90209021compile_backtrackingpath(common, altbacktrack.top);9022if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))9023{9024if (local_quit_available)9025{9026common->local_quit_available = save_local_quit_available;9027common->quit_label = save_quit_label;9028common->quit = save_quit;9029}9030common->in_positive_assertion = save_in_positive_assertion;9031common->restore_end_ptr = save_restore_end_ptr;9032common->then_trap = save_then_trap;9033common->accept_label = save_accept_label;9034common->positive_assertion_quit = save_positive_assertion_quit;9035common->accept = save_accept;9036return NULL;9037}9038set_jumps(altbacktrack.own_backtracks, LABEL());90399040if (*cc != OP_ALT)9041break;90429043ccbegin = cc;9044cc += GET(cc, 1);9045}90469047if (local_quit_available)9048{9049SLJIT_ASSERT(common->positive_assertion_quit == NULL);9050/* Makes the check less complicated below. */9051common->positive_assertion_quit = common->quit;9052}90539054/* None of them matched. */9055if (common->positive_assertion_quit != NULL)9056{9057jump = JUMP(SLJIT_JUMP);9058set_jumps(common->positive_assertion_quit, LABEL());9059SLJIT_ASSERT(framesize != no_stack);9060if (framesize < 0)9061OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));9062else9063{9064OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9065add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));9066OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));9067}9068JUMPHERE(jump);9069}90709071if (end_block_size > 0)9072OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));90739074if (needs_control_head)9075OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));90769077if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)9078{9079/* Assert is failed. */9080if ((conditional && extrasize > 0) || bra == OP_BRAZERO)9081OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));90829083if (framesize < 0)9084{9085/* The topmost item should be 0. */9086if (bra == OP_BRAZERO)9087{9088if (extrasize >= 2)9089free_stack(common, extrasize - 1);9090OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9091}9092else if (extrasize > 0)9093free_stack(common, extrasize);9094}9095else9096{9097OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));9098/* The topmost item should be 0. */9099if (bra == OP_BRAZERO)9100{9101free_stack(common, framesize + extrasize - 1);9102OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9103}9104else9105free_stack(common, framesize + extrasize);9106OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);9107}9108jump = JUMP(SLJIT_JUMP);9109if (bra != OP_BRAZERO)9110add_jump(compiler, target, jump);91119112/* Assert is successful. */9113set_jumps(tmp, LABEL());9114if (framesize < 0)9115{9116/* We know that STR_PTR was stored on the top of the stack. */9117if (extrasize > 0)9118OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));91199120/* Keep the STR_PTR on the top of the stack. */9121if (bra == OP_BRAZERO)9122{9123/* This allocation is always successful. */9124OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));9125if (extrasize >= 2)9126OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);9127}9128else if (bra == OP_BRAMINZERO)9129{9130OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));9131OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9132}9133}9134else9135{9136if (bra == OP_BRA)9137{9138/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */9139OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));9140OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));9141}9142else9143{9144/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */9145OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));91469147if (extrasize == 2 + end_block_size)9148{9149OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));9150if (bra == OP_BRAMINZERO)9151OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9152}9153else9154{9155SLJIT_ASSERT(extrasize == 3 + end_block_size);9156OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));9157OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);9158}9159}9160}91619162if (bra == OP_BRAZERO)9163{9164backtrack->matchingpath = LABEL();9165SET_LABEL(jump, backtrack->matchingpath);9166}9167else if (bra == OP_BRAMINZERO)9168{9169JUMPTO(SLJIT_JUMP, backtrack->matchingpath);9170JUMPHERE(brajump);9171SLJIT_ASSERT(framesize != 0);9172if (framesize > 0)9173{9174OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9175add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));9176OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));9177OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));9178OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);9179}9180set_jumps(backtrack->common.own_backtracks, LABEL());9181}9182}9183else9184{9185/* AssertNot is successful. */9186if (framesize < 0)9187{9188if (extrasize > 0)9189OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));91909191if (bra != OP_BRA)9192{9193if (extrasize >= 2)9194free_stack(common, extrasize - 1);9195OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9196}9197else if (extrasize > 0)9198free_stack(common, extrasize);9199}9200else9201{9202OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));9203OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));9204/* The topmost item should be 0. */9205if (bra != OP_BRA)9206{9207free_stack(common, framesize + extrasize - 1);9208OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9209}9210else9211free_stack(common, framesize + extrasize);9212OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);9213}92149215if (bra == OP_BRAZERO)9216backtrack->matchingpath = LABEL();9217else if (bra == OP_BRAMINZERO)9218{9219JUMPTO(SLJIT_JUMP, backtrack->matchingpath);9220JUMPHERE(brajump);9221}92229223if (bra != OP_BRA)9224{9225SLJIT_ASSERT(found == &backtrack->common.own_backtracks);9226set_jumps(backtrack->common.own_backtracks, LABEL());9227backtrack->common.own_backtracks = NULL;9228}9229}92309231if (local_quit_available)9232{9233common->local_quit_available = save_local_quit_available;9234common->quit_label = save_quit_label;9235common->quit = save_quit;9236}92379238common->in_positive_assertion = save_in_positive_assertion;9239common->restore_end_ptr = save_restore_end_ptr;9240common->then_trap = save_then_trap;9241common->accept_label = save_accept_label;9242common->positive_assertion_quit = save_positive_assertion_quit;9243common->accept = save_accept;9244return cc + 1 + LINK_SIZE;9245}92469247static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)9248{9249DEFINE_COMPILER;9250int stacksize;92519252if (framesize < 0)9253{9254if (framesize == no_frame)9255OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9256else9257{9258stacksize = needs_control_head ? 1 : 0;9259if (ket != OP_KET || has_alternatives)9260stacksize++;92619262if (stacksize > 0)9263free_stack(common, stacksize);9264}92659266if (needs_control_head)9267OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));92689269/* TMP2 which is set here used by OP_KETRMAX below. */9270if (ket == OP_KETRMAX)9271OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));9272else if (ket == OP_KETRMIN)9273{9274/* Move the STR_PTR to the private_data_ptr. */9275OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));9276}9277}9278else9279{9280stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;9281OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));9282if (needs_control_head)9283OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));92849285if (ket == OP_KETRMAX)9286{9287/* TMP2 which is set here used by OP_KETRMAX below. */9288OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));9289}9290}9291if (needs_control_head)9292OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);9293}92949295static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)9296{9297DEFINE_COMPILER;92989299if (common->capture_last_ptr != 0)9300{9301OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);9302OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);9303OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);9304stacksize++;9305}9306if (common->optimized_cbracket[offset >> 1] == 0)9307{9308OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));9309OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));9310OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);9311OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9312OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);9313OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);9314OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);9315stacksize += 2;9316}9317return stacksize;9318}93199320static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)9321{9322if (PRIV(script_run)(ptr, endptr, FALSE))9323return endptr;9324return NULL;9325}93269327#ifdef SUPPORT_UNICODE93289329static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)9330{9331if (PRIV(script_run)(ptr, endptr, TRUE))9332return endptr;9333return NULL;9334}93359336#endif /* SUPPORT_UNICODE */93379338static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)9339{9340DEFINE_COMPILER;93419342SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);93439344OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9345#ifdef SUPPORT_UNICODE9346sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,9347common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));9348#else9349sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));9350#endif93519352OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);9353add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));9354}93559356/*9357Handling bracketed expressions is probably the most complex part.93589359Stack layout naming characters:9360S - Push the current STR_PTR93610 - Push a 0 (NULL)9362A - Push the current STR_PTR. Needed for restoring the STR_PTR9363before the next alternative. Not pushed if there are no alternatives.9364M - Any values pushed by the current alternative. Can be empty, or anything.9365C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.9366L - Push the previous local (pointed by localptr) to the stack9367() - opional values stored on the stack9368()* - optonal, can be stored multiple times93699370The following list shows the regular expression templates, their PCRE byte codes9371and stack layout supported by pcre-sljit.93729373(?:) OP_BRA | OP_KET A M9374() OP_CBRA | OP_KET C M9375(?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*9376OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*9377(?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*9378OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*9379()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*9380OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*9381()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*9382OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*9383(?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )9384(?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )9385()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )9386()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )9387(?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*9388OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*9389(?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*9390OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*9391()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*9392OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*9393()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*9394OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*939593969397Stack layout naming characters:9398A - Push the alternative index (starting from 0) on the stack.9399Not pushed if there is no alternatives.9400M - Any values pushed by the current alternative. Can be empty, or anything.94019402The next list shows the possible content of a bracket:9403(|) OP_*BRA | OP_ALT ... M A9404(?()|) OP_*COND | OP_ALT M A9405(?>|) OP_ONCE | OP_ALT ... [stack trace] M A9406Or nothing, if trace is unnecessary9407*/94089409static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)9410{9411DEFINE_COMPILER;9412backtrack_common *backtrack;9413PCRE2_UCHAR opcode;9414int private_data_ptr = 0;9415int offset = 0;9416int i, stacksize;9417int repeat_ptr = 0, repeat_length = 0;9418int repeat_type = 0, repeat_count = 0;9419PCRE2_SPTR ccbegin;9420PCRE2_SPTR matchingpath;9421PCRE2_SPTR slot;9422PCRE2_UCHAR bra = OP_BRA;9423PCRE2_UCHAR ket;9424assert_backtrack *assert;9425BOOL has_alternatives;9426BOOL needs_control_head = FALSE;9427BOOL has_vreverse = FALSE;9428struct sljit_jump *jump;9429struct sljit_jump *skip;9430jump_list *jumplist;9431struct sljit_label *rmax_label = NULL;9432struct sljit_jump *braminzero = NULL;94339434PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);94359436if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)9437{9438bra = *cc;9439cc++;9440opcode = *cc;9441}94429443opcode = *cc;9444ccbegin = cc;9445matchingpath = bracketend(cc) - 1 - LINK_SIZE;9446ket = *matchingpath;9447if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)9448{9449repeat_ptr = PRIVATE_DATA(matchingpath);9450repeat_length = PRIVATE_DATA(matchingpath + 1);9451repeat_type = PRIVATE_DATA(matchingpath + 2);9452repeat_count = PRIVATE_DATA(matchingpath + 3);9453SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);9454if (repeat_type == OP_UPTO)9455ket = OP_KETRMAX;9456if (repeat_type == OP_MINUPTO)9457ket = OP_KETRMIN;9458}94599460matchingpath = ccbegin + 1 + LINK_SIZE;9461SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);9462SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));9463cc += GET(cc, 1);94649465has_alternatives = *cc == OP_ALT;9466if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))9467{9468SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,9469compile_time_checks_must_be_grouped_together);9470has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;9471}94729473if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))9474opcode = OP_SCOND;94759476if (opcode == OP_CBRA || opcode == OP_SCBRA)9477{9478/* Capturing brackets has a pre-allocated space. */9479offset = GET2(ccbegin, 1 + LINK_SIZE);9480if (common->optimized_cbracket[offset] == 0)9481{9482private_data_ptr = OVECTOR_PRIV(offset);9483offset <<= 1;9484}9485else9486{9487offset <<= 1;9488private_data_ptr = OVECTOR(offset);9489}9490BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;9491matchingpath += IMM2_SIZE;9492}9493else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE9494|| opcode == OP_ASSERT_SCS || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)9495{9496/* Other brackets simply allocate the next entry. */9497private_data_ptr = PRIVATE_DATA(ccbegin);9498SLJIT_ASSERT(private_data_ptr != 0);9499BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;9500if (opcode == OP_ONCE)9501BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);9502}95039504/* Instructions before the first alternative. */9505stacksize = 0;9506if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))9507stacksize++;9508if (bra == OP_BRAZERO)9509stacksize++;95109511if (stacksize > 0)9512allocate_stack(common, stacksize);95139514stacksize = 0;9515if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))9516{9517OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);9518stacksize++;9519}95209521if (bra == OP_BRAZERO)9522OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);95239524if (bra == OP_BRAMINZERO)9525{9526/* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */9527OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));9528if (ket != OP_KETRMIN)9529{9530free_stack(common, 1);9531braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);9532}9533else if (opcode == OP_ONCE || opcode >= OP_SBRA)9534{9535jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);9536OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));9537/* Nothing stored during the first run. */9538skip = JUMP(SLJIT_JUMP);9539JUMPHERE(jump);9540/* Checking zero-length iteration. */9541if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)9542{9543/* When we come from outside, private_data_ptr contains the previous STR_PTR. */9544braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9545}9546else9547{9548/* Except when the whole stack frame must be saved. */9549OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9550braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));9551}9552JUMPHERE(skip);9553}9554else9555{9556jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);9557OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));9558JUMPHERE(jump);9559}9560}95619562if (repeat_type != 0)9563{9564OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);9565if (repeat_type == OP_EXACT)9566rmax_label = LABEL();9567}95689569if (ket == OP_KETRMIN)9570BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();95719572if (ket == OP_KETRMAX)9573{9574rmax_label = LABEL();9575if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)9576BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;9577}95789579/* Handling capturing brackets and alternatives. */9580if (opcode == OP_ONCE)9581{9582stacksize = 0;9583if (needs_control_head)9584{9585OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);9586stacksize++;9587}95889589if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)9590{9591/* Neither capturing brackets nor recursions are found in the block. */9592if (ket == OP_KETRMIN)9593{9594stacksize += 2;9595if (!needs_control_head)9596OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9597}9598else9599{9600if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)9601OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);9602if (ket == OP_KETRMAX || has_alternatives)9603stacksize++;9604}96059606if (stacksize > 0)9607allocate_stack(common, stacksize);96089609stacksize = 0;9610if (needs_control_head)9611{9612stacksize++;9613OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);9614}96159616if (ket == OP_KETRMIN)9617{9618if (needs_control_head)9619OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9620OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);9621if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)9622OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));9623OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);9624}9625else if (ket == OP_KETRMAX || has_alternatives)9626OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);9627}9628else9629{9630if (ket != OP_KET || has_alternatives)9631stacksize++;96329633stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;9634allocate_stack(common, stacksize);96359636if (needs_control_head)9637OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);96389639OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9640OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));96419642stacksize = needs_control_head ? 1 : 0;9643if (ket != OP_KET || has_alternatives)9644{9645OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);9646OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);9647stacksize++;9648OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);9649}9650else9651{9652OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);9653OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);9654}9655init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);9656}9657}9658else if (opcode == OP_CBRA || opcode == OP_SCBRA)9659{9660/* Saving the previous values. */9661if (common->optimized_cbracket[offset >> 1] != 0)9662{9663SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));9664allocate_stack(common, 2);9665OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9666OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));9667OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9668OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);9669OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);9670}9671else9672{9673OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9674allocate_stack(common, 1);9675OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9676OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);9677}9678}9679else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))9680{9681OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9682allocate_stack(common, 4);9683OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));9684OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9685OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);9686OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);9687OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);9688OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);96899690has_vreverse = (*matchingpath == OP_VREVERSE);9691if (*matchingpath == OP_REVERSE || has_vreverse)9692matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);9693}9694else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)9695{9696/* Saving the previous value. */9697OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9698allocate_stack(common, 1);9699OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9700OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);97019702if (*matchingpath == OP_REVERSE)9703matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);9704}9705else if (opcode == OP_ASSERT_SCS)9706{9707/* Nested scs blocks will not update this variable. */9708if (common->restore_end_ptr == 0)9709common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);97109711if (*matchingpath == OP_CREF && (matchingpath[1 + IMM2_SIZE] != OP_CREF && matchingpath[1 + IMM2_SIZE] != OP_DNCREF))9712{9713/* Optimized case for a single capture reference. */9714i = OVECTOR(GET2(matchingpath, 1) << 1);97159716OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), i);97179718add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));9719matchingpath += 1 + IMM2_SIZE;97209721allocate_stack(common, has_alternatives ? 3 : 2);97229723OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9724OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));9725OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);9726OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), i + sizeof(sljit_sw));9727OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9728OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);9729}9730else9731{9732OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));9733jumplist = NULL;97349735while (TRUE)9736{9737if (*matchingpath == OP_CREF)9738{9739sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(matchingpath, 1) << 1));9740matchingpath += 1 + IMM2_SIZE;9741}9742else9743{9744SLJIT_ASSERT(*matchingpath == OP_DNCREF);97459746i = GET2(matchingpath, 1 + IMM2_SIZE);9747slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;97489749while (i-- > 1)9750{9751sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));9752add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));9753slot += common->name_entry_size;9754}97559756sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));9757matchingpath += 1 + 2 * IMM2_SIZE;9758}97599760if (*matchingpath != OP_CREF && *matchingpath != OP_DNCREF)9761break;97629763add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));9764}97659766add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),9767CMP(SLJIT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));97689769set_jumps(jumplist, LABEL());97709771allocate_stack(common, has_alternatives ? 3 : 2);97729773OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9774OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));9775OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9776OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), 0);9777OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);9778OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));9779}97809781OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);9782OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_TMP_DEST_REG, 0);97839784if (has_alternatives)9785OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);9786}9787else if (has_alternatives)9788{9789/* Pushing the starting string pointer. */9790allocate_stack(common, 1);9791OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);9792}97939794/* Generating code for the first alternative. */9795if (opcode == OP_COND || opcode == OP_SCOND)9796{9797if (*matchingpath == OP_CREF)9798{9799SLJIT_ASSERT(has_alternatives);9800add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),9801CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));9802matchingpath += 1 + IMM2_SIZE;9803}9804else if (*matchingpath == OP_DNCREF)9805{9806SLJIT_ASSERT(has_alternatives);98079808i = GET2(matchingpath, 1 + IMM2_SIZE);9809slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;9810OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);9811OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));9812OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);9813slot += common->name_entry_size;9814i--;9815while (i-- > 0)9816{9817OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);9818OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);9819slot += common->name_entry_size;9820}9821OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);9822add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), JUMP(SLJIT_ZERO));9823matchingpath += 1 + 2 * IMM2_SIZE;9824}9825else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)9826{9827/* Never has other case. */9828BACKTRACK_AS(bracket_backtrack)->u.no_capture = NULL;9829SLJIT_ASSERT(!has_alternatives);98309831if (*matchingpath == OP_TRUE)9832{9833stacksize = 1;9834matchingpath++;9835}9836else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)9837stacksize = 0;9838else if (*matchingpath == OP_RREF)9839{9840stacksize = GET2(matchingpath, 1);9841if (common->currententry == NULL)9842stacksize = 0;9843else if (stacksize == RREF_ANY)9844stacksize = 1;9845else if (common->currententry->start == 0)9846stacksize = stacksize == 0;9847else9848stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);98499850if (stacksize != 0)9851matchingpath += 1 + IMM2_SIZE;9852}9853else9854{9855if (common->currententry == NULL || common->currententry->start == 0)9856stacksize = 0;9857else9858{9859stacksize = GET2(matchingpath, 1 + IMM2_SIZE);9860slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;9861i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);9862while (stacksize > 0)9863{9864if ((int)GET2(slot, 0) == i)9865break;9866slot += common->name_entry_size;9867stacksize--;9868}9869}98709871if (stacksize != 0)9872matchingpath += 1 + 2 * IMM2_SIZE;9873}98749875/* The stacksize == 0 is a common "else" case. */9876if (stacksize == 0)9877{9878if (*cc == OP_ALT)9879{9880matchingpath = cc + 1 + LINK_SIZE;9881cc += GET(cc, 1);9882}9883else9884matchingpath = cc;9885}9886}9887else9888{9889SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);9890/* Similar code as PUSH_BACKTRACK macro. */9891assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));9892if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))9893return NULL;9894memset(assert, 0, sizeof(assert_backtrack));9895assert->common.cc = matchingpath;9896BACKTRACK_AS(bracket_backtrack)->u.assert = assert;9897matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);9898}9899}99009901compile_matchingpath(common, matchingpath, cc, backtrack);9902if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))9903return NULL;99049905switch (opcode)9906{9907case OP_ASSERTBACK_NA:9908if (has_vreverse)9909{9910SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));9911add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));9912}99139914if (PRIVATE_DATA(ccbegin + 1))9915OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));9916break;9917case OP_ONCE:9918match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);9919break;9920case OP_SCRIPT_RUN:9921match_script_run_common(common, private_data_ptr, backtrack);9922break;9923}99249925stacksize = 0;9926if (repeat_type == OP_MINUPTO)9927{9928/* We need to preserve the counter. TMP2 will be used below. */9929OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);9930stacksize++;9931}9932if (ket != OP_KET || bra != OP_BRA)9933stacksize++;9934if (offset != 0)9935{9936if (common->capture_last_ptr != 0)9937stacksize++;9938if (common->optimized_cbracket[offset >> 1] == 0)9939stacksize += 2;9940}9941if (has_alternatives && opcode != OP_ONCE)9942stacksize++;99439944if (stacksize > 0)9945allocate_stack(common, stacksize);99469947stacksize = 0;9948if (repeat_type == OP_MINUPTO)9949{9950/* TMP2 was set above. */9951OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);9952stacksize++;9953}99549955if (ket != OP_KET || bra != OP_BRA)9956{9957if (ket != OP_KET)9958OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);9959else9960OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);9961stacksize++;9962}99639964if (offset != 0)9965stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);99669967/* Skip and count the other alternatives. */9968i = 1;9969while (*cc == OP_ALT)9970{9971cc += GET(cc, 1);9972i++;9973}99749975if (has_alternatives)9976{9977if (opcode != OP_ONCE)9978{9979if (i <= 3)9980OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);9981else9982BACKTRACK_AS(bracket_backtrack)->matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));9983}9984if (ket != OP_KETRMAX)9985BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();9986}99879988/* Must be after the matchingpath label. */9989if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)9990{9991SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));9992OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);9993}9994else switch (opcode)9995{9996case OP_ASSERT_NA:9997OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9998break;9999case OP_ASSERT_SCS:10000OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);10001OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10002OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));10003OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);1000410005/* Nested scs blocks will not update this variable. */10006if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))10007common->restore_end_ptr = 0;10008break;10009}1001010011if (ket == OP_KETRMAX)10012{10013if (repeat_type != 0)10014{10015if (has_alternatives)10016BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();10017OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);10018JUMPTO(SLJIT_NOT_ZERO, rmax_label);10019/* Drop STR_PTR for greedy plus quantifier. */10020if (opcode != OP_ONCE)10021free_stack(common, 1);10022}10023else if (opcode < OP_BRA || opcode >= OP_SBRA)10024{10025if (has_alternatives)10026BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();1002710028/* Checking zero-length iteration. */10029if (opcode != OP_ONCE)10030{10031/* This case includes opcodes such as OP_SCRIPT_RUN. */10032CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);10033/* Drop STR_PTR for greedy plus quantifier. */10034if (bra != OP_BRAZERO)10035free_stack(common, 1);10036}10037else10038/* TMP2 must contain the starting STR_PTR. */10039CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);10040}10041else10042JUMPTO(SLJIT_JUMP, rmax_label);10043BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();10044}1004510046if (repeat_type == OP_EXACT)10047{10048count_match(common);10049OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);10050JUMPTO(SLJIT_NOT_ZERO, rmax_label);10051}10052else if (repeat_type == OP_UPTO)10053{10054/* We need to preserve the counter. */10055OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);10056allocate_stack(common, 1);10057OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);10058}1005910060if (bra == OP_BRAZERO)10061BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();1006210063if (bra == OP_BRAMINZERO)10064{10065/* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */10066JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);10067if (braminzero != NULL)10068{10069JUMPHERE(braminzero);10070/* We need to release the end pointer to perform the10071backtrack for the zero-length iteration. When10072framesize is < 0, OP_ONCE will do the release itself. */10073if (opcode == OP_ONCE)10074{10075int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;1007610077SLJIT_ASSERT(framesize != 0);10078if (framesize > 0)10079{10080OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10081add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));10082OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));10083}10084}10085else if (ket == OP_KETRMIN)10086free_stack(common, 1);10087}10088/* Continue to the normal backtrack. */10089}1009010091if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))10092count_match(common);1009310094cc += 1 + LINK_SIZE;1009510096if (opcode == OP_ONCE)10097{10098int data;10099int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;1010010101SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);10102/* We temporarily encode the needs_control_head in the lowest bit.10103The real value should be short enough for this operation to work10104without triggering Undefined Behaviour. */10105data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));10106BACKTRACK_AS(bracket_backtrack)->u.framesize = data;10107}10108return cc + repeat_length;10109}1011010111static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)10112{10113DEFINE_COMPILER;10114backtrack_common *backtrack;10115PCRE2_UCHAR opcode;10116int private_data_ptr;10117int cbraprivptr = 0;10118BOOL needs_control_head;10119int framesize;10120int stacksize;10121int offset = 0;10122BOOL zero = FALSE;10123PCRE2_SPTR ccbegin = NULL;10124int stack; /* Also contains the offset of control head. */10125struct sljit_label *loop = NULL;10126struct jump_list *emptymatch = NULL;1012710128PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);10129if (*cc == OP_BRAPOSZERO)10130{10131zero = TRUE;10132cc++;10133}1013410135opcode = *cc;10136private_data_ptr = PRIVATE_DATA(cc);10137SLJIT_ASSERT(private_data_ptr != 0);10138BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;10139switch(opcode)10140{10141case OP_BRAPOS:10142case OP_SBRAPOS:10143ccbegin = cc + 1 + LINK_SIZE;10144break;1014510146case OP_CBRAPOS:10147case OP_SCBRAPOS:10148offset = GET2(cc, 1 + LINK_SIZE);10149/* This case cannot be optimized in the same way as10150normal capturing brackets. */10151SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);10152cbraprivptr = OVECTOR_PRIV(offset);10153offset <<= 1;10154ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;10155break;1015610157default:10158SLJIT_UNREACHABLE();10159break;10160}1016110162framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);10163BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;10164if (framesize < 0)10165{10166if (offset != 0)10167{10168stacksize = 2;10169if (common->capture_last_ptr != 0)10170stacksize++;10171}10172else10173stacksize = 1;1017410175if (needs_control_head)10176stacksize++;10177if (!zero)10178stacksize++;1017910180BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;10181allocate_stack(common, stacksize);10182if (framesize == no_frame)10183OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);1018410185stack = 0;10186if (offset != 0)10187{10188stack = 2;10189OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));10190OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));10191OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);10192if (common->capture_last_ptr != 0)10193OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);10194OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);10195if (needs_control_head)10196OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);10197if (common->capture_last_ptr != 0)10198{10199OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);10200stack = 3;10201}10202}10203else10204{10205if (needs_control_head)10206OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);10207OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);10208stack = 1;10209}1021010211if (needs_control_head)10212stack++;10213if (!zero)10214OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);10215if (needs_control_head)10216{10217stack--;10218OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);10219}10220}10221else10222{10223stacksize = framesize + 1;10224if (!zero)10225stacksize++;10226if (needs_control_head)10227stacksize++;10228if (offset == 0)10229stacksize++;10230BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;1023110232allocate_stack(common, stacksize);10233OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10234if (needs_control_head)10235OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);10236OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));1023710238stack = 0;10239if (!zero)10240{10241OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);10242stack = 1;10243}10244if (needs_control_head)10245{10246OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);10247stack++;10248}10249if (offset == 0)10250{10251OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);10252stack++;10253}10254OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);10255init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);10256stack -= 1 + (offset == 0);10257}1025810259if (offset != 0)10260OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);1026110262loop = LABEL();10263while (*cc != OP_KETRPOS)10264{10265backtrack->top = NULL;10266backtrack->own_backtracks = NULL;10267cc += GET(cc, 1);1026810269compile_matchingpath(common, ccbegin, cc, backtrack);10270if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))10271return NULL;1027210273if (framesize < 0)10274{10275if (framesize == no_frame)10276OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);1027710278if (offset != 0)10279{10280OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);10281OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);10282OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);10283if (common->capture_last_ptr != 0)10284OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);10285OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);10286}10287else10288{10289if (opcode == OP_SBRAPOS)10290OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));10291OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);10292}1029310294/* Even if the match is empty, we need to reset the control head. */10295if (needs_control_head)10296OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));1029710298if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)10299add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));1030010301if (!zero)10302OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);10303}10304else10305{10306if (offset != 0)10307{10308OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));10309OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);10310OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);10311OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);10312if (common->capture_last_ptr != 0)10313OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);10314OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);10315}10316else10317{10318OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10319OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));10320if (opcode == OP_SBRAPOS)10321OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));10322OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);10323}1032410325/* Even if the match is empty, we need to reset the control head. */10326if (needs_control_head)10327OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));1032810329if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)10330add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));1033110332if (!zero)10333OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);10334}1033510336JUMPTO(SLJIT_JUMP, loop);10337flush_stubs(common);1033810339compile_backtrackingpath(common, backtrack->top);10340if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))10341return NULL;10342set_jumps(backtrack->own_backtracks, LABEL());1034310344if (framesize < 0)10345{10346if (offset != 0)10347OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);10348else10349OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));10350}10351else10352{10353if (offset != 0)10354{10355/* Last alternative. */10356if (*cc == OP_KETRPOS)10357OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10358OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);10359}10360else10361{10362OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10363OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));10364}10365}1036610367if (*cc == OP_KETRPOS)10368break;10369ccbegin = cc + 1 + LINK_SIZE;10370}1037110372/* We don't have to restore the control head in case of a failed match. */1037310374backtrack->own_backtracks = NULL;10375if (!zero)10376{10377if (framesize < 0)10378add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));10379else /* TMP2 is set to [private_data_ptr] above. */10380add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));10381}1038210383/* None of them matched. */10384set_jumps(emptymatch, LABEL());10385count_match(common);10386return cc + 1 + LINK_SIZE;10387}1038810389static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)10390{10391int class_len;1039210393*opcode = *cc;10394*exact = 0;1039510396if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)10397{10398cc++;10399*type = OP_CHAR;10400}10401else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)10402{10403cc++;10404*type = OP_CHARI;10405*opcode -= OP_STARI - OP_STAR;10406}10407else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)10408{10409cc++;10410*type = OP_NOT;10411*opcode -= OP_NOTSTAR - OP_STAR;10412}10413else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)10414{10415cc++;10416*type = OP_NOTI;10417*opcode -= OP_NOTSTARI - OP_STAR;10418}10419else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)10420{10421cc++;10422*opcode -= OP_TYPESTAR - OP_STAR;10423*type = OP_END;10424}10425else10426{10427SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS || *opcode == OP_ECLASS);10428*type = *opcode;10429class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 1);10430*opcode = cc[class_len];10431cc++;1043210433if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)10434{10435*opcode -= OP_CRSTAR - OP_STAR;10436*end = cc + class_len;1043710438if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)10439{10440*exact = 1;10441*opcode -= OP_PLUS - OP_STAR;10442}10443return cc;10444}1044510446if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)10447{10448*opcode -= OP_CRPOSSTAR - OP_POSSTAR;10449*end = cc + class_len;1045010451if (*opcode == OP_POSPLUS)10452{10453*exact = 1;10454*opcode = OP_POSSTAR;10455}10456return cc;10457}1045810459SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);10460*max = GET2(cc, (class_len + IMM2_SIZE));10461*exact = GET2(cc, class_len);10462*end = cc + class_len + 2 * IMM2_SIZE;1046310464if (*max == 0)10465{10466SLJIT_ASSERT(*exact > 1);10467if (*opcode == OP_CRRANGE)10468*opcode = OP_UPTO;10469else if (*opcode == OP_CRPOSRANGE)10470*opcode = OP_POSUPTO;10471else10472*opcode = OP_MINSTAR;10473return cc;10474}1047510476*max -= *exact;10477if (*max == 0)10478*opcode = OP_EXACT;10479else10480{10481SLJIT_ASSERT(*exact > 0 || *max > 1);10482if (*opcode == OP_CRRANGE)10483*opcode = OP_UPTO;10484else if (*opcode == OP_CRPOSRANGE)10485*opcode = OP_POSUPTO;10486else if (*max == 1)10487*opcode = OP_MINQUERY;10488else10489*opcode = OP_MINUPTO;10490}10491return cc;10492}1049310494switch(*opcode)10495{10496case OP_EXACT:10497*exact = GET2(cc, 0);10498cc += IMM2_SIZE;10499break;1050010501case OP_PLUS:10502case OP_MINPLUS:10503*exact = 1;10504*opcode -= OP_PLUS - OP_STAR;10505break;1050610507case OP_POSPLUS:10508*exact = 1;10509*opcode = OP_POSSTAR;10510break;1051110512case OP_UPTO:10513case OP_MINUPTO:10514case OP_POSUPTO:10515*max = GET2(cc, 0);10516cc += IMM2_SIZE;10517break;10518}1051910520if (*type == OP_END)10521{10522*type = *cc;10523*end = next_opcode(common, cc);10524cc++;10525return cc;10526}1052710528*end = cc + 1;10529#ifdef SUPPORT_UNICODE10530if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);10531#endif10532return cc;10533}1053410535static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent, jump_list **prev_backtracks)10536{10537DEFINE_COMPILER;10538backtrack_common *backtrack = NULL;10539PCRE2_SPTR begin = cc;10540PCRE2_UCHAR opcode;10541PCRE2_UCHAR type;10542sljit_u32 max = 0, exact;10543sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);10544sljit_s32 early_fail_type;10545BOOL charpos_enabled, use_tmp;10546PCRE2_UCHAR charpos_char;10547unsigned int charpos_othercasebit;10548PCRE2_SPTR end;10549jump_list *no_match = NULL;10550jump_list *no_char1_match = NULL;10551struct sljit_jump *jump = NULL;10552struct sljit_label *label;10553int private_data_ptr = PRIVATE_DATA(cc);10554int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);10555int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;10556int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);10557int tmp_base, tmp_offset;1055810559early_fail_type = (early_fail_ptr & 0x7);10560early_fail_ptr >>= 3;1056110562/* During recursion, these optimizations are disabled. */10563if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)10564{10565early_fail_ptr = 0;10566early_fail_type = type_skip;10567}1056810569SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 010570|| (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));1057110572if (early_fail_type == type_fail)10573add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));1057410575cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);1057610577if (type != OP_EXTUNI)10578{10579tmp_base = TMP3;10580tmp_offset = 0;10581}10582else10583{10584tmp_base = SLJIT_MEM1(SLJIT_SP);10585tmp_offset = LOCAL2;10586}1058710588if (opcode == OP_EXACT)10589{10590SLJIT_ASSERT(early_fail_ptr == 0 && exact >= 2);1059110592if (common->mode == PCRE2_JIT_COMPLETE10593#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3210594&& !common->utf10595#endif10596&& type != OP_ANYNL && type != OP_EXTUNI)10597{10598OP2(SLJIT_SUB, TMP1, 0, STR_END, 0, STR_PTR, 0);10599add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, IN_UCHARS(exact)));1060010601#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 3210602if (type == OP_ALLANY && !common->invalid_utf)10603#else10604if (type == OP_ALLANY)10605#endif10606OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));10607else10608{10609OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);10610label = LABEL();10611compile_char1_matchingpath(common, type, cc, prev_backtracks, FALSE);10612OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);10613JUMPTO(SLJIT_NOT_ZERO, label);10614}10615}10616else10617{10618SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));10619OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);10620label = LABEL();10621compile_char1_matchingpath(common, type, cc, prev_backtracks, TRUE);10622OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);10623JUMPTO(SLJIT_NOT_ZERO, label);10624}10625}1062610627if (early_fail_type == type_fail_range)10628{10629/* Range end first, followed by range start. */10630OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);10631OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));10632OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);10633OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);10634add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));1063510636OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);10637OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);10638}1063910640if (opcode < OP_EXACT)10641PUSH_BACKTRACK(sizeof(char_iterator_backtrack), begin, NULL);1064210643switch(opcode)10644{10645case OP_STAR:10646case OP_UPTO:10647SLJIT_ASSERT(backtrack != NULL && (early_fail_ptr == 0 || opcode == OP_STAR));10648max += exact;1064910650if (type == OP_EXTUNI)10651{10652SLJIT_ASSERT(private_data_ptr == 0);10653SLJIT_ASSERT(early_fail_ptr == 0);1065410655if (exact == 1)10656{10657SLJIT_ASSERT(opcode == OP_STAR);10658allocate_stack(common, 1);10659OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);10660}10661else10662{10663/* If OP_EXTUNI is present, it has a separate EXACT opcode. */10664SLJIT_ASSERT(exact == 0);1066510666allocate_stack(common, 2);10667OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);10668OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);10669}1067010671if (opcode == OP_UPTO)10672{10673SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));10674OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, SLJIT_IMM, max);10675}1067610677label = LABEL();10678compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);10679if (opcode == OP_UPTO)10680{10681OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);10682OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);10683jump = JUMP(SLJIT_ZERO);10684OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, TMP1, 0);10685}1068610687/* We cannot use TMP3 because of allocate_stack. */10688allocate_stack(common, 1);10689OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);10690JUMPTO(SLJIT_JUMP, label);10691if (jump != NULL)10692JUMPHERE(jump);10693BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();10694break;10695}10696#ifdef SUPPORT_UNICODE10697else if (type == OP_ALLANY && !common->invalid_utf)10698#else10699else if (type == OP_ALLANY)10700#endif10701{10702if (opcode == OP_STAR)10703{10704if (exact == 1)10705detect_partial_match(common, prev_backtracks);1070610707if (private_data_ptr == 0)10708allocate_stack(common, 2);1070910710OP1(SLJIT_MOV, base, offset0, STR_END, 0);10711OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);1071210713OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);10714process_partial_match(common);1071510716if (early_fail_ptr != 0)10717OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);10718BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();10719break;10720}10721#ifdef SUPPORT_UNICODE10722else if (!common->utf)10723#else10724else10725#endif10726{10727/* If OP_ALLANY is present, it has a separate EXACT opcode. */10728SLJIT_ASSERT(exact == 0);1072910730if (private_data_ptr == 0)10731allocate_stack(common, 2);1073210733OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);10734OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));1073510736if (common->mode == PCRE2_JIT_COMPLETE)10737{10738OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);10739SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);10740}10741else10742{10743jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);10744process_partial_match(common);10745JUMPHERE(jump);10746}1074710748OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);1074910750if (early_fail_ptr != 0)10751OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);10752BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();10753break;10754}10755}1075610757charpos_enabled = FALSE;10758charpos_char = 0;10759charpos_othercasebit = 0;1076010761SLJIT_ASSERT(tmp_base == TMP3);10762if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))10763{10764#ifdef SUPPORT_UNICODE10765charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);10766#else10767charpos_enabled = TRUE;10768#endif10769if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))10770{10771charpos_othercasebit = char_get_othercase_bit(common, end + 1);10772if (charpos_othercasebit == 0)10773charpos_enabled = FALSE;10774}1077510776if (charpos_enabled)10777{10778charpos_char = end[1];10779/* Consume the OP_CHAR opcode. */10780end += 2;10781#if PCRE2_CODE_UNIT_WIDTH == 810782SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);10783#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 3210784SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);10785if ((charpos_othercasebit & 0x100) != 0)10786charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;10787#endif10788if (charpos_othercasebit != 0)10789charpos_char |= charpos_othercasebit;1079010791BACKTRACK_AS(char_iterator_backtrack)->charpos.charpos_enabled = TRUE;10792BACKTRACK_AS(char_iterator_backtrack)->charpos.chr = charpos_char;10793BACKTRACK_AS(char_iterator_backtrack)->charpos.othercasebit = charpos_othercasebit;1079410795if (private_data_ptr == 0)10796allocate_stack(common, 2);1079710798use_tmp = (opcode == OP_STAR);1079910800if (use_tmp)10801{10802OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);10803OP1(SLJIT_MOV, base, offset0, TMP3, 0);10804}10805else10806{10807OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);10808OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, 0);10809OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);10810OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : (max + 1));10811}1081210813/* Search the first instance of charpos_char. */10814if (exact > 0)10815detect_partial_match(common, &no_match);10816else10817jump = JUMP(SLJIT_JUMP);1081810819label = LABEL();1082010821if (opcode == OP_UPTO)10822{10823if (exact == max)10824OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);10825else10826{10827OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);10828add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));10829}10830}1083110832compile_char1_matchingpath(common, type, cc, &no_match, FALSE);1083310834if (early_fail_ptr != 0)10835OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);1083610837if (exact == 0)10838JUMPHERE(jump);1083910840detect_partial_match(common, &no_match);1084110842if (opcode == OP_UPTO && exact > 0)10843{10844if (exact == max)10845CMPTO(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact, label);10846else10847CMPTO(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, (max + 1) - exact, label);10848}1084910850OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));10851if (charpos_othercasebit != 0)10852OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);10853CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);1085410855OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);10856if (use_tmp)10857{10858OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, SLJIT_IMM, 0);10859SELECT(SLJIT_EQUAL, TMP3, STR_PTR, 0, TMP3);10860}10861else10862{10863OP2U(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, SLJIT_IMM, 0);10864SELECT(SLJIT_EQUAL, COUNT_MATCH, STR_PTR, 0, COUNT_MATCH);10865}10866JUMPTO(SLJIT_JUMP, label);1086710868set_jumps(no_match, LABEL());10869OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);10870if (use_tmp)10871OP1(SLJIT_MOV, base, offset1, TMP3, 0);10872else10873{10874OP1(SLJIT_MOV, TMP1, 0, base, offset1);10875OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);10876OP1(SLJIT_MOV, COUNT_MATCH, 0, TMP1, 0);10877}1087810879add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));1088010881BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();10882OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);10883OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));10884break;10885}10886}1088710888if (private_data_ptr == 0)10889allocate_stack(common, 2);1089010891#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3210892use_tmp = (opcode == OP_STAR);1089310894if (common->utf)10895{10896if (!use_tmp)10897OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);1089810899OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);10900}10901#endif1090210903if (opcode == OP_UPTO)10904OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? -(sljit_sw)exact : (sljit_sw)max);1090510906if (opcode == OP_UPTO && exact > 0)10907{10908label = LABEL();10909detect_partial_match(common, &no_match);10910compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);10911#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3210912if (common->utf)10913OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);10914#endif1091510916if (exact == max)10917{10918OP2(SLJIT_ADD | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);10919JUMPTO(SLJIT_NOT_ZERO, label);10920}10921else10922{10923OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);10924add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));10925CMPTO(SLJIT_NOT_EQUAL, TMP3, 0, SLJIT_IMM, max - exact, label);10926}1092710928OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);10929JUMPTO(SLJIT_JUMP, label);10930}10931else10932{10933OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);1093410935detect_partial_match(common, &no_match);10936label = LABEL();10937compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);10938#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3210939if (common->utf)10940OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);10941#endif1094210943if (opcode == OP_UPTO)10944{10945OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);10946add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));10947}1094810949detect_partial_match_to(common, label);10950}1095110952#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3210953if (common->utf)10954{10955set_jumps(no_char1_match, LABEL());10956set_jumps(no_match, LABEL());10957if (use_tmp)10958{10959OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);10960OP1(SLJIT_MOV, base, offset0, TMP3, 0);10961}10962else10963{10964OP1(SLJIT_MOV, STR_PTR, 0, COUNT_MATCH, 0);10965OP1(SLJIT_MOV, COUNT_MATCH, 0, base, offset0);10966OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);10967}10968}10969else10970#endif10971{10972if (opcode != OP_UPTO || exact == 0)10973OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));10974set_jumps(no_char1_match, LABEL());1097510976OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));10977set_jumps(no_match, LABEL());10978OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);10979}1098010981if (opcode == OP_UPTO)10982{10983if (exact > 0)10984{10985if (max == exact)10986jump = CMP(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, -(sljit_sw)exact);10987else10988jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);1098910990add_jump(compiler, &backtrack->own_backtracks, jump);10991}10992}10993else if (exact == 1)10994add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, base, offset1, STR_PTR, 0));1099510996if (early_fail_ptr != 0)10997OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);1099810999BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();11000break;1100111002case OP_QUERY:11003SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);11004if (private_data_ptr == 0)11005allocate_stack(common, 1);11006OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11007compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);11008BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();11009break;1101011011case OP_MINSTAR:11012case OP_MINQUERY:11013SLJIT_ASSERT(backtrack != NULL && (opcode == OP_MINSTAR || early_fail_ptr == 0));11014if (private_data_ptr == 0)11015allocate_stack(common, 1);1101611017if (exact >= 1)11018{11019if (exact >= 2)11020{11021/* Extuni has a separate exact opcode. */11022SLJIT_ASSERT(tmp_base == TMP3 && early_fail_ptr == 0);11023OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);11024}1102511026if (opcode == OP_MINQUERY)11027OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, -1);1102811029label = LABEL();11030BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;1103111032compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);1103311034if (exact >= 2)11035{11036OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11037JUMPTO(SLJIT_NOT_ZERO, label);11038}1103911040if (opcode == OP_MINQUERY)11041OP2(SLJIT_AND, base, offset0, base, offset0, STR_PTR, 0);11042else11043OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11044}11045else11046{11047OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11048BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();11049}1105011051if (early_fail_ptr != 0)11052OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);11053break;1105411055case OP_MINUPTO:11056SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);11057if (private_data_ptr == 0)11058allocate_stack(common, 2);1105911060OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);1106111062if (exact == 0)11063{11064OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11065BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();11066break;11067}1106811069if (exact >= 2)11070{11071/* Extuni has a separate exact opcode. */11072SLJIT_ASSERT(tmp_base == TMP3);11073OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);11074}1107511076label = LABEL();11077BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;1107811079compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);1108011081if (exact >= 2)11082{11083OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11084JUMPTO(SLJIT_NOT_ZERO, label);11085}1108611087OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11088break;1108911090case OP_EXACT:11091SLJIT_ASSERT(backtrack == NULL);11092break;1109311094case OP_POSSTAR:11095SLJIT_ASSERT(backtrack == NULL);11096#if defined SUPPORT_UNICODE11097if (type == OP_ALLANY && !common->invalid_utf)11098#else11099if (type == OP_ALLANY)11100#endif11101{11102if (exact == 1)11103detect_partial_match(common, prev_backtracks);1110411105OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);11106process_partial_match(common);11107if (early_fail_ptr != 0)11108OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);11109break;11110}1111111112#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3211113if (common->utf)11114{11115SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));1111611117if (tmp_base != TMP3)11118{11119OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);11120tmp_base = COUNT_MATCH;11121}1112211123OP1(SLJIT_MOV, tmp_base, 0, exact == 1 ? SLJIT_IMM : STR_PTR, 0);11124detect_partial_match(common, &no_match);11125label = LABEL();11126compile_char1_matchingpath(common, type, cc, &no_match, FALSE);11127OP1(SLJIT_MOV, tmp_base, 0, STR_PTR, 0);11128detect_partial_match_to(common, label);1112911130set_jumps(no_match, LABEL());11131OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, 0);1113211133if (tmp_base != TMP3)11134OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);1113511136if (exact == 1)11137add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));1113811139if (early_fail_ptr != 0)11140OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);11141break;11142}11143#endif1114411145if (exact == 1)11146OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);1114711148detect_partial_match(common, &no_match);11149label = LABEL();11150/* Extuni never fails, so no_char1_match is not used in that case.11151Anynl optionally reads an extra character on success. */11152compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);11153detect_partial_match_to(common, label);11154if (type != OP_EXTUNI)11155OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));1115611157set_jumps(no_char1_match, LABEL());11158if (type != OP_EXTUNI)11159OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));1116011161set_jumps(no_match, LABEL());1116211163if (exact == 1)11164add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, tmp_base, tmp_offset, STR_PTR, 0));1116511166if (early_fail_ptr != 0)11167OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);11168break;1116911170case OP_POSUPTO:11171SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);11172max += exact;1117311174#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3211175if (type == OP_EXTUNI || common->utf)11176#else11177if (type == OP_EXTUNI)11178#endif11179{11180SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));1118111182/* Count match is not modified by compile_char1_matchingpath. */11183OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);11184OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, exact == max ? 0 : max);1118511186label = LABEL();11187/* Extuni only modifies TMP3 on successful match. */11188OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);11189compile_char1_matchingpath(common, type, cc, &no_match, TRUE);1119011191if (exact == max)11192{11193OP2(SLJIT_ADD, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);11194JUMPTO(SLJIT_JUMP, label);11195}11196else11197{11198OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);11199JUMPTO(SLJIT_NOT_ZERO, label);11200OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);11201}1120211203set_jumps(no_match, LABEL());1120411205if (exact > 0)11206{11207if (exact == max)11208OP2U(SLJIT_SUB | SLJIT_SET_LESS, COUNT_MATCH, 0, SLJIT_IMM, exact);11209else11210OP2U(SLJIT_SUB | SLJIT_SET_GREATER, COUNT_MATCH, 0, SLJIT_IMM, max - exact);11211}1121211213OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);1121411215if (exact > 0)11216add_jump(compiler, prev_backtracks, JUMP(exact == max ? SLJIT_LESS : SLJIT_GREATER));11217OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);11218break;11219}1122011221SLJIT_ASSERT(tmp_base == TMP3);1122211223OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : max);1122411225detect_partial_match(common, &no_match);11226label = LABEL();11227compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);1122811229if (exact == max)11230OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11231else11232{11233OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11234add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));11235}11236detect_partial_match_to(common, label);11237OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));1123811239set_jumps(no_char1_match, LABEL());11240OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));11241set_jumps(no_match, LABEL());1124211243if (exact > 0)11244{11245if (exact == max)11246jump = CMP(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact);11247else11248jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);1124911250add_jump(compiler, prev_backtracks, jump);11251}11252break;1125311254case OP_POSQUERY:11255SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);11256SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));11257OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);11258compile_char1_matchingpath(common, type, cc, &no_match, TRUE);11259OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);11260set_jumps(no_match, LABEL());11261OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);11262break;1126311264default:11265SLJIT_UNREACHABLE();11266break;11267}1126811269count_match(common);11270return end;11271}1127211273static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)11274{11275DEFINE_COMPILER;11276backtrack_common *backtrack;1127711278PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);1127911280if (*cc == OP_FAIL)11281{11282add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));11283return cc + 1;11284}1128511286if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)11287add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));1128811289if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)11290{11291/* No need to check notempty conditions. */11292if (common->accept_label == NULL)11293add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));11294else11295JUMPTO(SLJIT_JUMP, common->accept_label);11296return cc + 1;11297}1129811299if (common->accept_label == NULL)11300add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));11301else11302CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);1130311304if (HAS_VIRTUAL_REGISTERS)11305{11306OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);11307OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));11308}11309else11310OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));1131111312OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);11313add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));11314OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);11315if (common->accept_label == NULL)11316add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));11317else11318JUMPTO(SLJIT_ZERO, common->accept_label);1131911320OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));11321if (common->accept_label == NULL)11322add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));11323else11324CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);11325add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));11326return cc + 1;11327}1132811329static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)11330{11331DEFINE_COMPILER;11332int offset = GET2(cc, 1);11333BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;1133411335/* Data will be discarded anyway... */11336if (common->currententry != NULL)11337return cc + 1 + IMM2_SIZE;1133811339if (!optimized_cbracket)11340OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));11341offset <<= 1;11342OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);11343if (!optimized_cbracket)11344OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);11345return cc + 1 + IMM2_SIZE;11346}1134711348static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)11349{11350DEFINE_COMPILER;11351backtrack_common *backtrack;11352PCRE2_UCHAR opcode = *cc;11353PCRE2_SPTR ccend = cc + 1;1135411355if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||11356opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)11357ccend += 2 + cc[1];1135811359PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);1136011361if (opcode == OP_SKIP)11362{11363allocate_stack(common, 1);11364OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);11365return ccend;11366}1136711368if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)11369{11370if (HAS_VIRTUAL_REGISTERS)11371OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);11372OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));11373OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);11374OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);11375}1137611377return ccend;11378}1137911380static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };1138111382static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)11383{11384DEFINE_COMPILER;11385backtrack_common *backtrack;11386BOOL needs_control_head;11387int size;1138811389PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);11390common->then_trap = BACKTRACK_AS(then_trap_backtrack);11391BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;11392BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);11393BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);1139411395size = BACKTRACK_AS(then_trap_backtrack)->framesize;11396size = 3 + (size < 0 ? 0 : size);1139711398OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);11399allocate_stack(common, size);11400if (size > 3)11401OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));11402else11403OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);11404OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);11405OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);11406OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);1140711408size = BACKTRACK_AS(then_trap_backtrack)->framesize;11409if (size >= 0)11410init_frame(common, cc, ccend, size - 1, 0);11411}1141211413static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)11414{11415DEFINE_COMPILER;11416backtrack_common *backtrack;11417BOOL has_then_trap = FALSE;11418then_trap_backtrack *save_then_trap = NULL;11419size_t op_len;1142011421SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));1142211423if (common->has_then && common->then_offsets[cc - common->start] != 0)11424{11425SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);11426has_then_trap = TRUE;11427save_then_trap = common->then_trap;11428/* Tail item on backtrack. */11429compile_then_trap_matchingpath(common, cc, ccend, parent);11430}1143111432while (cc < ccend)11433{11434switch(*cc)11435{11436case OP_SOD:11437case OP_SOM:11438case OP_NOT_WORD_BOUNDARY:11439case OP_WORD_BOUNDARY:11440case OP_EODN:11441case OP_EOD:11442case OP_DOLL:11443case OP_DOLLM:11444case OP_CIRC:11445case OP_CIRCM:11446case OP_NOT_UCP_WORD_BOUNDARY:11447case OP_UCP_WORD_BOUNDARY:11448cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11449break;1145011451case OP_NOT_DIGIT:11452case OP_DIGIT:11453case OP_NOT_WHITESPACE:11454case OP_WHITESPACE:11455case OP_NOT_WORDCHAR:11456case OP_WORDCHAR:11457case OP_ANY:11458case OP_ALLANY:11459case OP_ANYBYTE:11460case OP_NOTPROP:11461case OP_PROP:11462case OP_ANYNL:11463case OP_NOT_HSPACE:11464case OP_HSPACE:11465case OP_NOT_VSPACE:11466case OP_VSPACE:11467case OP_EXTUNI:11468case OP_NOT:11469case OP_NOTI:11470cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);11471break;1147211473case OP_SET_SOM:11474PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);11475OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));11476allocate_stack(common, 1);11477OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);11478OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);11479cc++;11480break;1148111482case OP_CHAR:11483case OP_CHARI:11484if (common->mode == PCRE2_JIT_COMPLETE)11485cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11486else11487cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);11488break;1148911490case OP_STAR:11491case OP_MINSTAR:11492case OP_PLUS:11493case OP_MINPLUS:11494case OP_QUERY:11495case OP_MINQUERY:11496case OP_UPTO:11497case OP_MINUPTO:11498case OP_EXACT:11499case OP_POSSTAR:11500case OP_POSPLUS:11501case OP_POSQUERY:11502case OP_POSUPTO:11503case OP_STARI:11504case OP_MINSTARI:11505case OP_PLUSI:11506case OP_MINPLUSI:11507case OP_QUERYI:11508case OP_MINQUERYI:11509case OP_UPTOI:11510case OP_MINUPTOI:11511case OP_EXACTI:11512case OP_POSSTARI:11513case OP_POSPLUSI:11514case OP_POSQUERYI:11515case OP_POSUPTOI:11516case OP_NOTSTAR:11517case OP_NOTMINSTAR:11518case OP_NOTPLUS:11519case OP_NOTMINPLUS:11520case OP_NOTQUERY:11521case OP_NOTMINQUERY:11522case OP_NOTUPTO:11523case OP_NOTMINUPTO:11524case OP_NOTEXACT:11525case OP_NOTPOSSTAR:11526case OP_NOTPOSPLUS:11527case OP_NOTPOSQUERY:11528case OP_NOTPOSUPTO:11529case OP_NOTSTARI:11530case OP_NOTMINSTARI:11531case OP_NOTPLUSI:11532case OP_NOTMINPLUSI:11533case OP_NOTQUERYI:11534case OP_NOTMINQUERYI:11535case OP_NOTUPTOI:11536case OP_NOTMINUPTOI:11537case OP_NOTEXACTI:11538case OP_NOTPOSSTARI:11539case OP_NOTPOSPLUSI:11540case OP_NOTPOSQUERYI:11541case OP_NOTPOSUPTOI:11542case OP_TYPESTAR:11543case OP_TYPEMINSTAR:11544case OP_TYPEPLUS:11545case OP_TYPEMINPLUS:11546case OP_TYPEQUERY:11547case OP_TYPEMINQUERY:11548case OP_TYPEUPTO:11549case OP_TYPEMINUPTO:11550case OP_TYPEEXACT:11551case OP_TYPEPOSSTAR:11552case OP_TYPEPOSPLUS:11553case OP_TYPEPOSQUERY:11554case OP_TYPEPOSUPTO:11555cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11556break;1155711558case OP_CLASS:11559case OP_NCLASS:11560if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)11561cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11562else11563cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);11564break;1156511566#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 3211567case OP_XCLASS:11568case OP_ECLASS:11569op_len = GET(cc, 1);11570if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)11571cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11572else11573cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);11574break;11575#endif1157611577case OP_REF:11578case OP_REFI:11579op_len = PRIV(OP_lengths)[*cc];11580if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)11581cc = compile_ref_iterator_matchingpath(common, cc, parent);11582else11583{11584compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);11585cc += op_len;11586}11587break;1158811589case OP_DNREF:11590case OP_DNREFI:11591op_len = PRIV(OP_lengths)[*cc];11592if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)11593cc = compile_ref_iterator_matchingpath(common, cc, parent);11594else11595{11596compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11597compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);11598cc += op_len;11599}11600break;1160111602case OP_RECURSE:11603cc = compile_recurse_matchingpath(common, cc, parent);11604break;1160511606case OP_CALLOUT:11607case OP_CALLOUT_STR:11608cc = compile_callout_matchingpath(common, cc, parent);11609break;1161011611case OP_ASSERT:11612case OP_ASSERT_NOT:11613case OP_ASSERTBACK:11614case OP_ASSERTBACK_NOT:11615PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);11616cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);11617break;1161811619case OP_BRAMINZERO:11620PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);11621cc = bracketend(cc + 1);11622if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)11623{11624allocate_stack(common, 1);11625OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);11626}11627else11628{11629allocate_stack(common, 2);11630OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);11631OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);11632}11633BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();11634count_match(common);11635break;1163611637case OP_ASSERT_NA:11638case OP_ASSERTBACK_NA:11639case OP_ASSERT_SCS:11640case OP_ONCE:11641case OP_SCRIPT_RUN:11642case OP_BRA:11643case OP_CBRA:11644case OP_COND:11645case OP_SBRA:11646case OP_SCBRA:11647case OP_SCOND:11648cc = compile_bracket_matchingpath(common, cc, parent);11649break;1165011651case OP_BRAZERO:11652if (cc[1] > OP_ASSERTBACK_NOT)11653cc = compile_bracket_matchingpath(common, cc, parent);11654else11655{11656PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);11657cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);11658}11659break;1166011661case OP_BRAPOS:11662case OP_CBRAPOS:11663case OP_SBRAPOS:11664case OP_SCBRAPOS:11665case OP_BRAPOSZERO:11666cc = compile_bracketpos_matchingpath(common, cc, parent);11667break;1166811669case OP_MARK:11670PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);11671SLJIT_ASSERT(common->mark_ptr != 0);11672OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);11673allocate_stack(common, common->has_skip_arg ? 5 : 1);11674if (HAS_VIRTUAL_REGISTERS)11675OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);11676OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);11677OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));11678OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);11679OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);11680if (common->has_skip_arg)11681{11682OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);11683OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);11684OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);11685OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));11686OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);11687OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);11688}11689cc += 1 + 2 + cc[1];11690break;1169111692case OP_PRUNE:11693case OP_PRUNE_ARG:11694case OP_SKIP:11695case OP_SKIP_ARG:11696case OP_THEN:11697case OP_THEN_ARG:11698case OP_COMMIT:11699case OP_COMMIT_ARG:11700cc = compile_control_verb_matchingpath(common, cc, parent);11701break;1170211703case OP_FAIL:11704case OP_ACCEPT:11705case OP_ASSERT_ACCEPT:11706cc = compile_fail_accept_matchingpath(common, cc, parent);11707break;1170811709case OP_CLOSE:11710cc = compile_close_matchingpath(common, cc);11711break;1171211713case OP_SKIPZERO:11714cc = bracketend(cc + 1);11715break;1171611717default:11718SLJIT_UNREACHABLE();11719return;11720}11721if (cc == NULL)11722return;11723}1172411725if (has_then_trap)11726{11727/* Head item on backtrack. */11728PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);11729BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;11730BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;11731common->then_trap = save_then_trap;11732}11733SLJIT_ASSERT(cc == ccend);11734}1173511736#undef PUSH_BACKTRACK11737#undef PUSH_BACKTRACK_NOVALUE11738#undef BACKTRACK_AS1173911740#define COMPILE_BACKTRACKINGPATH(current) \11741do \11742{ \11743compile_backtrackingpath(common, (current)); \11744if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \11745return; \11746} \11747while (0)1174811749#define CURRENT_AS(type) ((type *)current)1175011751static void compile_newline_move_back(compiler_common *common)11752{11753DEFINE_COMPILER;11754struct sljit_jump *jump;1175511756OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));11757jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, TMP2, 0);11758/* All newlines are single byte, or their last byte11759is not equal to CHAR_NL/CHAR_CR even if UTF is enabled. */11760OP1(MOV_UCHAR, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));11761OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));11762OP2(SLJIT_SHL, SLJIT_TMP_DEST_REG, 0, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 8);11763OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_TMP_DEST_REG, 0);11764OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR << 8 | CHAR_NL);11765OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);11766#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 3211767OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);11768#endif11769OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);11770JUMPHERE(jump);11771}1177211773static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)11774{11775DEFINE_COMPILER;11776PCRE2_SPTR cc = current->cc;11777PCRE2_UCHAR opcode;11778PCRE2_UCHAR type;11779sljit_u32 max = 0, exact;11780struct sljit_label *label = NULL;11781struct sljit_jump *jump = NULL;11782jump_list *jumplist = NULL;11783PCRE2_SPTR end;11784int private_data_ptr = PRIVATE_DATA(cc);11785int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);11786int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;11787int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);1178811789cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);1179011791switch(opcode)11792{11793case OP_STAR:11794case OP_UPTO:11795if (type == OP_EXTUNI)11796{11797SLJIT_ASSERT(private_data_ptr == 0);11798set_jumps(current->own_backtracks, LABEL());11799OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));11800free_stack(common, 1);11801CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);11802}11803else11804{11805if (CURRENT_AS(char_iterator_backtrack)->charpos.charpos_enabled)11806{11807OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);11808OP1(SLJIT_MOV, TMP2, 0, base, offset1);1180911810jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);11811label = LABEL();11812if (type == OP_ANYNL)11813compile_newline_move_back(common);11814move_back(common, NULL, TRUE);1181511816OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));11817if (CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit != 0)11818OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit);11819CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);11820/* The range beginning must match, no need to compare. */11821JUMPTO(SLJIT_JUMP, label);1182211823set_jumps(current->own_backtracks, LABEL());11824current->own_backtracks = NULL;11825}11826else11827{11828OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);1182911830if (opcode == OP_STAR && exact == 1)11831{11832if (type == OP_ANYNL)11833{11834OP1(SLJIT_MOV, TMP2, 0, base, offset1);11835compile_newline_move_back(common);11836}1183711838move_back(common, NULL, TRUE);11839jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);11840}11841else11842{11843if (type == OP_ANYNL)11844{11845OP1(SLJIT_MOV, TMP2, 0, base, offset1);11846jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);11847compile_newline_move_back(common);11848}11849else11850jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);1185111852move_back(common, NULL, TRUE);11853}1185411855OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11856JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);1185711858set_jumps(current->own_backtracks, LABEL());11859}1186011861JUMPHERE(jump);11862if (private_data_ptr == 0)11863free_stack(common, 2);11864}11865break;1186611867case OP_QUERY:11868OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);11869OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);11870CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);11871jump = JUMP(SLJIT_JUMP);11872set_jumps(current->own_backtracks, LABEL());11873OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);11874OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);11875JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);11876JUMPHERE(jump);11877if (private_data_ptr == 0)11878free_stack(common, 1);11879break;1188011881case OP_MINSTAR:11882OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);11883if (exact == 0)11884{11885compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);11886OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11887}11888else if (exact > 1)11889OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);1189011891JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);11892set_jumps(exact > 0 ? current->own_backtracks : jumplist, LABEL());11893if (private_data_ptr == 0)11894free_stack(common, 1);11895break;1189611897case OP_MINUPTO:11898OP1(SLJIT_MOV, TMP1, 0, base, offset1);11899OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);11900OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);1190111902if (exact == 0)11903{11904add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));1190511906OP1(SLJIT_MOV, base, offset1, TMP1, 0);11907compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);11908OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11909JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);1191011911set_jumps(jumplist, LABEL());11912}11913else11914{11915if (exact > 1)11916OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);11917OP1(SLJIT_MOV, base, offset1, TMP1, 0);11918JUMPTO(SLJIT_NOT_ZERO, CURRENT_AS(char_iterator_backtrack)->matchingpath);1191911920set_jumps(current->own_backtracks, LABEL());11921}1192211923if (private_data_ptr == 0)11924free_stack(common, 2);11925break;1192611927case OP_MINQUERY:11928OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);11929OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);1193011931if (exact >= 1)11932{11933if (exact >= 2)11934OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);11935CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);11936set_jumps(current->own_backtracks, LABEL());11937}11938else11939{11940jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);11941compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);11942JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);11943set_jumps(jumplist, LABEL());11944JUMPHERE(jump);11945}1194611947if (private_data_ptr == 0)11948free_stack(common, 1);11949break;1195011951default:11952SLJIT_UNREACHABLE();11953break;11954}11955}1195611957static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)11958{11959DEFINE_COMPILER;11960PCRE2_SPTR cc = current->cc;11961BOOL ref = (*cc == OP_REF || *cc == OP_REFI);11962PCRE2_UCHAR type;1196311964type = cc[PRIV(OP_lengths)[*cc]];1196511966if ((type & 0x1) == 0)11967{11968/* Maximize case. */11969set_jumps(current->own_backtracks, LABEL());11970OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));11971free_stack(common, 1);11972CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);11973return;11974}1197511976OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));11977CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);11978set_jumps(current->own_backtracks, LABEL());11979free_stack(common, ref ? 2 : 3);11980}1198111982static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)11983{11984DEFINE_COMPILER;11985recurse_entry *entry;1198611987if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)11988{11989entry = CURRENT_AS(recurse_backtrack)->entry;11990if (entry->backtrack_label == NULL)11991add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));11992else11993JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);11994CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);11995}11996else11997compile_backtrackingpath(common, current->top);1199811999set_jumps(current->own_backtracks, LABEL());12000}1200112002static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)12003{12004DEFINE_COMPILER;12005PCRE2_SPTR cc = current->cc;12006PCRE2_UCHAR bra = OP_BRA;12007struct sljit_jump *brajump = NULL;1200812009SLJIT_ASSERT(*cc != OP_BRAMINZERO);12010if (*cc == OP_BRAZERO)12011{12012bra = *cc;12013cc++;12014}1201512016if (bra == OP_BRAZERO)12017{12018SLJIT_ASSERT(current->own_backtracks == NULL);12019OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12020}1202112022if (CURRENT_AS(assert_backtrack)->framesize < 0)12023{12024set_jumps(current->own_backtracks, LABEL());1202512026if (bra == OP_BRAZERO)12027{12028OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);12029CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);12030free_stack(common, 1);12031}12032return;12033}1203412035if (bra == OP_BRAZERO)12036{12037if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)12038{12039OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);12040CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);12041free_stack(common, 1);12042return;12043}12044free_stack(common, 1);12045brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);12046}1204712048if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)12049{12050OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);12051add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12052OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));12053OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));12054OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);1205512056set_jumps(current->own_backtracks, LABEL());12057}12058else12059set_jumps(current->own_backtracks, LABEL());1206012061if (bra == OP_BRAZERO)12062{12063/* We know there is enough place on the stack. */12064OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));12065OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);12066JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);12067JUMPHERE(brajump);12068}12069}1207012071static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)12072{12073DEFINE_COMPILER;12074int opcode, stacksize, alt_count, alt_max;12075int offset = 0;12076int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;12077int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;12078PCRE2_SPTR cc = current->cc;12079PCRE2_SPTR ccbegin;12080PCRE2_SPTR ccprev;12081PCRE2_UCHAR bra = OP_BRA;12082PCRE2_UCHAR ket;12083const assert_backtrack *assert;12084BOOL has_alternatives;12085BOOL needs_control_head = FALSE;12086BOOL has_vreverse;12087struct sljit_jump *brazero = NULL;12088struct sljit_jump *next_alt = NULL;12089struct sljit_jump *once = NULL;12090struct sljit_jump *cond = NULL;12091struct sljit_label *rmin_label = NULL;12092struct sljit_label *exact_label = NULL;12093struct sljit_jump *mov_addr = NULL;1209412095if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)12096{12097bra = *cc;12098cc++;12099}1210012101opcode = *cc;12102ccbegin = bracketend(cc) - 1 - LINK_SIZE;12103ket = *ccbegin;12104if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)12105{12106repeat_ptr = PRIVATE_DATA(ccbegin);12107repeat_type = PRIVATE_DATA(ccbegin + 2);12108repeat_count = PRIVATE_DATA(ccbegin + 3);12109SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);12110if (repeat_type == OP_UPTO)12111ket = OP_KETRMAX;12112if (repeat_type == OP_MINUPTO)12113ket = OP_KETRMIN;12114}12115ccbegin = cc;12116cc += GET(cc, 1);12117has_alternatives = *cc == OP_ALT;12118if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))12119has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.no_capture != NULL;12120if (opcode == OP_CBRA || opcode == OP_SCBRA)12121offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;12122if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))12123opcode = OP_SCOND;1212412125alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;1212612127/* Decoding the needs_control_head in framesize. */12128if (opcode == OP_ONCE)12129{12130needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;12131CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;12132}1213312134if (ket != OP_KET && repeat_type != 0)12135{12136/* TMP1 is used in OP_KETRMIN below. */12137OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12138free_stack(common, 1);12139if (repeat_type == OP_UPTO)12140OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);12141else12142OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);12143}1214412145if (ket == OP_KETRMAX)12146{12147if (bra == OP_BRAZERO)12148{12149OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12150free_stack(common, 1);12151brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);12152}12153}12154else if (ket == OP_KETRMIN)12155{12156if (bra != OP_BRAMINZERO)12157{12158OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12159if (repeat_type != 0)12160{12161/* TMP1 was set a few lines above. */12162CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);12163/* Drop STR_PTR for non-greedy plus quantifier. */12164if (opcode != OP_ONCE)12165free_stack(common, 1);12166}12167else if (opcode >= OP_SBRA || opcode == OP_ONCE)12168{12169/* Checking zero-length iteration. */12170if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)12171CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);12172else12173{12174OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);12175CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);12176}12177/* Drop STR_PTR for non-greedy plus quantifier. */12178if (opcode != OP_ONCE)12179free_stack(common, 1);12180}12181else12182JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);12183}12184rmin_label = LABEL();12185if (repeat_type != 0)12186OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);12187}12188else if (bra == OP_BRAZERO)12189{12190OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12191free_stack(common, 1);12192brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);12193}12194else if (repeat_type == OP_EXACT)12195{12196OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);12197exact_label = LABEL();12198}1219912200if (offset != 0)12201{12202if (common->capture_last_ptr != 0)12203{12204SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);12205OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12206OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12207OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);12208OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));12209free_stack(common, 3);12210OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);12211OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);12212}12213else if (common->optimized_cbracket[offset >> 1] == 0)12214{12215OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12216OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12217free_stack(common, 2);12218OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);12219OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);12220}12221}12222else if (SLJIT_UNLIKELY(opcode == OP_ASSERT_SCS))12223{12224OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);12225OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));12226OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);1222712228/* Nested scs blocks will not update this variable. */12229if (common->restore_end_ptr == 0)12230common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);12231}1223212233if (SLJIT_UNLIKELY(opcode == OP_ONCE))12234{12235int framesize = CURRENT_AS(bracket_backtrack)->u.framesize;1223612237SLJIT_ASSERT(framesize != 0);12238if (framesize > 0)12239{12240OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);12241add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12242OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));12243}12244once = JUMP(SLJIT_JUMP);12245}12246else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))12247{12248if (has_alternatives)12249{12250/* Always exactly one alternative. */12251OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12252free_stack(common, 1);1225312254alt_max = 2;12255next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);12256}12257}12258else if (has_alternatives)12259{12260OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12261free_stack(common, 1);1226212263if (alt_max > 3)12264{12265sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);1226612267SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->matching_mov_addr != NULL);12268sljit_set_label(CURRENT_AS(bracket_backtrack)->matching_mov_addr, LABEL());12269sljit_emit_op0(compiler, SLJIT_ENDBR);12270}12271else12272next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);12273}1227412275COMPILE_BACKTRACKINGPATH(current->top);12276if (current->own_backtracks)12277set_jumps(current->own_backtracks, LABEL());1227812279if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))12280{12281/* Conditional block always has at most one alternative. */12282if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)12283{12284SLJIT_ASSERT(has_alternatives);12285assert = CURRENT_AS(bracket_backtrack)->u.assert;12286SLJIT_ASSERT(assert->framesize != 0);12287if (assert->framesize > 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))12288{12289OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);12290add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12291OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));12292OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));12293OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);12294}12295cond = JUMP(SLJIT_JUMP);12296set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());12297}12298else if (CURRENT_AS(bracket_backtrack)->u.no_capture != NULL)12299{12300SLJIT_ASSERT(has_alternatives);12301cond = JUMP(SLJIT_JUMP);12302set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());12303}12304else12305SLJIT_ASSERT(!has_alternatives);12306}1230712308if (has_alternatives)12309{12310alt_count = 1;12311do12312{12313current->top = NULL;12314current->own_backtracks = NULL;12315current->simple_backtracks = NULL;12316/* Conditional blocks always have an additional alternative, even if it is empty. */12317if (*cc == OP_ALT)12318{12319ccprev = cc + 1 + LINK_SIZE;12320cc += GET(cc, 1);1232112322has_vreverse = FALSE;1232312324switch (opcode)12325{12326case OP_ASSERTBACK:12327case OP_ASSERTBACK_NA:12328SLJIT_ASSERT(private_data_ptr != 0);12329OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);1233012331has_vreverse = (*ccprev == OP_VREVERSE);12332if (*ccprev == OP_REVERSE || has_vreverse)12333ccprev = compile_reverse_matchingpath(common, ccprev, current);12334break;12335case OP_ASSERT_SCS:12336OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));12337break;12338case OP_ONCE:12339OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));12340break;12341case OP_COND:12342case OP_SCOND:12343break;12344default:12345if (private_data_ptr != 0)12346OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);12347else12348OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12349break;12350}1235112352compile_matchingpath(common, ccprev, cc, current);12353if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))12354return;1235512356switch (opcode)12357{12358case OP_ASSERTBACK_NA:12359if (has_vreverse)12360{12361SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));12362add_jump(compiler, ¤t->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));12363}1236412365if (PRIVATE_DATA(ccbegin + 1))12366OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));12367break;12368case OP_ASSERT_NA:12369OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);12370break;12371case OP_SCRIPT_RUN:12372match_script_run_common(common, private_data_ptr, current);12373break;12374}12375}1237612377/* Instructions after the current alternative is successfully matched. */12378/* There is a similar code in compile_bracket_matchingpath. */12379if (opcode == OP_ONCE)12380match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);1238112382stacksize = 0;12383if (repeat_type == OP_MINUPTO)12384{12385/* We need to preserve the counter. TMP2 will be used below. */12386OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);12387stacksize++;12388}12389if (ket != OP_KET || bra != OP_BRA)12390stacksize++;12391if (offset != 0)12392{12393if (common->capture_last_ptr != 0)12394stacksize++;12395if (common->optimized_cbracket[offset >> 1] == 0)12396stacksize += 2;12397}12398if (opcode != OP_ONCE)12399stacksize++;1240012401if (stacksize > 0)12402allocate_stack(common, stacksize);1240312404stacksize = 0;12405if (repeat_type == OP_MINUPTO)12406{12407/* TMP2 was set above. */12408OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);12409stacksize++;12410}1241112412if (ket != OP_KET || bra != OP_BRA)12413{12414if (ket != OP_KET)12415OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);12416else12417OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);12418stacksize++;12419}1242012421if (offset != 0)12422stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);1242312424if (opcode != OP_ONCE)12425{12426if (alt_max <= 3)12427OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);12428else12429mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));12430}1243112432if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)12433{12434/* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */12435SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));12436OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);12437}1243812439JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);1244012441if (opcode != OP_ONCE)12442{12443if (alt_max <= 3)12444{12445JUMPHERE(next_alt);12446alt_count++;12447if (alt_count < alt_max)12448{12449SLJIT_ASSERT(alt_count == 2 && alt_max == 3);12450next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);12451}12452}12453else12454{12455sljit_set_label(mov_addr, LABEL());12456sljit_emit_op0(compiler, SLJIT_ENDBR);12457}12458}1245912460COMPILE_BACKTRACKINGPATH(current->top);12461if (current->own_backtracks)12462set_jumps(current->own_backtracks, LABEL());12463SLJIT_ASSERT(!current->simple_backtracks);12464}12465while (*cc == OP_ALT);1246612467if (cond != NULL)12468{12469SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);12470if (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT)12471{12472assert = CURRENT_AS(bracket_backtrack)->u.assert;12473SLJIT_ASSERT(assert->framesize != 0);12474if (assert->framesize > 0)12475{12476OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);12477add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12478OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));12479OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));12480OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);12481}12482}12483JUMPHERE(cond);12484}1248512486/* Free the STR_PTR. */12487if (private_data_ptr == 0)12488free_stack(common, 1);12489}1249012491if (offset != 0)12492{12493/* Using both tmp register is better for instruction scheduling. */12494if (common->optimized_cbracket[offset >> 1] != 0)12495{12496OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12497OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12498free_stack(common, 2);12499OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);12500OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);12501}12502else12503{12504OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12505free_stack(common, 1);12506OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);12507}12508}12509else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))12510{12511OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12512OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12513OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));12514OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);12515OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);12516free_stack(common, 4);12517}12518else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)12519{12520OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));12521free_stack(common, 1);12522}12523else if (opcode == OP_ASSERT_SCS)12524{12525OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12526OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12527OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));12528OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);12529OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);12530free_stack(common, has_alternatives ? 3 : 2);1253112532set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());1253312534/* Nested scs blocks will not update this variable. */12535if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))12536common->restore_end_ptr = 0;12537}12538else if (opcode == OP_ONCE)12539{12540cc = ccbegin + GET(ccbegin, 1);12541stacksize = needs_control_head ? 1 : 0;1254212543if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)12544{12545/* Reset head and drop saved frame. */12546stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);12547}12548else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))12549{12550/* The STR_PTR must be released. */12551stacksize++;12552}1255312554if (stacksize > 0)12555free_stack(common, stacksize);1255612557JUMPHERE(once);12558/* Restore previous private_data_ptr */12559if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)12560OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));12561else if (ket == OP_KETRMIN)12562{12563OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12564/* See the comment below. */12565free_stack(common, 2);12566OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);12567}12568}1256912570if (repeat_type == OP_EXACT)12571{12572OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);12573OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);12574CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);12575}12576else if (ket == OP_KETRMAX)12577{12578OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12579if (bra != OP_BRAZERO)12580free_stack(common, 1);1258112582CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);12583if (bra == OP_BRAZERO)12584{12585OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12586JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);12587JUMPHERE(brazero);12588free_stack(common, 1);12589}12590}12591else if (ket == OP_KETRMIN)12592{12593OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));1259412595/* OP_ONCE removes everything in case of a backtrack, so we don't12596need to explicitly release the STR_PTR. The extra release would12597affect badly the free_stack(2) above. */12598if (opcode != OP_ONCE)12599free_stack(common, 1);12600CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);12601if (opcode == OP_ONCE)12602free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);12603else if (bra == OP_BRAMINZERO)12604free_stack(common, 1);12605}12606else if (bra == OP_BRAZERO)12607{12608OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12609JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);12610JUMPHERE(brazero);12611}12612}1261312614static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)12615{12616DEFINE_COMPILER;12617int offset;12618struct sljit_jump *jump;12619PCRE2_SPTR cc;1262012621/* No retry on backtrack, just drop everything. */12622if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)12623{12624cc = current->cc;1262512626if (*cc == OP_BRAPOSZERO)12627cc++;1262812629if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)12630{12631offset = (GET2(cc, 1 + LINK_SIZE)) << 1;12632OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12633OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12634OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);12635if (common->capture_last_ptr != 0)12636OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));12637OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);12638if (common->capture_last_ptr != 0)12639OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);12640}12641set_jumps(current->own_backtracks, LABEL());12642free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);12643return;12644}1264512646OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);12647add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12648OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));1264912650if (current->own_backtracks)12651{12652jump = JUMP(SLJIT_JUMP);12653set_jumps(current->own_backtracks, LABEL());12654/* Drop the stack frame. */12655free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);12656JUMPHERE(jump);12657}12658OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));12659}1266012661static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)12662{12663assert_backtrack backtrack;1266412665current->top = NULL;12666current->own_backtracks = NULL;12667current->simple_backtracks = NULL;12668if (current->cc[1] > OP_ASSERTBACK_NOT)12669{12670/* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */12671compile_bracket_matchingpath(common, current->cc, current);12672compile_bracket_backtrackingpath(common, current->top);12673}12674else12675{12676memset(&backtrack, 0, sizeof(backtrack));12677backtrack.common.cc = current->cc;12678backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;12679/* Manual call of compile_assert_matchingpath. */12680compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);12681}12682SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);12683}1268412685static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)12686{12687DEFINE_COMPILER;12688PCRE2_UCHAR opcode = *current->cc;12689struct sljit_label *loop;12690struct sljit_jump *jump;1269112692if (opcode == OP_THEN || opcode == OP_THEN_ARG)12693{12694if (common->then_trap != NULL)12695{12696SLJIT_ASSERT(common->control_head_ptr != 0);1269712698OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);12699OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);12700OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);12701jump = JUMP(SLJIT_JUMP);1270212703loop = LABEL();12704OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12705JUMPHERE(jump);12706CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);12707CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);12708add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));12709return;12710}12711else if (!common->local_quit_available && common->in_positive_assertion)12712{12713add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));12714return;12715}12716}1271712718if (common->restore_end_ptr != 0 && opcode != OP_SKIP_ARG)12719OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);1272012721if (common->local_quit_available)12722{12723/* Abort match with a fail. */12724if (common->quit_label == NULL)12725add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));12726else12727JUMPTO(SLJIT_JUMP, common->quit_label);12728return;12729}1273012731if (opcode == OP_SKIP_ARG)12732{12733SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);12734OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);12735OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));12736sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));1273712738if (common->restore_end_ptr == 0)12739{12740OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);12741add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));12742return;12743}1274412745jump = CMP(SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0);12746OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);12747OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);12748add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));12749JUMPHERE(jump);12750return;12751}1275212753if (opcode == OP_SKIP)12754OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12755else12756OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);12757add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));12758}1275912760static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)12761{12762DEFINE_COMPILER;12763struct sljit_jump *jump;12764struct sljit_label *label;1276512766OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));12767jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));12768skip_valid_char(common);12769OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);12770JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);1277112772label = LABEL();12773sljit_set_label(jump, label);12774set_jumps(current->own_backtracks, label);12775}1277612777static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)12778{12779DEFINE_COMPILER;12780struct sljit_jump *jump;12781int framesize;12782int size;1278312784if (CURRENT_AS(then_trap_backtrack)->then_trap)12785{12786common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;12787return;12788}1278912790size = CURRENT_AS(then_trap_backtrack)->framesize;12791size = 3 + (size < 0 ? 0 : size);1279212793OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));12794free_stack(common, size);12795jump = JUMP(SLJIT_JUMP);1279612797set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());1279812799framesize = CURRENT_AS(then_trap_backtrack)->framesize;12800SLJIT_ASSERT(framesize != 0);1280112802/* STACK_TOP is set by THEN. */12803if (framesize > 0)12804{12805add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12806OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));12807}12808OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12809free_stack(common, 3);1281012811JUMPHERE(jump);12812OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);12813}1281412815static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)12816{12817DEFINE_COMPILER;12818then_trap_backtrack *save_then_trap = common->then_trap;1281912820while (current)12821{12822if (current->simple_backtracks != NULL)12823set_jumps(current->simple_backtracks, LABEL());12824switch(*current->cc)12825{12826case OP_SET_SOM:12827OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12828free_stack(common, 1);12829OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);12830break;1283112832case OP_STAR:12833case OP_MINSTAR:12834case OP_PLUS:12835case OP_MINPLUS:12836case OP_QUERY:12837case OP_MINQUERY:12838case OP_UPTO:12839case OP_MINUPTO:12840case OP_EXACT:12841case OP_POSSTAR:12842case OP_POSPLUS:12843case OP_POSQUERY:12844case OP_POSUPTO:12845case OP_STARI:12846case OP_MINSTARI:12847case OP_PLUSI:12848case OP_MINPLUSI:12849case OP_QUERYI:12850case OP_MINQUERYI:12851case OP_UPTOI:12852case OP_MINUPTOI:12853case OP_EXACTI:12854case OP_POSSTARI:12855case OP_POSPLUSI:12856case OP_POSQUERYI:12857case OP_POSUPTOI:12858case OP_NOTSTAR:12859case OP_NOTMINSTAR:12860case OP_NOTPLUS:12861case OP_NOTMINPLUS:12862case OP_NOTQUERY:12863case OP_NOTMINQUERY:12864case OP_NOTUPTO:12865case OP_NOTMINUPTO:12866case OP_NOTEXACT:12867case OP_NOTPOSSTAR:12868case OP_NOTPOSPLUS:12869case OP_NOTPOSQUERY:12870case OP_NOTPOSUPTO:12871case OP_NOTSTARI:12872case OP_NOTMINSTARI:12873case OP_NOTPLUSI:12874case OP_NOTMINPLUSI:12875case OP_NOTQUERYI:12876case OP_NOTMINQUERYI:12877case OP_NOTUPTOI:12878case OP_NOTMINUPTOI:12879case OP_NOTEXACTI:12880case OP_NOTPOSSTARI:12881case OP_NOTPOSPLUSI:12882case OP_NOTPOSQUERYI:12883case OP_NOTPOSUPTOI:12884case OP_TYPESTAR:12885case OP_TYPEMINSTAR:12886case OP_TYPEPLUS:12887case OP_TYPEMINPLUS:12888case OP_TYPEQUERY:12889case OP_TYPEMINQUERY:12890case OP_TYPEUPTO:12891case OP_TYPEMINUPTO:12892case OP_TYPEEXACT:12893case OP_TYPEPOSSTAR:12894case OP_TYPEPOSPLUS:12895case OP_TYPEPOSQUERY:12896case OP_TYPEPOSUPTO:12897/* Since classes has no backtracking path, this12898backtrackingpath was pushed by an iterator. */12899case OP_CLASS:12900case OP_NCLASS:12901#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 812902case OP_XCLASS:12903case OP_ECLASS:12904#endif12905compile_iterator_backtrackingpath(common, current);12906break;1290712908case OP_REF:12909case OP_REFI:12910case OP_DNREF:12911case OP_DNREFI:12912compile_ref_iterator_backtrackingpath(common, current);12913break;1291412915case OP_RECURSE:12916compile_recurse_backtrackingpath(common, current);12917break;1291812919case OP_ASSERT:12920case OP_ASSERT_NOT:12921case OP_ASSERTBACK:12922case OP_ASSERTBACK_NOT:12923compile_assert_backtrackingpath(common, current);12924break;1292512926case OP_ASSERT_NA:12927case OP_ASSERTBACK_NA:12928case OP_ASSERT_SCS:12929case OP_ONCE:12930case OP_SCRIPT_RUN:12931case OP_BRA:12932case OP_CBRA:12933case OP_COND:12934case OP_SBRA:12935case OP_SCBRA:12936case OP_SCOND:12937compile_bracket_backtrackingpath(common, current);12938break;1293912940case OP_BRAZERO:12941if (current->cc[1] > OP_ASSERTBACK_NOT)12942compile_bracket_backtrackingpath(common, current);12943else12944compile_assert_backtrackingpath(common, current);12945break;1294612947case OP_BRAPOS:12948case OP_CBRAPOS:12949case OP_SBRAPOS:12950case OP_SCBRAPOS:12951case OP_BRAPOSZERO:12952compile_bracketpos_backtrackingpath(common, current);12953break;1295412955case OP_BRAMINZERO:12956compile_braminzero_backtrackingpath(common, current);12957break;1295812959case OP_MARK:12960OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));12961if (common->has_skip_arg)12962OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12963free_stack(common, common->has_skip_arg ? 5 : 1);12964OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);12965if (common->has_skip_arg)12966OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);12967break;1296812969case OP_THEN:12970case OP_THEN_ARG:12971case OP_PRUNE:12972case OP_PRUNE_ARG:12973case OP_SKIP:12974case OP_SKIP_ARG:12975compile_control_verb_backtrackingpath(common, current);12976break;1297712978case OP_COMMIT:12979case OP_COMMIT_ARG:12980if (common->restore_end_ptr != 0)12981OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);1298212983if (!common->local_quit_available)12984OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);1298512986if (common->quit_label == NULL)12987add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));12988else12989JUMPTO(SLJIT_JUMP, common->quit_label);12990break;1299112992case OP_CALLOUT:12993case OP_CALLOUT_STR:12994case OP_FAIL:12995case OP_ACCEPT:12996case OP_ASSERT_ACCEPT:12997set_jumps(current->own_backtracks, LABEL());12998break;1299913000case OP_VREVERSE:13001compile_vreverse_backtrackingpath(common, current);13002break;1300313004case OP_THEN_TRAP:13005/* A virtual opcode for then traps. */13006compile_then_trap_backtrackingpath(common, current);13007break;1300813009default:13010SLJIT_UNREACHABLE();13011break;13012}13013current = current->prev;13014}13015common->then_trap = save_then_trap;13016}1301713018static SLJIT_INLINE void compile_recurse(compiler_common *common)13019{13020DEFINE_COMPILER;13021PCRE2_SPTR cc = common->start + common->currententry->start;13022PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);13023PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);13024uint32_t recurse_flags = 0;13025int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);13026int alt_count, alt_max, local_size;13027backtrack_common altbacktrack;13028jump_list *match = NULL;13029struct sljit_jump *next_alt = NULL;13030struct sljit_jump *accept_exit = NULL;13031struct sljit_label *quit;13032struct sljit_jump *mov_addr = NULL;1303313034/* Recurse captures then. */13035common->then_trap = NULL;1303613037SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);1303813039alt_max = no_alternatives(cc);13040alt_count = 0;1304113042/* Matching path. */13043SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);13044common->currententry->entry_label = LABEL();13045set_jumps(common->currententry->entry_calls, common->currententry->entry_label);1304613047sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);13048count_match(common);1304913050local_size = (alt_max > 1) ? 2 : 1;1305113052/* (Reversed) stack layout:13053[private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */1305413055allocate_stack(common, private_data_size + local_size);13056/* Save return address. */13057OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);1305813059copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);1306013061/* This variable is saved and restored all time when we enter or exit from a recursive context. */13062OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);1306313064if (recurse_flags & recurse_flag_control_head_found)13065OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);1306613067if (alt_max > 1)13068OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);1306913070memset(&altbacktrack, 0, sizeof(backtrack_common));13071common->quit_label = NULL;13072common->accept_label = NULL;13073common->quit = NULL;13074common->accept = NULL;13075altbacktrack.cc = ccbegin;13076cc += GET(cc, 1);13077while (1)13078{13079altbacktrack.top = NULL;13080altbacktrack.own_backtracks = NULL;1308113082if (altbacktrack.cc != ccbegin)13083OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));1308413085compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);13086if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))13087return;1308813089allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);13090OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);1309113092if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))13093{13094if (alt_max > 3)13095mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));13096else13097OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);13098}1309913100add_jump(compiler, &match, JUMP(SLJIT_JUMP));1310113102if (alt_count == 0)13103{13104/* Backtracking path entry. */13105SLJIT_ASSERT(common->currententry->backtrack_label == NULL);13106common->currententry->backtrack_label = LABEL();13107set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);1310813109sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);1311013111if (recurse_flags & recurse_flag_accept_found)13112accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);1311313114OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));13115/* Save return address. */13116OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);1311713118copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);1311913120if (alt_max > 1)13121{13122OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));13123free_stack(common, 2);1312413125if (alt_max > 3)13126{13127sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);13128sljit_set_label(mov_addr, LABEL());13129sljit_emit_op0(compiler, SLJIT_ENDBR);13130}13131else13132next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);13133}13134else13135free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);13136}13137else if (alt_max > 3)13138{13139sljit_set_label(mov_addr, LABEL());13140sljit_emit_op0(compiler, SLJIT_ENDBR);13141}13142else13143{13144JUMPHERE(next_alt);13145if (alt_count + 1 < alt_max)13146{13147SLJIT_ASSERT(alt_count == 1 && alt_max == 3);13148next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);13149}13150}1315113152alt_count++;1315313154compile_backtrackingpath(common, altbacktrack.top);13155if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))13156return;13157set_jumps(altbacktrack.own_backtracks, LABEL());1315813159if (*cc != OP_ALT)13160break;1316113162altbacktrack.cc = cc + 1 + LINK_SIZE;13163cc += GET(cc, 1);13164}1316513166/* No alternative is matched. */1316713168quit = LABEL();1316913170copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);1317113172OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));13173free_stack(common, private_data_size + local_size);13174OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);13175OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);1317613177if (common->quit != NULL)13178{13179SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);1318013181set_jumps(common->quit, LABEL());13182OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);13183copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);13184JUMPTO(SLJIT_JUMP, quit);13185}1318613187if (recurse_flags & recurse_flag_accept_found)13188{13189JUMPHERE(accept_exit);13190free_stack(common, 2);1319113192/* Save return address. */13193OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);1319413195copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);1319613197OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));13198free_stack(common, private_data_size + local_size);13199OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);13200OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);13201}1320213203if (common->accept != NULL)13204{13205SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);1320613207set_jumps(common->accept, LABEL());1320813209OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);13210OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);1321113212allocate_stack(common, 2);13213OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);13214}1321513216set_jumps(match, LABEL());1321713218OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);1321913220copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);1322113222OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));13223OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);13224OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);13225}1322613227#undef COMPILE_BACKTRACKINGPATH13228#undef CURRENT_AS1322913230#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \13231(PCRE2_JIT_INVALID_UTF)1323213233static int jit_compile(pcre2_code *code, sljit_u32 mode)13234{13235pcre2_real_code *re = (pcre2_real_code *)code;13236struct sljit_compiler *compiler;13237backtrack_common rootbacktrack;13238compiler_common common_data;13239compiler_common *common = &common_data;13240const sljit_u8 *tables = re->tables;13241void *allocator_data = &re->memctl;13242int private_data_size;13243PCRE2_SPTR ccend;13244executable_functions *functions;13245void *executable_func;13246sljit_uw executable_size, private_data_length, total_length;13247struct sljit_label *mainloop_label = NULL;13248struct sljit_label *continue_match_label;13249struct sljit_label *empty_match_found_label = NULL;13250struct sljit_label *empty_match_backtrack_label = NULL;13251struct sljit_label *reset_match_label;13252struct sljit_label *quit_label;13253struct sljit_jump *jump;13254struct sljit_jump *minlength_check_failed = NULL;13255struct sljit_jump *empty_match = NULL;13256struct sljit_jump *end_anchor_failed = NULL;13257jump_list *reqcu_not_found = NULL;1325813259SLJIT_ASSERT(tables);1326013261#if HAS_VIRTUAL_REGISTERS == 113262SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);13263#elif HAS_VIRTUAL_REGISTERS == 013264SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);13265#else13266#error "Invalid value for HAS_VIRTUAL_REGISTERS"13267#endif1326813269memset(&rootbacktrack, 0, sizeof(backtrack_common));13270memset(common, 0, sizeof(compiler_common));13271common->re = re;13272common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));13273rootbacktrack.cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start);1327413275#ifdef SUPPORT_UNICODE13276common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;13277#endif /* SUPPORT_UNICODE */13278mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;1327913280common->start = rootbacktrack.cc;13281common->read_only_data_head = NULL;13282common->fcc = tables + fcc_offset;13283common->lcc = (sljit_sw)(tables + lcc_offset);13284common->mode = mode;13285common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);13286common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);13287common->nltype = NLTYPE_FIXED;13288switch(re->newline_convention)13289{13290case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;13291case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;13292case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;13293case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;13294case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;13295case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;13296default: return PCRE2_ERROR_INTERNAL;13297}13298common->nlmax = READ_CHAR_MAX;13299common->nlmin = 0;13300if (re->bsr_convention == PCRE2_BSR_UNICODE)13301common->bsr_nltype = NLTYPE_ANY;13302else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)13303common->bsr_nltype = NLTYPE_ANYCRLF;13304else13305{13306#ifdef BSR_ANYCRLF13307common->bsr_nltype = NLTYPE_ANYCRLF;13308#else13309common->bsr_nltype = NLTYPE_ANY;13310#endif13311}13312common->bsr_nlmax = READ_CHAR_MAX;13313common->bsr_nlmin = 0;13314common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;13315common->ctypes = (sljit_sw)(tables + ctypes_offset);13316common->name_count = re->name_count;13317common->name_entry_size = re->name_entry_size;13318common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;13319common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;13320#ifdef SUPPORT_UNICODE13321/* PCRE2_UTF[16|32] have the same value as PCRE2_UTF8. */13322common->utf = (re->overall_options & PCRE2_UTF) != 0;13323common->ucp = (re->overall_options & PCRE2_UCP) != 0;13324if (common->utf)13325{13326if (common->nltype == NLTYPE_ANY)13327common->nlmax = 0x2029;13328else if (common->nltype == NLTYPE_ANYCRLF)13329common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;13330else13331{13332/* We only care about the first newline character. */13333common->nlmax = common->newline & 0xff;13334}1333513336if (common->nltype == NLTYPE_FIXED)13337common->nlmin = common->newline & 0xff;13338else13339common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;1334013341if (common->bsr_nltype == NLTYPE_ANY)13342common->bsr_nlmax = 0x2029;13343else13344common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;13345common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;13346}13347else13348common->invalid_utf = FALSE;13349#endif /* SUPPORT_UNICODE */13350ccend = bracketend(common->start);1335113352/* Calculate the local space size on the stack. */13353common->ovector_start = LOCAL0;13354/* Allocate space for temporary data structures. */13355private_data_length = ccend - common->start;13356/* The chance of overflow is very low, but might happen on 32 bit. */13357if (private_data_length > ~(sljit_uw)0 / sizeof(sljit_s32))13358return PCRE2_ERROR_NOMEMORY;1335913360private_data_length *= sizeof(sljit_s32);13361/* Align to 32 bit. */13362total_length = ((re->top_bracket + 1) + (sljit_uw)(sizeof(sljit_s32) - 1)) & ~(sljit_uw)(sizeof(sljit_s32) - 1);13363if (~(sljit_uw)0 - private_data_length < total_length)13364return PCRE2_ERROR_NOMEMORY;1336513366total_length += private_data_length;13367common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length, allocator_data);13368if (!common->private_data_ptrs)13369return PCRE2_ERROR_NOMEMORY;1337013371memset(common->private_data_ptrs, 0, private_data_length);13372common->optimized_cbracket = ((sljit_u8 *)common->private_data_ptrs) + private_data_length;13373#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 113374memset(common->optimized_cbracket, 0, re->top_bracket + 1);13375#else13376memset(common->optimized_cbracket, 1, re->top_bracket + 1);13377#endif1337813379SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);13380#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 213381common->capture_last_ptr = common->ovector_start;13382common->ovector_start += sizeof(sljit_sw);13383#endif13384if (!check_opcode_types(common, common->start, ccend))13385{13386SLJIT_FREE(common->private_data_ptrs, allocator_data);13387return PCRE2_ERROR_JIT_UNSUPPORTED;13388}1338913390/* Checking flags and updating ovector_start. */13391if (mode == PCRE2_JIT_COMPLETE &&13392(re->flags & PCRE2_LASTSET) != 0 &&13393(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)13394{13395common->req_char_ptr = common->ovector_start;13396common->ovector_start += sizeof(sljit_sw);13397}1339813399if (mode != PCRE2_JIT_COMPLETE)13400{13401common->start_used_ptr = common->ovector_start;13402common->ovector_start += sizeof(sljit_sw);13403if (mode == PCRE2_JIT_PARTIAL_SOFT)13404{13405common->hit_start = common->ovector_start;13406common->ovector_start += sizeof(sljit_sw);13407}13408}1340913410if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)13411{13412common->match_end_ptr = common->ovector_start;13413common->ovector_start += sizeof(sljit_sw);13414}1341513416#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD13417common->control_head_ptr = 1;13418#endif1341913420if (common->control_head_ptr != 0)13421{13422common->control_head_ptr = common->ovector_start;13423common->ovector_start += sizeof(sljit_sw);13424}1342513426if (common->has_set_som)13427{13428/* Saving the real start pointer is necessary. */13429common->start_ptr = common->ovector_start;13430common->ovector_start += sizeof(sljit_sw);13431}1343213433/* Aligning ovector to even number of sljit words. */13434if ((common->ovector_start & sizeof(sljit_sw)) != 0)13435common->ovector_start += sizeof(sljit_sw);1343613437if (common->start_ptr == 0)13438common->start_ptr = OVECTOR(0);1343913440/* Capturing brackets cannot be optimized if callouts are allowed. */13441if (common->capture_last_ptr != 0)13442memset(common->optimized_cbracket, 0, re->top_bracket + 1);1344313444SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));13445common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);13446private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);1344713448if ((re->overall_options & PCRE2_ANCHORED) == 0 &&13449(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&13450!common->has_skip_in_assert_back)13451detect_early_fail(common, common->start, &private_data_size, 0, 0);1345213453set_private_data_ptrs(common, &private_data_size, ccend);1345413455SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);1345613457if (private_data_size > 65536)13458{13459SLJIT_FREE(common->private_data_ptrs, allocator_data);13460return PCRE2_ERROR_JIT_UNSUPPORTED;13461}1346213463if (common->has_then)13464{13465total_length = ccend - common->start;13466common->then_offsets = (sljit_u8 *)SLJIT_MALLOC(total_length, allocator_data);13467if (!common->then_offsets)13468{13469SLJIT_FREE(common->private_data_ptrs, allocator_data);13470return PCRE2_ERROR_NOMEMORY;13471}13472memset(common->then_offsets, 0, total_length);13473set_then_offsets(common, common->start, NULL);13474}1347513476compiler = sljit_create_compiler(allocator_data);13477if (!compiler)13478{13479SLJIT_FREE(common->private_data_ptrs, allocator_data);13480if (common->has_then)13481SLJIT_FREE(common->then_offsets, allocator_data);13482return PCRE2_ERROR_NOMEMORY;13483}13484common->compiler = compiler;1348513486/* Main pcre2_jit_exec entry. */13487SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);13488sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS), 5, private_data_size);1348913490/* Register init. */13491reset_ovector(common, (re->top_bracket + 1) * 2);13492if (common->req_char_ptr != 0)13493OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);1349413495OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);13496OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);13497OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));13498OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));13499OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));13500OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));13501OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));13502OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));13503OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);13504OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);1350513506if (common->early_fail_start_ptr < common->early_fail_end_ptr)13507reset_early_fail(common);1350813509if (mode == PCRE2_JIT_PARTIAL_SOFT)13510OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);13511if (common->mark_ptr != 0)13512OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);13513if (common->control_head_ptr != 0)13514OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);1351513516/* Main part of the matching */13517if ((re->overall_options & PCRE2_ANCHORED) == 0)13518{13519mainloop_label = mainloop_entry(common);13520continue_match_label = LABEL();13521/* Forward search if possible. */13522if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)13523{13524if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))13525;13526else if ((re->flags & PCRE2_FIRSTSET) != 0)13527fast_forward_first_char(common);13528else if ((re->flags & PCRE2_STARTLINE) != 0)13529fast_forward_newline(common);13530else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)13531fast_forward_start_bits(common);13532}13533}13534else13535continue_match_label = LABEL();1353613537if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 &&13538(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)13539{13540OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);13541OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));13542minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);13543}13544if (common->req_char_ptr != 0)13545reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);1354613547/* Store the current STR_PTR in OVECTOR(0). */13548OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);13549/* Copy the limit of allowed recursions. */13550OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);13551if (common->capture_last_ptr != 0)13552OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);13553if (common->fast_forward_bc_ptr != NULL)13554OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);1355513556if (common->start_ptr != OVECTOR(0))13557OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);1355813559/* Copy the beginning of the string. */13560if (mode == PCRE2_JIT_PARTIAL_SOFT)13561{13562jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);13563OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);13564JUMPHERE(jump);13565}13566else if (mode == PCRE2_JIT_PARTIAL_HARD)13567OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);1356813569compile_matchingpath(common, common->start, ccend, &rootbacktrack);13570if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))13571{13572sljit_free_compiler(compiler);13573SLJIT_FREE(common->private_data_ptrs, allocator_data);13574if (common->has_then)13575SLJIT_FREE(common->then_offsets, allocator_data);13576PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);13577return PCRE2_ERROR_NOMEMORY;13578}1357913580if ((re->overall_options & PCRE2_ENDANCHORED) != 0)13581end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);1358213583if (common->might_be_empty)13584{13585empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));13586empty_match_found_label = LABEL();13587}1358813589common->accept_label = LABEL();13590if (common->accept != NULL)13591set_jumps(common->accept, common->accept_label);1359213593/* This means we have a match. Update the ovector. */13594copy_ovector(common, re->top_bracket + 1);13595common->quit_label = common->abort_label = LABEL();13596if (common->quit != NULL)13597set_jumps(common->quit, common->quit_label);13598if (common->abort != NULL)13599set_jumps(common->abort, common->abort_label);13600if (minlength_check_failed != NULL)13601SET_LABEL(minlength_check_failed, common->abort_label);1360213603sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);13604sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);1360513606if (common->failed_match != NULL)13607{13608SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);13609set_jumps(common->failed_match, LABEL());13610OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);13611JUMPTO(SLJIT_JUMP, common->abort_label);13612}1361313614if ((re->overall_options & PCRE2_ENDANCHORED) != 0)13615JUMPHERE(end_anchor_failed);1361613617if (mode != PCRE2_JIT_COMPLETE)13618{13619common->partialmatchlabel = LABEL();13620set_jumps(common->partialmatch, common->partialmatchlabel);13621return_with_partial_match(common, common->quit_label);13622}1362313624if (common->might_be_empty)13625empty_match_backtrack_label = LABEL();13626compile_backtrackingpath(common, rootbacktrack.top);13627if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))13628{13629sljit_free_compiler(compiler);13630SLJIT_FREE(common->private_data_ptrs, allocator_data);13631if (common->has_then)13632SLJIT_FREE(common->then_offsets, allocator_data);13633PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);13634return PCRE2_ERROR_NOMEMORY;13635}1363613637SLJIT_ASSERT(rootbacktrack.prev == NULL);13638reset_match_label = LABEL();1363913640if (mode == PCRE2_JIT_PARTIAL_SOFT)13641{13642/* Update hit_start only in the first time. */13643jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);13644OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);13645OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);13646OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);13647JUMPHERE(jump);13648}1364913650/* Check we have remaining characters. */13651if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)13652{13653OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);13654}1365513656OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),13657(common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);1365813659if ((re->overall_options & PCRE2_ANCHORED) == 0)13660{13661if (common->ff_newline_shortcut != NULL)13662{13663/* There cannot be more newlines if PCRE2_FIRSTLINE is set. */13664if ((re->overall_options & PCRE2_FIRSTLINE) == 0)13665{13666if (common->match_end_ptr != 0)13667{13668OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);13669OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);13670CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);13671OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);13672}13673else13674CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);13675}13676}13677else13678CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);13679}1368013681/* No more remaining characters. */13682if (reqcu_not_found != NULL)13683set_jumps(reqcu_not_found, LABEL());1368413685if (mode == PCRE2_JIT_PARTIAL_SOFT)13686CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);1368713688OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);13689JUMPTO(SLJIT_JUMP, common->quit_label);1369013691flush_stubs(common);1369213693if (common->might_be_empty)13694{13695JUMPHERE(empty_match);13696OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);13697OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));13698OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);13699JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);13700OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);13701JUMPTO(SLJIT_ZERO, empty_match_found_label);13702OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));13703CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);13704JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);13705}1370613707common->fast_forward_bc_ptr = NULL;13708common->early_fail_start_ptr = 0;13709common->early_fail_end_ptr = 0;13710common->currententry = common->entries;13711common->local_quit_available = TRUE;13712quit_label = common->quit_label;13713SLJIT_ASSERT(common->restore_end_ptr == 0);1371413715if (common->currententry != NULL)13716{13717/* A free bit for each private data. */13718common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;13719SLJIT_ASSERT(common->recurse_bitset_size > 0);13720common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;1372113722if (common->recurse_bitset != NULL)13723{13724do13725{13726/* Might add new entries. */13727compile_recurse(common);13728if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))13729break;13730flush_stubs(common);13731common->currententry = common->currententry->next;13732}13733while (common->currententry != NULL);1373413735SLJIT_FREE(common->recurse_bitset, allocator_data);13736}1373713738if (common->currententry != NULL)13739{13740/* The common->recurse_bitset has been freed. */13741SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);1374213743sljit_free_compiler(compiler);13744SLJIT_FREE(common->private_data_ptrs, allocator_data);13745if (common->has_then)13746SLJIT_FREE(common->then_offsets, allocator_data);13747PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);13748return PCRE2_ERROR_NOMEMORY;13749}13750}1375113752common->local_quit_available = FALSE;13753common->quit_label = quit_label;13754SLJIT_ASSERT(common->restore_end_ptr == 0);1375513756/* Allocating stack, returns with PCRE2_ERROR_JIT_STACKLIMIT if fails. */13757/* This is a (really) rare case. */13758set_jumps(common->stackalloc, LABEL());13759/* RETURN_ADDR is not a saved register. */13760SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));13761sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);1376213763SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);1376413765OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, STR_PTR, 0);13766OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);13767OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);13768OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));13769OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);1377013771sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));1377213773jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);13774OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);13775OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);13776OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);13777OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);13778OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);1377913780/* Allocation failed. */13781JUMPHERE(jump);13782/* We break the return address cache here, but this is a really rare case. */13783OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);13784JUMPTO(SLJIT_JUMP, common->quit_label);1378513786/* Call limit reached. */13787set_jumps(common->calllimit, LABEL());13788OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);13789JUMPTO(SLJIT_JUMP, common->quit_label);1379013791if (common->revertframes != NULL)13792{13793set_jumps(common->revertframes, LABEL());13794do_revertframes(common);13795}13796if (common->wordboundary != NULL)13797{13798set_jumps(common->wordboundary, LABEL());13799check_wordboundary(common, FALSE);13800}13801if (common->ucp_wordboundary != NULL)13802{13803set_jumps(common->ucp_wordboundary, LABEL());13804check_wordboundary(common, TRUE);13805}13806if (common->anynewline != NULL)13807{13808set_jumps(common->anynewline, LABEL());13809check_anynewline(common);13810}13811if (common->hspace != NULL)13812{13813set_jumps(common->hspace, LABEL());13814check_hspace(common);13815}13816if (common->vspace != NULL)13817{13818set_jumps(common->vspace, LABEL());13819check_vspace(common);13820}13821if (common->casefulcmp != NULL)13822{13823set_jumps(common->casefulcmp, LABEL());13824do_casefulcmp(common);13825}13826if (common->caselesscmp != NULL)13827{13828set_jumps(common->caselesscmp, LABEL());13829do_caselesscmp(common);13830}13831if (common->reset_match != NULL || common->restart_match != NULL)13832{13833if (common->restart_match != NULL)13834{13835set_jumps(common->restart_match, LABEL());13836OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);13837}1383813839set_jumps(common->reset_match, LABEL());13840do_reset_match(common, (re->top_bracket + 1) * 2);13841/* The value of restart_match is in TMP1. */13842CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);13843OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);13844JUMPTO(SLJIT_JUMP, reset_match_label);13845}13846#ifdef SUPPORT_UNICODE13847#if PCRE2_CODE_UNIT_WIDTH == 813848if (common->utfreadchar != NULL)13849{13850set_jumps(common->utfreadchar, LABEL());13851do_utfreadchar(common);13852}13853if (common->utfreadtype8 != NULL)13854{13855set_jumps(common->utfreadtype8, LABEL());13856do_utfreadtype8(common);13857}13858if (common->utfpeakcharback != NULL)13859{13860set_jumps(common->utfpeakcharback, LABEL());13861do_utfpeakcharback(common);13862}13863#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */13864#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 1613865if (common->utfreadchar_invalid != NULL)13866{13867set_jumps(common->utfreadchar_invalid, LABEL());13868do_utfreadchar_invalid(common);13869}13870if (common->utfreadnewline_invalid != NULL)13871{13872set_jumps(common->utfreadnewline_invalid, LABEL());13873do_utfreadnewline_invalid(common);13874}13875if (common->utfmoveback_invalid)13876{13877set_jumps(common->utfmoveback_invalid, LABEL());13878do_utfmoveback_invalid(common);13879}13880if (common->utfpeakcharback_invalid)13881{13882set_jumps(common->utfpeakcharback_invalid, LABEL());13883do_utfpeakcharback_invalid(common);13884}13885#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */13886if (common->getucd != NULL)13887{13888set_jumps(common->getucd, LABEL());13889do_getucd(common);13890}13891if (common->getucdtype != NULL)13892{13893set_jumps(common->getucdtype, LABEL());13894do_getucdtype(common);13895}13896#endif /* SUPPORT_UNICODE */1389713898SLJIT_FREE(common->private_data_ptrs, allocator_data);13899if (common->has_then)13900SLJIT_FREE(common->then_offsets, allocator_data);1390113902executable_func = sljit_generate_code(compiler, 0, NULL);13903executable_size = sljit_get_generated_code_size(compiler);13904sljit_free_compiler(compiler);1390513906if (executable_func == NULL)13907{13908PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);13909return PCRE2_ERROR_NOMEMORY;13910}1391113912/* Reuse the function descriptor if possible. */13913if (re->executable_jit != NULL)13914functions = (executable_functions *)re->executable_jit;13915else13916{13917functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);13918if (functions == NULL)13919{13920/* This case is highly unlikely since we just recently13921freed a lot of memory. Not impossible though. */13922sljit_free_code(executable_func, NULL);13923PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);13924return PCRE2_ERROR_NOMEMORY;13925}13926memset(functions, 0, sizeof(executable_functions));13927functions->top_bracket = re->top_bracket + 1;13928functions->limit_match = re->limit_match;13929re->executable_jit = functions;13930}1393113932/* Turn mode into an index. */13933if (mode == PCRE2_JIT_COMPLETE)13934mode = 0;13935else13936mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;1393713938SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);13939functions->executable_funcs[mode] = executable_func;13940functions->read_only_data_heads[mode] = common->read_only_data_head;13941functions->executable_sizes[mode] = executable_size;13942return 0;13943}1394413945#endif1394613947/*************************************************13948* JIT compile a Regular Expression *13949*************************************************/1395013951/* This function used JIT to convert a previously-compiled pattern into machine13952code.1395313954Arguments:13955code a compiled pattern13956options JIT option bits1395713958Returns: 0: success or (*NOJIT) was used13959<0: an error code13960*/1396113962#define PUBLIC_JIT_COMPILE_OPTIONS \13963(PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)1396413965PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION13966pcre2_jit_compile(pcre2_code *code, uint32_t options)13967{13968pcre2_real_code *re = (pcre2_real_code *)code;13969#ifdef SUPPORT_JIT13970void *exec_memory;13971executable_functions *functions;13972static int executable_allocator_is_working = -1;1397313974if (executable_allocator_is_working == -1)13975{13976/* Checks whether the executable allocator is working. This check13977might run multiple times in multi-threaded environments, but the13978result should not be affected by it. */13979exec_memory = SLJIT_MALLOC_EXEC(32, NULL);13980if (exec_memory != NULL)13981{13982SLJIT_FREE_EXEC(((sljit_u8*)(exec_memory)) + SLJIT_EXEC_OFFSET(exec_memory), NULL);13983executable_allocator_is_working = 1;13984}13985else executable_allocator_is_working = 0;13986}13987#endif1398813989if (options & PCRE2_JIT_TEST_ALLOC)13990{13991if (options != PCRE2_JIT_TEST_ALLOC)13992return PCRE2_ERROR_JIT_BADOPTION;1399313994#ifdef SUPPORT_JIT13995return executable_allocator_is_working ? 0 : PCRE2_ERROR_NOMEMORY;13996#else13997return PCRE2_ERROR_JIT_UNSUPPORTED;13998#endif13999}1400014001if (code == NULL)14002return PCRE2_ERROR_NULL;1400314004if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)14005return PCRE2_ERROR_JIT_BADOPTION;1400614007/* Support for invalid UTF was first introduced in JIT, with the option14008PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the14009compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the14010preferred feature, with the earlier option deprecated. However, for backward14011compatibility, if the earlier option is set, it forces the new option so that14012if JIT matching falls back to the interpreter, there is still support for14013invalid UTF. However, if this function has already been successfully called14014without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that14015non-invalid-supporting JIT code was compiled), give an error.1401614017If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following14018actions are needed:14019140201. Remove the definition from pcre2.h.in and from the list in14021PUBLIC_JIT_COMPILE_OPTIONS above.14022140232. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.14024140253. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.14026140274. Delete the following short block of code. The setting of "re" and14028"functions" can be moved into the JIT-only block below, but if that is14029done, (void)re and (void)functions will be needed in the non-JIT case, to14030avoid compiler warnings.14031*/1403214033#ifdef SUPPORT_JIT14034functions = (executable_functions *)re->executable_jit;14035#endif1403614037if ((options & PCRE2_JIT_INVALID_UTF) != 0)14038{14039if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)14040{14041#ifdef SUPPORT_JIT14042if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;14043#endif14044re->overall_options |= PCRE2_MATCH_INVALID_UTF;14045}14046}1404714048/* The above tests are run with and without JIT support. This means that14049PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring14050interpreter support) even in the absence of JIT. But now, if there is no JIT14051support, give an error return. */1405214053#ifndef SUPPORT_JIT14054return PCRE2_ERROR_JIT_BADOPTION;14055#else /* SUPPORT_JIT */1405614057/* There is JIT support. Do the necessary. */1405814059if ((re->flags & PCRE2_NOJIT) != 0) return 0;1406014061if (!executable_allocator_is_working)14062return PCRE2_ERROR_NOMEMORY;1406314064if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)14065options |= PCRE2_JIT_INVALID_UTF;1406614067if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL14068|| functions->executable_funcs[0] == NULL)) {14069uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);14070int result = jit_compile(code, options & ~excluded_options);14071if (result != 0)14072return result;14073}1407414075if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL14076|| functions->executable_funcs[1] == NULL)) {14077uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);14078int result = jit_compile(code, options & ~excluded_options);14079if (result != 0)14080return result;14081}1408214083if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL14084|| functions->executable_funcs[2] == NULL)) {14085uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);14086int result = jit_compile(code, options & ~excluded_options);14087if (result != 0)14088return result;14089}1409014091return 0;1409214093#endif /* SUPPORT_JIT */14094}1409514096/* JIT compiler uses an all-in-one approach. This improves security,14097since the code generator functions are not exported. */1409814099#define INCLUDED_FROM_PCRE2_JIT_COMPILE1410014101#include "pcre2_jit_match.c"14102#include "pcre2_jit_misc.c"1410314104/* End of pcre2_jit_compile.c */141051410614107